Update Linux to v5.4.2

Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 671d078..6f7fdcb 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # SPI driver configuration
 #
@@ -63,7 +64,7 @@
 
 config SPI_ATH79
 	tristate "Atheros AR71XX/AR724X/AR913X SPI controller driver"
-	depends on ATH79 && GPIOLIB
+	depends on ATH79 || COMPILE_TEST
 	select SPI_BITBANG
 	help
 	  This enables support for the SPI controller present on the
@@ -83,6 +84,23 @@
 	  This selects a driver for the Atmel SPI Controller, present on
 	  many AT91 ARM chips.
 
+config SPI_AT91_USART
+	tristate "Atmel USART Controller SPI driver"
+	depends on (ARCH_AT91 || COMPILE_TEST)
+	depends on MFD_AT91_USART
+	help
+	  This selects a driver for the AT91 USART Controller as SPI Master,
+	  present on AT91 and SAMA5 SoC series.
+
+config SPI_ATMEL_QUADSPI
+	tristate "Atmel Quad SPI Controller"
+	depends on ARCH_AT91 || (ARM && COMPILE_TEST && !ARCH_EBSA110)
+	depends on OF && HAS_IOMEM
+	help
+	  This enables support for the Quad SPI controller in master mode.
+	  This driver does not support generic SPI. The implementation only
+	  supports spi-mem interface.
+
 config SPI_AU1550
 	tristate "Au1550/Au1200/Au1300 SPI Controller"
 	depends on MIPS_ALCHEMY
@@ -102,7 +120,7 @@
 config SPI_BCM2835
 	tristate "BCM2835 SPI controller"
 	depends on GPIOLIB
-	depends on ARCH_BCM2835 || COMPILE_TEST
+	depends on ARCH_BCM2835 || ARCH_BRCMSTB || COMPILE_TEST
 	help
 	  This selects a driver for the Broadcom BCM2835 SPI master.
 
@@ -113,7 +131,7 @@
 
 config SPI_BCM2835AUX
 	tristate "BCM2835 SPI auxiliary controller"
-	depends on (ARCH_BCM2835 && GPIOLIB) || COMPILE_TEST
+	depends on ((ARCH_BCM2835 || ARCH_BRCMSTB) && GPIOLIB) || COMPILE_TEST
 	help
 	  This selects a driver for the Broadcom BCM2835 SPI aux master.
 
@@ -129,7 +147,7 @@
 
 config SPI_BCM63XX_HSSPI
 	tristate "Broadcom BCM63XX HS SPI controller driver"
-	depends on BCM63XX || COMPILE_TEST
+	depends on BCM63XX || ARCH_BCM_63XX || COMPILE_TEST
 	help
 	  This enables support for the High Speed SPI controller present on
 	  newer Broadcom BCM63XX SoCs.
@@ -251,6 +269,27 @@
 	help
 	  This enables Freescale i.MX LPSPI controllers in master mode.
 
+config SPI_FSL_QUADSPI
+	tristate "Freescale QSPI controller"
+	depends on ARCH_MXC || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST
+	depends on HAS_IOMEM
+	help
+	  This enables support for the Quad SPI controller in master mode.
+	  Up to four flash chips can be connected on two buses with two
+	  chipselects each.
+	  This controller does not support generic SPI messages. It only
+	  supports the high-level SPI memory interface.
+
+config SPI_NXP_FLEXSPI
+	tristate "NXP Flex SPI controller"
+	depends on ARCH_LAYERSCAPE || HAS_IOMEM
+	help
+	  This enables support for the Flex SPI controller in master mode.
+	  Up to four slave devices can be connected on two buses with two
+	  chipselects each.
+	  This controller does not support generic SPI messages and only
+	  supports the high-level SPI memory interface.
+
 config SPI_GPIO
 	tristate "GPIO-based bitbanging SPI Master"
 	depends on GPIOLIB || COMPILE_TEST
@@ -279,8 +318,7 @@
 	depends on ARCH_MXC || COMPILE_TEST
 	select SPI_BITBANG
 	help
-	  This enables using the Freescale i.MX SPI controllers in master
-	  mode.
+	  This enables support for the Freescale i.MX SPI controllers.
 
 config SPI_JCORE
 	tristate "J-Core SPI Master"
@@ -355,7 +393,7 @@
 	depends on SOC_VF610 || SOC_LS1021A || ARCH_LAYERSCAPE || M5441x || COMPILE_TEST
 	help
 	  This enables support for the Freescale DSPI controller in master
-	  mode. VF610 platform uses the controller.
+	  mode. VF610, LS1021A and ColdFire platforms uses the controller.
 
 config SPI_FSL_ESPI
 	tristate "Freescale eSPI controller"
@@ -389,12 +427,28 @@
 	  say Y or M here.If you are not sure, say N.
 	  SPI drivers for Mediatek MT65XX and MT81XX series ARM SoCs.
 
-config SPI_NUC900
-	tristate "Nuvoton NUC900 series SPI"
-	depends on ARCH_W90X900
-	select SPI_BITBANG
+config SPI_MT7621
+	tristate "MediaTek MT7621 SPI Controller"
+	depends on RALINK || COMPILE_TEST
 	help
-	  SPI driver for Nuvoton NUC900 series ARM SoCs
+	  This selects a driver for the MediaTek MT7621 SPI Controller.
+
+config SPI_NPCM_FIU
+	tristate "Nuvoton NPCM FLASH Interface Unit"
+	depends on ARCH_NPCM || COMPILE_TEST
+	depends on OF && HAS_IOMEM
+	help
+	  This enables support for the Flash Interface Unit SPI controller
+	  in master mode.
+	  This driver does not support generic SPI. The implementation only
+	  supports spi-mem interface.
+
+config SPI_NPCM_PSPI
+	tristate "Nuvoton NPCM PSPI Controller"
+	depends on ARCH_NPCM || COMPILE_TEST
+	help
+	  This driver provides support for Nuvoton NPCM BMC
+	  Peripheral SPI controller in master mode.
 
 config SPI_LANTIQ_SSC
 	tristate "Lantiq SSC SPI controller"
@@ -427,7 +481,7 @@
 
 config SPI_OMAP24XX
 	tristate "McSPI driver for OMAP"
-	depends on ARCH_OMAP2PLUS || COMPILE_TEST
+	depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST
 	select SG_SPLIT
 	help
 	  SPI master controller for OMAP24XX and later Multichannel SPI
@@ -492,7 +546,7 @@
 	help
 	  This enables using a PXA2xx or Sodaville SSP port as a SPI master
 	  controller. The driver can be configured to use any SSP port and
-	  additional documentation can be found a Documentation/spi/pxa2xx.
+	  additional documentation can be found a Documentation/spi/pxa2xx.rst.
 
 config SPI_PXA2XX_PCI
 	def_tristate SPI_PXA2XX && PCI && COMMON_CLK
@@ -520,6 +574,12 @@
 	help
 	  SPI driver for Renesas RSPI and QSPI blocks.
 
+config SPI_QCOM_QSPI
+	tristate "QTI QSPI controller"
+	depends on ARCH_QCOM
+	help
+	  QSPI(Quad SPI) driver for Qualcomm QSPI controller.
+
 config SPI_QUP
 	tristate "Qualcomm SPI controller with QUP interface"
 	depends on ARCH_QCOM || (ARM && COMPILE_TEST)
@@ -533,6 +593,18 @@
 	  This driver can also be built as a module.  If so, the module
 	  will be called spi_qup.
 
+config SPI_QCOM_GENI
+	tristate "Qualcomm GENI based SPI controller"
+	depends on QCOM_GENI_SE
+	help
+	  This driver supports GENI serial engine based SPI controller in
+	  master mode on the Qualcomm Technologies Inc.'s SoCs. If you say
+	  yes to this option, support will be included for the built-in SPI
+	  interface on the Qualcomm Technologies Inc.'s SoCs.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called spi-geni-qcom.
+
 config SPI_S3C24XX
 	tristate "Samsung S3C24XX series SPI"
 	depends on ARCH_S3C24XX
@@ -589,6 +661,12 @@
 	help
 	  SPI driver for SuperH HSPI blocks.
 
+config SPI_SIFIVE
+	tristate "SiFive SPI controller"
+	depends on HAS_IOMEM
+	help
+	  This exposes the SPI controller IP from SiFive.
+
 config SPI_SIRF
 	tristate "CSR SiRFprimaII SPI controller"
 	depends on SIRF_DMA
@@ -596,6 +674,22 @@
 	help
 	  SPI driver for CSR SiRFprimaII SoCs
 
+config SPI_SLAVE_MT27XX
+	tristate "MediaTek SPI slave device"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	depends on SPI_SLAVE
+	help
+	  This selects the MediaTek(R) SPI slave device driver.
+	  If you want to use MediaTek(R) SPI slave interface,
+	  say Y or M here.If you are not sure, say N.
+	  SPI slave drivers for Mediatek MT27XX series ARM SoCs.
+
+config SPI_SPRD
+	tristate "Spreadtrum SPI controller"
+	depends on ARCH_SPRD || COMPILE_TEST
+	help
+	  SPI driver for Spreadtrum SoCs.
+
 config SPI_SPRD_ADI
 	tristate "Spreadtrum ADI controller"
 	depends on ARCH_SPRD || COMPILE_TEST
@@ -607,12 +701,21 @@
 	tristate "STMicroelectronics STM32 SPI controller"
 	depends on ARCH_STM32 || COMPILE_TEST
 	help
-	  SPI driver for STMicroelectonics STM32 SoCs.
+	  SPI driver for STMicroelectronics STM32 SoCs.
 
 	  STM32 SPI controller supports DMA and PIO modes. When DMA
 	  is not available, the driver automatically falls back to
 	  PIO mode.
 
+config SPI_STM32_QSPI
+	tristate "STMicroelectronics STM32 QUAD SPI controller"
+	depends on ARCH_STM32 || COMPILE_TEST
+	depends on OF
+	help
+	  This enables support for the Quad SPI controller in master mode.
+	  This driver does not support generic SPI. The implementation only
+	  supports spi-mem interface.
+
 config SPI_ST_SSC4
 	tristate "STMicroelectronics SPI SSC-based driver"
 	depends on ARCH_STI || COMPILE_TEST
@@ -633,6 +736,22 @@
 	help
 	  This enables using the SPI controller on the Allwinner A31 SoCs.
 
+config SPI_SYNQUACER
+	tristate "Socionext's SynQuacer HighSpeed SPI controller"
+	depends on ARCH_SYNQUACER || COMPILE_TEST
+	help
+	  SPI driver for Socionext's High speed SPI controller which provides
+	  various operating modes for interfacing to serial peripheral devices
+	  that use the de-facto standard SPI protocol.
+
+	  It also supports the new dual-bit and quad-bit SPI protocol.
+
+config SPI_MXIC
+        tristate "Macronix MX25F0A SPI controller"
+        depends on SPI_MASTER
+        help
+          This selects the Macronix MX25F0A SPI controller driver.
+
 config SPI_MXS
 	tristate "Freescale MXS SPI controller"
 	depends on ARCH_MXS
@@ -743,9 +862,17 @@
 	  16 bit words in SPI mode 0, automatically asserting CS on transfer
 	  start and deasserting on end.
 
+config SPI_ZYNQ_QSPI
+	tristate "Xilinx Zynq QSPI controller"
+	depends on ARCH_ZYNQ || COMPILE_TEST
+	help
+	  This enables support for the Zynq Quad SPI controller
+	  in master mode.
+	  This controller only supports SPI memory interface.
+
 config SPI_ZYNQMP_GQSPI
 	tristate "Xilinx ZynqMP GQSPI controller"
-	depends on SPI_MASTER && HAS_DMA
+	depends on (SPI_MASTER && HAS_DMA) || COMPILE_TEST
 	help
 	  Enables Xilinx GQSPI controller driver for Zynq UltraScale+ MPSoC.
 
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index a90d559..bb49c9e 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -16,6 +16,8 @@
 obj-$(CONFIG_SPI_ALTERA)		+= spi-altera.o
 obj-$(CONFIG_SPI_ARMADA_3700)		+= spi-armada-3700.o
 obj-$(CONFIG_SPI_ATMEL)			+= spi-atmel.o
+obj-$(CONFIG_SPI_ATMEL_QUADSPI)		+= atmel-quadspi.o
+obj-$(CONFIG_SPI_AT91_USART)		+= spi-at91-usart.o
 obj-$(CONFIG_SPI_ATH79)			+= spi-ath79.o
 obj-$(CONFIG_SPI_AU1550)		+= spi-au1550.o
 obj-$(CONFIG_SPI_AXI_SPI_ENGINE)	+= spi-axi-spi-engine.o
@@ -43,6 +45,7 @@
 obj-$(CONFIG_SPI_FSL_LIB)		+= spi-fsl-lib.o
 obj-$(CONFIG_SPI_FSL_ESPI)		+= spi-fsl-espi.o
 obj-$(CONFIG_SPI_FSL_LPSPI)		+= spi-fsl-lpspi.o
+obj-$(CONFIG_SPI_FSL_QUADSPI)		+= spi-fsl-qspi.o
 obj-$(CONFIG_SPI_FSL_SPI)		+= spi-fsl-spi.o
 obj-$(CONFIG_SPI_GPIO)			+= spi-gpio.o
 obj-$(CONFIG_SPI_IMG_SPFI)		+= spi-img-spfi.o
@@ -57,8 +60,12 @@
 obj-$(CONFIG_SPI_MPC52xx_PSC)		+= spi-mpc52xx-psc.o
 obj-$(CONFIG_SPI_MPC52xx)		+= spi-mpc52xx.o
 obj-$(CONFIG_SPI_MT65XX)                += spi-mt65xx.o
+obj-$(CONFIG_SPI_MT7621)		+= spi-mt7621.o
+obj-$(CONFIG_SPI_MXIC)			+= spi-mxic.o
 obj-$(CONFIG_SPI_MXS)			+= spi-mxs.o
-obj-$(CONFIG_SPI_NUC900)		+= spi-nuc900.o
+obj-$(CONFIG_SPI_NPCM_FIU)		+= spi-npcm-fiu.o
+obj-$(CONFIG_SPI_NPCM_PSPI)		+= spi-npcm-pspi.o
+obj-$(CONFIG_SPI_NXP_FLEXSPI)		+= spi-nxp-fspi.o
 obj-$(CONFIG_SPI_OC_TINY)		+= spi-oc-tiny.o
 spi-octeon-objs				:= spi-cavium.o spi-cavium-octeon.o
 obj-$(CONFIG_SPI_OCTEON)		+= spi-octeon.o
@@ -74,6 +81,8 @@
 spi-pxa2xx-platform-objs		:= spi-pxa2xx.o spi-pxa2xx-dma.o
 obj-$(CONFIG_SPI_PXA2XX)		+= spi-pxa2xx-platform.o
 obj-$(CONFIG_SPI_PXA2XX_PCI)		+= spi-pxa2xx-pci.o
+obj-$(CONFIG_SPI_QCOM_GENI)		+= spi-geni-qcom.o
+obj-$(CONFIG_SPI_QCOM_QSPI)		+= spi-qcom-qspi.o
 obj-$(CONFIG_SPI_QUP)			+= spi-qup.o
 obj-$(CONFIG_SPI_ROCKCHIP)		+= spi-rockchip.o
 obj-$(CONFIG_SPI_RB4XX)			+= spi-rb4xx.o
@@ -87,12 +96,17 @@
 obj-$(CONFIG_SPI_SH_HSPI)		+= spi-sh-hspi.o
 obj-$(CONFIG_SPI_SH_MSIOF)		+= spi-sh-msiof.o
 obj-$(CONFIG_SPI_SH_SCI)		+= spi-sh-sci.o
+obj-$(CONFIG_SPI_SIFIVE)		+= spi-sifive.o
 obj-$(CONFIG_SPI_SIRF)		+= spi-sirf.o
+obj-$(CONFIG_SPI_SLAVE_MT27XX)          += spi-slave-mt27xx.o
+obj-$(CONFIG_SPI_SPRD)			+= spi-sprd.o
 obj-$(CONFIG_SPI_SPRD_ADI)		+= spi-sprd-adi.o
 obj-$(CONFIG_SPI_STM32) 		+= spi-stm32.o
+obj-$(CONFIG_SPI_STM32_QSPI) 		+= spi-stm32-qspi.o
 obj-$(CONFIG_SPI_ST_SSC4)		+= spi-st-ssc4.o
 obj-$(CONFIG_SPI_SUN4I)			+= spi-sun4i.o
 obj-$(CONFIG_SPI_SUN6I)			+= spi-sun6i.o
+obj-$(CONFIG_SPI_SYNQUACER)		+= spi-synquacer.o
 obj-$(CONFIG_SPI_TEGRA114)		+= spi-tegra114.o
 obj-$(CONFIG_SPI_TEGRA20_SFLASH)	+= spi-tegra20-sflash.o
 obj-$(CONFIG_SPI_TEGRA20_SLINK)		+= spi-tegra20-slink.o
@@ -106,6 +120,7 @@
 obj-$(CONFIG_SPI_XILINX)		+= spi-xilinx.o
 obj-$(CONFIG_SPI_XLP)			+= spi-xlp.o
 obj-$(CONFIG_SPI_XTENSA_XTFPGA)		+= spi-xtensa-xtfpga.o
+obj-$(CONFIG_SPI_ZYNQ_QSPI)		+= spi-zynq-qspi.o
 obj-$(CONFIG_SPI_ZYNQMP_GQSPI)		+= spi-zynqmp-gqspi.o
 
 # SPI slave protocol handlers
diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c
new file mode 100644
index 0000000..fd8007e
--- /dev/null
+++ b/drivers/spi/atmel-quadspi.c
@@ -0,0 +1,631 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Atmel QSPI Controller
+ *
+ * Copyright (C) 2015 Atmel Corporation
+ * Copyright (C) 2018 Cryptera A/S
+ *
+ * Author: Cyrille Pitchen <cyrille.pitchen@atmel.com>
+ * Author: Piotr Bugalski <bugalski.piotr@gmail.com>
+ *
+ * This driver is based on drivers/mtd/spi-nor/fsl-quadspi.c from Freescale.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi-mem.h>
+
+/* QSPI register offsets */
+#define QSPI_CR      0x0000  /* Control Register */
+#define QSPI_MR      0x0004  /* Mode Register */
+#define QSPI_RD      0x0008  /* Receive Data Register */
+#define QSPI_TD      0x000c  /* Transmit Data Register */
+#define QSPI_SR      0x0010  /* Status Register */
+#define QSPI_IER     0x0014  /* Interrupt Enable Register */
+#define QSPI_IDR     0x0018  /* Interrupt Disable Register */
+#define QSPI_IMR     0x001c  /* Interrupt Mask Register */
+#define QSPI_SCR     0x0020  /* Serial Clock Register */
+
+#define QSPI_IAR     0x0030  /* Instruction Address Register */
+#define QSPI_ICR     0x0034  /* Instruction Code Register */
+#define QSPI_WICR    0x0034  /* Write Instruction Code Register */
+#define QSPI_IFR     0x0038  /* Instruction Frame Register */
+#define QSPI_RICR    0x003C  /* Read Instruction Code Register */
+
+#define QSPI_SMR     0x0040  /* Scrambling Mode Register */
+#define QSPI_SKR     0x0044  /* Scrambling Key Register */
+
+#define QSPI_WPMR    0x00E4  /* Write Protection Mode Register */
+#define QSPI_WPSR    0x00E8  /* Write Protection Status Register */
+
+#define QSPI_VERSION 0x00FC  /* Version Register */
+
+
+/* Bitfields in QSPI_CR (Control Register) */
+#define QSPI_CR_QSPIEN                  BIT(0)
+#define QSPI_CR_QSPIDIS                 BIT(1)
+#define QSPI_CR_SWRST                   BIT(7)
+#define QSPI_CR_LASTXFER                BIT(24)
+
+/* Bitfields in QSPI_MR (Mode Register) */
+#define QSPI_MR_SMM                     BIT(0)
+#define QSPI_MR_LLB                     BIT(1)
+#define QSPI_MR_WDRBT                   BIT(2)
+#define QSPI_MR_SMRM                    BIT(3)
+#define QSPI_MR_CSMODE_MASK             GENMASK(5, 4)
+#define QSPI_MR_CSMODE_NOT_RELOADED     (0 << 4)
+#define QSPI_MR_CSMODE_LASTXFER         (1 << 4)
+#define QSPI_MR_CSMODE_SYSTEMATICALLY   (2 << 4)
+#define QSPI_MR_NBBITS_MASK             GENMASK(11, 8)
+#define QSPI_MR_NBBITS(n)               ((((n) - 8) << 8) & QSPI_MR_NBBITS_MASK)
+#define QSPI_MR_DLYBCT_MASK             GENMASK(23, 16)
+#define QSPI_MR_DLYBCT(n)               (((n) << 16) & QSPI_MR_DLYBCT_MASK)
+#define QSPI_MR_DLYCS_MASK              GENMASK(31, 24)
+#define QSPI_MR_DLYCS(n)                (((n) << 24) & QSPI_MR_DLYCS_MASK)
+
+/* Bitfields in QSPI_SR/QSPI_IER/QSPI_IDR/QSPI_IMR  */
+#define QSPI_SR_RDRF                    BIT(0)
+#define QSPI_SR_TDRE                    BIT(1)
+#define QSPI_SR_TXEMPTY                 BIT(2)
+#define QSPI_SR_OVRES                   BIT(3)
+#define QSPI_SR_CSR                     BIT(8)
+#define QSPI_SR_CSS                     BIT(9)
+#define QSPI_SR_INSTRE                  BIT(10)
+#define QSPI_SR_QSPIENS                 BIT(24)
+
+#define QSPI_SR_CMD_COMPLETED	(QSPI_SR_INSTRE | QSPI_SR_CSR)
+
+/* Bitfields in QSPI_SCR (Serial Clock Register) */
+#define QSPI_SCR_CPOL                   BIT(0)
+#define QSPI_SCR_CPHA                   BIT(1)
+#define QSPI_SCR_SCBR_MASK              GENMASK(15, 8)
+#define QSPI_SCR_SCBR(n)                (((n) << 8) & QSPI_SCR_SCBR_MASK)
+#define QSPI_SCR_DLYBS_MASK             GENMASK(23, 16)
+#define QSPI_SCR_DLYBS(n)               (((n) << 16) & QSPI_SCR_DLYBS_MASK)
+
+/* Bitfields in QSPI_ICR (Read/Write Instruction Code Register) */
+#define QSPI_ICR_INST_MASK              GENMASK(7, 0)
+#define QSPI_ICR_INST(inst)             (((inst) << 0) & QSPI_ICR_INST_MASK)
+#define QSPI_ICR_OPT_MASK               GENMASK(23, 16)
+#define QSPI_ICR_OPT(opt)               (((opt) << 16) & QSPI_ICR_OPT_MASK)
+
+/* Bitfields in QSPI_IFR (Instruction Frame Register) */
+#define QSPI_IFR_WIDTH_MASK             GENMASK(2, 0)
+#define QSPI_IFR_WIDTH_SINGLE_BIT_SPI   (0 << 0)
+#define QSPI_IFR_WIDTH_DUAL_OUTPUT      (1 << 0)
+#define QSPI_IFR_WIDTH_QUAD_OUTPUT      (2 << 0)
+#define QSPI_IFR_WIDTH_DUAL_IO          (3 << 0)
+#define QSPI_IFR_WIDTH_QUAD_IO          (4 << 0)
+#define QSPI_IFR_WIDTH_DUAL_CMD         (5 << 0)
+#define QSPI_IFR_WIDTH_QUAD_CMD         (6 << 0)
+#define QSPI_IFR_INSTEN                 BIT(4)
+#define QSPI_IFR_ADDREN                 BIT(5)
+#define QSPI_IFR_OPTEN                  BIT(6)
+#define QSPI_IFR_DATAEN                 BIT(7)
+#define QSPI_IFR_OPTL_MASK              GENMASK(9, 8)
+#define QSPI_IFR_OPTL_1BIT              (0 << 8)
+#define QSPI_IFR_OPTL_2BIT              (1 << 8)
+#define QSPI_IFR_OPTL_4BIT              (2 << 8)
+#define QSPI_IFR_OPTL_8BIT              (3 << 8)
+#define QSPI_IFR_ADDRL                  BIT(10)
+#define QSPI_IFR_TFRTYP_MEM		BIT(12)
+#define QSPI_IFR_SAMA5D2_WRITE_TRSFR	BIT(13)
+#define QSPI_IFR_CRM                    BIT(14)
+#define QSPI_IFR_NBDUM_MASK             GENMASK(20, 16)
+#define QSPI_IFR_NBDUM(n)               (((n) << 16) & QSPI_IFR_NBDUM_MASK)
+#define QSPI_IFR_APBTFRTYP_READ		BIT(24)	/* Defined in SAM9X60 */
+
+/* Bitfields in QSPI_SMR (Scrambling Mode Register) */
+#define QSPI_SMR_SCREN                  BIT(0)
+#define QSPI_SMR_RVDIS                  BIT(1)
+
+/* Bitfields in QSPI_WPMR (Write Protection Mode Register) */
+#define QSPI_WPMR_WPEN                  BIT(0)
+#define QSPI_WPMR_WPKEY_MASK            GENMASK(31, 8)
+#define QSPI_WPMR_WPKEY(wpkey)          (((wpkey) << 8) & QSPI_WPMR_WPKEY_MASK)
+
+/* Bitfields in QSPI_WPSR (Write Protection Status Register) */
+#define QSPI_WPSR_WPVS                  BIT(0)
+#define QSPI_WPSR_WPVSRC_MASK           GENMASK(15, 8)
+#define QSPI_WPSR_WPVSRC(src)           (((src) << 8) & QSPI_WPSR_WPVSRC)
+
+struct atmel_qspi_caps {
+	bool has_qspick;
+	bool has_ricr;
+};
+
+struct atmel_qspi {
+	void __iomem		*regs;
+	void __iomem		*mem;
+	struct clk		*pclk;
+	struct clk		*qspick;
+	struct platform_device	*pdev;
+	const struct atmel_qspi_caps *caps;
+	u32			pending;
+	u32			mr;
+	u32			scr;
+	struct completion	cmd_completion;
+};
+
+struct atmel_qspi_mode {
+	u8 cmd_buswidth;
+	u8 addr_buswidth;
+	u8 data_buswidth;
+	u32 config;
+};
+
+static const struct atmel_qspi_mode atmel_qspi_modes[] = {
+	{ 1, 1, 1, QSPI_IFR_WIDTH_SINGLE_BIT_SPI },
+	{ 1, 1, 2, QSPI_IFR_WIDTH_DUAL_OUTPUT },
+	{ 1, 1, 4, QSPI_IFR_WIDTH_QUAD_OUTPUT },
+	{ 1, 2, 2, QSPI_IFR_WIDTH_DUAL_IO },
+	{ 1, 4, 4, QSPI_IFR_WIDTH_QUAD_IO },
+	{ 2, 2, 2, QSPI_IFR_WIDTH_DUAL_CMD },
+	{ 4, 4, 4, QSPI_IFR_WIDTH_QUAD_CMD },
+};
+
+static inline bool atmel_qspi_is_compatible(const struct spi_mem_op *op,
+					    const struct atmel_qspi_mode *mode)
+{
+	if (op->cmd.buswidth != mode->cmd_buswidth)
+		return false;
+
+	if (op->addr.nbytes && op->addr.buswidth != mode->addr_buswidth)
+		return false;
+
+	if (op->data.nbytes && op->data.buswidth != mode->data_buswidth)
+		return false;
+
+	return true;
+}
+
+static int atmel_qspi_find_mode(const struct spi_mem_op *op)
+{
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(atmel_qspi_modes); i++)
+		if (atmel_qspi_is_compatible(op, &atmel_qspi_modes[i]))
+			return i;
+
+	return -ENOTSUPP;
+}
+
+static bool atmel_qspi_supports_op(struct spi_mem *mem,
+				   const struct spi_mem_op *op)
+{
+	if (atmel_qspi_find_mode(op) < 0)
+		return false;
+
+	/* special case not supported by hardware */
+	if (op->addr.nbytes == 2 && op->cmd.buswidth != op->addr.buswidth &&
+		op->dummy.nbytes == 0)
+		return false;
+
+	return true;
+}
+
+static int atmel_qspi_set_cfg(struct atmel_qspi *aq,
+			      const struct spi_mem_op *op, u32 *offset)
+{
+	u32 iar, icr, ifr;
+	u32 dummy_cycles = 0;
+	int mode;
+
+	iar = 0;
+	icr = QSPI_ICR_INST(op->cmd.opcode);
+	ifr = QSPI_IFR_INSTEN;
+
+	mode = atmel_qspi_find_mode(op);
+	if (mode < 0)
+		return mode;
+	ifr |= atmel_qspi_modes[mode].config;
+
+	if (op->dummy.buswidth && op->dummy.nbytes)
+		dummy_cycles = op->dummy.nbytes * 8 / op->dummy.buswidth;
+
+	/*
+	 * The controller allows 24 and 32-bit addressing while NAND-flash
+	 * requires 16-bit long. Handling 8-bit long addresses is done using
+	 * the option field. For the 16-bit addresses, the workaround depends
+	 * of the number of requested dummy bits. If there are 8 or more dummy
+	 * cycles, the address is shifted and sent with the first dummy byte.
+	 * Otherwise opcode is disabled and the first byte of the address
+	 * contains the command opcode (works only if the opcode and address
+	 * use the same buswidth). The limitation is when the 16-bit address is
+	 * used without enough dummy cycles and the opcode is using a different
+	 * buswidth than the address.
+	 */
+	if (op->addr.buswidth) {
+		switch (op->addr.nbytes) {
+		case 0:
+			break;
+		case 1:
+			ifr |= QSPI_IFR_OPTEN | QSPI_IFR_OPTL_8BIT;
+			icr |= QSPI_ICR_OPT(op->addr.val & 0xff);
+			break;
+		case 2:
+			if (dummy_cycles < 8 / op->addr.buswidth) {
+				ifr &= ~QSPI_IFR_INSTEN;
+				ifr |= QSPI_IFR_ADDREN;
+				iar = (op->cmd.opcode << 16) |
+					(op->addr.val & 0xffff);
+			} else {
+				ifr |= QSPI_IFR_ADDREN;
+				iar = (op->addr.val << 8) & 0xffffff;
+				dummy_cycles -= 8 / op->addr.buswidth;
+			}
+			break;
+		case 3:
+			ifr |= QSPI_IFR_ADDREN;
+			iar = op->addr.val & 0xffffff;
+			break;
+		case 4:
+			ifr |= QSPI_IFR_ADDREN | QSPI_IFR_ADDRL;
+			iar = op->addr.val & 0x7ffffff;
+			break;
+		default:
+			return -ENOTSUPP;
+		}
+	}
+
+	/* offset of the data access in the QSPI memory space */
+	*offset = iar;
+
+	/* Set number of dummy cycles */
+	if (dummy_cycles)
+		ifr |= QSPI_IFR_NBDUM(dummy_cycles);
+
+	/* Set data enable */
+	if (op->data.nbytes)
+		ifr |= QSPI_IFR_DATAEN;
+
+	/*
+	 * If the QSPI controller is set in regular SPI mode, set it in
+	 * Serial Memory Mode (SMM).
+	 */
+	if (aq->mr != QSPI_MR_SMM) {
+		writel_relaxed(QSPI_MR_SMM, aq->regs + QSPI_MR);
+		aq->mr = QSPI_MR_SMM;
+	}
+
+	/* Clear pending interrupts */
+	(void)readl_relaxed(aq->regs + QSPI_SR);
+
+	if (aq->caps->has_ricr) {
+		if (!op->addr.nbytes && op->data.dir == SPI_MEM_DATA_IN)
+			ifr |= QSPI_IFR_APBTFRTYP_READ;
+
+		/* Set QSPI Instruction Frame registers */
+		writel_relaxed(iar, aq->regs + QSPI_IAR);
+		if (op->data.dir == SPI_MEM_DATA_IN)
+			writel_relaxed(icr, aq->regs + QSPI_RICR);
+		else
+			writel_relaxed(icr, aq->regs + QSPI_WICR);
+		writel_relaxed(ifr, aq->regs + QSPI_IFR);
+	} else {
+		if (op->data.dir == SPI_MEM_DATA_OUT)
+			ifr |= QSPI_IFR_SAMA5D2_WRITE_TRSFR;
+
+		/* Set QSPI Instruction Frame registers */
+		writel_relaxed(iar, aq->regs + QSPI_IAR);
+		writel_relaxed(icr, aq->regs + QSPI_ICR);
+		writel_relaxed(ifr, aq->regs + QSPI_IFR);
+	}
+
+	return 0;
+}
+
+static int atmel_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct atmel_qspi *aq = spi_controller_get_devdata(mem->spi->master);
+	u32 sr, offset;
+	int err;
+
+	err = atmel_qspi_set_cfg(aq, op, &offset);
+	if (err)
+		return err;
+
+	/* Skip to the final steps if there is no data */
+	if (op->data.nbytes) {
+		/* Dummy read of QSPI_IFR to synchronize APB and AHB accesses */
+		(void)readl_relaxed(aq->regs + QSPI_IFR);
+
+		/* Send/Receive data */
+		if (op->data.dir == SPI_MEM_DATA_IN)
+			_memcpy_fromio(op->data.buf.in, aq->mem + offset,
+				       op->data.nbytes);
+		else
+			_memcpy_toio(aq->mem + offset, op->data.buf.out,
+				     op->data.nbytes);
+
+		/* Release the chip-select */
+		writel_relaxed(QSPI_CR_LASTXFER, aq->regs + QSPI_CR);
+	}
+
+	/* Poll INSTRuction End status */
+	sr = readl_relaxed(aq->regs + QSPI_SR);
+	if ((sr & QSPI_SR_CMD_COMPLETED) == QSPI_SR_CMD_COMPLETED)
+		return err;
+
+	/* Wait for INSTRuction End interrupt */
+	reinit_completion(&aq->cmd_completion);
+	aq->pending = sr & QSPI_SR_CMD_COMPLETED;
+	writel_relaxed(QSPI_SR_CMD_COMPLETED, aq->regs + QSPI_IER);
+	if (!wait_for_completion_timeout(&aq->cmd_completion,
+					 msecs_to_jiffies(1000)))
+		err = -ETIMEDOUT;
+	writel_relaxed(QSPI_SR_CMD_COMPLETED, aq->regs + QSPI_IDR);
+
+	return err;
+}
+
+static const char *atmel_qspi_get_name(struct spi_mem *spimem)
+{
+	return dev_name(spimem->spi->dev.parent);
+}
+
+static const struct spi_controller_mem_ops atmel_qspi_mem_ops = {
+	.supports_op = atmel_qspi_supports_op,
+	.exec_op = atmel_qspi_exec_op,
+	.get_name = atmel_qspi_get_name
+};
+
+static int atmel_qspi_setup(struct spi_device *spi)
+{
+	struct spi_controller *ctrl = spi->master;
+	struct atmel_qspi *aq = spi_controller_get_devdata(ctrl);
+	unsigned long src_rate;
+	u32 scbr;
+
+	if (ctrl->busy)
+		return -EBUSY;
+
+	if (!spi->max_speed_hz)
+		return -EINVAL;
+
+	src_rate = clk_get_rate(aq->pclk);
+	if (!src_rate)
+		return -EINVAL;
+
+	/* Compute the QSPI baudrate */
+	scbr = DIV_ROUND_UP(src_rate, spi->max_speed_hz);
+	if (scbr > 0)
+		scbr--;
+
+	aq->scr = QSPI_SCR_SCBR(scbr);
+	writel_relaxed(aq->scr, aq->regs + QSPI_SCR);
+
+	return 0;
+}
+
+static void atmel_qspi_init(struct atmel_qspi *aq)
+{
+	/* Reset the QSPI controller */
+	writel_relaxed(QSPI_CR_SWRST, aq->regs + QSPI_CR);
+
+	/* Set the QSPI controller by default in Serial Memory Mode */
+	writel_relaxed(QSPI_MR_SMM, aq->regs + QSPI_MR);
+	aq->mr = QSPI_MR_SMM;
+
+	/* Enable the QSPI controller */
+	writel_relaxed(QSPI_CR_QSPIEN, aq->regs + QSPI_CR);
+}
+
+static irqreturn_t atmel_qspi_interrupt(int irq, void *dev_id)
+{
+	struct atmel_qspi *aq = dev_id;
+	u32 status, mask, pending;
+
+	status = readl_relaxed(aq->regs + QSPI_SR);
+	mask = readl_relaxed(aq->regs + QSPI_IMR);
+	pending = status & mask;
+
+	if (!pending)
+		return IRQ_NONE;
+
+	aq->pending |= pending;
+	if ((aq->pending & QSPI_SR_CMD_COMPLETED) == QSPI_SR_CMD_COMPLETED)
+		complete(&aq->cmd_completion);
+
+	return IRQ_HANDLED;
+}
+
+static int atmel_qspi_probe(struct platform_device *pdev)
+{
+	struct spi_controller *ctrl;
+	struct atmel_qspi *aq;
+	struct resource *res;
+	int irq, err = 0;
+
+	ctrl = spi_alloc_master(&pdev->dev, sizeof(*aq));
+	if (!ctrl)
+		return -ENOMEM;
+
+	ctrl->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD | SPI_TX_DUAL | SPI_TX_QUAD;
+	ctrl->setup = atmel_qspi_setup;
+	ctrl->bus_num = -1;
+	ctrl->mem_ops = &atmel_qspi_mem_ops;
+	ctrl->num_chipselect = 1;
+	ctrl->dev.of_node = pdev->dev.of_node;
+	platform_set_drvdata(pdev, ctrl);
+
+	aq = spi_controller_get_devdata(ctrl);
+
+	init_completion(&aq->cmd_completion);
+	aq->pdev = pdev;
+
+	/* Map the registers */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi_base");
+	aq->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(aq->regs)) {
+		dev_err(&pdev->dev, "missing registers\n");
+		err = PTR_ERR(aq->regs);
+		goto exit;
+	}
+
+	/* Map the AHB memory */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi_mmap");
+	aq->mem = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(aq->mem)) {
+		dev_err(&pdev->dev, "missing AHB memory\n");
+		err = PTR_ERR(aq->mem);
+		goto exit;
+	}
+
+	/* Get the peripheral clock */
+	aq->pclk = devm_clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(aq->pclk))
+		aq->pclk = devm_clk_get(&pdev->dev, NULL);
+
+	if (IS_ERR(aq->pclk)) {
+		dev_err(&pdev->dev, "missing peripheral clock\n");
+		err = PTR_ERR(aq->pclk);
+		goto exit;
+	}
+
+	/* Enable the peripheral clock */
+	err = clk_prepare_enable(aq->pclk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable the peripheral clock\n");
+		goto exit;
+	}
+
+	aq->caps = of_device_get_match_data(&pdev->dev);
+	if (!aq->caps) {
+		dev_err(&pdev->dev, "Could not retrieve QSPI caps\n");
+		err = -EINVAL;
+		goto exit;
+	}
+
+	if (aq->caps->has_qspick) {
+		/* Get the QSPI system clock */
+		aq->qspick = devm_clk_get(&pdev->dev, "qspick");
+		if (IS_ERR(aq->qspick)) {
+			dev_err(&pdev->dev, "missing system clock\n");
+			err = PTR_ERR(aq->qspick);
+			goto disable_pclk;
+		}
+
+		/* Enable the QSPI system clock */
+		err = clk_prepare_enable(aq->qspick);
+		if (err) {
+			dev_err(&pdev->dev,
+				"failed to enable the QSPI system clock\n");
+			goto disable_pclk;
+		}
+	}
+
+	/* Request the IRQ */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		err = irq;
+		goto disable_qspick;
+	}
+	err = devm_request_irq(&pdev->dev, irq, atmel_qspi_interrupt,
+			       0, dev_name(&pdev->dev), aq);
+	if (err)
+		goto disable_qspick;
+
+	atmel_qspi_init(aq);
+
+	err = spi_register_controller(ctrl);
+	if (err)
+		goto disable_qspick;
+
+	return 0;
+
+disable_qspick:
+	clk_disable_unprepare(aq->qspick);
+disable_pclk:
+	clk_disable_unprepare(aq->pclk);
+exit:
+	spi_controller_put(ctrl);
+
+	return err;
+}
+
+static int atmel_qspi_remove(struct platform_device *pdev)
+{
+	struct spi_controller *ctrl = platform_get_drvdata(pdev);
+	struct atmel_qspi *aq = spi_controller_get_devdata(ctrl);
+
+	spi_unregister_controller(ctrl);
+	writel_relaxed(QSPI_CR_QSPIDIS, aq->regs + QSPI_CR);
+	clk_disable_unprepare(aq->qspick);
+	clk_disable_unprepare(aq->pclk);
+	return 0;
+}
+
+static int __maybe_unused atmel_qspi_suspend(struct device *dev)
+{
+	struct spi_controller *ctrl = dev_get_drvdata(dev);
+	struct atmel_qspi *aq = spi_controller_get_devdata(ctrl);
+
+	clk_disable_unprepare(aq->qspick);
+	clk_disable_unprepare(aq->pclk);
+
+	return 0;
+}
+
+static int __maybe_unused atmel_qspi_resume(struct device *dev)
+{
+	struct spi_controller *ctrl = dev_get_drvdata(dev);
+	struct atmel_qspi *aq = spi_controller_get_devdata(ctrl);
+
+	clk_prepare_enable(aq->pclk);
+	clk_prepare_enable(aq->qspick);
+
+	atmel_qspi_init(aq);
+
+	writel_relaxed(aq->scr, aq->regs + QSPI_SCR);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(atmel_qspi_pm_ops, atmel_qspi_suspend,
+			 atmel_qspi_resume);
+
+static const struct atmel_qspi_caps atmel_sama5d2_qspi_caps = {};
+
+static const struct atmel_qspi_caps atmel_sam9x60_qspi_caps = {
+	.has_qspick = true,
+	.has_ricr = true,
+};
+
+static const struct of_device_id atmel_qspi_dt_ids[] = {
+	{
+		.compatible = "atmel,sama5d2-qspi",
+		.data = &atmel_sama5d2_qspi_caps,
+	},
+	{
+		.compatible = "microchip,sam9x60-qspi",
+		.data = &atmel_sam9x60_qspi_caps,
+	},
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(of, atmel_qspi_dt_ids);
+
+static struct platform_driver atmel_qspi_driver = {
+	.driver = {
+		.name	= "atmel_qspi",
+		.of_match_table	= atmel_qspi_dt_ids,
+		.pm	= &atmel_qspi_pm_ops,
+	},
+	.probe		= atmel_qspi_probe,
+	.remove		= atmel_qspi_remove,
+};
+module_platform_driver(atmel_qspi_driver);
+
+MODULE_AUTHOR("Cyrille Pitchen <cyrille.pitchen@atmel.com>");
+MODULE_AUTHOR("Piotr Bugalski <bugalski.piotr@gmail.com");
+MODULE_DESCRIPTION("Atmel QSPI Controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-altera.c b/drivers/spi/spi-altera.c
index a5adf0d..41d71ba 100644
--- a/drivers/spi/spi-altera.c
+++ b/drivers/spi/spi-altera.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Altera SPI driver
  *
@@ -7,10 +8,6 @@
  * Copyright (c) 2006 Ben Dooks
  * Copyright (c) 2006 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/interrupt.h>
@@ -173,7 +170,6 @@
 {
 	struct altera_spi *hw;
 	struct spi_master *master;
-	struct resource *res;
 	int err = -ENODEV;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct altera_spi));
@@ -192,8 +188,7 @@
 	hw = spi_master_get_devdata(master);
 
 	/* find and map our resources */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	hw->base = devm_ioremap_resource(&pdev->dev, res);
+	hw->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(hw->base)) {
 		err = PTR_ERR(hw->base);
 		goto exit;
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
index 7dcb14d..e450ee1 100644
--- a/drivers/spi/spi-armada-3700.c
+++ b/drivers/spi/spi-armada-3700.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Marvell Armada-3700 SPI controller driver
  *
@@ -5,10 +6,6 @@
  *
  * Author: Wilson Ding <dingwei@marvell.com>
  * Author: Romain Perier <romain.perier@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
@@ -820,7 +817,6 @@
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *of_node = dev->of_node;
-	struct resource *res;
 	struct spi_master *master;
 	struct a3700_spi *spi;
 	u32 num_cs = 0;
@@ -858,8 +854,7 @@
 
 	spi->master = master;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	spi->base = devm_ioremap_resource(dev, res);
+	spi->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(spi->base)) {
 		ret = PTR_ERR(spi->base);
 		goto error;
@@ -867,7 +862,6 @@
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(dev, "could not get irq: %d\n", irq);
 		ret = -ENXIO;
 		goto error;
 	}
diff --git a/drivers/spi/spi-at91-usart.c b/drivers/spi/spi-at91-usart.c
new file mode 100644
index 0000000..a40bb2e
--- /dev/null
+++ b/drivers/spi/spi-at91-usart.c
@@ -0,0 +1,705 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Driver for AT91 USART Controllers as SPI
+//
+// Copyright (C) 2018 Microchip Technology Inc.
+//
+// Author: Radu Pirea <radu.pirea@microchip.com>
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-direction.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_gpio.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/spi/spi.h>
+
+#define US_CR			0x00
+#define US_MR			0x04
+#define US_IER			0x08
+#define US_IDR			0x0C
+#define US_CSR			0x14
+#define US_RHR			0x18
+#define US_THR			0x1C
+#define US_BRGR			0x20
+#define US_VERSION		0xFC
+
+#define US_CR_RSTRX		BIT(2)
+#define US_CR_RSTTX		BIT(3)
+#define US_CR_RXEN		BIT(4)
+#define US_CR_RXDIS		BIT(5)
+#define US_CR_TXEN		BIT(6)
+#define US_CR_TXDIS		BIT(7)
+
+#define US_MR_SPI_MASTER	0x0E
+#define US_MR_CHRL		GENMASK(7, 6)
+#define US_MR_CPHA		BIT(8)
+#define US_MR_CPOL		BIT(16)
+#define US_MR_CLKO		BIT(18)
+#define US_MR_WRDBT		BIT(20)
+#define US_MR_LOOP		BIT(15)
+
+#define US_IR_RXRDY		BIT(0)
+#define US_IR_TXRDY		BIT(1)
+#define US_IR_OVRE		BIT(5)
+
+#define US_BRGR_SIZE		BIT(16)
+
+#define US_MIN_CLK_DIV		0x06
+#define US_MAX_CLK_DIV		BIT(16)
+
+#define US_RESET		(US_CR_RSTRX | US_CR_RSTTX)
+#define US_DISABLE		(US_CR_RXDIS | US_CR_TXDIS)
+#define US_ENABLE		(US_CR_RXEN | US_CR_TXEN)
+#define US_OVRE_RXRDY_IRQS	(US_IR_OVRE | US_IR_RXRDY)
+
+#define US_INIT \
+	(US_MR_SPI_MASTER | US_MR_CHRL | US_MR_CLKO | US_MR_WRDBT)
+#define US_DMA_MIN_BYTES       16
+#define US_DMA_TIMEOUT         (msecs_to_jiffies(1000))
+
+/* Register access macros */
+#define at91_usart_spi_readl(port, reg) \
+	readl_relaxed((port)->regs + US_##reg)
+#define at91_usart_spi_writel(port, reg, value) \
+	writel_relaxed((value), (port)->regs + US_##reg)
+
+#define at91_usart_spi_readb(port, reg) \
+	readb_relaxed((port)->regs + US_##reg)
+#define at91_usart_spi_writeb(port, reg, value) \
+	writeb_relaxed((value), (port)->regs + US_##reg)
+
+struct at91_usart_spi {
+	struct platform_device  *mpdev;
+	struct spi_transfer	*current_transfer;
+	void __iomem		*regs;
+	struct device		*dev;
+	struct clk		*clk;
+
+	struct completion	xfer_completion;
+
+	/*used in interrupt to protect data reading*/
+	spinlock_t		lock;
+
+	phys_addr_t		phybase;
+
+	int			irq;
+	unsigned int		current_tx_remaining_bytes;
+	unsigned int		current_rx_remaining_bytes;
+
+	u32			spi_clk;
+	u32			status;
+
+	bool			xfer_failed;
+	bool			use_dma;
+};
+
+static void dma_callback(void *data)
+{
+	struct spi_controller   *ctlr = data;
+	struct at91_usart_spi   *aus = spi_master_get_devdata(ctlr);
+
+	at91_usart_spi_writel(aus, IER, US_IR_RXRDY);
+	aus->current_rx_remaining_bytes = 0;
+	complete(&aus->xfer_completion);
+}
+
+static bool at91_usart_spi_can_dma(struct spi_controller *ctrl,
+				   struct spi_device *spi,
+				   struct spi_transfer *xfer)
+{
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctrl);
+
+	return aus->use_dma && xfer->len >= US_DMA_MIN_BYTES;
+}
+
+static int at91_usart_spi_configure_dma(struct spi_controller *ctlr,
+					struct at91_usart_spi *aus)
+{
+	struct dma_slave_config slave_config;
+	struct device *dev = &aus->mpdev->dev;
+	phys_addr_t phybase = aus->phybase;
+	dma_cap_mask_t mask;
+	int err = 0;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	ctlr->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+	if (IS_ERR_OR_NULL(ctlr->dma_tx)) {
+		if (IS_ERR(ctlr->dma_tx)) {
+			err = PTR_ERR(ctlr->dma_tx);
+			goto at91_usart_spi_error_clear;
+		}
+
+		dev_dbg(dev,
+			"DMA TX channel not available, SPI unable to use DMA\n");
+		err = -EBUSY;
+		goto at91_usart_spi_error_clear;
+	}
+
+	ctlr->dma_rx = dma_request_slave_channel_reason(dev, "rx");
+	if (IS_ERR_OR_NULL(ctlr->dma_rx)) {
+		if (IS_ERR(ctlr->dma_rx)) {
+			err = PTR_ERR(ctlr->dma_rx);
+			goto at91_usart_spi_error;
+		}
+
+		dev_dbg(dev,
+			"DMA RX channel not available, SPI unable to use DMA\n");
+		err = -EBUSY;
+		goto at91_usart_spi_error;
+	}
+
+	slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	slave_config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	slave_config.dst_addr = (dma_addr_t)phybase + US_THR;
+	slave_config.src_addr = (dma_addr_t)phybase + US_RHR;
+	slave_config.src_maxburst = 1;
+	slave_config.dst_maxburst = 1;
+	slave_config.device_fc = false;
+
+	slave_config.direction = DMA_DEV_TO_MEM;
+	if (dmaengine_slave_config(ctlr->dma_rx, &slave_config)) {
+		dev_err(&ctlr->dev,
+			"failed to configure rx dma channel\n");
+		err = -EINVAL;
+		goto at91_usart_spi_error;
+	}
+
+	slave_config.direction = DMA_MEM_TO_DEV;
+	if (dmaengine_slave_config(ctlr->dma_tx, &slave_config)) {
+		dev_err(&ctlr->dev,
+			"failed to configure tx dma channel\n");
+		err = -EINVAL;
+		goto at91_usart_spi_error;
+	}
+
+	aus->use_dma = true;
+	return 0;
+
+at91_usart_spi_error:
+	if (!IS_ERR_OR_NULL(ctlr->dma_tx))
+		dma_release_channel(ctlr->dma_tx);
+	if (!IS_ERR_OR_NULL(ctlr->dma_rx))
+		dma_release_channel(ctlr->dma_rx);
+	ctlr->dma_tx = NULL;
+	ctlr->dma_rx = NULL;
+
+at91_usart_spi_error_clear:
+	return err;
+}
+
+static void at91_usart_spi_release_dma(struct spi_controller *ctlr)
+{
+	if (ctlr->dma_rx)
+		dma_release_channel(ctlr->dma_rx);
+	if (ctlr->dma_tx)
+		dma_release_channel(ctlr->dma_tx);
+}
+
+static void at91_usart_spi_stop_dma(struct spi_controller *ctlr)
+{
+	if (ctlr->dma_rx)
+		dmaengine_terminate_all(ctlr->dma_rx);
+	if (ctlr->dma_tx)
+		dmaengine_terminate_all(ctlr->dma_tx);
+}
+
+static int at91_usart_spi_dma_transfer(struct spi_controller *ctlr,
+				       struct spi_transfer *xfer)
+{
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctlr);
+	struct dma_chan	 *rxchan = ctlr->dma_rx;
+	struct dma_chan *txchan = ctlr->dma_tx;
+	struct dma_async_tx_descriptor *rxdesc;
+	struct dma_async_tx_descriptor *txdesc;
+	dma_cookie_t cookie;
+
+	/* Disable RX interrupt */
+	at91_usart_spi_writel(aus, IDR, US_IR_RXRDY);
+
+	rxdesc = dmaengine_prep_slave_sg(rxchan,
+					 xfer->rx_sg.sgl,
+					 xfer->rx_sg.nents,
+					 DMA_DEV_TO_MEM,
+					 DMA_PREP_INTERRUPT |
+					 DMA_CTRL_ACK);
+	if (!rxdesc)
+		goto at91_usart_spi_err_dma;
+
+	txdesc = dmaengine_prep_slave_sg(txchan,
+					 xfer->tx_sg.sgl,
+					 xfer->tx_sg.nents,
+					 DMA_MEM_TO_DEV,
+					 DMA_PREP_INTERRUPT |
+					 DMA_CTRL_ACK);
+	if (!txdesc)
+		goto at91_usart_spi_err_dma;
+
+	rxdesc->callback = dma_callback;
+	rxdesc->callback_param = ctlr;
+
+	cookie = rxdesc->tx_submit(rxdesc);
+	if (dma_submit_error(cookie))
+		goto at91_usart_spi_err_dma;
+
+	cookie = txdesc->tx_submit(txdesc);
+	if (dma_submit_error(cookie))
+		goto at91_usart_spi_err_dma;
+
+	rxchan->device->device_issue_pending(rxchan);
+	txchan->device->device_issue_pending(txchan);
+
+	return 0;
+
+at91_usart_spi_err_dma:
+	/* Enable RX interrupt if something fails and fallback to PIO */
+	at91_usart_spi_writel(aus, IER, US_IR_RXRDY);
+	at91_usart_spi_stop_dma(ctlr);
+
+	return -ENOMEM;
+}
+
+static unsigned long at91_usart_spi_dma_timeout(struct at91_usart_spi *aus)
+{
+	return wait_for_completion_timeout(&aus->xfer_completion,
+					   US_DMA_TIMEOUT);
+}
+
+static inline u32 at91_usart_spi_tx_ready(struct at91_usart_spi *aus)
+{
+	return aus->status & US_IR_TXRDY;
+}
+
+static inline u32 at91_usart_spi_rx_ready(struct at91_usart_spi *aus)
+{
+	return aus->status & US_IR_RXRDY;
+}
+
+static inline u32 at91_usart_spi_check_overrun(struct at91_usart_spi *aus)
+{
+	return aus->status & US_IR_OVRE;
+}
+
+static inline u32 at91_usart_spi_read_status(struct at91_usart_spi *aus)
+{
+	aus->status = at91_usart_spi_readl(aus, CSR);
+	return aus->status;
+}
+
+static inline void at91_usart_spi_tx(struct at91_usart_spi *aus)
+{
+	unsigned int len = aus->current_transfer->len;
+	unsigned int remaining = aus->current_tx_remaining_bytes;
+	const u8  *tx_buf = aus->current_transfer->tx_buf;
+
+	if (!remaining)
+		return;
+
+	if (at91_usart_spi_tx_ready(aus)) {
+		at91_usart_spi_writeb(aus, THR, tx_buf[len - remaining]);
+		aus->current_tx_remaining_bytes--;
+	}
+}
+
+static inline void at91_usart_spi_rx(struct at91_usart_spi *aus)
+{
+	int len = aus->current_transfer->len;
+	int remaining = aus->current_rx_remaining_bytes;
+	u8  *rx_buf = aus->current_transfer->rx_buf;
+
+	if (!remaining)
+		return;
+
+	rx_buf[len - remaining] = at91_usart_spi_readb(aus, RHR);
+	aus->current_rx_remaining_bytes--;
+}
+
+static inline void
+at91_usart_spi_set_xfer_speed(struct at91_usart_spi *aus,
+			      struct spi_transfer *xfer)
+{
+	at91_usart_spi_writel(aus, BRGR,
+			      DIV_ROUND_UP(aus->spi_clk, xfer->speed_hz));
+}
+
+static irqreturn_t at91_usart_spi_interrupt(int irq, void *dev_id)
+{
+	struct spi_controller *controller = dev_id;
+	struct at91_usart_spi *aus = spi_master_get_devdata(controller);
+
+	spin_lock(&aus->lock);
+	at91_usart_spi_read_status(aus);
+
+	if (at91_usart_spi_check_overrun(aus)) {
+		aus->xfer_failed = true;
+		at91_usart_spi_writel(aus, IDR, US_IR_OVRE | US_IR_RXRDY);
+		spin_unlock(&aus->lock);
+		return IRQ_HANDLED;
+	}
+
+	if (at91_usart_spi_rx_ready(aus)) {
+		at91_usart_spi_rx(aus);
+		spin_unlock(&aus->lock);
+		return IRQ_HANDLED;
+	}
+
+	spin_unlock(&aus->lock);
+
+	return IRQ_NONE;
+}
+
+static int at91_usart_spi_setup(struct spi_device *spi)
+{
+	struct at91_usart_spi *aus = spi_master_get_devdata(spi->controller);
+	u32 *ausd = spi->controller_state;
+	unsigned int mr = at91_usart_spi_readl(aus, MR);
+
+	if (spi->mode & SPI_CPOL)
+		mr |= US_MR_CPOL;
+	else
+		mr &= ~US_MR_CPOL;
+
+	if (spi->mode & SPI_CPHA)
+		mr |= US_MR_CPHA;
+	else
+		mr &= ~US_MR_CPHA;
+
+	if (spi->mode & SPI_LOOP)
+		mr |= US_MR_LOOP;
+	else
+		mr &= ~US_MR_LOOP;
+
+	if (!ausd) {
+		ausd = kzalloc(sizeof(*ausd), GFP_KERNEL);
+		if (!ausd)
+			return -ENOMEM;
+
+		spi->controller_state = ausd;
+	}
+
+	*ausd = mr;
+
+	dev_dbg(&spi->dev,
+		"setup: bpw %u mode 0x%x -> mr %d %08x\n",
+		spi->bits_per_word, spi->mode, spi->chip_select, mr);
+
+	return 0;
+}
+
+static int at91_usart_spi_transfer_one(struct spi_controller *ctlr,
+				       struct spi_device *spi,
+				       struct spi_transfer *xfer)
+{
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctlr);
+	unsigned long dma_timeout = 0;
+	int ret = 0;
+
+	at91_usart_spi_set_xfer_speed(aus, xfer);
+	aus->xfer_failed = false;
+	aus->current_transfer = xfer;
+	aus->current_tx_remaining_bytes = xfer->len;
+	aus->current_rx_remaining_bytes = xfer->len;
+
+	while ((aus->current_tx_remaining_bytes ||
+		aus->current_rx_remaining_bytes) && !aus->xfer_failed) {
+		reinit_completion(&aus->xfer_completion);
+		if (at91_usart_spi_can_dma(ctlr, spi, xfer) &&
+		    !ret) {
+			ret = at91_usart_spi_dma_transfer(ctlr, xfer);
+			if (ret)
+				continue;
+
+			dma_timeout = at91_usart_spi_dma_timeout(aus);
+
+			if (WARN_ON(dma_timeout == 0)) {
+				dev_err(&spi->dev, "DMA transfer timeout\n");
+				return -EIO;
+			}
+			aus->current_tx_remaining_bytes = 0;
+		} else {
+			at91_usart_spi_read_status(aus);
+			at91_usart_spi_tx(aus);
+		}
+
+		cpu_relax();
+	}
+
+	if (aus->xfer_failed) {
+		dev_err(aus->dev, "Overrun!\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int at91_usart_spi_prepare_message(struct spi_controller *ctlr,
+					  struct spi_message *message)
+{
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctlr);
+	struct spi_device *spi = message->spi;
+	u32 *ausd = spi->controller_state;
+
+	at91_usart_spi_writel(aus, CR, US_ENABLE);
+	at91_usart_spi_writel(aus, IER, US_OVRE_RXRDY_IRQS);
+	at91_usart_spi_writel(aus, MR, *ausd);
+
+	return 0;
+}
+
+static int at91_usart_spi_unprepare_message(struct spi_controller *ctlr,
+					    struct spi_message *message)
+{
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctlr);
+
+	at91_usart_spi_writel(aus, CR, US_RESET | US_DISABLE);
+	at91_usart_spi_writel(aus, IDR, US_OVRE_RXRDY_IRQS);
+
+	return 0;
+}
+
+static void at91_usart_spi_cleanup(struct spi_device *spi)
+{
+	struct at91_usart_spi_device *ausd = spi->controller_state;
+
+	spi->controller_state = NULL;
+	kfree(ausd);
+}
+
+static void at91_usart_spi_init(struct at91_usart_spi *aus)
+{
+	at91_usart_spi_writel(aus, MR, US_INIT);
+	at91_usart_spi_writel(aus, CR, US_RESET | US_DISABLE);
+}
+
+static int at91_usart_gpio_setup(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.parent->of_node;
+	int i;
+	int ret;
+	int nb;
+
+	if (!np)
+		return -EINVAL;
+
+	nb = of_gpio_named_count(np, "cs-gpios");
+	for (i = 0; i < nb; i++) {
+		int cs_gpio = of_get_named_gpio(np, "cs-gpios", i);
+
+		if (cs_gpio < 0)
+			return cs_gpio;
+
+		if (gpio_is_valid(cs_gpio)) {
+			ret = devm_gpio_request_one(&pdev->dev, cs_gpio,
+						    GPIOF_DIR_OUT,
+						    dev_name(&pdev->dev));
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int at91_usart_spi_probe(struct platform_device *pdev)
+{
+	struct resource *regs;
+	struct spi_controller *controller;
+	struct at91_usart_spi *aus;
+	struct clk *clk;
+	int irq;
+	int ret;
+
+	regs = platform_get_resource(to_platform_device(pdev->dev.parent),
+				     IORESOURCE_MEM, 0);
+	if (!regs)
+		return -EINVAL;
+
+	irq = platform_get_irq(to_platform_device(pdev->dev.parent), 0);
+	if (irq < 0)
+		return irq;
+
+	clk = devm_clk_get(pdev->dev.parent, "usart");
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	ret = -ENOMEM;
+	controller = spi_alloc_master(&pdev->dev, sizeof(*aus));
+	if (!controller)
+		goto at91_usart_spi_probe_fail;
+
+	ret = at91_usart_gpio_setup(pdev);
+	if (ret)
+		goto at91_usart_spi_probe_fail;
+
+	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP | SPI_CS_HIGH;
+	controller->dev.of_node = pdev->dev.parent->of_node;
+	controller->bits_per_word_mask = SPI_BPW_MASK(8);
+	controller->setup = at91_usart_spi_setup;
+	controller->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX;
+	controller->transfer_one = at91_usart_spi_transfer_one;
+	controller->prepare_message = at91_usart_spi_prepare_message;
+	controller->unprepare_message = at91_usart_spi_unprepare_message;
+	controller->can_dma = at91_usart_spi_can_dma;
+	controller->cleanup = at91_usart_spi_cleanup;
+	controller->max_speed_hz = DIV_ROUND_UP(clk_get_rate(clk),
+						US_MIN_CLK_DIV);
+	controller->min_speed_hz = DIV_ROUND_UP(clk_get_rate(clk),
+						US_MAX_CLK_DIV);
+	platform_set_drvdata(pdev, controller);
+
+	aus = spi_master_get_devdata(controller);
+
+	aus->dev = &pdev->dev;
+	aus->regs = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(aus->regs)) {
+		ret = PTR_ERR(aus->regs);
+		goto at91_usart_spi_probe_fail;
+	}
+
+	aus->irq = irq;
+	aus->clk = clk;
+
+	ret = devm_request_irq(&pdev->dev, irq, at91_usart_spi_interrupt, 0,
+			       dev_name(&pdev->dev), controller);
+	if (ret)
+		goto at91_usart_spi_probe_fail;
+
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		goto at91_usart_spi_probe_fail;
+
+	aus->spi_clk = clk_get_rate(clk);
+	at91_usart_spi_init(aus);
+
+	aus->phybase = regs->start;
+
+	aus->mpdev = to_platform_device(pdev->dev.parent);
+
+	ret = at91_usart_spi_configure_dma(controller, aus);
+	if (ret)
+		goto at91_usart_fail_dma;
+
+	spin_lock_init(&aus->lock);
+	init_completion(&aus->xfer_completion);
+
+	ret = devm_spi_register_master(&pdev->dev, controller);
+	if (ret)
+		goto at91_usart_fail_register_master;
+
+	dev_info(&pdev->dev,
+		 "AT91 USART SPI Controller version 0x%x at %pa (irq %d)\n",
+		 at91_usart_spi_readl(aus, VERSION),
+		 &regs->start, irq);
+
+	return 0;
+
+at91_usart_fail_register_master:
+	at91_usart_spi_release_dma(controller);
+at91_usart_fail_dma:
+	clk_disable_unprepare(clk);
+at91_usart_spi_probe_fail:
+	spi_master_put(controller);
+	return ret;
+}
+
+__maybe_unused static int at91_usart_spi_runtime_suspend(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctlr);
+
+	clk_disable_unprepare(aus->clk);
+	pinctrl_pm_select_sleep_state(dev);
+
+	return 0;
+}
+
+__maybe_unused static int at91_usart_spi_runtime_resume(struct device *dev)
+{
+	struct spi_controller *ctrl = dev_get_drvdata(dev);
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctrl);
+
+	pinctrl_pm_select_default_state(dev);
+
+	return clk_prepare_enable(aus->clk);
+}
+
+__maybe_unused static int at91_usart_spi_suspend(struct device *dev)
+{
+	struct spi_controller *ctrl = dev_get_drvdata(dev);
+	int ret;
+
+	ret = spi_controller_suspend(ctrl);
+	if (ret)
+		return ret;
+
+	if (!pm_runtime_suspended(dev))
+		at91_usart_spi_runtime_suspend(dev);
+
+	return 0;
+}
+
+__maybe_unused static int at91_usart_spi_resume(struct device *dev)
+{
+	struct spi_controller *ctrl = dev_get_drvdata(dev);
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctrl);
+	int ret;
+
+	if (!pm_runtime_suspended(dev)) {
+		ret = at91_usart_spi_runtime_resume(dev);
+		if (ret)
+			return ret;
+	}
+
+	at91_usart_spi_init(aus);
+
+	return spi_controller_resume(ctrl);
+}
+
+static int at91_usart_spi_remove(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr = platform_get_drvdata(pdev);
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctlr);
+
+	at91_usart_spi_release_dma(ctlr);
+	clk_disable_unprepare(aus->clk);
+
+	return 0;
+}
+
+static const struct dev_pm_ops at91_usart_spi_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(at91_usart_spi_suspend, at91_usart_spi_resume)
+	SET_RUNTIME_PM_OPS(at91_usart_spi_runtime_suspend,
+			   at91_usart_spi_runtime_resume, NULL)
+};
+
+static const struct of_device_id at91_usart_spi_dt_ids[] = {
+	{ .compatible = "microchip,at91sam9g45-usart-spi"},
+	{ /* sentinel */}
+};
+
+MODULE_DEVICE_TABLE(of, at91_usart_spi_dt_ids);
+
+static struct platform_driver at91_usart_spi_driver = {
+	.driver = {
+		.name = "at91_usart_spi",
+		.pm = &at91_usart_spi_pm_ops,
+	},
+	.probe = at91_usart_spi_probe,
+	.remove = at91_usart_spi_remove,
+};
+
+module_platform_driver(at91_usart_spi_driver);
+
+MODULE_DESCRIPTION("Microchip AT91 USART SPI Controller driver");
+MODULE_AUTHOR("Radu Pirea <radu.pirea@microchip.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:at91_usart_spi");
diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c
index 3f6b657..eb9a243 100644
--- a/drivers/spi/spi-ath79.c
+++ b/drivers/spi/spi-ath79.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * SPI controller driver for the Atheros AR71XX/AR724X/AR913X SoCs
  *
@@ -5,11 +6,6 @@
  *
  * This driver has been based on the spi-gpio.c:
  *	Copyright (C) 2006,2008 David Brownell
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/kernel.h>
@@ -21,18 +17,26 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/bitops.h>
-#include <linux/gpio.h>
 #include <linux/clk.h>
 #include <linux/err.h>
-
-#include <asm/mach-ath79/ar71xx_regs.h>
-#include <asm/mach-ath79/ath79_spi_platform.h>
+#include <linux/platform_data/spi-ath79.h>
 
 #define DRV_NAME	"ath79-spi"
 
 #define ATH79_SPI_RRW_DELAY_FACTOR	12000
 #define MHZ				(1000 * 1000)
 
+#define AR71XX_SPI_REG_FS		0x00	/* Function Select */
+#define AR71XX_SPI_REG_CTRL		0x04	/* SPI Control */
+#define AR71XX_SPI_REG_IOC		0x08	/* SPI I/O Control */
+#define AR71XX_SPI_REG_RDS		0x0c	/* Read Data Shift */
+
+#define AR71XX_SPI_FS_GPIO		BIT(0)	/* Enable GPIO mode */
+
+#define AR71XX_SPI_IOC_DO		BIT(0)	/* Data Out pin */
+#define AR71XX_SPI_IOC_CLK		BIT(8)	/* CLK pin */
+#define AR71XX_SPI_IOC_CS(n)		BIT(16 + (n))
+
 struct ath79_spi {
 	struct spi_bitbang	bitbang;
 	u32			ioc_base;
@@ -67,31 +71,14 @@
 {
 	struct ath79_spi *sp = ath79_spidev_to_sp(spi);
 	int cs_high = (spi->mode & SPI_CS_HIGH) ? is_active : !is_active;
+	u32 cs_bit = AR71XX_SPI_IOC_CS(spi->chip_select);
 
-	if (is_active) {
-		/* set initial clock polarity */
-		if (spi->mode & SPI_CPOL)
-			sp->ioc_base |= AR71XX_SPI_IOC_CLK;
-		else
-			sp->ioc_base &= ~AR71XX_SPI_IOC_CLK;
+	if (cs_high)
+		sp->ioc_base |= cs_bit;
+	else
+		sp->ioc_base &= ~cs_bit;
 
-		ath79_spi_wr(sp, AR71XX_SPI_REG_IOC, sp->ioc_base);
-	}
-
-	if (gpio_is_valid(spi->cs_gpio)) {
-		/* SPI is normally active-low */
-		gpio_set_value_cansleep(spi->cs_gpio, cs_high);
-	} else {
-		u32 cs_bit = AR71XX_SPI_IOC_CS(spi->chip_select);
-
-		if (cs_high)
-			sp->ioc_base |= cs_bit;
-		else
-			sp->ioc_base &= ~cs_bit;
-
-		ath79_spi_wr(sp, AR71XX_SPI_REG_IOC, sp->ioc_base);
-	}
-
+	ath79_spi_wr(sp, AR71XX_SPI_REG_IOC, sp->ioc_base);
 }
 
 static void ath79_spi_enable(struct ath79_spi *sp)
@@ -103,6 +90,9 @@
 	sp->reg_ctrl = ath79_spi_rr(sp, AR71XX_SPI_REG_CTRL);
 	sp->ioc_base = ath79_spi_rr(sp, AR71XX_SPI_REG_IOC);
 
+	/* clear clk and mosi in the base state */
+	sp->ioc_base &= ~(AR71XX_SPI_IOC_DO | AR71XX_SPI_IOC_CLK);
+
 	/* TODO: setup speed? */
 	ath79_spi_wr(sp, AR71XX_SPI_REG_CTRL, 0x43);
 }
@@ -115,66 +105,6 @@
 	ath79_spi_wr(sp, AR71XX_SPI_REG_FS, 0);
 }
 
-static int ath79_spi_setup_cs(struct spi_device *spi)
-{
-	struct ath79_spi *sp = ath79_spidev_to_sp(spi);
-	int status;
-
-	status = 0;
-	if (gpio_is_valid(spi->cs_gpio)) {
-		unsigned long flags;
-
-		flags = GPIOF_DIR_OUT;
-		if (spi->mode & SPI_CS_HIGH)
-			flags |= GPIOF_INIT_LOW;
-		else
-			flags |= GPIOF_INIT_HIGH;
-
-		status = gpio_request_one(spi->cs_gpio, flags,
-					  dev_name(&spi->dev));
-	} else {
-		u32 cs_bit = AR71XX_SPI_IOC_CS(spi->chip_select);
-
-		if (spi->mode & SPI_CS_HIGH)
-			sp->ioc_base &= ~cs_bit;
-		else
-			sp->ioc_base |= cs_bit;
-
-		ath79_spi_wr(sp, AR71XX_SPI_REG_IOC, sp->ioc_base);
-	}
-
-	return status;
-}
-
-static void ath79_spi_cleanup_cs(struct spi_device *spi)
-{
-	if (gpio_is_valid(spi->cs_gpio))
-		gpio_free(spi->cs_gpio);
-}
-
-static int ath79_spi_setup(struct spi_device *spi)
-{
-	int status = 0;
-
-	if (!spi->controller_state) {
-		status = ath79_spi_setup_cs(spi);
-		if (status)
-			return status;
-	}
-
-	status = spi_bitbang_setup(spi);
-	if (status && !spi->controller_state)
-		ath79_spi_cleanup_cs(spi);
-
-	return status;
-}
-
-static void ath79_spi_cleanup(struct spi_device *spi)
-{
-	ath79_spi_cleanup_cs(spi);
-	spi_bitbang_cleanup(spi);
-}
-
 static u32 ath79_spi_txrx_mode0(struct spi_device *spi, unsigned int nsecs,
 			       u32 word, u8 bits, unsigned flags)
 {
@@ -209,7 +139,6 @@
 	struct spi_master *master;
 	struct ath79_spi *sp;
 	struct ath79_spi_platform_data *pdata;
-	struct resource	*r;
 	unsigned long rate;
 	int ret;
 
@@ -225,9 +154,10 @@
 
 	pdata = dev_get_platdata(&pdev->dev);
 
+	master->use_gpio_descriptors = true;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
-	master->setup = ath79_spi_setup;
-	master->cleanup = ath79_spi_cleanup;
+	master->setup = spi_bitbang_setup;
+	master->cleanup = spi_bitbang_cleanup;
 	if (pdata) {
 		master->bus_num = pdata->bus_num;
 		master->num_chipselect = pdata->num_chipselect;
@@ -236,11 +166,9 @@
 	sp->bitbang.master = master;
 	sp->bitbang.chipselect = ath79_spi_chipselect;
 	sp->bitbang.txrx_word[SPI_MODE_0] = ath79_spi_txrx_mode0;
-	sp->bitbang.setup_transfer = spi_bitbang_setup_transfer;
 	sp->bitbang.flags = SPI_CS_HIGH;
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	sp->base = devm_ioremap_resource(&pdev->dev, r);
+	sp->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(sp->base)) {
 		ret = PTR_ERR(sp->base);
 		goto err_put_master;
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 3f890d1..acf318e 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Driver for Atmel AT32 and AT91 SPI Controllers
  *
  * Copyright (C) 2006 Atmel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -23,10 +20,10 @@
 #include <linux/of.h>
 
 #include <linux/io.h>
-#include <linux/gpio.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
+#include <trace/events/spi.h>
 
 /* SPI register offsets */
 #define SPI_CR					0x0000
@@ -312,7 +309,7 @@
 
 /* Controller-specific per-slave state */
 struct atmel_spi_device {
-	unsigned int		npcs_pin;
+	struct gpio_desc	*npcs_pin;
 	u32			csr;
 };
 
@@ -355,7 +352,6 @@
 static void cs_activate(struct atmel_spi *as, struct spi_device *spi)
 {
 	struct atmel_spi_device *asd = spi->controller_state;
-	unsigned active = spi->mode & SPI_CS_HIGH;
 	u32 mr;
 
 	if (atmel_spi_is_v2(as)) {
@@ -379,7 +375,7 @@
 
 		mr = spi_readl(as, MR);
 		if (as->use_cs_gpios)
-			gpio_set_value(asd->npcs_pin, active);
+			gpiod_set_value(asd->npcs_pin, 1);
 	} else {
 		u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0;
 		int i;
@@ -396,19 +392,16 @@
 		mr = spi_readl(as, MR);
 		mr = SPI_BFINS(PCS, ~(1 << spi->chip_select), mr);
 		if (as->use_cs_gpios && spi->chip_select != 0)
-			gpio_set_value(asd->npcs_pin, active);
+			gpiod_set_value(asd->npcs_pin, 1);
 		spi_writel(as, MR, mr);
 	}
 
-	dev_dbg(&spi->dev, "activate %u%s, mr %08x\n",
-			asd->npcs_pin, active ? " (high)" : "",
-			mr);
+	dev_dbg(&spi->dev, "activate NPCS, mr %08x\n", mr);
 }
 
 static void cs_deactivate(struct atmel_spi *as, struct spi_device *spi)
 {
 	struct atmel_spi_device *asd = spi->controller_state;
-	unsigned active = spi->mode & SPI_CS_HIGH;
 	u32 mr;
 
 	/* only deactivate *this* device; sometimes transfers to
@@ -420,14 +413,12 @@
 		spi_writel(as, MR, mr);
 	}
 
-	dev_dbg(&spi->dev, "DEactivate %u%s, mr %08x\n",
-			asd->npcs_pin, active ? " (low)" : "",
-			mr);
+	dev_dbg(&spi->dev, "DEactivate NPCS, mr %08x\n", mr);
 
 	if (!as->use_cs_gpios)
 		spi_writel(as, CR, SPI_BIT(LASTXFER));
 	else if (atmel_spi_is_v2(as) || spi->chip_select != 0)
-		gpio_set_value(asd->npcs_pin, !active);
+		gpiod_set_value(asd->npcs_pin, 0);
 }
 
 static void atmel_spi_lock(struct atmel_spi *as) __acquires(&as->lock)
@@ -1188,7 +1179,6 @@
 	struct atmel_spi_device	*asd;
 	u32			csr;
 	unsigned int		bits = spi->bits_per_word;
-	unsigned int		npcs_pin;
 
 	as = spi_master_get_devdata(spi->master);
 
@@ -1209,21 +1199,14 @@
 		csr |= SPI_BIT(CSAAT);
 
 	/* DLYBS is mostly irrelevant since we manage chipselect using GPIOs.
-	 *
-	 * DLYBCT would add delays between words, slowing down transfers.
-	 * It could potentially be useful to cope with DMA bottlenecks, but
-	 * in those cases it's probably best to just use a lower bitrate.
 	 */
 	csr |= SPI_BF(DLYBS, 0);
-	csr |= SPI_BF(DLYBCT, 0);
 
-	/* chipselect must have been muxed as GPIO (e.g. in board setup) */
-	npcs_pin = (unsigned long)spi->controller_data;
-
-	if (!as->use_cs_gpios)
-		npcs_pin = spi->chip_select;
-	else if (gpio_is_valid(spi->cs_gpio))
-		npcs_pin = spi->cs_gpio;
+	/* DLYBCT adds delays between words.  This is useful for slow devices
+	 * that need a bit of time to setup the next transfer.
+	 */
+	csr |= SPI_BF(DLYBCT,
+			(as->spi_clk / 1000000 * spi->word_delay_usecs) >> 5);
 
 	asd = spi->controller_state;
 	if (!asd) {
@@ -1231,11 +1214,21 @@
 		if (!asd)
 			return -ENOMEM;
 
-		if (as->use_cs_gpios)
-			gpio_direction_output(npcs_pin,
-					      !(spi->mode & SPI_CS_HIGH));
+		/*
+		 * If use_cs_gpios is true this means that we have "cs-gpios"
+		 * defined in the device tree node so we should have
+		 * gotten the GPIO lines from the device tree inside the
+		 * SPI core. Warn if this is not the case but continue since
+		 * CS GPIOs are after all optional.
+		 */
+		if (as->use_cs_gpios) {
+			if (!spi->cs_gpiod) {
+				dev_err(&spi->dev,
+					"host claims to use CS GPIOs but no CS found in DT by the SPI core\n");
+			}
+			asd->npcs_pin = spi->cs_gpiod;
+		}
 
-		asd->npcs_pin = npcs_pin;
 		spi->controller_state = asd;
 	}
 
@@ -1417,9 +1410,13 @@
 	msg->actual_length = 0;
 
 	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		trace_spi_transfer_start(msg, xfer);
+
 		ret = atmel_spi_one_transfer(master, msg, xfer);
 		if (ret)
 			goto msg_done;
+
+		trace_spi_transfer_stop(msg, xfer);
 	}
 
 	if (as->use_pdc)
@@ -1473,41 +1470,6 @@
 	as->caps.has_pdc_support = version < 0x212;
 }
 
-/*-------------------------------------------------------------------------*/
-static int atmel_spi_gpio_cs(struct platform_device *pdev)
-{
-	struct spi_master	*master = platform_get_drvdata(pdev);
-	struct atmel_spi	*as = spi_master_get_devdata(master);
-	struct device_node	*np = master->dev.of_node;
-	int			i;
-	int			ret = 0;
-	int			nb = 0;
-
-	if (!as->use_cs_gpios)
-		return 0;
-
-	if (!np)
-		return 0;
-
-	nb = of_gpio_named_count(np, "cs-gpios");
-	for (i = 0; i < nb; i++) {
-		int cs_gpio = of_get_named_gpio(pdev->dev.of_node,
-						"cs-gpios", i);
-
-		if (cs_gpio == -EPROBE_DEFER)
-			return cs_gpio;
-
-		if (gpio_is_valid(cs_gpio)) {
-			ret = devm_gpio_request(&pdev->dev, cs_gpio,
-						dev_name(&pdev->dev));
-			if (ret)
-				return ret;
-		}
-	}
-
-	return 0;
-}
-
 static void atmel_spi_init(struct atmel_spi *as)
 {
 	spi_writel(as, CR, SPI_BIT(SWRST));
@@ -1560,6 +1522,7 @@
 		goto out_free;
 
 	/* the spi->mode bits understood by this driver: */
+	master->use_gpio_descriptors = true;
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 16);
 	master->dev.of_node = pdev->dev.of_node;
@@ -1592,6 +1555,11 @@
 
 	atmel_get_caps(as);
 
+	/*
+	 * If there are chip selects in the device tree, those will be
+	 * discovered by the SPI core when registering the SPI master
+	 * and assigned to each SPI device.
+	 */
 	as->use_cs_gpios = true;
 	if (atmel_spi_is_v2(as) &&
 	    pdev->dev.of_node &&
@@ -1600,10 +1568,6 @@
 		master->num_chipselect = 4;
 	}
 
-	ret = atmel_spi_gpio_cs(pdev);
-	if (ret)
-		goto out_unmap_regs;
-
 	as->use_dma = false;
 	as->use_pdc = false;
 	if (as->caps.has_dma_support) {
@@ -1767,10 +1731,8 @@
 
 	/* Stop the queue running */
 	ret = spi_master_suspend(master);
-	if (ret) {
-		dev_warn(dev, "cannot suspend master\n");
+	if (ret)
 		return ret;
-	}
 
 	if (!pm_runtime_suspended(dev))
 		atmel_spi_runtime_suspend(dev);
@@ -1799,11 +1761,7 @@
 	}
 
 	/* Start the queue running */
-	ret = spi_master_resume(master);
-	if (ret)
-		dev_err(dev, "problem starting queue (%d)\n", ret);
-
-	return ret;
+	return spi_master_resume(master);
 }
 #endif
 
diff --git a/drivers/spi/spi-au1550.c b/drivers/spi/spi-au1550.c
index afd239d..dfb7196 100644
--- a/drivers/spi/spi-au1550.c
+++ b/drivers/spi/spi-au1550.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * au1550 psc spi controller driver
  * may work also with au1200, au1210, au1250
@@ -5,16 +6,6 @@
  *
  * Copyright (c) 2006 ATRON electronic GmbH
  * Author: Jan Nikitenko <jan.nikitenko@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/init.h>
diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index 68cfc35..74842f6 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * SPI-Engine SPI controller driver
  * Copyright 2015 Analog Devices Inc.
  *  Author: Lars-Peter Clausen <lars@metafoo.de>
- *
- * Licensed under the GPL-2.
  */
 
 #include <linux/clk.h>
@@ -461,7 +460,6 @@
 	struct spi_engine *spi_engine;
 	struct spi_master *master;
 	unsigned int version;
-	struct resource *res;
 	int irq;
 	int ret;
 
@@ -481,8 +479,7 @@
 
 	spin_lock_init(&spi_engine->lock);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	spi_engine->base = devm_ioremap_resource(&pdev->dev, res);
+	spi_engine->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(spi_engine->base)) {
 		ret = PTR_ERR(spi_engine->base);
 		goto err_put_master;
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 584bcb0..7a35318 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Driver for Broadcom BRCMSTB, NSP,  NS2, Cygnus SPI Controllers
  *
  * Copyright 2016 Broadcom
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation (the "GPL").
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 (GPLv2) for more details.
- *
- * You should have received a copy of the GNU General Public License
- * version 2 (GPLv2) along with this source code.
  */
 
 #include <linux/clk.h>
@@ -354,7 +343,7 @@
 {
 	int bpc = 0, bpp = 0;
 	u8 command = op->cmd.opcode;
-	int width  = op->cmd.buswidth ? op->cmd.buswidth : SPI_NBITS_SINGLE;
+	int width = op->data.buswidth ? op->data.buswidth : SPI_NBITS_SINGLE;
 	int addrlen = op->addr.nbytes;
 	int flex_mode = 1;
 
@@ -908,6 +897,7 @@
 
 		read_from_hw(qspi, slots);
 	}
+	bcm_qspi_enable_bspi(qspi);
 
 	return 0;
 }
@@ -992,7 +982,7 @@
 	if (mspi_read)
 		return bcm_qspi_mspi_exec_mem_op(spi, op);
 
-	ret = bcm_qspi_bspi_set_mode(qspi, op, -1);
+	ret = bcm_qspi_bspi_set_mode(qspi, op, 0);
 
 	if (!ret)
 		ret = bcm_qspi_bspi_exec_mem_op(spi, op);
diff --git a/drivers/spi/spi-bcm-qspi.h b/drivers/spi/spi-bcm-qspi.h
index 7abfc75..01aec64 100644
--- a/drivers/spi/spi-bcm-qspi.h
+++ b/drivers/spi/spi-bcm-qspi.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2016 Broadcom
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation (the "GPL").
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 (GPLv2) for more details.
- *
- * You should have received a copy of the GNU General Public License
- * version 2 (GPLv2) along with this source code.
  */
 
 #ifndef __SPI_BCM_QSPI_H__
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index f35cc10..b4070c0 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Driver for Broadcom BCM2835 SPI Controllers
  *
@@ -8,21 +9,11 @@
  * This driver is inspired by:
  * spi-ath79.c, Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
  * spi-atmel.c, Copyright (C) 2006 Atmel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
-#include <asm/page.h>
 #include <linux/clk.h>
 #include <linux/completion.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
@@ -34,7 +25,9 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/gpio/machine.h> /* FIXME: using chip internals */
+#include <linux/gpio/driver.h> /* FIXME: using chip internals */
 #include <linux/of_irq.h>
 #include <linux/spi/spi.h>
 
@@ -72,25 +65,132 @@
 #define BCM2835_SPI_CS_CS_10		0x00000002
 #define BCM2835_SPI_CS_CS_01		0x00000001
 
-#define BCM2835_SPI_POLLING_LIMIT_US	30
-#define BCM2835_SPI_POLLING_JIFFIES	2
+#define BCM2835_SPI_FIFO_SIZE		64
+#define BCM2835_SPI_FIFO_SIZE_3_4	48
 #define BCM2835_SPI_DMA_MIN_LENGTH	96
+#define BCM2835_SPI_NUM_CS		3   /* raise as necessary */
 #define BCM2835_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
 				| SPI_NO_CS | SPI_3WIRE)
 
 #define DRV_NAME	"spi-bcm2835"
 
+/* define polling limits */
+unsigned int polling_limit_us = 30;
+module_param(polling_limit_us, uint, 0664);
+MODULE_PARM_DESC(polling_limit_us,
+		 "time in us to run a transfer in polling mode\n");
+
+/**
+ * struct bcm2835_spi - BCM2835 SPI controller
+ * @regs: base address of register map
+ * @clk: core clock, divided to calculate serial clock
+ * @irq: interrupt, signals TX FIFO empty or RX FIFO ¾ full
+ * @tfr: SPI transfer currently processed
+ * @tx_buf: pointer whence next transmitted byte is read
+ * @rx_buf: pointer where next received byte is written
+ * @tx_len: remaining bytes to transmit
+ * @rx_len: remaining bytes to receive
+ * @tx_prologue: bytes transmitted without DMA if first TX sglist entry's
+ *	length is not a multiple of 4 (to overcome hardware limitation)
+ * @rx_prologue: bytes received without DMA if first RX sglist entry's
+ *	length is not a multiple of 4 (to overcome hardware limitation)
+ * @tx_spillover: whether @tx_prologue spills over to second TX sglist entry
+ * @prepare_cs: precalculated CS register value for ->prepare_message()
+ *	(uses slave-specific clock polarity and phase settings)
+ * @debugfs_dir: the debugfs directory - neede to remove debugfs when
+ *      unloading the module
+ * @count_transfer_polling: count of how often polling mode is used
+ * @count_transfer_irq: count of how often interrupt mode is used
+ * @count_transfer_irq_after_polling: count of how often we fall back to
+ *      interrupt mode after starting in polling mode.
+ *      These are counted as well in @count_transfer_polling and
+ *      @count_transfer_irq
+ * @count_transfer_dma: count how often dma mode is used
+ * @chip_select: SPI slave currently selected
+ *	(used by bcm2835_spi_dma_tx_done() to write @clear_rx_cs)
+ * @tx_dma_active: whether a TX DMA descriptor is in progress
+ * @rx_dma_active: whether a RX DMA descriptor is in progress
+ *	(used by bcm2835_spi_dma_tx_done() to handle a race)
+ * @fill_tx_desc: preallocated TX DMA descriptor used for RX-only transfers
+ *	(cyclically copies from zero page to TX FIFO)
+ * @fill_tx_addr: bus address of zero page
+ * @clear_rx_desc: preallocated RX DMA descriptor used for TX-only transfers
+ *	(cyclically clears RX FIFO by writing @clear_rx_cs to CS register)
+ * @clear_rx_addr: bus address of @clear_rx_cs
+ * @clear_rx_cs: precalculated CS register value to clear RX FIFO
+ *	(uses slave-specific clock polarity and phase settings)
+ */
 struct bcm2835_spi {
 	void __iomem *regs;
 	struct clk *clk;
 	int irq;
+	struct spi_transfer *tfr;
 	const u8 *tx_buf;
 	u8 *rx_buf;
 	int tx_len;
 	int rx_len;
-	bool dma_pending;
+	int tx_prologue;
+	int rx_prologue;
+	unsigned int tx_spillover;
+	u32 prepare_cs[BCM2835_SPI_NUM_CS];
+
+	struct dentry *debugfs_dir;
+	u64 count_transfer_polling;
+	u64 count_transfer_irq;
+	u64 count_transfer_irq_after_polling;
+	u64 count_transfer_dma;
+
+	u8 chip_select;
+	unsigned int tx_dma_active;
+	unsigned int rx_dma_active;
+	struct dma_async_tx_descriptor *fill_tx_desc;
+	dma_addr_t fill_tx_addr;
+	struct dma_async_tx_descriptor *clear_rx_desc[BCM2835_SPI_NUM_CS];
+	dma_addr_t clear_rx_addr;
+	u32 clear_rx_cs[BCM2835_SPI_NUM_CS] ____cacheline_aligned;
 };
 
+#if defined(CONFIG_DEBUG_FS)
+static void bcm2835_debugfs_create(struct bcm2835_spi *bs,
+				   const char *dname)
+{
+	char name[64];
+	struct dentry *dir;
+
+	/* get full name */
+	snprintf(name, sizeof(name), "spi-bcm2835-%s", dname);
+
+	/* the base directory */
+	dir = debugfs_create_dir(name, NULL);
+	bs->debugfs_dir = dir;
+
+	/* the counters */
+	debugfs_create_u64("count_transfer_polling", 0444, dir,
+			   &bs->count_transfer_polling);
+	debugfs_create_u64("count_transfer_irq", 0444, dir,
+			   &bs->count_transfer_irq);
+	debugfs_create_u64("count_transfer_irq_after_polling", 0444, dir,
+			   &bs->count_transfer_irq_after_polling);
+	debugfs_create_u64("count_transfer_dma", 0444, dir,
+			   &bs->count_transfer_dma);
+}
+
+static void bcm2835_debugfs_remove(struct bcm2835_spi *bs)
+{
+	debugfs_remove_recursive(bs->debugfs_dir);
+	bs->debugfs_dir = NULL;
+}
+#else
+static void bcm2835_debugfs_create(struct bcm2835_spi *bs,
+				   const char *dname)
+{
+}
+
+static void bcm2835_debugfs_remove(struct bcm2835_spi *bs)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
 static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg)
 {
 	return readl(bs->regs + reg);
@@ -126,9 +226,118 @@
 	}
 }
 
-static void bcm2835_spi_reset_hw(struct spi_master *master)
+/**
+ * bcm2835_rd_fifo_count() - blindly read exactly @count bytes from RX FIFO
+ * @bs: BCM2835 SPI controller
+ * @count: bytes to read from RX FIFO
+ *
+ * The caller must ensure that @bs->rx_len is greater than or equal to @count,
+ * that the RX FIFO contains at least @count bytes and that the DMA Enable flag
+ * in the CS register is set (such that a read from the FIFO register receives
+ * 32-bit instead of just 8-bit).  Moreover @bs->rx_buf must not be %NULL.
+ */
+static inline void bcm2835_rd_fifo_count(struct bcm2835_spi *bs, int count)
 {
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	u32 val;
+	int len;
+
+	bs->rx_len -= count;
+
+	while (count > 0) {
+		val = bcm2835_rd(bs, BCM2835_SPI_FIFO);
+		len = min(count, 4);
+		memcpy(bs->rx_buf, &val, len);
+		bs->rx_buf += len;
+		count -= 4;
+	}
+}
+
+/**
+ * bcm2835_wr_fifo_count() - blindly write exactly @count bytes to TX FIFO
+ * @bs: BCM2835 SPI controller
+ * @count: bytes to write to TX FIFO
+ *
+ * The caller must ensure that @bs->tx_len is greater than or equal to @count,
+ * that the TX FIFO can accommodate @count bytes and that the DMA Enable flag
+ * in the CS register is set (such that a write to the FIFO register transmits
+ * 32-bit instead of just 8-bit).
+ */
+static inline void bcm2835_wr_fifo_count(struct bcm2835_spi *bs, int count)
+{
+	u32 val;
+	int len;
+
+	bs->tx_len -= count;
+
+	while (count > 0) {
+		if (bs->tx_buf) {
+			len = min(count, 4);
+			memcpy(&val, bs->tx_buf, len);
+			bs->tx_buf += len;
+		} else {
+			val = 0;
+		}
+		bcm2835_wr(bs, BCM2835_SPI_FIFO, val);
+		count -= 4;
+	}
+}
+
+/**
+ * bcm2835_wait_tx_fifo_empty() - busy-wait for TX FIFO to empty
+ * @bs: BCM2835 SPI controller
+ *
+ * The caller must ensure that the RX FIFO can accommodate as many bytes
+ * as have been written to the TX FIFO:  Transmission is halted once the
+ * RX FIFO is full, causing this function to spin forever.
+ */
+static inline void bcm2835_wait_tx_fifo_empty(struct bcm2835_spi *bs)
+{
+	while (!(bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE))
+		cpu_relax();
+}
+
+/**
+ * bcm2835_rd_fifo_blind() - blindly read up to @count bytes from RX FIFO
+ * @bs: BCM2835 SPI controller
+ * @count: bytes available for reading in RX FIFO
+ */
+static inline void bcm2835_rd_fifo_blind(struct bcm2835_spi *bs, int count)
+{
+	u8 val;
+
+	count = min(count, bs->rx_len);
+	bs->rx_len -= count;
+
+	while (count) {
+		val = bcm2835_rd(bs, BCM2835_SPI_FIFO);
+		if (bs->rx_buf)
+			*bs->rx_buf++ = val;
+		count--;
+	}
+}
+
+/**
+ * bcm2835_wr_fifo_blind() - blindly write up to @count bytes to TX FIFO
+ * @bs: BCM2835 SPI controller
+ * @count: bytes available for writing in TX FIFO
+ */
+static inline void bcm2835_wr_fifo_blind(struct bcm2835_spi *bs, int count)
+{
+	u8 val;
+
+	count = min(count, bs->tx_len);
+	bs->tx_len -= count;
+
+	while (count) {
+		val = bs->tx_buf ? *bs->tx_buf++ : 0;
+		bcm2835_wr(bs, BCM2835_SPI_FIFO, val);
+		count--;
+	}
+}
+
+static void bcm2835_spi_reset_hw(struct spi_controller *ctlr)
+{
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
 	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
 
 	/* Disable SPI interrupts and transfer */
@@ -136,6 +345,13 @@
 		BCM2835_SPI_CS_INTD |
 		BCM2835_SPI_CS_DMAEN |
 		BCM2835_SPI_CS_TA);
+	/*
+	 * Transmission sometimes breaks unless the DONE bit is written at the
+	 * end of every transfer.  The spec says it's a RO bit.  Either the
+	 * spec is wrong and the bit is actually of type RW1C, or it's a
+	 * hardware erratum.
+	 */
+	cs |= BCM2835_SPI_CS_DONE;
 	/* and reset RX/TX FIFOS */
 	cs |= BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX;
 
@@ -147,54 +363,59 @@
 
 static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
 {
-	struct spi_master *master = dev_id;
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_id;
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+
+	/*
+	 * An interrupt is signaled either if DONE is set (TX FIFO empty)
+	 * or if RXR is set (RX FIFO >= ¾ full).
+	 */
+	if (cs & BCM2835_SPI_CS_RXF)
+		bcm2835_rd_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
+	else if (cs & BCM2835_SPI_CS_RXR)
+		bcm2835_rd_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE_3_4);
+
+	if (bs->tx_len && cs & BCM2835_SPI_CS_DONE)
+		bcm2835_wr_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
 
 	/* Read as many bytes as possible from FIFO */
 	bcm2835_rd_fifo(bs);
 	/* Write as many bytes as possible to FIFO */
 	bcm2835_wr_fifo(bs);
 
-	/* based on flags decide if we can finish the transfer */
-	if (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE) {
+	if (!bs->rx_len) {
 		/* Transfer complete - reset SPI HW */
-		bcm2835_spi_reset_hw(master);
+		bcm2835_spi_reset_hw(ctlr);
 		/* wake up the framework */
-		complete(&master->xfer_completion);
+		complete(&ctlr->xfer_completion);
 	}
 
 	return IRQ_HANDLED;
 }
 
-static int bcm2835_spi_transfer_one_irq(struct spi_master *master,
+static int bcm2835_spi_transfer_one_irq(struct spi_controller *ctlr,
 					struct spi_device *spi,
 					struct spi_transfer *tfr,
-					u32 cs)
+					u32 cs, bool fifo_empty)
 {
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
 
-	/* fill in fifo if we have gpio-cs
-	 * note that there have been rare events where the native-CS
-	 * flapped for <1us which may change the behaviour
-	 * with gpio-cs this does not happen, so it is implemented
-	 * only for this case
-	 */
-	if (gpio_is_valid(spi->cs_gpio)) {
-		/* enable HW block, but without interrupts enabled
-		 * this would triggern an immediate interrupt
-		 */
-		bcm2835_wr(bs, BCM2835_SPI_CS,
-			   cs | BCM2835_SPI_CS_TA);
-		/* fill in tx fifo as much as possible */
-		bcm2835_wr_fifo(bs);
-	}
+	/* update usage statistics */
+	bs->count_transfer_irq++;
 
 	/*
-	 * Enable the HW block. This will immediately trigger a DONE (TX
-	 * empty) interrupt, upon which we will fill the TX FIFO with the
-	 * first TX bytes. Pre-filling the TX FIFO here to avoid the
-	 * interrupt doesn't work:-(
+	 * Enable HW block, but with interrupts still disabled.
+	 * Otherwise the empty TX FIFO would immediately trigger an interrupt.
 	 */
+	bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA);
+
+	/* fill TX FIFO as much as possible */
+	if (fifo_empty)
+		bcm2835_wr_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
+	bcm2835_wr_fifo(bs);
+
+	/* enable interrupts */
 	cs |= BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD | BCM2835_SPI_CS_TA;
 	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
 
@@ -202,48 +423,243 @@
 	return 1;
 }
 
-/*
- * DMA support
+/**
+ * bcm2835_spi_transfer_prologue() - transfer first few bytes without DMA
+ * @ctlr: SPI master controller
+ * @tfr: SPI transfer
+ * @bs: BCM2835 SPI controller
+ * @cs: CS register
  *
- * this implementation has currently a few issues in so far as it does
- * not work arrount limitations of the HW.
+ * A limitation in DMA mode is that the FIFO must be accessed in 4 byte chunks.
+ * Only the final write access is permitted to transmit less than 4 bytes, the
+ * SPI controller deduces its intended size from the DLEN register.
  *
- * the main one being that DMA transfers are limited to 16 bit
- * (so 0 to 65535 bytes) by the SPI HW due to BCM2835_SPI_DLEN
+ * If a TX or RX sglist contains multiple entries, one per page, and the first
+ * entry starts in the middle of a page, that first entry's length may not be
+ * a multiple of 4.  Subsequent entries are fine because they span an entire
+ * page, hence do have a length that's a multiple of 4.
  *
- * also we currently assume that the scatter-gather fragments are
- * all multiple of 4 (except the last) - otherwise we would need
- * to reset the FIFO before subsequent transfers...
- * this also means that tx/rx transfers sg's need to be of equal size!
+ * This cannot happen with kmalloc'ed buffers (which is what most clients use)
+ * because they are contiguous in physical memory and therefore not split on
+ * page boundaries by spi_map_buf().  But it *can* happen with vmalloc'ed
+ * buffers.
  *
- * there may be a few more border-cases we may need to address as well
- * but unfortunately this would mean splitting up the scatter-gather
- * list making it slightly unpractical...
+ * The DMA engine is incapable of combining sglist entries into a continuous
+ * stream of 4 byte chunks, it treats every entry separately:  A TX entry is
+ * rounded up a to a multiple of 4 bytes by transmitting surplus bytes, an RX
+ * entry is rounded up by throwing away received bytes.
+ *
+ * Overcome this limitation by transferring the first few bytes without DMA:
+ * E.g. if the first TX sglist entry's length is 23 and the first RX's is 42,
+ * write 3 bytes to the TX FIFO but read only 2 bytes from the RX FIFO.
+ * The residue of 1 byte in the RX FIFO is picked up by DMA.  Together with
+ * the rest of the first RX sglist entry it makes up a multiple of 4 bytes.
+ *
+ * Should the RX prologue be larger, say, 3 vis-à-vis a TX prologue of 1,
+ * write 1 + 4 = 5 bytes to the TX FIFO and read 3 bytes from the RX FIFO.
+ * Caution, the additional 4 bytes spill over to the second TX sglist entry
+ * if the length of the first is *exactly* 1.
+ *
+ * At most 6 bytes are written and at most 3 bytes read.  Do we know the
+ * transfer has this many bytes?  Yes, see BCM2835_SPI_DMA_MIN_LENGTH.
+ *
+ * The FIFO is normally accessed with 8-bit width by the CPU and 32-bit width
+ * by the DMA engine.  Toggling the DMA Enable flag in the CS register switches
+ * the width but also garbles the FIFO's contents.  The prologue must therefore
+ * be transmitted in 32-bit width to ensure that the following DMA transfer can
+ * pick up the residue in the RX FIFO in ungarbled form.
  */
-static void bcm2835_spi_dma_done(void *data)
+static void bcm2835_spi_transfer_prologue(struct spi_controller *ctlr,
+					  struct spi_transfer *tfr,
+					  struct bcm2835_spi *bs,
+					  u32 cs)
 {
-	struct spi_master *master = data;
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	int tx_remaining;
 
-	/* reset fifo and HW */
-	bcm2835_spi_reset_hw(master);
+	bs->tfr		 = tfr;
+	bs->tx_prologue  = 0;
+	bs->rx_prologue  = 0;
+	bs->tx_spillover = false;
 
-	/* and terminate tx-dma as we do not have an irq for it
+	if (bs->tx_buf && !sg_is_last(&tfr->tx_sg.sgl[0]))
+		bs->tx_prologue = sg_dma_len(&tfr->tx_sg.sgl[0]) & 3;
+
+	if (bs->rx_buf && !sg_is_last(&tfr->rx_sg.sgl[0])) {
+		bs->rx_prologue = sg_dma_len(&tfr->rx_sg.sgl[0]) & 3;
+
+		if (bs->rx_prologue > bs->tx_prologue) {
+			if (!bs->tx_buf || sg_is_last(&tfr->tx_sg.sgl[0])) {
+				bs->tx_prologue  = bs->rx_prologue;
+			} else {
+				bs->tx_prologue += 4;
+				bs->tx_spillover =
+					!(sg_dma_len(&tfr->tx_sg.sgl[0]) & ~3);
+			}
+		}
+	}
+
+	/* rx_prologue > 0 implies tx_prologue > 0, so check only the latter */
+	if (!bs->tx_prologue)
+		return;
+
+	/* Write and read RX prologue.  Adjust first entry in RX sglist. */
+	if (bs->rx_prologue) {
+		bcm2835_wr(bs, BCM2835_SPI_DLEN, bs->rx_prologue);
+		bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA
+						  | BCM2835_SPI_CS_DMAEN);
+		bcm2835_wr_fifo_count(bs, bs->rx_prologue);
+		bcm2835_wait_tx_fifo_empty(bs);
+		bcm2835_rd_fifo_count(bs, bs->rx_prologue);
+		bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_CLEAR_RX
+						  | BCM2835_SPI_CS_CLEAR_TX
+						  | BCM2835_SPI_CS_DONE);
+
+		dma_sync_single_for_device(ctlr->dma_rx->device->dev,
+					   sg_dma_address(&tfr->rx_sg.sgl[0]),
+					   bs->rx_prologue, DMA_FROM_DEVICE);
+
+		sg_dma_address(&tfr->rx_sg.sgl[0]) += bs->rx_prologue;
+		sg_dma_len(&tfr->rx_sg.sgl[0])     -= bs->rx_prologue;
+	}
+
+	if (!bs->tx_buf)
+		return;
+
+	/*
+	 * Write remaining TX prologue.  Adjust first entry in TX sglist.
+	 * Also adjust second entry if prologue spills over to it.
+	 */
+	tx_remaining = bs->tx_prologue - bs->rx_prologue;
+	if (tx_remaining) {
+		bcm2835_wr(bs, BCM2835_SPI_DLEN, tx_remaining);
+		bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA
+						  | BCM2835_SPI_CS_DMAEN);
+		bcm2835_wr_fifo_count(bs, tx_remaining);
+		bcm2835_wait_tx_fifo_empty(bs);
+		bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_CLEAR_TX
+						  | BCM2835_SPI_CS_DONE);
+	}
+
+	if (likely(!bs->tx_spillover)) {
+		sg_dma_address(&tfr->tx_sg.sgl[0]) += bs->tx_prologue;
+		sg_dma_len(&tfr->tx_sg.sgl[0])     -= bs->tx_prologue;
+	} else {
+		sg_dma_len(&tfr->tx_sg.sgl[0])      = 0;
+		sg_dma_address(&tfr->tx_sg.sgl[1]) += 4;
+		sg_dma_len(&tfr->tx_sg.sgl[1])     -= 4;
+	}
+}
+
+/**
+ * bcm2835_spi_undo_prologue() - reconstruct original sglist state
+ * @bs: BCM2835 SPI controller
+ *
+ * Undo changes which were made to an SPI transfer's sglist when transmitting
+ * the prologue.  This is necessary to ensure the same memory ranges are
+ * unmapped that were originally mapped.
+ */
+static void bcm2835_spi_undo_prologue(struct bcm2835_spi *bs)
+{
+	struct spi_transfer *tfr = bs->tfr;
+
+	if (!bs->tx_prologue)
+		return;
+
+	if (bs->rx_prologue) {
+		sg_dma_address(&tfr->rx_sg.sgl[0]) -= bs->rx_prologue;
+		sg_dma_len(&tfr->rx_sg.sgl[0])     += bs->rx_prologue;
+	}
+
+	if (!bs->tx_buf)
+		goto out;
+
+	if (likely(!bs->tx_spillover)) {
+		sg_dma_address(&tfr->tx_sg.sgl[0]) -= bs->tx_prologue;
+		sg_dma_len(&tfr->tx_sg.sgl[0])     += bs->tx_prologue;
+	} else {
+		sg_dma_len(&tfr->tx_sg.sgl[0])      = bs->tx_prologue - 4;
+		sg_dma_address(&tfr->tx_sg.sgl[1]) -= 4;
+		sg_dma_len(&tfr->tx_sg.sgl[1])     += 4;
+	}
+out:
+	bs->tx_prologue = 0;
+}
+
+/**
+ * bcm2835_spi_dma_rx_done() - callback for DMA RX channel
+ * @data: SPI master controller
+ *
+ * Used for bidirectional and RX-only transfers.
+ */
+static void bcm2835_spi_dma_rx_done(void *data)
+{
+	struct spi_controller *ctlr = data;
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+
+	/* terminate tx-dma as we do not have an irq for it
 	 * because when the rx dma will terminate and this callback
 	 * is called the tx-dma must have finished - can't get to this
 	 * situation otherwise...
 	 */
-	dmaengine_terminate_all(master->dma_tx);
+	dmaengine_terminate_async(ctlr->dma_tx);
+	bs->tx_dma_active = false;
+	bs->rx_dma_active = false;
+	bcm2835_spi_undo_prologue(bs);
 
-	/* mark as no longer pending */
-	bs->dma_pending = 0;
+	/* reset fifo and HW */
+	bcm2835_spi_reset_hw(ctlr);
 
 	/* and mark as completed */;
-	complete(&master->xfer_completion);
+	complete(&ctlr->xfer_completion);
 }
 
-static int bcm2835_spi_prepare_sg(struct spi_master *master,
+/**
+ * bcm2835_spi_dma_tx_done() - callback for DMA TX channel
+ * @data: SPI master controller
+ *
+ * Used for TX-only transfers.
+ */
+static void bcm2835_spi_dma_tx_done(void *data)
+{
+	struct spi_controller *ctlr = data;
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+
+	/* busy-wait for TX FIFO to empty */
+	while (!(bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE))
+		bcm2835_wr(bs, BCM2835_SPI_CS,
+			   bs->clear_rx_cs[bs->chip_select]);
+
+	bs->tx_dma_active = false;
+	smp_wmb();
+
+	/*
+	 * In case of a very short transfer, RX DMA may not have been
+	 * issued yet.  The onus is then on bcm2835_spi_transfer_one_dma()
+	 * to terminate it immediately after issuing.
+	 */
+	if (cmpxchg(&bs->rx_dma_active, true, false))
+		dmaengine_terminate_async(ctlr->dma_rx);
+
+	bcm2835_spi_undo_prologue(bs);
+	bcm2835_spi_reset_hw(ctlr);
+	complete(&ctlr->xfer_completion);
+}
+
+/**
+ * bcm2835_spi_prepare_sg() - prepare and submit DMA descriptor for sglist
+ * @ctlr: SPI master controller
+ * @spi: SPI slave
+ * @tfr: SPI transfer
+ * @bs: BCM2835 SPI controller
+ * @is_tx: whether to submit DMA descriptor for TX or RX sglist
+ *
+ * Prepare and submit a DMA descriptor for the TX or RX sglist of @tfr.
+ * Return 0 on success or a negative error number.
+ */
+static int bcm2835_spi_prepare_sg(struct spi_controller *ctlr,
+				  struct spi_device *spi,
 				  struct spi_transfer *tfr,
+				  struct bcm2835_spi *bs,
 				  bool is_tx)
 {
 	struct dma_chan *chan;
@@ -257,14 +673,13 @@
 
 	if (is_tx) {
 		dir   = DMA_MEM_TO_DEV;
-		chan  = master->dma_tx;
+		chan  = ctlr->dma_tx;
 		nents = tfr->tx_sg.nents;
 		sgl   = tfr->tx_sg.sgl;
-		flags = 0 /* no  tx interrupt */;
-
+		flags = tfr->rx_buf ? 0 : DMA_PREP_INTERRUPT;
 	} else {
 		dir   = DMA_DEV_TO_MEM;
-		chan  = master->dma_rx;
+		chan  = ctlr->dma_rx;
 		nents = tfr->rx_sg.nents;
 		sgl   = tfr->rx_sg.sgl;
 		flags = DMA_PREP_INTERRUPT;
@@ -274,10 +689,17 @@
 	if (!desc)
 		return -EINVAL;
 
-	/* set callback for rx */
+	/*
+	 * Completion is signaled by the RX channel for bidirectional and
+	 * RX-only transfers; else by the TX channel for TX-only transfers.
+	 */
 	if (!is_tx) {
-		desc->callback = bcm2835_spi_dma_done;
-		desc->callback_param = master;
+		desc->callback = bcm2835_spi_dma_rx_done;
+		desc->callback_param = ctlr;
+	} else if (!tfr->rx_buf) {
+		desc->callback = bcm2835_spi_dma_tx_done;
+		desc->callback_param = ctlr;
+		bs->chip_select = spi->chip_select;
 	}
 
 	/* submit it to DMA-engine */
@@ -286,144 +708,196 @@
 	return dma_submit_error(cookie);
 }
 
-static inline int bcm2835_check_sg_length(struct sg_table *sgt)
-{
-	int i;
-	struct scatterlist *sgl;
-
-	/* check that the sg entries are word-sized (except for last) */
-	for_each_sg(sgt->sgl, sgl, (int)sgt->nents - 1, i) {
-		if (sg_dma_len(sgl) % 4)
-			return -EFAULT;
-	}
-
-	return 0;
-}
-
-static int bcm2835_spi_transfer_one_dma(struct spi_master *master,
+/**
+ * bcm2835_spi_transfer_one_dma() - perform SPI transfer using DMA engine
+ * @ctlr: SPI master controller
+ * @spi: SPI slave
+ * @tfr: SPI transfer
+ * @cs: CS register
+ *
+ * For *bidirectional* transfers (both tx_buf and rx_buf are non-%NULL), set up
+ * the TX and RX DMA channel to copy between memory and FIFO register.
+ *
+ * For *TX-only* transfers (rx_buf is %NULL), copying the RX FIFO's contents to
+ * memory is pointless.  However not reading the RX FIFO isn't an option either
+ * because transmission is halted once it's full.  As a workaround, cyclically
+ * clear the RX FIFO by setting the CLEAR_RX bit in the CS register.
+ *
+ * The CS register value is precalculated in bcm2835_spi_setup().  Normally
+ * this is called only once, on slave registration.  A DMA descriptor to write
+ * this value is preallocated in bcm2835_dma_init().  All that's left to do
+ * when performing a TX-only transfer is to submit this descriptor to the RX
+ * DMA channel.  Latency is thereby minimized.  The descriptor does not
+ * generate any interrupts while running.  It must be terminated once the
+ * TX DMA channel is done.
+ *
+ * Clearing the RX FIFO is paced by the DREQ signal.  The signal is asserted
+ * when the RX FIFO becomes half full, i.e. 32 bytes.  (Tuneable with the DC
+ * register.)  Reading 32 bytes from the RX FIFO would normally require 8 bus
+ * accesses, whereas clearing it requires only 1 bus access.  So an 8-fold
+ * reduction in bus traffic and thus energy consumption is achieved.
+ *
+ * For *RX-only* transfers (tx_buf is %NULL), fill the TX FIFO by cyclically
+ * copying from the zero page.  The DMA descriptor to do this is preallocated
+ * in bcm2835_dma_init().  It must be terminated once the RX DMA channel is
+ * done and can then be reused.
+ *
+ * The BCM2835 DMA driver autodetects when a transaction copies from the zero
+ * page and utilizes the DMA controller's ability to synthesize zeroes instead
+ * of copying them from memory.  This reduces traffic on the memory bus.  The
+ * feature is not available on so-called "lite" channels, but normally TX DMA
+ * is backed by a full-featured channel.
+ *
+ * Zero-filling the TX FIFO is paced by the DREQ signal.  Unfortunately the
+ * BCM2835 SPI controller continues to assert DREQ even after the DLEN register
+ * has been counted down to zero (hardware erratum).  Thus, when the transfer
+ * has finished, the DMA engine zero-fills the TX FIFO until it is half full.
+ * (Tuneable with the DC register.)  So up to 9 gratuitous bus accesses are
+ * performed at the end of an RX-only transfer.
+ */
+static int bcm2835_spi_transfer_one_dma(struct spi_controller *ctlr,
 					struct spi_device *spi,
 					struct spi_transfer *tfr,
 					u32 cs)
 {
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+	dma_cookie_t cookie;
 	int ret;
 
-	/* check that the scatter gather segments are all a multiple of 4 */
-	if (bcm2835_check_sg_length(&tfr->tx_sg) ||
-	    bcm2835_check_sg_length(&tfr->rx_sg)) {
-		dev_warn_once(&spi->dev,
-			      "scatter gather segment length is not a multiple of 4 - falling back to interrupt mode\n");
-		return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs);
-	}
+	/* update usage statistics */
+	bs->count_transfer_dma++;
+
+	/*
+	 * Transfer first few bytes without DMA if length of first TX or RX
+	 * sglist entry is not a multiple of 4 bytes (hardware limitation).
+	 */
+	bcm2835_spi_transfer_prologue(ctlr, tfr, bs, cs);
 
 	/* setup tx-DMA */
-	ret = bcm2835_spi_prepare_sg(master, tfr, true);
+	if (bs->tx_buf) {
+		ret = bcm2835_spi_prepare_sg(ctlr, spi, tfr, bs, true);
+	} else {
+		cookie = dmaengine_submit(bs->fill_tx_desc);
+		ret = dma_submit_error(cookie);
+	}
 	if (ret)
-		return ret;
-
-	/* start TX early */
-	dma_async_issue_pending(master->dma_tx);
-
-	/* mark as dma pending */
-	bs->dma_pending = 1;
+		goto err_reset_hw;
 
 	/* set the DMA length */
-	bcm2835_wr(bs, BCM2835_SPI_DLEN, tfr->len);
+	bcm2835_wr(bs, BCM2835_SPI_DLEN, bs->tx_len);
 
 	/* start the HW */
 	bcm2835_wr(bs, BCM2835_SPI_CS,
 		   cs | BCM2835_SPI_CS_TA | BCM2835_SPI_CS_DMAEN);
 
+	bs->tx_dma_active = true;
+	smp_wmb();
+
+	/* start TX early */
+	dma_async_issue_pending(ctlr->dma_tx);
+
 	/* setup rx-DMA late - to run transfers while
 	 * mapping of the rx buffers still takes place
 	 * this saves 10us or more.
 	 */
-	ret = bcm2835_spi_prepare_sg(master, tfr, false);
+	if (bs->rx_buf) {
+		ret = bcm2835_spi_prepare_sg(ctlr, spi, tfr, bs, false);
+	} else {
+		cookie = dmaengine_submit(bs->clear_rx_desc[spi->chip_select]);
+		ret = dma_submit_error(cookie);
+	}
 	if (ret) {
 		/* need to reset on errors */
-		dmaengine_terminate_all(master->dma_tx);
-		bcm2835_spi_reset_hw(master);
-		return ret;
+		dmaengine_terminate_sync(ctlr->dma_tx);
+		bs->tx_dma_active = false;
+		goto err_reset_hw;
 	}
 
 	/* start rx dma late */
-	dma_async_issue_pending(master->dma_rx);
+	dma_async_issue_pending(ctlr->dma_rx);
+	bs->rx_dma_active = true;
+	smp_mb();
+
+	/*
+	 * In case of a very short TX-only transfer, bcm2835_spi_dma_tx_done()
+	 * may run before RX DMA is issued.  Terminate RX DMA if so.
+	 */
+	if (!bs->rx_buf && !bs->tx_dma_active &&
+	    cmpxchg(&bs->rx_dma_active, true, false)) {
+		dmaengine_terminate_async(ctlr->dma_rx);
+		bcm2835_spi_reset_hw(ctlr);
+	}
 
 	/* wait for wakeup in framework */
 	return 1;
+
+err_reset_hw:
+	bcm2835_spi_reset_hw(ctlr);
+	bcm2835_spi_undo_prologue(bs);
+	return ret;
 }
 
-static bool bcm2835_spi_can_dma(struct spi_master *master,
+static bool bcm2835_spi_can_dma(struct spi_controller *ctlr,
 				struct spi_device *spi,
 				struct spi_transfer *tfr)
 {
-	/* only run for gpio_cs */
-	if (!gpio_is_valid(spi->cs_gpio))
-		return false;
-
 	/* we start DMA efforts only on bigger transfers */
 	if (tfr->len < BCM2835_SPI_DMA_MIN_LENGTH)
 		return false;
 
-	/* BCM2835_SPI_DLEN has defined a max transfer size as
-	 * 16 bit, so max is 65535
-	 * we can revisit this by using an alternative transfer
-	 * method - ideally this would get done without any more
-	 * interaction...
-	 */
-	if (tfr->len > 65535) {
-		dev_warn_once(&spi->dev,
-			      "transfer size of %d too big for dma-transfer\n",
-			      tfr->len);
-		return false;
-	}
-
-	/* if we run rx/tx_buf with word aligned addresses then we are OK */
-	if ((((size_t)tfr->rx_buf & 3) == 0) &&
-	    (((size_t)tfr->tx_buf & 3) == 0))
-		return true;
-
-	/* otherwise we only allow transfers within the same page
-	 * to avoid wasting time on dma_mapping when it is not practical
-	 */
-	if (((size_t)tfr->tx_buf & (PAGE_SIZE - 1)) + tfr->len > PAGE_SIZE) {
-		dev_warn_once(&spi->dev,
-			      "Unaligned spi tx-transfer bridging page\n");
-		return false;
-	}
-	if (((size_t)tfr->rx_buf & (PAGE_SIZE - 1)) + tfr->len > PAGE_SIZE) {
-		dev_warn_once(&spi->dev,
-			      "Unaligned spi rx-transfer bridging page\n");
-		return false;
-	}
-
 	/* return OK */
 	return true;
 }
 
-static void bcm2835_dma_release(struct spi_master *master)
+static void bcm2835_dma_release(struct spi_controller *ctlr,
+				struct bcm2835_spi *bs)
 {
-	if (master->dma_tx) {
-		dmaengine_terminate_all(master->dma_tx);
-		dma_release_channel(master->dma_tx);
-		master->dma_tx = NULL;
+	int i;
+
+	if (ctlr->dma_tx) {
+		dmaengine_terminate_sync(ctlr->dma_tx);
+
+		if (bs->fill_tx_desc)
+			dmaengine_desc_free(bs->fill_tx_desc);
+
+		if (bs->fill_tx_addr)
+			dma_unmap_page_attrs(ctlr->dma_tx->device->dev,
+					     bs->fill_tx_addr, sizeof(u32),
+					     DMA_TO_DEVICE,
+					     DMA_ATTR_SKIP_CPU_SYNC);
+
+		dma_release_channel(ctlr->dma_tx);
+		ctlr->dma_tx = NULL;
 	}
-	if (master->dma_rx) {
-		dmaengine_terminate_all(master->dma_rx);
-		dma_release_channel(master->dma_rx);
-		master->dma_rx = NULL;
+
+	if (ctlr->dma_rx) {
+		dmaengine_terminate_sync(ctlr->dma_rx);
+
+		for (i = 0; i < BCM2835_SPI_NUM_CS; i++)
+			if (bs->clear_rx_desc[i])
+				dmaengine_desc_free(bs->clear_rx_desc[i]);
+
+		if (bs->clear_rx_addr)
+			dma_unmap_single(ctlr->dma_rx->device->dev,
+					 bs->clear_rx_addr,
+					 sizeof(bs->clear_rx_cs),
+					 DMA_TO_DEVICE);
+
+		dma_release_channel(ctlr->dma_rx);
+		ctlr->dma_rx = NULL;
 	}
 }
 
-static void bcm2835_dma_init(struct spi_master *master, struct device *dev)
+static void bcm2835_dma_init(struct spi_controller *ctlr, struct device *dev,
+			     struct bcm2835_spi *bs)
 {
 	struct dma_slave_config slave_config;
 	const __be32 *addr;
 	dma_addr_t dma_reg_base;
-	int ret;
+	int ret, i;
 
 	/* base address in dma-space */
-	addr = of_get_address(master->dev.of_node, 0, NULL, NULL);
+	addr = of_get_address(ctlr->dev.of_node, 0, NULL, NULL);
 	if (!addr) {
 		dev_err(dev, "could not get DMA-register address - not using dma mode\n");
 		goto err;
@@ -431,39 +905,97 @@
 	dma_reg_base = be32_to_cpup(addr);
 
 	/* get tx/rx dma */
-	master->dma_tx = dma_request_slave_channel(dev, "tx");
-	if (!master->dma_tx) {
+	ctlr->dma_tx = dma_request_slave_channel(dev, "tx");
+	if (!ctlr->dma_tx) {
 		dev_err(dev, "no tx-dma configuration found - not using dma mode\n");
 		goto err;
 	}
-	master->dma_rx = dma_request_slave_channel(dev, "rx");
-	if (!master->dma_rx) {
+	ctlr->dma_rx = dma_request_slave_channel(dev, "rx");
+	if (!ctlr->dma_rx) {
 		dev_err(dev, "no rx-dma configuration found - not using dma mode\n");
 		goto err_release;
 	}
 
-	/* configure DMAs */
-	slave_config.direction = DMA_MEM_TO_DEV;
+	/*
+	 * The TX DMA channel either copies a transfer's TX buffer to the FIFO
+	 * or, in case of an RX-only transfer, cyclically copies from the zero
+	 * page to the FIFO using a preallocated, reusable descriptor.
+	 */
 	slave_config.dst_addr = (u32)(dma_reg_base + BCM2835_SPI_FIFO);
 	slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
 
-	ret = dmaengine_slave_config(master->dma_tx, &slave_config);
+	ret = dmaengine_slave_config(ctlr->dma_tx, &slave_config);
 	if (ret)
 		goto err_config;
 
-	slave_config.direction = DMA_DEV_TO_MEM;
+	bs->fill_tx_addr = dma_map_page_attrs(ctlr->dma_tx->device->dev,
+					      ZERO_PAGE(0), 0, sizeof(u32),
+					      DMA_TO_DEVICE,
+					      DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(ctlr->dma_tx->device->dev, bs->fill_tx_addr)) {
+		dev_err(dev, "cannot map zero page - not using DMA mode\n");
+		bs->fill_tx_addr = 0;
+		goto err_release;
+	}
+
+	bs->fill_tx_desc = dmaengine_prep_dma_cyclic(ctlr->dma_tx,
+						     bs->fill_tx_addr,
+						     sizeof(u32), 0,
+						     DMA_MEM_TO_DEV, 0);
+	if (!bs->fill_tx_desc) {
+		dev_err(dev, "cannot prepare fill_tx_desc - not using DMA mode\n");
+		goto err_release;
+	}
+
+	ret = dmaengine_desc_set_reuse(bs->fill_tx_desc);
+	if (ret) {
+		dev_err(dev, "cannot reuse fill_tx_desc - not using DMA mode\n");
+		goto err_release;
+	}
+
+	/*
+	 * The RX DMA channel is used bidirectionally:  It either reads the
+	 * RX FIFO or, in case of a TX-only transfer, cyclically writes a
+	 * precalculated value to the CS register to clear the RX FIFO.
+	 */
 	slave_config.src_addr = (u32)(dma_reg_base + BCM2835_SPI_FIFO);
 	slave_config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	slave_config.dst_addr = (u32)(dma_reg_base + BCM2835_SPI_CS);
+	slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
 
-	ret = dmaengine_slave_config(master->dma_rx, &slave_config);
+	ret = dmaengine_slave_config(ctlr->dma_rx, &slave_config);
 	if (ret)
 		goto err_config;
 
+	bs->clear_rx_addr = dma_map_single(ctlr->dma_rx->device->dev,
+					   bs->clear_rx_cs,
+					   sizeof(bs->clear_rx_cs),
+					   DMA_TO_DEVICE);
+	if (dma_mapping_error(ctlr->dma_rx->device->dev, bs->clear_rx_addr)) {
+		dev_err(dev, "cannot map clear_rx_cs - not using DMA mode\n");
+		bs->clear_rx_addr = 0;
+		goto err_release;
+	}
+
+	for (i = 0; i < BCM2835_SPI_NUM_CS; i++) {
+		bs->clear_rx_desc[i] = dmaengine_prep_dma_cyclic(ctlr->dma_rx,
+					   bs->clear_rx_addr + i * sizeof(u32),
+					   sizeof(u32), 0,
+					   DMA_MEM_TO_DEV, 0);
+		if (!bs->clear_rx_desc[i]) {
+			dev_err(dev, "cannot prepare clear_rx_desc - not using DMA mode\n");
+			goto err_release;
+		}
+
+		ret = dmaengine_desc_set_reuse(bs->clear_rx_desc[i]);
+		if (ret) {
+			dev_err(dev, "cannot reuse clear_rx_desc - not using DMA mode\n");
+			goto err_release;
+		}
+	}
+
 	/* all went well, so set can_dma */
-	master->can_dma = bcm2835_spi_can_dma;
-	master->max_dma_len = 65535; /* limitation by BCM2835_SPI_DLEN */
-	/* need to do TX AND RX DMA, so we need dummy buffers */
-	master->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX;
+	ctlr->can_dma = bcm2835_spi_can_dma;
 
 	return;
 
@@ -471,20 +1003,22 @@
 	dev_err(dev, "issue configuring dma: %d - not using DMA mode\n",
 		ret);
 err_release:
-	bcm2835_dma_release(master);
+	bcm2835_dma_release(ctlr, bs);
 err:
 	return;
 }
 
-static int bcm2835_spi_transfer_one_poll(struct spi_master *master,
+static int bcm2835_spi_transfer_one_poll(struct spi_controller *ctlr,
 					 struct spi_device *spi,
 					 struct spi_transfer *tfr,
-					 u32 cs,
-					 unsigned long long xfer_time_us)
+					 u32 cs)
 {
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
 	unsigned long timeout;
 
+	/* update usage statistics */
+	bs->count_transfer_polling++;
+
 	/* enable HW block without interrupts */
 	bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA);
 
@@ -492,10 +1026,10 @@
 	 * if we are interrupted here, then the data is
 	 * getting transferred by the HW while we are interrupted
 	 */
-	bcm2835_wr_fifo(bs);
+	bcm2835_wr_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
 
-	/* set the timeout */
-	timeout = jiffies + BCM2835_SPI_POLLING_JIFFIES;
+	/* set the timeout to at least 2 jiffies */
+	timeout = jiffies + 2 + HZ * polling_limit_us / 1000000;
 
 	/* loop until finished the transfer */
 	while (bs->rx_len) {
@@ -514,26 +1048,29 @@
 					    jiffies - timeout,
 					    bs->tx_len, bs->rx_len);
 			/* fall back to interrupt mode */
-			return bcm2835_spi_transfer_one_irq(master, spi,
-							    tfr, cs);
+
+			/* update usage statistics */
+			bs->count_transfer_irq_after_polling++;
+
+			return bcm2835_spi_transfer_one_irq(ctlr, spi,
+							    tfr, cs, false);
 		}
 	}
 
 	/* Transfer complete - reset SPI HW */
-	bcm2835_spi_reset_hw(master);
+	bcm2835_spi_reset_hw(ctlr);
 	/* and return without waiting for completion */
 	return 0;
 }
 
-static int bcm2835_spi_transfer_one(struct spi_master *master,
+static int bcm2835_spi_transfer_one(struct spi_controller *ctlr,
 				    struct spi_device *spi,
 				    struct spi_transfer *tfr)
 {
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
-	unsigned long spi_hz, clk_hz, cdiv;
-	unsigned long spi_used_hz;
-	unsigned long long xfer_time_us;
-	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+	unsigned long spi_hz, clk_hz, cdiv, spi_used_hz;
+	unsigned long hz_per_byte, byte_limit;
+	u32 cs = bs->prepare_cs[spi->chip_select];
 
 	/* set clock */
 	spi_hz = tfr->speed_hz;
@@ -555,17 +1092,8 @@
 	bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv);
 
 	/* handle all the 3-wire mode */
-	if ((spi->mode & SPI_3WIRE) && (tfr->rx_buf))
+	if (spi->mode & SPI_3WIRE && tfr->rx_buf)
 		cs |= BCM2835_SPI_CS_REN;
-	else
-		cs &= ~BCM2835_SPI_CS_REN;
-
-	/* for gpio_cs set dummy CS so that no HW-CS get changed
-	 * we can not run this in bcm2835_spi_set_cs, as it does
-	 * not get called for cs_gpio cases, so we need to do it here
-	 */
-	if (gpio_is_valid(spi->cs_gpio) || (spi->mode & SPI_NO_CS))
-		cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
 
 	/* set transmit buffers and length */
 	bs->tx_buf = tfr->tx_buf;
@@ -573,110 +1101,72 @@
 	bs->tx_len = tfr->len;
 	bs->rx_len = tfr->len;
 
-	/* calculate the estimated time in us the transfer runs */
-	xfer_time_us = (unsigned long long)tfr->len
-		* 9 /* clocks/byte - SPI-HW waits 1 clock after each byte */
-		* 1000000;
-	do_div(xfer_time_us, spi_used_hz);
+	/* Calculate the estimated time in us the transfer runs.  Note that
+	 * there is 1 idle clocks cycles after each byte getting transferred
+	 * so we have 9 cycles/byte.  This is used to find the number of Hz
+	 * per byte per polling limit.  E.g., we can transfer 1 byte in 30 us
+	 * per 300,000 Hz of bus clock.
+	 */
+	hz_per_byte = polling_limit_us ? (9 * 1000000) / polling_limit_us : 0;
+	byte_limit = hz_per_byte ? spi_used_hz / hz_per_byte : 1;
 
-	/* for short requests run polling*/
-	if (xfer_time_us <= BCM2835_SPI_POLLING_LIMIT_US)
-		return bcm2835_spi_transfer_one_poll(master, spi, tfr,
-						     cs, xfer_time_us);
+	/* run in polling mode for short transfers */
+	if (tfr->len < byte_limit)
+		return bcm2835_spi_transfer_one_poll(ctlr, spi, tfr, cs);
 
-	/* run in dma mode if conditions are right */
-	if (master->can_dma && bcm2835_spi_can_dma(master, spi, tfr))
-		return bcm2835_spi_transfer_one_dma(master, spi, tfr, cs);
+	/* run in dma mode if conditions are right
+	 * Note that unlike poll or interrupt mode DMA mode does not have
+	 * this 1 idle clock cycle pattern but runs the spi clock without gaps
+	 */
+	if (ctlr->can_dma && bcm2835_spi_can_dma(ctlr, spi, tfr))
+		return bcm2835_spi_transfer_one_dma(ctlr, spi, tfr, cs);
 
 	/* run in interrupt-mode */
-	return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs);
+	return bcm2835_spi_transfer_one_irq(ctlr, spi, tfr, cs, true);
 }
 
-static int bcm2835_spi_prepare_message(struct spi_master *master,
+static int bcm2835_spi_prepare_message(struct spi_controller *ctlr,
 				       struct spi_message *msg)
 {
 	struct spi_device *spi = msg->spi;
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
-	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+	int ret;
 
-	cs &= ~(BCM2835_SPI_CS_CPOL | BCM2835_SPI_CS_CPHA);
+	if (ctlr->can_dma) {
+		/*
+		 * DMA transfers are limited to 16 bit (0 to 65535 bytes) by
+		 * the SPI HW due to DLEN. Split up transfers (32-bit FIFO
+		 * aligned) if the limit is exceeded.
+		 */
+		ret = spi_split_transfers_maxsize(ctlr, msg, 65532,
+						  GFP_KERNEL | GFP_DMA);
+		if (ret)
+			return ret;
+	}
 
-	if (spi->mode & SPI_CPOL)
-		cs |= BCM2835_SPI_CS_CPOL;
-	if (spi->mode & SPI_CPHA)
-		cs |= BCM2835_SPI_CS_CPHA;
-
-	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
+	/*
+	 * Set up clock polarity before spi_transfer_one_message() asserts
+	 * chip select to avoid a gratuitous clock signal edge.
+	 */
+	bcm2835_wr(bs, BCM2835_SPI_CS, bs->prepare_cs[spi->chip_select]);
 
 	return 0;
 }
 
-static void bcm2835_spi_handle_err(struct spi_master *master,
+static void bcm2835_spi_handle_err(struct spi_controller *ctlr,
 				   struct spi_message *msg)
 {
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
 
 	/* if an error occurred and we have an active dma, then terminate */
-	if (bs->dma_pending) {
-		dmaengine_terminate_all(master->dma_tx);
-		dmaengine_terminate_all(master->dma_rx);
-		bs->dma_pending = 0;
-	}
+	dmaengine_terminate_sync(ctlr->dma_tx);
+	bs->tx_dma_active = false;
+	dmaengine_terminate_sync(ctlr->dma_rx);
+	bs->rx_dma_active = false;
+	bcm2835_spi_undo_prologue(bs);
+
 	/* and reset */
-	bcm2835_spi_reset_hw(master);
-}
-
-static void bcm2835_spi_set_cs(struct spi_device *spi, bool gpio_level)
-{
-	/*
-	 * we can assume that we are "native" as per spi_set_cs
-	 *   calling us ONLY when cs_gpio is not set
-	 * we can also assume that we are CS < 3 as per bcm2835_spi_setup
-	 *   we would not get called because of error handling there.
-	 * the level passed is the electrical level not enabled/disabled
-	 *   so it has to get translated back to enable/disable
-	 *   see spi_set_cs in spi.c for the implementation
-	 */
-
-	struct spi_master *master = spi->master;
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
-	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
-	bool enable;
-
-	/* calculate the enable flag from the passed gpio_level */
-	enable = (spi->mode & SPI_CS_HIGH) ? gpio_level : !gpio_level;
-
-	/* set flags for "reverse" polarity in the registers */
-	if (spi->mode & SPI_CS_HIGH) {
-		/* set the correct CS-bits */
-		cs |= BCM2835_SPI_CS_CSPOL;
-		cs |= BCM2835_SPI_CS_CSPOL0 << spi->chip_select;
-	} else {
-		/* clean the CS-bits */
-		cs &= ~BCM2835_SPI_CS_CSPOL;
-		cs &= ~(BCM2835_SPI_CS_CSPOL0 << spi->chip_select);
-	}
-
-	/* select the correct chip_select depending on disabled/enabled */
-	if (enable) {
-		/* set cs correctly */
-		if (spi->mode & SPI_NO_CS) {
-			/* use the "undefined" chip-select */
-			cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
-		} else {
-			/* set the chip select */
-			cs &= ~(BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01);
-			cs |= spi->chip_select;
-		}
-	} else {
-		/* disable CSPOL which puts HW-CS into deselected state */
-		cs &= ~BCM2835_SPI_CS_CSPOL;
-		/* use the "undefined" chip-select as precaution */
-		cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
-	}
-
-	/* finally set the calculated flags in SPI_CS */
-	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
+	bcm2835_spi_reset_hw(ctlr);
 }
 
 static int chip_match_name(struct gpio_chip *chip, void *data)
@@ -686,14 +1176,50 @@
 
 static int bcm2835_spi_setup(struct spi_device *spi)
 {
-	int err;
+	struct spi_controller *ctlr = spi->controller;
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
 	struct gpio_chip *chip;
+	enum gpio_lookup_flags lflags;
+	u32 cs;
+
+	/*
+	 * Precalculate SPI slave's CS register value for ->prepare_message():
+	 * The driver always uses software-controlled GPIO chip select, hence
+	 * set the hardware-controlled native chip select to an invalid value
+	 * to prevent it from interfering.
+	 */
+	cs = BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
+	if (spi->mode & SPI_CPOL)
+		cs |= BCM2835_SPI_CS_CPOL;
+	if (spi->mode & SPI_CPHA)
+		cs |= BCM2835_SPI_CS_CPHA;
+	bs->prepare_cs[spi->chip_select] = cs;
+
+	/*
+	 * Precalculate SPI slave's CS register value to clear RX FIFO
+	 * in case of a TX-only DMA transfer.
+	 */
+	if (ctlr->dma_rx) {
+		bs->clear_rx_cs[spi->chip_select] = cs |
+						    BCM2835_SPI_CS_TA |
+						    BCM2835_SPI_CS_DMAEN |
+						    BCM2835_SPI_CS_CLEAR_RX;
+		dma_sync_single_for_device(ctlr->dma_rx->device->dev,
+					   bs->clear_rx_addr,
+					   sizeof(bs->clear_rx_cs),
+					   DMA_TO_DEVICE);
+	}
+
 	/*
 	 * sanity checking the native-chipselects
 	 */
 	if (spi->mode & SPI_NO_CS)
 		return 0;
-	if (gpio_is_valid(spi->cs_gpio))
+	/*
+	 * The SPI core has successfully requested the CS GPIO line from the
+	 * device tree, so we are done.
+	 */
+	if (spi->cs_gpiod)
 		return 0;
 	if (spi->chip_select > 1) {
 		/* error in the case of native CS requested with CS > 1
@@ -704,115 +1230,130 @@
 			"setup: only two native chip-selects are supported\n");
 		return -EINVAL;
 	}
-	/* now translate native cs to GPIO */
+
+	/*
+	 * Translate native CS to GPIO
+	 *
+	 * FIXME: poking around in the gpiolib internals like this is
+	 * not very good practice. Find a way to locate the real problem
+	 * and fix it. Why is the GPIO descriptor in spi->cs_gpiod
+	 * sometimes not assigned correctly? Erroneous device trees?
+	 */
 
 	/* get the gpio chip for the base */
 	chip = gpiochip_find("pinctrl-bcm2835", chip_match_name);
 	if (!chip)
 		return 0;
 
-	/* and calculate the real CS */
-	spi->cs_gpio = chip->base + 8 - spi->chip_select;
+	/*
+	 * Retrieve the corresponding GPIO line used for CS.
+	 * The inversion semantics will be handled by the GPIO core
+	 * code, so we pass GPIOS_OUT_LOW for "unasserted" and
+	 * the correct flag for inversion semantics. The SPI_CS_HIGH
+	 * on spi->mode cannot be checked for polarity in this case
+	 * as the flag use_gpio_descriptors enforces SPI_CS_HIGH.
+	 */
+	if (of_property_read_bool(spi->dev.of_node, "spi-cs-high"))
+		lflags = GPIO_ACTIVE_HIGH;
+	else
+		lflags = GPIO_ACTIVE_LOW;
+	spi->cs_gpiod = gpiochip_request_own_desc(chip, 8 - spi->chip_select,
+						  DRV_NAME,
+						  lflags,
+						  GPIOD_OUT_LOW);
+	if (IS_ERR(spi->cs_gpiod))
+		return PTR_ERR(spi->cs_gpiod);
 
 	/* and set up the "mode" and level */
-	dev_info(&spi->dev, "setting up native-CS%i as GPIO %i\n",
-		 spi->chip_select, spi->cs_gpio);
-
-	/* set up GPIO as output and pull to the correct level */
-	err = gpio_direction_output(spi->cs_gpio,
-				    (spi->mode & SPI_CS_HIGH) ? 0 : 1);
-	if (err) {
-		dev_err(&spi->dev,
-			"could not set CS%i gpio %i as output: %i",
-			spi->chip_select, spi->cs_gpio, err);
-		return err;
-	}
+	dev_info(&spi->dev, "setting up native-CS%i to use GPIO\n",
+		 spi->chip_select);
 
 	return 0;
 }
 
 static int bcm2835_spi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	struct bcm2835_spi *bs;
-	struct resource *res;
 	int err;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*bs));
-	if (!master) {
-		dev_err(&pdev->dev, "spi_alloc_master() failed\n");
+	ctlr = spi_alloc_master(&pdev->dev, ALIGN(sizeof(*bs),
+						  dma_get_cache_alignment()));
+	if (!ctlr)
 		return -ENOMEM;
-	}
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, ctlr);
 
-	master->mode_bits = BCM2835_SPI_MODE_BITS;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->num_chipselect = 3;
-	master->setup = bcm2835_spi_setup;
-	master->set_cs = bcm2835_spi_set_cs;
-	master->transfer_one = bcm2835_spi_transfer_one;
-	master->handle_err = bcm2835_spi_handle_err;
-	master->prepare_message = bcm2835_spi_prepare_message;
-	master->dev.of_node = pdev->dev.of_node;
+	ctlr->use_gpio_descriptors = true;
+	ctlr->mode_bits = BCM2835_SPI_MODE_BITS;
+	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+	ctlr->num_chipselect = BCM2835_SPI_NUM_CS;
+	ctlr->setup = bcm2835_spi_setup;
+	ctlr->transfer_one = bcm2835_spi_transfer_one;
+	ctlr->handle_err = bcm2835_spi_handle_err;
+	ctlr->prepare_message = bcm2835_spi_prepare_message;
+	ctlr->dev.of_node = pdev->dev.of_node;
 
-	bs = spi_master_get_devdata(master);
+	bs = spi_controller_get_devdata(ctlr);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	bs->regs = devm_ioremap_resource(&pdev->dev, res);
+	bs->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(bs->regs)) {
 		err = PTR_ERR(bs->regs);
-		goto out_master_put;
+		goto out_controller_put;
 	}
 
 	bs->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(bs->clk)) {
 		err = PTR_ERR(bs->clk);
 		dev_err(&pdev->dev, "could not get clk: %d\n", err);
-		goto out_master_put;
+		goto out_controller_put;
 	}
 
 	bs->irq = platform_get_irq(pdev, 0);
 	if (bs->irq <= 0) {
-		dev_err(&pdev->dev, "could not get IRQ: %d\n", bs->irq);
 		err = bs->irq ? bs->irq : -ENODEV;
-		goto out_master_put;
+		goto out_controller_put;
 	}
 
 	clk_prepare_enable(bs->clk);
 
-	bcm2835_dma_init(master, &pdev->dev);
+	bcm2835_dma_init(ctlr, &pdev->dev, bs);
 
 	/* initialise the hardware with the default polarities */
 	bcm2835_wr(bs, BCM2835_SPI_CS,
 		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
 
 	err = devm_request_irq(&pdev->dev, bs->irq, bcm2835_spi_interrupt, 0,
-			       dev_name(&pdev->dev), master);
+			       dev_name(&pdev->dev), ctlr);
 	if (err) {
 		dev_err(&pdev->dev, "could not request IRQ: %d\n", err);
 		goto out_clk_disable;
 	}
 
-	err = devm_spi_register_master(&pdev->dev, master);
+	err = devm_spi_register_controller(&pdev->dev, ctlr);
 	if (err) {
-		dev_err(&pdev->dev, "could not register SPI master: %d\n", err);
+		dev_err(&pdev->dev, "could not register SPI controller: %d\n",
+			err);
 		goto out_clk_disable;
 	}
 
+	bcm2835_debugfs_create(bs, dev_name(&pdev->dev));
+
 	return 0;
 
 out_clk_disable:
 	clk_disable_unprepare(bs->clk);
-out_master_put:
-	spi_master_put(master);
+out_controller_put:
+	spi_controller_put(ctlr);
 	return err;
 }
 
 static int bcm2835_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = platform_get_drvdata(pdev);
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+
+	bcm2835_debugfs_remove(bs);
 
 	/* Clear FIFOs, and disable the HW block */
 	bcm2835_wr(bs, BCM2835_SPI_CS,
@@ -820,7 +1361,7 @@
 
 	clk_disable_unprepare(bs->clk);
 
-	bcm2835_dma_release(master);
+	bcm2835_dma_release(ctlr, bs);
 
 	return 0;
 }
@@ -843,4 +1384,4 @@
 
 MODULE_DESCRIPTION("SPI controller driver for Broadcom BCM2835");
 MODULE_AUTHOR("Chris Boot <bootc@bootc.net>");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 3094d81..a2162ff 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Driver for Broadcom BCM2835 auxiliary SPI Controllers
  *
@@ -7,20 +8,11 @@
  * Based on: spi-bcm2835.c
  *
  * Copyright (C) 2015 Martin Sperl
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
 #include <linux/completion.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
@@ -36,6 +28,12 @@
 #include <linux/spi/spi.h>
 #include <linux/spinlock.h>
 
+/* define polling limits */
+static unsigned int polling_limit_us = 30;
+module_param(polling_limit_us, uint, 0664);
+MODULE_PARM_DESC(polling_limit_us,
+		 "time in us to run a transfer in polling mode - if zero no polling is used\n");
+
 /*
  * spi register defines
  *
@@ -88,10 +86,6 @@
 #define BCM2835_AUX_SPI_STAT_BUSY	0x00000040
 #define BCM2835_AUX_SPI_STAT_BITCOUNT	0x0000003F
 
-/* timeout values */
-#define BCM2835_AUX_SPI_POLLING_LIMIT_US	30
-#define BCM2835_AUX_SPI_POLLING_JIFFIES		2
-
 struct bcm2835aux_spi {
 	void __iomem *regs;
 	struct clk *clk;
@@ -102,8 +96,53 @@
 	int tx_len;
 	int rx_len;
 	int pending;
+
+	u64 count_transfer_polling;
+	u64 count_transfer_irq;
+	u64 count_transfer_irq_after_poll;
+
+	struct dentry *debugfs_dir;
 };
 
+#if defined(CONFIG_DEBUG_FS)
+static void bcm2835aux_debugfs_create(struct bcm2835aux_spi *bs,
+				      const char *dname)
+{
+	char name[64];
+	struct dentry *dir;
+
+	/* get full name */
+	snprintf(name, sizeof(name), "spi-bcm2835aux-%s", dname);
+
+	/* the base directory */
+	dir = debugfs_create_dir(name, NULL);
+	bs->debugfs_dir = dir;
+
+	/* the counters */
+	debugfs_create_u64("count_transfer_polling", 0444, dir,
+			   &bs->count_transfer_polling);
+	debugfs_create_u64("count_transfer_irq", 0444, dir,
+			   &bs->count_transfer_irq);
+	debugfs_create_u64("count_transfer_irq_after_poll", 0444, dir,
+			   &bs->count_transfer_irq_after_poll);
+}
+
+static void bcm2835aux_debugfs_remove(struct bcm2835aux_spi *bs)
+{
+	debugfs_remove_recursive(bs->debugfs_dir);
+	bs->debugfs_dir = NULL;
+}
+#else
+static void bcm2835aux_debugfs_create(struct bcm2835aux_spi *bs,
+				      const char *dname)
+{
+}
+
+static void bcm2835aux_debugfs_remove(struct bcm2835aux_spi *bs)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
 static inline u32 bcm2835aux_rd(struct bcm2835aux_spi *bs, unsigned reg)
 {
 	return readl(bs->regs + reg);
@@ -123,9 +162,6 @@
 	data = bcm2835aux_rd(bs, BCM2835_AUX_SPI_IO);
 	if (bs->rx_buf) {
 		switch (count) {
-		case 4:
-			*bs->rx_buf++ = (data >> 24) & 0xff;
-			/* fallthrough */
 		case 3:
 			*bs->rx_buf++ = (data >> 16) & 0xff;
 			/* fallthrough */
@@ -178,24 +214,14 @@
 		      BCM2835_AUX_SPI_CNTL0_CLEARFIFO);
 }
 
-static irqreturn_t bcm2835aux_spi_interrupt(int irq, void *dev_id)
+static void bcm2835aux_spi_transfer_helper(struct bcm2835aux_spi *bs)
 {
-	struct spi_master *master = dev_id;
-	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
-	irqreturn_t ret = IRQ_NONE;
-
-	/* IRQ may be shared, so return if our interrupts are disabled */
-	if (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_CNTL1) &
-	      (BCM2835_AUX_SPI_CNTL1_TXEMPTY | BCM2835_AUX_SPI_CNTL1_IDLE)))
-		return ret;
+	u32 stat = bcm2835aux_rd(bs, BCM2835_AUX_SPI_STAT);
 
 	/* check if we have data to read */
-	while (bs->rx_len &&
-	       (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_STAT) &
-		  BCM2835_AUX_SPI_STAT_RX_EMPTY))) {
+	for (; bs->rx_len && (stat & BCM2835_AUX_SPI_STAT_RX_LVL);
+	     stat = bcm2835aux_rd(bs, BCM2835_AUX_SPI_STAT))
 		bcm2835aux_rd_fifo(bs);
-		ret = IRQ_HANDLED;
-	}
 
 	/* check if we have data to write */
 	while (bs->tx_len &&
@@ -203,16 +229,21 @@
 	       (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_STAT) &
 		  BCM2835_AUX_SPI_STAT_TX_FULL))) {
 		bcm2835aux_wr_fifo(bs);
-		ret = IRQ_HANDLED;
 	}
+}
 
-	/* and check if we have reached "done" */
-	while (bs->rx_len &&
-	       (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_STAT) &
-		  BCM2835_AUX_SPI_STAT_BUSY))) {
-		bcm2835aux_rd_fifo(bs);
-		ret = IRQ_HANDLED;
-	}
+static irqreturn_t bcm2835aux_spi_interrupt(int irq, void *dev_id)
+{
+	struct spi_master *master = dev_id;
+	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
+
+	/* IRQ may be shared, so return if our interrupts are disabled */
+	if (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_CNTL1) &
+	      (BCM2835_AUX_SPI_CNTL1_TXEMPTY | BCM2835_AUX_SPI_CNTL1_IDLE)))
+		return IRQ_NONE;
+
+	/* do common fifo handling */
+	bcm2835aux_spi_transfer_helper(bs);
 
 	if (!bs->tx_len) {
 		/* disable tx fifo empty interrupt */
@@ -226,8 +257,7 @@
 		complete(&master->xfer_completion);
 	}
 
-	/* and return */
-	return ret;
+	return IRQ_HANDLED;
 }
 
 static int __bcm2835aux_spi_transfer_one_irq(struct spi_master *master,
@@ -251,6 +281,9 @@
 {
 	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
 
+	/* update statistics */
+	bs->count_transfer_irq++;
+
 	/* fill in registers and fifos before enabling interrupts */
 	bcm2835aux_wr(bs, BCM2835_AUX_SPI_CNTL1, bs->cntl[1]);
 	bcm2835aux_wr(bs, BCM2835_AUX_SPI_CNTL0, bs->cntl[0]);
@@ -273,35 +306,22 @@
 {
 	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
 	unsigned long timeout;
-	u32 stat;
+
+	/* update statistics */
+	bs->count_transfer_polling++;
 
 	/* configure spi */
 	bcm2835aux_wr(bs, BCM2835_AUX_SPI_CNTL1, bs->cntl[1]);
 	bcm2835aux_wr(bs, BCM2835_AUX_SPI_CNTL0, bs->cntl[0]);
 
-	/* set the timeout */
-	timeout = jiffies + BCM2835_AUX_SPI_POLLING_JIFFIES;
+	/* set the timeout to at least 2 jiffies */
+	timeout = jiffies + 2 + HZ * polling_limit_us / 1000000;
 
 	/* loop until finished the transfer */
 	while (bs->rx_len) {
-		/* read status */
-		stat = bcm2835aux_rd(bs, BCM2835_AUX_SPI_STAT);
 
-		/* fill in tx fifo with remaining data */
-		if ((bs->tx_len) && (!(stat & BCM2835_AUX_SPI_STAT_TX_FULL))) {
-			bcm2835aux_wr_fifo(bs);
-			continue;
-		}
-
-		/* read data from fifo for both cases */
-		if (!(stat & BCM2835_AUX_SPI_STAT_RX_EMPTY)) {
-			bcm2835aux_rd_fifo(bs);
-			continue;
-		}
-		if (!(stat & BCM2835_AUX_SPI_STAT_BUSY)) {
-			bcm2835aux_rd_fifo(bs);
-			continue;
-		}
+		/* do common fifo handling */
+		bcm2835aux_spi_transfer_helper(bs);
 
 		/* there is still data pending to read check the timeout */
 		if (bs->rx_len && time_after(jiffies, timeout)) {
@@ -310,6 +330,7 @@
 					    jiffies - timeout,
 					    bs->tx_len, bs->rx_len);
 			/* forward to interrupt handler */
+			bs->count_transfer_irq_after_poll++;
 			return __bcm2835aux_spi_transfer_one_irq(master,
 							       spi, tfr);
 		}
@@ -324,8 +345,8 @@
 				       struct spi_transfer *tfr)
 {
 	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
-	unsigned long spi_hz, clk_hz, speed;
-	unsigned long spi_used_hz;
+	unsigned long spi_hz, clk_hz, speed, spi_used_hz;
+	unsigned long hz_per_byte, byte_limit;
 
 	/* calculate the registers to handle
 	 *
@@ -369,14 +390,15 @@
 	 * of Hz per byte per polling limit.  E.g., we can transfer 1 byte in
 	 * 30 µs per 300,000 Hz of bus clock.
 	 */
-#define HZ_PER_BYTE ((9 * 1000000) / BCM2835_AUX_SPI_POLLING_LIMIT_US)
+	hz_per_byte = polling_limit_us ? (9 * 1000000) / polling_limit_us : 0;
+	byte_limit = hz_per_byte ? spi_used_hz / hz_per_byte : 1;
+
 	/* run in polling mode for short transfers */
-	if (tfr->len < spi_used_hz / HZ_PER_BYTE)
+	if (tfr->len < byte_limit)
 		return bcm2835aux_spi_transfer_one_poll(master, spi, tfr);
 
 	/* run in interrupt mode for all others */
 	return bcm2835aux_spi_transfer_one_irq(master, spi, tfr);
-#undef HZ_PER_BYTE
 }
 
 static int bcm2835aux_spi_prepare_message(struct spi_master *master,
@@ -421,24 +443,77 @@
 	bcm2835aux_spi_reset_hw(bs);
 }
 
+static int bcm2835aux_spi_setup(struct spi_device *spi)
+{
+	int ret;
+
+	/* sanity check for native cs */
+	if (spi->mode & SPI_NO_CS)
+		return 0;
+	if (gpio_is_valid(spi->cs_gpio)) {
+		/* with gpio-cs set the GPIO to the correct level
+		 * and as output (in case the dt has the gpio not configured
+		 * as output but native cs)
+		 */
+		ret = gpio_direction_output(spi->cs_gpio,
+					    (spi->mode & SPI_CS_HIGH) ? 0 : 1);
+		if (ret)
+			dev_err(&spi->dev,
+				"could not set gpio %i as output: %i\n",
+				spi->cs_gpio, ret);
+
+		return ret;
+	}
+
+	/* for dt-backwards compatibility: only support native on CS0
+	 * known things not supported with broken native CS:
+	 * * multiple chip-selects: cs0-cs2 are all
+	 *     simultaniously asserted whenever there is a transfer
+	 *     this even includes SPI_NO_CS
+	 * * SPI_CS_HIGH: cs are always asserted low
+	 * * cs_change: cs is deasserted after each spi_transfer
+	 * * cs_delay_usec: cs is always deasserted one SCK cycle
+	 *     after the last transfer
+	 * probably more...
+	 */
+	dev_warn(&spi->dev,
+		 "Native CS is not supported - please configure cs-gpio in device-tree\n");
+
+	if (spi->chip_select == 0)
+		return 0;
+
+	dev_warn(&spi->dev, "Native CS is not working for cs > 0\n");
+
+	return -EINVAL;
+}
+
 static int bcm2835aux_spi_probe(struct platform_device *pdev)
 {
 	struct spi_master *master;
 	struct bcm2835aux_spi *bs;
-	struct resource *res;
 	unsigned long clk_hz;
 	int err;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*bs));
-	if (!master) {
-		dev_err(&pdev->dev, "spi_alloc_master() failed\n");
+	if (!master)
 		return -ENOMEM;
-	}
 
 	platform_set_drvdata(pdev, master);
 	master->mode_bits = (SPI_CPOL | SPI_CS_HIGH | SPI_NO_CS);
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->num_chipselect = -1;
+	/* even though the driver never officially supported native CS
+	 * allow a single native CS for legacy DT support purposes when
+	 * no cs-gpio is configured.
+	 * Known limitations for native cs are:
+	 * * multiple chip-selects: cs0-cs2 are all simultaniously asserted
+	 *     whenever there is a transfer -  this even includes SPI_NO_CS
+	 * * SPI_CS_HIGH: is ignores - cs are always asserted low
+	 * * cs_change: cs is deasserted after each spi_transfer
+	 * * cs_delay_usec: cs is always deasserted one SCK cycle after
+	 *     a spi_transfer
+	 */
+	master->num_chipselect = 1;
+	master->setup = bcm2835aux_spi_setup;
 	master->transfer_one = bcm2835aux_spi_transfer_one;
 	master->handle_err = bcm2835aux_spi_handle_err;
 	master->prepare_message = bcm2835aux_spi_prepare_message;
@@ -448,15 +523,14 @@
 	bs = spi_master_get_devdata(master);
 
 	/* the main area */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	bs->regs = devm_ioremap_resource(&pdev->dev, res);
+	bs->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(bs->regs)) {
 		err = PTR_ERR(bs->regs);
 		goto out_master_put;
 	}
 
 	bs->clk = devm_clk_get(&pdev->dev, NULL);
-	if ((!bs->clk) || (IS_ERR(bs->clk))) {
+	if (IS_ERR(bs->clk)) {
 		err = PTR_ERR(bs->clk);
 		dev_err(&pdev->dev, "could not get clk: %d\n", err);
 		goto out_master_put;
@@ -464,7 +538,6 @@
 
 	bs->irq = platform_get_irq(pdev, 0);
 	if (bs->irq <= 0) {
-		dev_err(&pdev->dev, "could not get IRQ: %d\n", bs->irq);
 		err = bs->irq ? bs->irq : -ENODEV;
 		goto out_master_put;
 	}
@@ -502,6 +575,8 @@
 		goto out_clk_disable;
 	}
 
+	bcm2835aux_debugfs_create(bs, dev_name(&pdev->dev));
+
 	return 0;
 
 out_clk_disable:
@@ -516,6 +591,8 @@
 	struct spi_master *master = platform_get_drvdata(pdev);
 	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
 
+	bcm2835aux_debugfs_remove(bs);
+
 	bcm2835aux_spi_reset_hw(bs);
 
 	/* disable the HW block by releasing the clock */
@@ -542,4 +619,4 @@
 
 MODULE_DESCRIPTION("SPI controller driver for Broadcom BCM2835 aux");
 MODULE_AUTHOR("Martin Sperl <kernel@martin.sperl.org>");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c
index c23849f..c6836a9 100644
--- a/drivers/spi/spi-bcm63xx-hsspi.c
+++ b/drivers/spi/spi-bcm63xx-hsspi.c
@@ -101,6 +101,7 @@
 
 	struct platform_device *pdev;
 	struct clk *clk;
+	struct clk *pll_clk;
 	void __iomem *regs;
 	u8 __iomem *fifo;
 
@@ -329,21 +330,17 @@
 {
 	struct spi_master *master;
 	struct bcm63xx_hsspi *bs;
-	struct resource *res_mem;
 	void __iomem *regs;
 	struct device *dev = &pdev->dev;
-	struct clk *clk;
+	struct clk *clk, *pll_clk = NULL;
 	int irq, ret;
 	u32 reg, rate, num_cs = HSSPI_SPI_MAX_CS;
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(dev, "no irq: %d\n", irq);
+	if (irq < 0)
 		return irq;
-	}
 
-	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	regs = devm_ioremap_resource(dev, res_mem);
+	regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
 
@@ -358,7 +355,7 @@
 
 	rate = clk_get_rate(clk);
 	if (!rate) {
-		struct clk *pll_clk = devm_clk_get(dev, "pll");
+		pll_clk = devm_clk_get(dev, "pll");
 
 		if (IS_ERR(pll_clk)) {
 			ret = PTR_ERR(pll_clk);
@@ -373,19 +370,20 @@
 		clk_disable_unprepare(pll_clk);
 		if (!rate) {
 			ret = -EINVAL;
-			goto out_disable_clk;
+			goto out_disable_pll_clk;
 		}
 	}
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*bs));
 	if (!master) {
 		ret = -ENOMEM;
-		goto out_disable_clk;
+		goto out_disable_pll_clk;
 	}
 
 	bs = spi_master_get_devdata(master);
 	bs->pdev = pdev;
 	bs->clk = clk;
+	bs->pll_clk = pll_clk;
 	bs->regs = regs;
 	bs->speed_hz = rate;
 	bs->fifo = (u8 __iomem *)(bs->regs + HSSPI_FIFO_REG(0));
@@ -440,6 +438,8 @@
 
 out_put_master:
 	spi_master_put(master);
+out_disable_pll_clk:
+	clk_disable_unprepare(pll_clk);
 out_disable_clk:
 	clk_disable_unprepare(clk);
 	return ret;
@@ -453,6 +453,7 @@
 
 	/* reset the hardware and block queue progress */
 	__raw_writel(0, bs->regs + HSSPI_INT_MASK_REG);
+	clk_disable_unprepare(bs->pll_clk);
 	clk_disable_unprepare(bs->clk);
 
 	return 0;
@@ -465,6 +466,7 @@
 	struct bcm63xx_hsspi *bs = spi_master_get_devdata(master);
 
 	spi_master_suspend(master);
+	clk_disable_unprepare(bs->pll_clk);
 	clk_disable_unprepare(bs->clk);
 
 	return 0;
@@ -480,6 +482,12 @@
 	if (ret)
 		return ret;
 
+	if (bs->pll_clk) {
+		ret = clk_prepare_enable(bs->pll_clk);
+		if (ret)
+			return ret;
+	}
+
 	spi_master_resume(master);
 
 	return 0;
diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
index bfe5754..fdd7eaa 100644
--- a/drivers/spi/spi-bcm63xx.c
+++ b/drivers/spi/spi-bcm63xx.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Broadcom BCM63xx SPI controller support
  *
  * Copyright (C) 2009-2012 Florian Fainelli <florian@openwrt.org>
  * Copyright (C) 2010 Tanguy Bouzeloc <tanguy.bouzeloc@efixo.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
@@ -529,10 +520,8 @@
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(dev, "no irq: %d\n", irq);
+	if (irq < 0)
 		return irq;
-	}
 
 	clk = devm_clk_get(dev, "spi");
 	if (IS_ERR(clk)) {
diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index f291760..d84e22d 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * polling/bitbanging SPI master controller driver utilities
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/spinlock.h>
@@ -213,19 +204,6 @@
 
 	dev_dbg(&spi->dev, "%s, %u nsec/bit\n", __func__, 2 * cs->nsecs);
 
-	/* NOTE we _need_ to call chipselect() early, ideally with adapter
-	 * setup, unless the hardware defaults cooperate to avoid confusion
-	 * between normal (active low) and inverted chipselects.
-	 */
-
-	/* deselect chip (low or high) */
-	mutex_lock(&bitbang->lock);
-	if (!bitbang->busy) {
-		bitbang->chipselect(spi, BITBANG_CS_INACTIVE);
-		ndelay(cs->nsecs);
-	}
-	mutex_unlock(&bitbang->lock);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(spi_bitbang_setup);
@@ -348,6 +326,42 @@
 
 /*----------------------------------------------------------------------*/
 
+int spi_bitbang_init(struct spi_bitbang *bitbang)
+{
+	struct spi_master *master = bitbang->master;
+
+	if (!master || !bitbang->chipselect)
+		return -EINVAL;
+
+	mutex_init(&bitbang->lock);
+
+	if (!master->mode_bits)
+		master->mode_bits = SPI_CPOL | SPI_CPHA | bitbang->flags;
+
+	if (master->transfer || master->transfer_one_message)
+		return -EINVAL;
+
+	master->prepare_transfer_hardware = spi_bitbang_prepare_hardware;
+	master->unprepare_transfer_hardware = spi_bitbang_unprepare_hardware;
+	master->transfer_one = spi_bitbang_transfer_one;
+	master->set_cs = spi_bitbang_set_cs;
+
+	if (!bitbang->txrx_bufs) {
+		bitbang->use_dma = 0;
+		bitbang->txrx_bufs = spi_bitbang_bufs;
+		if (!master->setup) {
+			if (!bitbang->setup_transfer)
+				bitbang->setup_transfer =
+					 spi_bitbang_setup_transfer;
+			master->setup = spi_bitbang_setup;
+			master->cleanup = spi_bitbang_cleanup;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spi_bitbang_init);
+
 /**
  * spi_bitbang_start - start up a polled/bitbanging SPI master driver
  * @bitbang: driver handle
@@ -381,33 +395,9 @@
 	struct spi_master *master = bitbang->master;
 	int ret;
 
-	if (!master || !bitbang->chipselect)
-		return -EINVAL;
-
-	mutex_init(&bitbang->lock);
-
-	if (!master->mode_bits)
-		master->mode_bits = SPI_CPOL | SPI_CPHA | bitbang->flags;
-
-	if (master->transfer || master->transfer_one_message)
-		return -EINVAL;
-
-	master->prepare_transfer_hardware = spi_bitbang_prepare_hardware;
-	master->unprepare_transfer_hardware = spi_bitbang_unprepare_hardware;
-	master->transfer_one = spi_bitbang_transfer_one;
-	master->set_cs = spi_bitbang_set_cs;
-
-	if (!bitbang->txrx_bufs) {
-		bitbang->use_dma = 0;
-		bitbang->txrx_bufs = spi_bitbang_bufs;
-		if (!master->setup) {
-			if (!bitbang->setup_transfer)
-				bitbang->setup_transfer =
-					 spi_bitbang_setup_transfer;
-			master->setup = spi_bitbang_setup;
-			master->cleanup = spi_bitbang_cleanup;
-		}
-	}
+	ret = spi_bitbang_init(bitbang);
+	if (ret)
+		return ret;
 
 	/* driver may get busy before register() returns, especially
 	 * if someone registered boardinfo for devices
@@ -416,7 +406,7 @@
 	if (ret)
 		spi_master_put(master);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(spi_bitbang_start);
 
diff --git a/drivers/spi/spi-brcmstb-qspi.c b/drivers/spi/spi-brcmstb-qspi.c
index c7df92e..75e9b76 100644
--- a/drivers/spi/spi-brcmstb-qspi.c
+++ b/drivers/spi/spi-brcmstb-qspi.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2016 Broadcom
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation (the "GPL").
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 (GPLv2) for more details.
- *
- * You should have received a copy of the GNU General Public License
- * version 2 (GPLv2) along with this source code.
  */
 
 #include <linux/device.h>
diff --git a/drivers/spi/spi-butterfly.c b/drivers/spi/spi-butterfly.c
index 1a35102..7e71a35 100644
--- a/drivers/spi/spi-butterfly.c
+++ b/drivers/spi/spi-butterfly.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * parport-to-butterfly adapter
  *
  * Copyright (C) 2005 David Brownell
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -32,7 +23,7 @@
  * with a battery powered AVR microcontroller and lots of goodies.  You
  * can use GCC to develop firmware for this.
  *
- * See Documentation/spi/butterfly for information about how to build
+ * See Documentation/spi/butterfly.rst for information about how to build
  * and use this custom parallel port cable.
  */
 
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index 7c88f74..c36587b 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -1,19 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Cadence SPI controller driver (master mode only)
  *
  * Copyright (C) 2008 - 2014 Xilinx, Inc.
  *
  * based on Blackfin On-Chip SPI Driver (spi_bfin5xx.c)
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
  */
 
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -128,10 +124,6 @@
 	u32 is_decoded_cs;
 };
 
-struct cdns_spi_device_data {
-	bool gpio_requested;
-};
-
 /* Macros for the SPI controller read/write */
 static inline u32 cdns_spi_read(struct cdns_spi *xspi, u32 offset)
 {
@@ -176,16 +168,16 @@
 /**
  * cdns_spi_chipselect - Select or deselect the chip select line
  * @spi:	Pointer to the spi_device structure
- * @is_high:	Select(0) or deselect (1) the chip select line
+ * @enable:	Select (1) or deselect (0) the chip select line
  */
-static void cdns_spi_chipselect(struct spi_device *spi, bool is_high)
+static void cdns_spi_chipselect(struct spi_device *spi, bool enable)
 {
 	struct cdns_spi *xspi = spi_master_get_devdata(spi->master);
 	u32 ctrl_reg;
 
 	ctrl_reg = cdns_spi_read(xspi, CDNS_SPI_CR);
 
-	if (is_high) {
+	if (!enable) {
 		/* Deselect the slave */
 		ctrl_reg |= CDNS_SPI_CR_SSCTRL;
 	} else {
@@ -469,64 +461,6 @@
 	return 0;
 }
 
-static int cdns_spi_setup(struct spi_device *spi)
-{
-
-	int ret = -EINVAL;
-	struct cdns_spi_device_data *cdns_spi_data = spi_get_ctldata(spi);
-
-	/* this is a pin managed by the controller, leave it alone */
-	if (spi->cs_gpio == -ENOENT)
-		return 0;
-
-	/* this seems to be the first time we're here */
-	if (!cdns_spi_data) {
-		cdns_spi_data = kzalloc(sizeof(*cdns_spi_data), GFP_KERNEL);
-		if (!cdns_spi_data)
-			return -ENOMEM;
-		cdns_spi_data->gpio_requested = false;
-		spi_set_ctldata(spi, cdns_spi_data);
-	}
-
-	/* if we haven't done so, grab the gpio */
-	if (!cdns_spi_data->gpio_requested && gpio_is_valid(spi->cs_gpio)) {
-		ret = gpio_request_one(spi->cs_gpio,
-				       (spi->mode & SPI_CS_HIGH) ?
-				       GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH,
-				       dev_name(&spi->dev));
-		if (ret)
-			dev_err(&spi->dev, "can't request chipselect gpio %d\n",
-				spi->cs_gpio);
-		else
-			cdns_spi_data->gpio_requested = true;
-	} else {
-		if (gpio_is_valid(spi->cs_gpio)) {
-			int mode = ((spi->mode & SPI_CS_HIGH) ?
-				    GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH);
-
-			ret = gpio_direction_output(spi->cs_gpio, mode);
-			if (ret)
-				dev_err(&spi->dev, "chipselect gpio %d setup failed (%d)\n",
-					spi->cs_gpio, ret);
-		}
-	}
-
-	return ret;
-}
-
-static void cdns_spi_cleanup(struct spi_device *spi)
-{
-	struct cdns_spi_device_data *cdns_spi_data = spi_get_ctldata(spi);
-
-	if (cdns_spi_data) {
-		if (cdns_spi_data->gpio_requested)
-			gpio_free(spi->cs_gpio);
-		kfree(cdns_spi_data);
-		spi_set_ctldata(spi, NULL);
-	}
-
-}
-
 /**
  * cdns_spi_probe - Probe method for the SPI driver
  * @pdev:	Pointer to the platform_device structure
@@ -540,7 +474,6 @@
 	int ret = 0, irq;
 	struct spi_master *master;
 	struct cdns_spi *xspi;
-	struct resource *res;
 	u32 num_cs;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*xspi));
@@ -551,8 +484,7 @@
 	master->dev.of_node = pdev->dev.of_node;
 	platform_set_drvdata(pdev, master);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	xspi->regs = devm_ioremap_resource(&pdev->dev, res);
+	xspi->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(xspi->regs)) {
 		ret = PTR_ERR(xspi->regs);
 		goto remove_master;
@@ -584,11 +516,6 @@
 		goto clk_dis_apb;
 	}
 
-	pm_runtime_use_autosuspend(&pdev->dev);
-	pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT);
-	pm_runtime_set_active(&pdev->dev);
-	pm_runtime_enable(&pdev->dev);
-
 	ret = of_property_read_u32(pdev->dev.of_node, "num-cs", &num_cs);
 	if (ret < 0)
 		master->num_chipselect = CDNS_SPI_DEFAULT_NUM_CS;
@@ -603,13 +530,14 @@
 	/* SPI controller initializations */
 	cdns_spi_init_hw(xspi);
 
-	pm_runtime_mark_last_busy(&pdev->dev);
-	pm_runtime_put_autosuspend(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT);
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq <= 0) {
 		ret = -ENXIO;
-		dev_err(&pdev->dev, "irq number is invalid\n");
 		goto clk_dis_all;
 	}
 
@@ -621,13 +549,12 @@
 		goto clk_dis_all;
 	}
 
+	master->use_gpio_descriptors = true;
 	master->prepare_transfer_hardware = cdns_prepare_transfer_hardware;
 	master->prepare_message = cdns_prepare_message;
 	master->transfer_one = cdns_transfer_one;
 	master->unprepare_transfer_hardware = cdns_unprepare_transfer_hardware;
 	master->set_cs = cdns_spi_chipselect;
-	master->setup = cdns_spi_setup;
-	master->cleanup = cdns_spi_cleanup;
 	master->auto_runtime_pm = true;
 	master->mode_bits = SPI_CPOL | SPI_CPHA;
 
diff --git a/drivers/spi/spi-cavium-octeon.c b/drivers/spi/spi-cavium-octeon.c
index ee4703e..1a2de6c 100644
--- a/drivers/spi/spi-cavium-octeon.c
+++ b/drivers/spi/spi-cavium-octeon.c
@@ -18,7 +18,6 @@
 
 static int octeon_spi_probe(struct platform_device *pdev)
 {
-	struct resource *res_mem;
 	void __iomem *reg_base;
 	struct spi_master *master;
 	struct octeon_spi *p;
@@ -30,8 +29,7 @@
 	p = spi_master_get_devdata(master);
 	platform_set_drvdata(pdev, master);
 
-	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	reg_base = devm_ioremap_resource(&pdev->dev, res_mem);
+	reg_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(reg_base)) {
 		err = PTR_ERR(reg_base);
 		goto fail;
diff --git a/drivers/spi/spi-cavium-thunderx.c b/drivers/spi/spi-cavium-thunderx.c
index 8779377..d12e149 100644
--- a/drivers/spi/spi-cavium-thunderx.c
+++ b/drivers/spi/spi-cavium-thunderx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Cavium ThunderX SPI driver.
  *
diff --git a/drivers/spi/spi-clps711x.c b/drivers/spi/spi-clps711x.c
index 18193df..5e900f2 100644
--- a/drivers/spi/spi-clps711x.c
+++ b/drivers/spi/spi-clps711x.c
@@ -1,17 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  CLPS711X SPI bus driver
  *
  *  Copyright (C) 2012-2016 Alexander Shiyan <shc_work@mail.ru>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/io.h>
 #include <linux/clk.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
@@ -36,25 +32,6 @@
 	int			len;
 };
 
-static int spi_clps711x_setup(struct spi_device *spi)
-{
-	if (!spi->controller_state) {
-		int ret;
-
-		ret = devm_gpio_request(&spi->master->dev, spi->cs_gpio,
-					dev_name(&spi->master->dev));
-		if (ret)
-			return ret;
-
-		spi->controller_state = spi;
-	}
-
-	/* We are expect that SPI-device is not selected */
-	gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
-
-	return 0;
-}
-
 static int spi_clps711x_prepare_message(struct spi_master *master,
 					struct spi_message *msg)
 {
@@ -114,7 +91,6 @@
 {
 	struct spi_clps711x_data *hw;
 	struct spi_master *master;
-	struct resource *res;
 	int irq, ret;
 
 	irq = platform_get_irq(pdev, 0);
@@ -125,11 +101,11 @@
 	if (!master)
 		return -ENOMEM;
 
+	master->use_gpio_descriptors = true;
 	master->bus_num = -1;
 	master->mode_bits = SPI_CPHA | SPI_CS_HIGH;
 	master->bits_per_word_mask =  SPI_BPW_RANGE_MASK(1, 8);
 	master->dev.of_node = pdev->dev.of_node;
-	master->setup = spi_clps711x_setup;
 	master->prepare_message = spi_clps711x_prepare_message;
 	master->transfer_one = spi_clps711x_transfer_one;
 
@@ -148,8 +124,7 @@
 		goto err_out;
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	hw->syncio = devm_ioremap_resource(&pdev->dev, res);
+	hw->syncio = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(hw->syncio)) {
 		ret = PTR_ERR(hw->syncio);
 		goto err_out;
diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c
index 23f6fff..f80e06c 100644
--- a/drivers/spi/spi-coldfire-qspi.c
+++ b/drivers/spi/spi-coldfire-qspi.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Freescale/Motorola Coldfire Queued SPI driver
  *
  * Copyright 2010 Steven King <sfking@fdwdc.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
 */
 
 #include <linux/kernel.h>
@@ -348,7 +339,6 @@
 {
 	struct spi_master *master;
 	struct mcfqspi *mcfqspi;
-	struct resource *res;
 	struct mcfqspi_platform_data *pdata;
 	int status;
 
@@ -371,8 +361,7 @@
 
 	mcfqspi = spi_master_get_devdata(master);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	mcfqspi->iobase = devm_ioremap_resource(&pdev->dev, res);
+	mcfqspi->iobase = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(mcfqspi->iobase)) {
 		status = PTR_ERR(mcfqspi->iobase);
 		goto fail0;
diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index a02099c..f71c497 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -1,21 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2009 Texas Instruments.
  * Copyright (C) 2010 EF Johnson Technologies
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
@@ -25,7 +16,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/slab.h>
@@ -208,13 +198,11 @@
 static void davinci_spi_chipselect(struct spi_device *spi, int value)
 {
 	struct davinci_spi *dspi;
-	struct davinci_spi_platform_data *pdata;
 	struct davinci_spi_config *spicfg = spi->controller_data;
 	u8 chip_sel = spi->chip_select;
 	u16 spidat1 = CS_DEFAULT;
 
 	dspi = spi_master_get_devdata(spi->master);
-	pdata = &dspi->pdata;
 
 	/* program delay transfers if tx_delay is non zero */
 	if (spicfg && spicfg->wdelay)
@@ -224,15 +212,21 @@
 	 * Board specific chip select logic decides the polarity and cs
 	 * line for the controller
 	 */
-	if (spi->cs_gpio >= 0) {
+	if (spi->cs_gpiod) {
+		/*
+		 * FIXME: is this code ever executed? This host does not
+		 * set SPI_MASTER_GPIO_SS so this chipselect callback should
+		 * not get called from the SPI core when we are using
+		 * GPIOs for chip select.
+		 */
 		if (value == BITBANG_CS_ACTIVE)
-			gpio_set_value(spi->cs_gpio, spi->mode & SPI_CS_HIGH);
+			gpiod_set_value(spi->cs_gpiod, 1);
 		else
-			gpio_set_value(spi->cs_gpio,
-				!(spi->mode & SPI_CS_HIGH));
+			gpiod_set_value(spi->cs_gpiod, 0);
 	} else {
 		if (value == BITBANG_CS_ACTIVE) {
-			spidat1 |= SPIDAT1_CSHOLD_MASK;
+			if (!(spi->mode & SPI_CS_WORD))
+				spidat1 |= SPIDAT1_CSHOLD_MASK;
 			spidat1 &= ~(0x1 << chip_sel);
 		}
 	}
@@ -419,35 +413,15 @@
  */
 static int davinci_spi_setup(struct spi_device *spi)
 {
-	int retval = 0;
 	struct davinci_spi *dspi;
-	struct davinci_spi_platform_data *pdata;
-	struct spi_master *master = spi->master;
 	struct device_node *np = spi->dev.of_node;
 	bool internal_cs = true;
 
 	dspi = spi_master_get_devdata(spi->master);
-	pdata = &dspi->pdata;
 
 	if (!(spi->mode & SPI_NO_CS)) {
-		if (np && (master->cs_gpios != NULL) && (spi->cs_gpio >= 0)) {
-			retval = gpio_direction_output(
-				      spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
+		if (np && spi->cs_gpiod)
 			internal_cs = false;
-		} else if (pdata->chip_sel &&
-			   spi->chip_select < pdata->num_chipselect &&
-			   pdata->chip_sel[spi->chip_select] != SPI_INTERN_CS) {
-			spi->cs_gpio = pdata->chip_sel[spi->chip_select];
-			retval = gpio_direction_output(
-				      spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
-			internal_cs = false;
-		}
-
-		if (retval) {
-			dev_err(&spi->dev, "GPIO %d setup failed (%d)\n",
-				spi->cs_gpio, retval);
-			return retval;
-		}
 
 		if (internal_cs)
 			set_io_bits(dspi->base + SPIPC0, 1 << spi->chip_select);
@@ -971,6 +945,7 @@
 	if (ret)
 		goto free_master;
 
+	master->use_gpio_descriptors = true;
 	master->dev.of_node = pdev->dev.of_node;
 	master->bus_num = pdev->id;
 	master->num_chipselect = pdata->num_chipselect;
@@ -985,31 +960,10 @@
 	dspi->prescaler_limit = pdata->prescaler_limit;
 	dspi->version = pdata->version;
 
-	dspi->bitbang.flags = SPI_NO_CS | SPI_LSB_FIRST | SPI_LOOP;
+	dspi->bitbang.flags = SPI_NO_CS | SPI_LSB_FIRST | SPI_LOOP | SPI_CS_WORD;
 	if (dspi->version == SPI_VERSION_2)
 		dspi->bitbang.flags |= SPI_READY;
 
-	if (pdev->dev.of_node) {
-		int i;
-
-		for (i = 0; i < pdata->num_chipselect; i++) {
-			int cs_gpio = of_get_named_gpio(pdev->dev.of_node,
-							"cs-gpios", i);
-
-			if (cs_gpio == -EPROBE_DEFER) {
-				ret = cs_gpio;
-				goto free_clk;
-			}
-
-			if (gpio_is_valid(cs_gpio)) {
-				ret = devm_gpio_request(&pdev->dev, cs_gpio,
-							dev_name(&pdev->dev));
-				if (ret)
-					goto free_clk;
-			}
-		}
-	}
-
 	dspi->bitbang.txrx_bufs = davinci_spi_bufs;
 
 	ret = davinci_spi_request_dma(dspi);
diff --git a/drivers/spi/spi-dln2.c b/drivers/spi/spi-dln2.c
index b62a99c..75b33d7 100644
--- a/drivers/spi/spi-dln2.c
+++ b/drivers/spi/spi-dln2.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Driver for the Diolan DLN-2 USB-SPI adapter
  *
  * Copyright (c) 2014 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index 3db905f..2663bb1 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Special handling for DW core on Intel MID platform
  *
  * Copyright (c) 2009, 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #include <linux/dma-mapping.h>
diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
index e80f60e..bd46fca 100644
--- a/drivers/spi/spi-dw-mmio.c
+++ b/drivers/spi/spi-dw-mmio.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Memory-mapped interface driver for DW SPI Core
  *
  * Copyright (c) 2010, Octasic semiconductor.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
@@ -18,8 +15,8 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/of_platform.h>
+#include <linux/acpi.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
 
@@ -30,12 +27,14 @@
 struct dw_spi_mmio {
 	struct dw_spi  dws;
 	struct clk     *clk;
+	struct clk     *pclk;
 	void           *priv;
 };
 
 #define MSCC_CPU_SYSTEM_CTRL_GENERAL_CTRL	0x24
-#define OCELOT_IF_SI_OWNER_MASK			GENMASK(5, 4)
 #define OCELOT_IF_SI_OWNER_OFFSET		4
+#define JAGUAR2_IF_SI_OWNER_OFFSET		6
+#define MSCC_IF_SI_OWNER_MASK			GENMASK(1, 0)
 #define MSCC_IF_SI_OWNER_SISL			0
 #define MSCC_IF_SI_OWNER_SIBM			1
 #define MSCC_IF_SI_OWNER_SIMC			2
@@ -76,23 +75,22 @@
 }
 
 static int dw_spi_mscc_init(struct platform_device *pdev,
-			    struct dw_spi_mmio *dwsmmio)
+			    struct dw_spi_mmio *dwsmmio,
+			    const char *cpu_syscon, u32 if_si_owner_offset)
 {
 	struct dw_spi_mscc *dwsmscc;
-	struct resource *res;
 
 	dwsmscc = devm_kzalloc(&pdev->dev, sizeof(*dwsmscc), GFP_KERNEL);
 	if (!dwsmscc)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	dwsmscc->spi_mst = devm_ioremap_resource(&pdev->dev, res);
+	dwsmscc->spi_mst = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(dwsmscc->spi_mst)) {
 		dev_err(&pdev->dev, "SPI_MST region map failed\n");
 		return PTR_ERR(dwsmscc->spi_mst);
 	}
 
-	dwsmscc->syscon = syscon_regmap_lookup_by_compatible("mscc,ocelot-cpu-syscon");
+	dwsmscc->syscon = syscon_regmap_lookup_by_compatible(cpu_syscon);
 	if (IS_ERR(dwsmscc->syscon))
 		return PTR_ERR(dwsmscc->syscon);
 
@@ -101,8 +99,8 @@
 
 	/* Select the owner of the SI interface */
 	regmap_update_bits(dwsmscc->syscon, MSCC_CPU_SYSTEM_CTRL_GENERAL_CTRL,
-			   OCELOT_IF_SI_OWNER_MASK,
-			   MSCC_IF_SI_OWNER_SIMC << OCELOT_IF_SI_OWNER_OFFSET);
+			   MSCC_IF_SI_OWNER_MASK << if_si_owner_offset,
+			   MSCC_IF_SI_OWNER_SIMC << if_si_owner_offset);
 
 	dwsmmio->dws.set_cs = dw_spi_mscc_set_cs;
 	dwsmmio->priv = dwsmscc;
@@ -110,13 +108,34 @@
 	return 0;
 }
 
+static int dw_spi_mscc_ocelot_init(struct platform_device *pdev,
+				   struct dw_spi_mmio *dwsmmio)
+{
+	return dw_spi_mscc_init(pdev, dwsmmio, "mscc,ocelot-cpu-syscon",
+				OCELOT_IF_SI_OWNER_OFFSET);
+}
+
+static int dw_spi_mscc_jaguar2_init(struct platform_device *pdev,
+				    struct dw_spi_mmio *dwsmmio)
+{
+	return dw_spi_mscc_init(pdev, dwsmmio, "mscc,jaguar2-cpu-syscon",
+				JAGUAR2_IF_SI_OWNER_OFFSET);
+}
+
+static int dw_spi_alpine_init(struct platform_device *pdev,
+			      struct dw_spi_mmio *dwsmmio)
+{
+	dwsmmio->dws.cs_override = 1;
+
+	return 0;
+}
+
 static int dw_spi_mmio_probe(struct platform_device *pdev)
 {
 	int (*init_func)(struct platform_device *pdev,
 			 struct dw_spi_mmio *dwsmmio);
 	struct dw_spi_mmio *dwsmmio;
 	struct dw_spi *dws;
-	struct resource *mem;
 	int ret;
 	int num_cs;
 
@@ -128,18 +147,15 @@
 	dws = &dwsmmio->dws;
 
 	/* Get basic io resource and map it */
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	dws->regs = devm_ioremap_resource(&pdev->dev, mem);
+	dws->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(dws->regs)) {
 		dev_err(&pdev->dev, "SPI region map failed\n");
 		return PTR_ERR(dws->regs);
 	}
 
 	dws->irq = platform_get_irq(pdev, 0);
-	if (dws->irq < 0) {
-		dev_err(&pdev->dev, "no irq resource?\n");
+	if (dws->irq < 0)
 		return dws->irq; /* -ENXIO */
-	}
 
 	dwsmmio->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dwsmmio->clk))
@@ -148,6 +164,16 @@
 	if (ret)
 		return ret;
 
+	/* Optional clock needed to access the registers */
+	dwsmmio->pclk = devm_clk_get_optional(&pdev->dev, "pclk");
+	if (IS_ERR(dwsmmio->pclk)) {
+		ret = PTR_ERR(dwsmmio->pclk);
+		goto out_clk;
+	}
+	ret = clk_prepare_enable(dwsmmio->pclk);
+	if (ret)
+		goto out_clk;
+
 	dws->bus_num = pdev->id;
 
 	dws->max_freq = clk_get_rate(dwsmmio->clk);
@@ -160,27 +186,6 @@
 
 	dws->num_cs = num_cs;
 
-	if (pdev->dev.of_node) {
-		int i;
-
-		for (i = 0; i < dws->num_cs; i++) {
-			int cs_gpio = of_get_named_gpio(pdev->dev.of_node,
-					"cs-gpios", i);
-
-			if (cs_gpio == -EPROBE_DEFER) {
-				ret = cs_gpio;
-				goto out;
-			}
-
-			if (gpio_is_valid(cs_gpio)) {
-				ret = devm_gpio_request(&pdev->dev, cs_gpio,
-						dev_name(&pdev->dev));
-				if (ret)
-					goto out;
-			}
-		}
-	}
-
 	init_func = device_get_match_data(&pdev->dev);
 	if (init_func) {
 		ret = init_func(pdev, dwsmmio);
@@ -196,6 +201,8 @@
 	return 0;
 
 out:
+	clk_disable_unprepare(dwsmmio->pclk);
+out_clk:
 	clk_disable_unprepare(dwsmmio->clk);
 	return ret;
 }
@@ -205,6 +212,7 @@
 	struct dw_spi_mmio *dwsmmio = platform_get_drvdata(pdev);
 
 	dw_spi_remove_host(&dwsmmio->dws);
+	clk_disable_unprepare(dwsmmio->pclk);
 	clk_disable_unprepare(dwsmmio->clk);
 
 	return 0;
@@ -212,17 +220,26 @@
 
 static const struct of_device_id dw_spi_mmio_of_match[] = {
 	{ .compatible = "snps,dw-apb-ssi", },
-	{ .compatible = "mscc,ocelot-spi", .data = dw_spi_mscc_init},
+	{ .compatible = "mscc,ocelot-spi", .data = dw_spi_mscc_ocelot_init},
+	{ .compatible = "mscc,jaguar2-spi", .data = dw_spi_mscc_jaguar2_init},
+	{ .compatible = "amazon,alpine-dw-apb-ssi", .data = dw_spi_alpine_init},
 	{ /* end of table */}
 };
 MODULE_DEVICE_TABLE(of, dw_spi_mmio_of_match);
 
+static const struct acpi_device_id dw_spi_mmio_acpi_match[] = {
+	{"HISI0173", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, dw_spi_mmio_acpi_match);
+
 static struct platform_driver dw_spi_mmio_driver = {
 	.probe		= dw_spi_mmio_probe,
 	.remove		= dw_spi_mmio_remove,
 	.driver		= {
 		.name	= DRIVER_NAME,
 		.of_match_table = dw_spi_mmio_of_match,
+		.acpi_match_table = ACPI_PTR(dw_spi_mmio_acpi_match),
 	},
 };
 module_platform_driver(dw_spi_mmio_driver);
diff --git a/drivers/spi/spi-dw-pci.c b/drivers/spi/spi-dw-pci.c
index ef7db75..1406449 100644
--- a/drivers/spi/spi-dw-pci.c
+++ b/drivers/spi/spi-dw-pci.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * PCI interface driver for DW SPI Core
  *
  * Copyright (c) 2009, 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #include <linux/interrupt.h>
@@ -27,6 +19,7 @@
 	int	(*setup)(struct dw_spi *);
 	u16	num_cs;
 	u16	bus_num;
+	u32	max_freq;
 };
 
 static struct spi_pci_desc spi_pci_mid_desc_1 = {
@@ -41,6 +34,12 @@
 	.bus_num = 1,
 };
 
+static struct spi_pci_desc spi_pci_ehl_desc = {
+	.num_cs = 1,
+	.bus_num = -1,
+	.max_freq = 100000000,
+};
+
 static int spi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct dw_spi *dws;
@@ -73,6 +72,7 @@
 	if (desc) {
 		dws->num_cs = desc->num_cs;
 		dws->bus_num = desc->bus_num;
+		dws->max_freq = desc->max_freq;
 
 		if (desc->setup) {
 			ret = desc->setup(dws);
@@ -106,16 +106,14 @@
 #ifdef CONFIG_PM_SLEEP
 static int spi_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct dw_spi *dws = pci_get_drvdata(pdev);
+	struct dw_spi *dws = dev_get_drvdata(dev);
 
 	return dw_spi_suspend_host(dws);
 }
 
 static int spi_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct dw_spi *dws = pci_get_drvdata(pdev);
+	struct dw_spi *dws = dev_get_drvdata(dev);
 
 	return dw_spi_resume_host(dws);
 }
@@ -133,8 +131,14 @@
 	{ PCI_VDEVICE(INTEL, 0x0800), (kernel_ulong_t)&spi_pci_mid_desc_1},
 	/* Intel MID platform SPI controller 2 */
 	{ PCI_VDEVICE(INTEL, 0x0812), (kernel_ulong_t)&spi_pci_mid_desc_2},
+	/* Intel Elkhart Lake PSE SPI controllers */
+	{ PCI_VDEVICE(INTEL, 0x4b84), (kernel_ulong_t)&spi_pci_ehl_desc},
+	{ PCI_VDEVICE(INTEL, 0x4b85), (kernel_ulong_t)&spi_pci_ehl_desc},
+	{ PCI_VDEVICE(INTEL, 0x4b86), (kernel_ulong_t)&spi_pci_ehl_desc},
+	{ PCI_VDEVICE(INTEL, 0x4b87), (kernel_ulong_t)&spi_pci_ehl_desc},
 	{},
 };
+MODULE_DEVICE_TABLE(pci, pci_ids);
 
 static struct pci_driver dw_spi_driver = {
 	.name =		DRIVER_NAME,
diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c
index ac2eb89..9a49e07 100644
--- a/drivers/spi/spi-dw.c
+++ b/drivers/spi/spi-dw.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Designware SPI core controller driver (refer pxa2xx_spi.c)
  *
  * Copyright (c) 2009, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #include <linux/dma-mapping.h>
@@ -20,7 +12,6 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
-#include <linux/gpio.h>
 
 #include "spi-dw.h"
 
@@ -54,41 +45,41 @@
 	if (!buf)
 		return 0;
 
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"%s registers:\n", dev_name(&dws->master->dev));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"=================================\n");
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"CTRL0: \t\t0x%08x\n", dw_readl(dws, DW_SPI_CTRL0));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"CTRL1: \t\t0x%08x\n", dw_readl(dws, DW_SPI_CTRL1));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"SSIENR: \t0x%08x\n", dw_readl(dws, DW_SPI_SSIENR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"SER: \t\t0x%08x\n", dw_readl(dws, DW_SPI_SER));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"BAUDR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_BAUDR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"TXFTLR: \t0x%08x\n", dw_readl(dws, DW_SPI_TXFLTR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"RXFTLR: \t0x%08x\n", dw_readl(dws, DW_SPI_RXFLTR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"TXFLR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_TXFLR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"RXFLR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_RXFLR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"SR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_SR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"IMR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_IMR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"ISR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_ISR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"DMACR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_DMACR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"DMATDLR: \t0x%08x\n", dw_readl(dws, DW_SPI_DMATDLR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"DMARDLR: \t0x%08x\n", dw_readl(dws, DW_SPI_DMARDLR));
-	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
 			"=================================\n");
 
 	ret = simple_read_from_buffer(user_buf, count, ppos, buf, len);
@@ -138,12 +129,13 @@
 	struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
 	struct chip_data *chip = spi_get_ctldata(spi);
 
-	/* Chip select logic is inverted from spi_set_cs() */
 	if (chip && chip->cs_control)
-		chip->cs_control(!enable);
+		chip->cs_control(enable);
 
-	if (!enable)
+	if (enable)
 		dw_writel(dws, DW_SPI_SER, BIT(spi->chip_select));
+	else if (dws->cs_override)
+		dw_writel(dws, DW_SPI_SER, 0);
 }
 EXPORT_SYMBOL_GPL(dw_spi_set_cs);
 
@@ -308,19 +300,15 @@
 		dws->current_freq = transfer->speed_hz;
 		spi_set_clk(dws, chip->clk_div);
 	}
-	if (transfer->bits_per_word == 8) {
-		dws->n_bytes = 1;
-		dws->dma_width = 1;
-	} else if (transfer->bits_per_word == 16) {
-		dws->n_bytes = 2;
-		dws->dma_width = 2;
-	} else {
-		return -EINVAL;
-	}
+
+	dws->n_bytes = DIV_ROUND_UP(transfer->bits_per_word, BITS_PER_BYTE);
+	dws->dma_width = DIV_ROUND_UP(transfer->bits_per_word, BITS_PER_BYTE);
+
 	/* Default SPI mode is SCPOL = 0, SCPH = 0 */
 	cr0 = (transfer->bits_per_word - 1)
 		| (chip->type << SPI_FRF_OFFSET)
-		| (spi->mode << SPI_MODE_OFFSET)
+		| ((((spi->mode & SPI_CPOL) ? 1 : 0) << SPI_SCOL_OFFSET) |
+			(((spi->mode & SPI_CPHA) ? 1 : 0) << SPI_SCPH_OFFSET))
 		| (chip->tmode << SPI_TMOD_OFFSET);
 
 	/*
@@ -400,7 +388,6 @@
 {
 	struct dw_spi_chip *chip_info = NULL;
 	struct chip_data *chip;
-	int ret;
 
 	/* Only alloc on first setup */
 	chip = spi_get_ctldata(spi);
@@ -428,13 +415,6 @@
 
 	chip->tmode = SPI_TMOD_TR;
 
-	if (gpio_is_valid(spi->cs_gpio)) {
-		ret = gpio_direction_output(spi->cs_gpio,
-				!(spi->mode & SPI_CS_HIGH));
-		if (ret)
-			return ret;
-	}
-
 	return 0;
 }
 
@@ -468,6 +448,10 @@
 		dws->fifo_len = (fifo == 1) ? 0 : fifo;
 		dev_dbg(dev, "Detected FIFO size: %u bytes\n", dws->fifo_len);
 	}
+
+	/* enable HW fixup for explicit CS deselect for Amazon's alpine chip */
+	if (dws->cs_override)
+		dw_writel(dws, DW_SPI_CS_OVERRIDE, 0xF);
 }
 
 int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
@@ -495,8 +479,9 @@
 		goto err_free_master;
 	}
 
+	master->use_gpio_descriptors = true;
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP;
-	master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
+	master->bits_per_word_mask =  SPI_BPW_RANGE_MASK(4, 16);
 	master->bus_num = dws->bus_num;
 	master->num_chipselect = dws->num_cs;
 	master->setup = dw_spi_setup;
@@ -506,6 +491,7 @@
 	master->handle_err = dw_spi_handle_err;
 	master->max_speed_hz = dws->max_freq;
 	master->dev.of_node = dev->of_node;
+	master->dev.fwnode = dev->fwnode;
 	master->flags = SPI_MASTER_GPIO_SS;
 
 	if (dws->set_cs)
@@ -572,13 +558,8 @@
 
 int dw_spi_resume_host(struct dw_spi *dws)
 {
-	int ret;
-
 	spi_hw_init(&dws->master->dev, dws);
-	ret = spi_controller_resume(dws->master);
-	if (ret)
-		dev_err(&dws->master->dev, "fail to start queue (%d)\n", ret);
-	return ret;
+	return spi_controller_resume(dws->master);
 }
 EXPORT_SYMBOL_GPL(dw_spi_resume_host);
 
diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h
index 0168b08..c9c1588 100644
--- a/drivers/spi/spi-dw.h
+++ b/drivers/spi/spi-dw.h
@@ -32,6 +32,7 @@
 #define DW_SPI_IDR			0x58
 #define DW_SPI_VERSION			0x5c
 #define DW_SPI_DR			0x60
+#define DW_SPI_CS_OVERRIDE		0xf4
 
 /* Bit fields in CTRLR0 */
 #define SPI_DFS_OFFSET			0
@@ -109,6 +110,7 @@
 	u32			fifo_len;	/* depth of the FIFO buffer */
 	u32			max_freq;	/* max bus freq supported */
 
+	int			cs_override;
 	u32			reg_io_width;	/* DR I/O width in bytes */
 	u16			bus_num;
 	u16			num_cs;		/* supported slave numbers */
diff --git a/drivers/spi/spi-efm32.c b/drivers/spi/spi-efm32.c
index 065fe87..64d4c44 100644
--- a/drivers/spi/spi-efm32.c
+++ b/drivers/spi/spi-efm32.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2012-2013 Uwe Kleine-Koenig for Pengutronix
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation.
  */
 #include <linux/kernel.h>
 #include <linux/io.h>
@@ -403,10 +400,8 @@
 	}
 
 	ret = platform_get_irq(pdev, 0);
-	if (ret <= 0) {
-		dev_err(&pdev->dev, "failed to get rx irq (%d)\n", ret);
+	if (ret <= 0)
 		goto err;
-	}
 
 	ddata->rxirq = ret;
 
diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c
index 79fc394..4e1ccd4 100644
--- a/drivers/spi/spi-ep93xx.c
+++ b/drivers/spi/spi-ep93xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Driver for Cirrus Logic EP93xx SPI controller.
  *
@@ -10,10 +11,6 @@
  * For more information about the SPI controller see documentation on Cirrus
  * Logic web site:
  *     http://www.cirrus.com/en/pubs/manual/EP93xx_Users_Guide_UM1.pdf
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/io.h>
@@ -28,7 +25,6 @@
 #include <linux/platform_device.h>
 #include <linux/sched.h>
 #include <linux/scatterlist.h>
-#include <linux/gpio.h>
 #include <linux/spi/spi.h>
 
 #include <linux/platform_data/dma-ep93xx.h>
@@ -652,7 +648,6 @@
 	struct resource *res;
 	int irq;
 	int error;
-	int i;
 
 	info = dev_get_platdata(&pdev->dev);
 	if (!info) {
@@ -661,10 +656,8 @@
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "failed to get irq resources\n");
+	if (irq < 0)
 		return -EBUSY;
-	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
@@ -676,6 +669,7 @@
 	if (!master)
 		return -ENOMEM;
 
+	master->use_gpio_descriptors = true;
 	master->prepare_transfer_hardware = ep93xx_spi_prepare_hardware;
 	master->unprepare_transfer_hardware = ep93xx_spi_unprepare_hardware;
 	master->prepare_message = ep93xx_spi_prepare_message;
@@ -683,31 +677,11 @@
 	master->bus_num = pdev->id;
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
-
-	master->num_chipselect = info->num_chipselect;
-	master->cs_gpios = devm_kcalloc(&master->dev,
-					master->num_chipselect, sizeof(int),
-					GFP_KERNEL);
-	if (!master->cs_gpios) {
-		error = -ENOMEM;
-		goto fail_release_master;
-	}
-
-	for (i = 0; i < master->num_chipselect; i++) {
-		master->cs_gpios[i] = info->chipselect[i];
-
-		if (!gpio_is_valid(master->cs_gpios[i]))
-			continue;
-
-		error = devm_gpio_request_one(&pdev->dev, master->cs_gpios[i],
-					      GPIOF_OUT_INIT_HIGH,
-					      "ep93xx-spi");
-		if (error) {
-			dev_err(&pdev->dev, "could not request cs gpio %d\n",
-				master->cs_gpios[i]);
-			goto fail_release_master;
-		}
-	}
+	/*
+	 * The SPI core will count the number of GPIO descriptors to figure
+	 * out the number of chip selects available on the platform.
+	 */
+	master->num_chipselect = 0;
 
 	platform_set_drvdata(pdev, master);
 
diff --git a/drivers/spi/spi-falcon.c b/drivers/spi/spi-falcon.c
index f8638e8..00f46c8 100644
--- a/drivers/spi/spi-falcon.c
+++ b/drivers/spi/spi-falcon.c
@@ -1,7 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License version 2 as published
- *  by the Free Software Foundation.
  *
  *  Copyright (C) 2012 Thomas Langer <thomas.langer@lantiq.com>
  */
diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c
index 8f7b26e..858f054 100644
--- a/drivers/spi/spi-fsl-cpm.c
+++ b/drivers/spi/spi-fsl-cpm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Freescale SPI controller driver cpm functions.
  *
@@ -9,11 +10,6 @@
  * CPM SPI and QE buffer descriptors mode support:
  * Copyright (c) 2009  MontaVista Software, Inc.
  * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 #include <asm/cpm.h>
 #include <soc/fsl/qe/qe.h>
@@ -309,12 +305,10 @@
 	}
 
 	if (mspi->flags & SPI_CPM1) {
-		struct resource *res;
 		void *pram;
 
-		res = platform_get_resource(to_platform_device(dev),
-					    IORESOURCE_MEM, 1);
-		pram = devm_ioremap_resource(dev, res);
+		pram = devm_platform_ioremap_resource(to_platform_device(dev),
+						      1);
 		if (IS_ERR(pram))
 			mspi->pram = NULL;
 		else
diff --git a/drivers/spi/spi-fsl-cpm.h b/drivers/spi/spi-fsl-cpm.h
index c711158..160f999 100644
--- a/drivers/spi/spi-fsl-cpm.h
+++ b/drivers/spi/spi-fsl-cpm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Freescale SPI controller driver cpm functions.
  *
@@ -9,11 +10,6 @@
  * CPM SPI and QE buffer descriptors mode support:
  * Copyright (c) 2009  MontaVista Software, Inc.
  * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 
 #ifndef __SPI_FSL_CPM_H__
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 3082e72..bec758e 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -9,26 +9,16 @@
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
-#include <linux/err.h>
-#include <linux/errno.h>
 #include <linux/interrupt.h>
-#include <linux/io.h>
 #include <linux/kernel.h>
-#include <linux/math64.h>
 #include <linux/module.h>
-#include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
-#include <linux/platform_device.h>
-#include <linux/pm_runtime.h>
 #include <linux/regmap.h>
-#include <linux/sched.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-fsl-dspi.h>
-#include <linux/spi/spi_bitbang.h>
-#include <linux/time.h>
 
-#define DRIVER_NAME "fsl-dspi"
+#define DRIVER_NAME			"fsl-dspi"
 
 #ifdef CONFIG_M5441x
 #define DSPI_FIFO_SIZE			16
@@ -37,101 +27,97 @@
 #endif
 #define DSPI_DMA_BUFSIZE		(DSPI_FIFO_SIZE * 1024)
 
-#define SPI_MCR		0x00
-#define SPI_MCR_MASTER		(1 << 31)
-#define SPI_MCR_PCSIS		(0x3F << 16)
-#define SPI_MCR_CLR_TXF	(1 << 11)
-#define SPI_MCR_CLR_RXF	(1 << 10)
-#define SPI_MCR_XSPI		(1 << 3)
+#define SPI_MCR				0x00
+#define SPI_MCR_MASTER			BIT(31)
+#define SPI_MCR_PCSIS			(0x3F << 16)
+#define SPI_MCR_CLR_TXF			BIT(11)
+#define SPI_MCR_CLR_RXF			BIT(10)
+#define SPI_MCR_XSPI			BIT(3)
 
-#define SPI_TCR			0x08
-#define SPI_TCR_GET_TCNT(x)	(((x) & 0xffff0000) >> 16)
+#define SPI_TCR				0x08
+#define SPI_TCR_GET_TCNT(x)		(((x) & GENMASK(31, 16)) >> 16)
 
-#define SPI_CTAR(x)		(0x0c + (((x) & 0x3) * 4))
-#define SPI_CTAR_FMSZ(x)	(((x) & 0x0000000f) << 27)
-#define SPI_CTAR_CPOL(x)	((x) << 26)
-#define SPI_CTAR_CPHA(x)	((x) << 25)
-#define SPI_CTAR_LSBFE(x)	((x) << 24)
-#define SPI_CTAR_PCSSCK(x)	(((x) & 0x00000003) << 22)
-#define SPI_CTAR_PASC(x)	(((x) & 0x00000003) << 20)
-#define SPI_CTAR_PDT(x)	(((x) & 0x00000003) << 18)
-#define SPI_CTAR_PBR(x)	(((x) & 0x00000003) << 16)
-#define SPI_CTAR_CSSCK(x)	(((x) & 0x0000000f) << 12)
-#define SPI_CTAR_ASC(x)	(((x) & 0x0000000f) << 8)
-#define SPI_CTAR_DT(x)		(((x) & 0x0000000f) << 4)
-#define SPI_CTAR_BR(x)		((x) & 0x0000000f)
-#define SPI_CTAR_SCALE_BITS	0xf
+#define SPI_CTAR(x)			(0x0c + (((x) & GENMASK(1, 0)) * 4))
+#define SPI_CTAR_FMSZ(x)		(((x) << 27) & GENMASK(30, 27))
+#define SPI_CTAR_CPOL			BIT(26)
+#define SPI_CTAR_CPHA			BIT(25)
+#define SPI_CTAR_LSBFE			BIT(24)
+#define SPI_CTAR_PCSSCK(x)		(((x) << 22) & GENMASK(23, 22))
+#define SPI_CTAR_PASC(x)		(((x) << 20) & GENMASK(21, 20))
+#define SPI_CTAR_PDT(x)			(((x) << 18) & GENMASK(19, 18))
+#define SPI_CTAR_PBR(x)			(((x) << 16) & GENMASK(17, 16))
+#define SPI_CTAR_CSSCK(x)		(((x) << 12) & GENMASK(15, 12))
+#define SPI_CTAR_ASC(x)			(((x) << 8) & GENMASK(11, 8))
+#define SPI_CTAR_DT(x)			(((x) << 4) & GENMASK(7, 4))
+#define SPI_CTAR_BR(x)			((x) & GENMASK(3, 0))
+#define SPI_CTAR_SCALE_BITS		0xf
 
-#define SPI_CTAR0_SLAVE	0x0c
+#define SPI_CTAR0_SLAVE			0x0c
 
-#define SPI_SR			0x2c
-#define SPI_SR_EOQF		0x10000000
-#define SPI_SR_TCFQF		0x80000000
-#define SPI_SR_CLEAR		0xdaad0000
+#define SPI_SR				0x2c
+#define SPI_SR_TCFQF			BIT(31)
+#define SPI_SR_EOQF			BIT(28)
+#define SPI_SR_TFUF			BIT(27)
+#define SPI_SR_TFFF			BIT(25)
+#define SPI_SR_CMDTCF			BIT(23)
+#define SPI_SR_SPEF			BIT(21)
+#define SPI_SR_RFOF			BIT(19)
+#define SPI_SR_TFIWF			BIT(18)
+#define SPI_SR_RFDF			BIT(17)
+#define SPI_SR_CMDFFF			BIT(16)
+#define SPI_SR_CLEAR			(SPI_SR_TCFQF | SPI_SR_EOQF | \
+					SPI_SR_TFUF | SPI_SR_TFFF | \
+					SPI_SR_CMDTCF | SPI_SR_SPEF | \
+					SPI_SR_RFOF | SPI_SR_TFIWF | \
+					SPI_SR_RFDF | SPI_SR_CMDFFF)
 
-#define SPI_RSER_TFFFE		BIT(25)
-#define SPI_RSER_TFFFD		BIT(24)
-#define SPI_RSER_RFDFE		BIT(17)
-#define SPI_RSER_RFDFD		BIT(16)
+#define SPI_RSER_TFFFE			BIT(25)
+#define SPI_RSER_TFFFD			BIT(24)
+#define SPI_RSER_RFDFE			BIT(17)
+#define SPI_RSER_RFDFD			BIT(16)
 
-#define SPI_RSER		0x30
-#define SPI_RSER_EOQFE		0x10000000
-#define SPI_RSER_TCFQE		0x80000000
+#define SPI_RSER			0x30
+#define SPI_RSER_TCFQE			BIT(31)
+#define SPI_RSER_EOQFE			BIT(28)
 
-#define SPI_PUSHR		0x34
-#define SPI_PUSHR_CMD_CONT	(1 << 15)
-#define SPI_PUSHR_CONT		(SPI_PUSHR_CMD_CONT << 16)
-#define SPI_PUSHR_CMD_CTAS(x)	(((x) & 0x0003) << 12)
-#define SPI_PUSHR_CTAS(x)	(SPI_PUSHR_CMD_CTAS(x) << 16)
-#define SPI_PUSHR_CMD_EOQ	(1 << 11)
-#define SPI_PUSHR_EOQ		(SPI_PUSHR_CMD_EOQ << 16)
-#define SPI_PUSHR_CMD_CTCNT	(1 << 10)
-#define SPI_PUSHR_CTCNT		(SPI_PUSHR_CMD_CTCNT << 16)
-#define SPI_PUSHR_CMD_PCS(x)	((1 << x) & 0x003f)
-#define SPI_PUSHR_PCS(x)	(SPI_PUSHR_CMD_PCS(x) << 16)
-#define SPI_PUSHR_TXDATA(x)	((x) & 0x0000ffff)
+#define SPI_PUSHR			0x34
+#define SPI_PUSHR_CMD_CONT		BIT(15)
+#define SPI_PUSHR_CMD_CTAS(x)		(((x) << 12 & GENMASK(14, 12)))
+#define SPI_PUSHR_CMD_EOQ		BIT(11)
+#define SPI_PUSHR_CMD_CTCNT		BIT(10)
+#define SPI_PUSHR_CMD_PCS(x)		(BIT(x) & GENMASK(5, 0))
 
-#define SPI_PUSHR_SLAVE	0x34
+#define SPI_PUSHR_SLAVE			0x34
 
-#define SPI_POPR		0x38
-#define SPI_POPR_RXDATA(x)	((x) & 0x0000ffff)
+#define SPI_POPR			0x38
 
-#define SPI_TXFR0		0x3c
-#define SPI_TXFR1		0x40
-#define SPI_TXFR2		0x44
-#define SPI_TXFR3		0x48
-#define SPI_RXFR0		0x7c
-#define SPI_RXFR1		0x80
-#define SPI_RXFR2		0x84
-#define SPI_RXFR3		0x88
+#define SPI_TXFR0			0x3c
+#define SPI_TXFR1			0x40
+#define SPI_TXFR2			0x44
+#define SPI_TXFR3			0x48
+#define SPI_RXFR0			0x7c
+#define SPI_RXFR1			0x80
+#define SPI_RXFR2			0x84
+#define SPI_RXFR3			0x88
 
-#define SPI_CTARE(x)		(0x11c + (((x) & 0x3) * 4))
-#define SPI_CTARE_FMSZE(x)	(((x) & 0x1) << 16)
-#define SPI_CTARE_DTCP(x)	((x) & 0x7ff)
+#define SPI_CTARE(x)			(0x11c + (((x) & GENMASK(1, 0)) * 4))
+#define SPI_CTARE_FMSZE(x)		(((x) & 0x1) << 16)
+#define SPI_CTARE_DTCP(x)		((x) & 0x7ff)
 
-#define SPI_SREX		0x13c
+#define SPI_SREX			0x13c
 
-#define SPI_FRAME_BITS(bits)	SPI_CTAR_FMSZ((bits) - 1)
-#define SPI_FRAME_BITS_MASK	SPI_CTAR_FMSZ(0xf)
-#define SPI_FRAME_BITS_16	SPI_CTAR_FMSZ(0xf)
-#define SPI_FRAME_BITS_8	SPI_CTAR_FMSZ(0x7)
-
-#define SPI_FRAME_EBITS(bits)	SPI_CTARE_FMSZE(((bits) - 1) >> 4)
-#define SPI_FRAME_EBITS_MASK	SPI_CTARE_FMSZE(1)
+#define SPI_FRAME_BITS(bits)		SPI_CTAR_FMSZ((bits) - 1)
+#define SPI_FRAME_EBITS(bits)		SPI_CTARE_FMSZE(((bits) - 1) >> 4)
 
 /* Register offsets for regmap_pushr */
-#define PUSHR_CMD		0x0
-#define PUSHR_TX		0x2
+#define PUSHR_CMD			0x0
+#define PUSHR_TX			0x2
 
-#define SPI_CS_INIT		0x01
-#define SPI_CS_ASSERT		0x02
-#define SPI_CS_DROP		0x04
-
-#define DMA_COMPLETION_TIMEOUT	msecs_to_jiffies(3000)
+#define DMA_COMPLETION_TIMEOUT		msecs_to_jiffies(3000)
 
 struct chip_data {
-	u32 ctar_val;
-	u16 void_write_data;
+	u32			ctar_val;
+	u16			void_write_data;
 };
 
 enum dspi_trans_mode {
@@ -141,75 +127,75 @@
 };
 
 struct fsl_dspi_devtype_data {
-	enum dspi_trans_mode trans_mode;
-	u8 max_clock_factor;
-	bool xspi_mode;
+	enum dspi_trans_mode	trans_mode;
+	u8			max_clock_factor;
+	bool			xspi_mode;
 };
 
 static const struct fsl_dspi_devtype_data vf610_data = {
-	.trans_mode = DSPI_DMA_MODE,
-	.max_clock_factor = 2,
+	.trans_mode		= DSPI_DMA_MODE,
+	.max_clock_factor	= 2,
 };
 
 static const struct fsl_dspi_devtype_data ls1021a_v1_data = {
-	.trans_mode = DSPI_TCFQ_MODE,
-	.max_clock_factor = 8,
-	.xspi_mode = true,
+	.trans_mode		= DSPI_TCFQ_MODE,
+	.max_clock_factor	= 8,
+	.xspi_mode		= true,
 };
 
 static const struct fsl_dspi_devtype_data ls2085a_data = {
-	.trans_mode = DSPI_TCFQ_MODE,
-	.max_clock_factor = 8,
+	.trans_mode		= DSPI_TCFQ_MODE,
+	.max_clock_factor	= 8,
 };
 
 static const struct fsl_dspi_devtype_data coldfire_data = {
-	.trans_mode = DSPI_EOQ_MODE,
-	.max_clock_factor = 8,
+	.trans_mode		= DSPI_EOQ_MODE,
+	.max_clock_factor	= 8,
 };
 
 struct fsl_dspi_dma {
 	/* Length of transfer in words of DSPI_FIFO_SIZE */
-	u32 curr_xfer_len;
+	u32					curr_xfer_len;
 
-	u32 *tx_dma_buf;
-	struct dma_chan *chan_tx;
-	dma_addr_t tx_dma_phys;
-	struct completion cmd_tx_complete;
-	struct dma_async_tx_descriptor *tx_desc;
+	u32					*tx_dma_buf;
+	struct dma_chan				*chan_tx;
+	dma_addr_t				tx_dma_phys;
+	struct completion			cmd_tx_complete;
+	struct dma_async_tx_descriptor		*tx_desc;
 
-	u32 *rx_dma_buf;
-	struct dma_chan *chan_rx;
-	dma_addr_t rx_dma_phys;
-	struct completion cmd_rx_complete;
-	struct dma_async_tx_descriptor *rx_desc;
+	u32					*rx_dma_buf;
+	struct dma_chan				*chan_rx;
+	dma_addr_t				rx_dma_phys;
+	struct completion			cmd_rx_complete;
+	struct dma_async_tx_descriptor		*rx_desc;
 };
 
 struct fsl_dspi {
-	struct spi_master	*master;
-	struct platform_device	*pdev;
+	struct spi_controller			*ctlr;
+	struct platform_device			*pdev;
 
-	struct regmap		*regmap;
-	struct regmap		*regmap_pushr;
-	int			irq;
-	struct clk		*clk;
+	struct regmap				*regmap;
+	struct regmap				*regmap_pushr;
+	int					irq;
+	struct clk				*clk;
 
-	struct spi_transfer	*cur_transfer;
-	struct spi_message	*cur_msg;
-	struct chip_data	*cur_chip;
-	size_t			len;
-	const void		*tx;
-	void			*rx;
-	void			*rx_end;
-	u16			void_write_data;
-	u16			tx_cmd;
-	u8			bits_per_word;
-	u8			bytes_per_word;
-	const struct fsl_dspi_devtype_data *devtype_data;
+	struct spi_transfer			*cur_transfer;
+	struct spi_message			*cur_msg;
+	struct chip_data			*cur_chip;
+	size_t					len;
+	const void				*tx;
+	void					*rx;
+	void					*rx_end;
+	u16					void_write_data;
+	u16					tx_cmd;
+	u8					bits_per_word;
+	u8					bytes_per_word;
+	const struct fsl_dspi_devtype_data	*devtype_data;
 
-	wait_queue_head_t	waitq;
-	u32			waitflags;
+	wait_queue_head_t			waitq;
+	u32					waitflags;
 
-	struct fsl_dspi_dma	*dma;
+	struct fsl_dspi_dma			*dma;
 };
 
 static u32 dspi_pop_tx(struct fsl_dspi *dspi)
@@ -233,6 +219,9 @@
 {
 	u16 cmd = dspi->tx_cmd, data = dspi_pop_tx(dspi);
 
+	if (spi_controller_is_slave(dspi->ctlr))
+		return data;
+
 	if (dspi->len > 0)
 		cmd |= SPI_PUSHR_CMD_CONT;
 	return cmd << 16 | data;
@@ -243,7 +232,7 @@
 	if (!dspi->rx)
 		return;
 
-	/* Mask of undefined bits */
+	/* Mask off undefined bits */
 	rxdata &= (1 << dspi->bits_per_word) - 1;
 
 	if (dspi->bytes_per_word == 1)
@@ -279,8 +268,8 @@
 
 static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)
 {
-	struct fsl_dspi_dma *dma = dspi->dma;
 	struct device *dev = &dspi->pdev->dev;
+	struct fsl_dspi_dma *dma = dspi->dma;
 	int time_left;
 	int i;
 
@@ -329,8 +318,13 @@
 	dma_async_issue_pending(dma->chan_rx);
 	dma_async_issue_pending(dma->chan_tx);
 
+	if (spi_controller_is_slave(dspi->ctlr)) {
+		wait_for_completion_interruptible(&dspi->dma->cmd_rx_complete);
+		return 0;
+	}
+
 	time_left = wait_for_completion_timeout(&dspi->dma->cmd_tx_complete,
-					DMA_COMPLETION_TIMEOUT);
+						DMA_COMPLETION_TIMEOUT);
 	if (time_left == 0) {
 		dev_err(dev, "DMA tx timeout\n");
 		dmaengine_terminate_all(dma->chan_tx);
@@ -339,7 +333,7 @@
 	}
 
 	time_left = wait_for_completion_timeout(&dspi->dma->cmd_rx_complete,
-					DMA_COMPLETION_TIMEOUT);
+						DMA_COMPLETION_TIMEOUT);
 	if (time_left == 0) {
 		dev_err(dev, "DMA rx timeout\n");
 		dmaengine_terminate_all(dma->chan_tx);
@@ -352,9 +346,9 @@
 
 static int dspi_dma_xfer(struct fsl_dspi *dspi)
 {
-	struct fsl_dspi_dma *dma = dspi->dma;
-	struct device *dev = &dspi->pdev->dev;
 	struct spi_message *message = dspi->cur_msg;
+	struct device *dev = &dspi->pdev->dev;
+	struct fsl_dspi_dma *dma = dspi->dma;
 	int curr_remaining_bytes;
 	int bytes_per_buffer;
 	int ret = 0;
@@ -389,9 +383,9 @@
 
 static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
 {
-	struct fsl_dspi_dma *dma;
-	struct dma_slave_config cfg;
 	struct device *dev = &dspi->pdev->dev;
+	struct dma_slave_config cfg;
+	struct fsl_dspi_dma *dma;
 	int ret;
 
 	dma = devm_kzalloc(dev, sizeof(*dma), GFP_KERNEL);
@@ -413,14 +407,14 @@
 	}
 
 	dma->tx_dma_buf = dma_alloc_coherent(dev, DSPI_DMA_BUFSIZE,
-					&dma->tx_dma_phys, GFP_KERNEL);
+					     &dma->tx_dma_phys, GFP_KERNEL);
 	if (!dma->tx_dma_buf) {
 		ret = -ENOMEM;
 		goto err_tx_dma_buf;
 	}
 
 	dma->rx_dma_buf = dma_alloc_coherent(dev, DSPI_DMA_BUFSIZE,
-					&dma->rx_dma_phys, GFP_KERNEL);
+					     &dma->rx_dma_phys, GFP_KERNEL);
 	if (!dma->rx_dma_buf) {
 		ret = -ENOMEM;
 		goto err_rx_dma_buf;
@@ -477,30 +471,31 @@
 	struct fsl_dspi_dma *dma = dspi->dma;
 	struct device *dev = &dspi->pdev->dev;
 
-	if (dma) {
-		if (dma->chan_tx) {
-			dma_unmap_single(dev, dma->tx_dma_phys,
-					DSPI_DMA_BUFSIZE, DMA_TO_DEVICE);
-			dma_release_channel(dma->chan_tx);
-		}
+	if (!dma)
+		return;
 
-		if (dma->chan_rx) {
-			dma_unmap_single(dev, dma->rx_dma_phys,
-					DSPI_DMA_BUFSIZE, DMA_FROM_DEVICE);
-			dma_release_channel(dma->chan_rx);
-		}
+	if (dma->chan_tx) {
+		dma_unmap_single(dev, dma->tx_dma_phys,
+				 DSPI_DMA_BUFSIZE, DMA_TO_DEVICE);
+		dma_release_channel(dma->chan_tx);
+	}
+
+	if (dma->chan_rx) {
+		dma_unmap_single(dev, dma->rx_dma_phys,
+				 DSPI_DMA_BUFSIZE, DMA_FROM_DEVICE);
+		dma_release_channel(dma->chan_rx);
 	}
 }
 
 static void hz_to_spi_baud(char *pbr, char *br, int speed_hz,
-		unsigned long clkrate)
+			   unsigned long clkrate)
 {
 	/* Valid baud rate pre-scaler values */
 	int pbr_tbl[4] = {2, 3, 5, 7};
 	int brs[16] = {	2,	4,	6,	8,
-		16,	32,	64,	128,
-		256,	512,	1024,	2048,
-		4096,	8192,	16384,	32768 };
+			16,	32,	64,	128,
+			256,	512,	1024,	2048,
+			4096,	8192,	16384,	32768 };
 	int scale_needed, scale, minscale = INT_MAX;
 	int i, j;
 
@@ -530,15 +525,15 @@
 }
 
 static void ns_delay_scale(char *psc, char *sc, int delay_ns,
-		unsigned long clkrate)
+			   unsigned long clkrate)
 {
-	int pscale_tbl[4] = {1, 3, 5, 7};
 	int scale_needed, scale, minscale = INT_MAX;
-	int i, j;
+	int pscale_tbl[4] = {1, 3, 5, 7};
 	u32 remainder;
+	int i, j;
 
 	scale_needed = div_u64_rem((u64)delay_ns * clkrate, NSEC_PER_SEC,
-			&remainder);
+				   &remainder);
 	if (remainder)
 		scale_needed++;
 
@@ -593,7 +588,7 @@
 		 */
 		u32 data = dspi_pop_tx(dspi);
 
-		if (dspi->cur_chip->ctar_val & SPI_CTAR_LSBFE(1)) {
+		if (dspi->cur_chip->ctar_val & SPI_CTAR_LSBFE) {
 			/* LSB */
 			tx_fifo_write(dspi, data & 0xFFFF);
 			tx_fifo_write(dspi, data >> 16);
@@ -647,19 +642,90 @@
 {
 	int fifo_size = DSPI_FIFO_SIZE;
 
-	/* Read one FIFO entry at and push to rx buffer */
+	/* Read one FIFO entry and push to rx buffer */
 	while ((dspi->rx < dspi->rx_end) && fifo_size--)
 		dspi_push_rx(dspi, fifo_read(dspi));
 }
 
-static int dspi_transfer_one_message(struct spi_master *master,
-		struct spi_message *message)
+static int dspi_rxtx(struct fsl_dspi *dspi)
 {
-	struct fsl_dspi *dspi = spi_master_get_devdata(master);
+	struct spi_message *msg = dspi->cur_msg;
+	enum dspi_trans_mode trans_mode;
+	u16 spi_tcnt;
+	u32 spi_tcr;
+
+	/* Get transfer counter (in number of SPI transfers). It was
+	 * reset to 0 when transfer(s) were started.
+	 */
+	regmap_read(dspi->regmap, SPI_TCR, &spi_tcr);
+	spi_tcnt = SPI_TCR_GET_TCNT(spi_tcr);
+	/* Update total number of bytes that were transferred */
+	msg->actual_length += spi_tcnt * dspi->bytes_per_word;
+
+	trans_mode = dspi->devtype_data->trans_mode;
+	if (trans_mode == DSPI_EOQ_MODE)
+		dspi_eoq_read(dspi);
+	else if (trans_mode == DSPI_TCFQ_MODE)
+		dspi_tcfq_read(dspi);
+
+	if (!dspi->len)
+		/* Success! */
+		return 0;
+
+	if (trans_mode == DSPI_EOQ_MODE)
+		dspi_eoq_write(dspi);
+	else if (trans_mode == DSPI_TCFQ_MODE)
+		dspi_tcfq_write(dspi);
+
+	return -EINPROGRESS;
+}
+
+static int dspi_poll(struct fsl_dspi *dspi)
+{
+	int tries = 1000;
+	u32 spi_sr;
+
+	do {
+		regmap_read(dspi->regmap, SPI_SR, &spi_sr);
+		regmap_write(dspi->regmap, SPI_SR, spi_sr);
+
+		if (spi_sr & (SPI_SR_EOQF | SPI_SR_TCFQF))
+			break;
+	} while (--tries);
+
+	if (!tries)
+		return -ETIMEDOUT;
+
+	return dspi_rxtx(dspi);
+}
+
+static irqreturn_t dspi_interrupt(int irq, void *dev_id)
+{
+	struct fsl_dspi *dspi = (struct fsl_dspi *)dev_id;
+	u32 spi_sr;
+
+	regmap_read(dspi->regmap, SPI_SR, &spi_sr);
+	regmap_write(dspi->regmap, SPI_SR, spi_sr);
+
+	if (!(spi_sr & (SPI_SR_EOQF | SPI_SR_TCFQF)))
+		return IRQ_NONE;
+
+	if (dspi_rxtx(dspi) == 0) {
+		dspi->waitflags = 1;
+		wake_up_interruptible(&dspi->waitq);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int dspi_transfer_one_message(struct spi_controller *ctlr,
+				     struct spi_message *message)
+{
+	struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr);
 	struct spi_device *spi = message->spi;
+	enum dspi_trans_mode trans_mode;
 	struct spi_transfer *transfer;
 	int status = 0;
-	enum dspi_trans_mode trans_mode;
 
 	message->actual_length = 0;
 
@@ -669,7 +735,7 @@
 		dspi->cur_chip = spi_get_ctldata(spi);
 		/* Prepare command word for CMD FIFO */
 		dspi->tx_cmd = SPI_PUSHR_CMD_CTAS(0) |
-			SPI_PUSHR_CMD_PCS(spi->chip_select);
+			       SPI_PUSHR_CMD_PCS(spi->chip_select);
 		if (list_is_last(&dspi->cur_transfer->transfer_list,
 				 &dspi->cur_msg->transfers)) {
 			/* Leave PCS activated after last transfer when
@@ -710,8 +776,8 @@
 			     SPI_FRAME_BITS(transfer->bits_per_word));
 		if (dspi->devtype_data->xspi_mode)
 			regmap_write(dspi->regmap, SPI_CTARE(0),
-				     SPI_FRAME_EBITS(transfer->bits_per_word)
-				     | SPI_CTARE_DTCP(1));
+				     SPI_FRAME_EBITS(transfer->bits_per_word) |
+				     SPI_CTARE_DTCP(1));
 
 		trans_mode = dspi->devtype_data->trans_mode;
 		switch (trans_mode) {
@@ -725,8 +791,8 @@
 			break;
 		case DSPI_DMA_MODE:
 			regmap_write(dspi->regmap, SPI_RSER,
-				SPI_RSER_TFFFE | SPI_RSER_TFFFD |
-				SPI_RSER_RFDFE | SPI_RSER_RFDFD);
+				     SPI_RSER_TFFFE | SPI_RSER_TFFFD |
+				     SPI_RSER_RFDFE | SPI_RSER_RFDFD);
 			status = dspi_dma_xfer(dspi);
 			break;
 		default:
@@ -736,13 +802,18 @@
 			goto out;
 		}
 
-		if (trans_mode != DSPI_DMA_MODE) {
-			if (wait_event_interruptible(dspi->waitq,
-						dspi->waitflags))
-				dev_err(&dspi->pdev->dev,
-					"wait transfer complete fail!\n");
+		if (!dspi->irq) {
+			do {
+				status = dspi_poll(dspi);
+			} while (status == -EINPROGRESS);
+		} else if (trans_mode != DSPI_DMA_MODE) {
+			status = wait_event_interruptible(dspi->waitq,
+							  dspi->waitflags);
 			dspi->waitflags = 0;
 		}
+		if (status)
+			dev_err(&dspi->pdev->dev,
+				"Waiting for transfer to complete failed!\n");
 
 		if (transfer->delay_usecs)
 			udelay(transfer->delay_usecs);
@@ -750,19 +821,19 @@
 
 out:
 	message->status = status;
-	spi_finalize_current_message(master);
+	spi_finalize_current_message(ctlr);
 
 	return status;
 }
 
 static int dspi_setup(struct spi_device *spi)
 {
-	struct chip_data *chip;
-	struct fsl_dspi *dspi = spi_master_get_devdata(spi->master);
-	struct fsl_dspi_platform_data *pdata;
-	u32 cs_sck_delay = 0, sck_cs_delay = 0;
+	struct fsl_dspi *dspi = spi_controller_get_devdata(spi->controller);
 	unsigned char br = 0, pbr = 0, pcssck = 0, cssck = 0;
+	u32 cs_sck_delay = 0, sck_cs_delay = 0;
+	struct fsl_dspi_platform_data *pdata;
 	unsigned char pasc = 0, asc = 0;
+	struct chip_data *chip;
 	unsigned long clkrate;
 
 	/* Only alloc on first setup */
@@ -777,10 +848,10 @@
 
 	if (!pdata) {
 		of_property_read_u32(spi->dev.of_node, "fsl,spi-cs-sck-delay",
-				&cs_sck_delay);
+				     &cs_sck_delay);
 
 		of_property_read_u32(spi->dev.of_node, "fsl,spi-sck-cs-delay",
-				&sck_cs_delay);
+				     &sck_cs_delay);
 	} else {
 		cs_sck_delay = pdata->cs_sck_delay;
 		sck_cs_delay = pdata->sck_cs_delay;
@@ -797,15 +868,23 @@
 	/* Set After SCK delay scale values */
 	ns_delay_scale(&pasc, &asc, sck_cs_delay, clkrate);
 
-	chip->ctar_val = SPI_CTAR_CPOL(spi->mode & SPI_CPOL ? 1 : 0)
-		| SPI_CTAR_CPHA(spi->mode & SPI_CPHA ? 1 : 0)
-		| SPI_CTAR_LSBFE(spi->mode & SPI_LSB_FIRST ? 1 : 0)
-		| SPI_CTAR_PCSSCK(pcssck)
-		| SPI_CTAR_CSSCK(cssck)
-		| SPI_CTAR_PASC(pasc)
-		| SPI_CTAR_ASC(asc)
-		| SPI_CTAR_PBR(pbr)
-		| SPI_CTAR_BR(br);
+	chip->ctar_val = 0;
+	if (spi->mode & SPI_CPOL)
+		chip->ctar_val |= SPI_CTAR_CPOL;
+	if (spi->mode & SPI_CPHA)
+		chip->ctar_val |= SPI_CTAR_CPHA;
+
+	if (!spi_controller_is_slave(dspi->ctlr)) {
+		chip->ctar_val |= SPI_CTAR_PCSSCK(pcssck) |
+				  SPI_CTAR_CSSCK(cssck) |
+				  SPI_CTAR_PASC(pasc) |
+				  SPI_CTAR_ASC(asc) |
+				  SPI_CTAR_PBR(pbr) |
+				  SPI_CTAR_BR(br);
+
+		if (spi->mode & SPI_LSB_FIRST)
+			chip->ctar_val |= SPI_CTAR_LSBFE;
+	}
 
 	spi_set_ctldata(spi, chip);
 
@@ -817,68 +896,11 @@
 	struct chip_data *chip = spi_get_ctldata((struct spi_device *)spi);
 
 	dev_dbg(&spi->dev, "spi_device %u.%u cleanup\n",
-			spi->master->bus_num, spi->chip_select);
+		spi->controller->bus_num, spi->chip_select);
 
 	kfree(chip);
 }
 
-static irqreturn_t dspi_interrupt(int irq, void *dev_id)
-{
-	struct fsl_dspi *dspi = (struct fsl_dspi *)dev_id;
-	struct spi_message *msg = dspi->cur_msg;
-	enum dspi_trans_mode trans_mode;
-	u32 spi_sr, spi_tcr;
-	u16 spi_tcnt;
-
-	regmap_read(dspi->regmap, SPI_SR, &spi_sr);
-	regmap_write(dspi->regmap, SPI_SR, spi_sr);
-
-
-	if (spi_sr & (SPI_SR_EOQF | SPI_SR_TCFQF)) {
-		/* Get transfer counter (in number of SPI transfers). It was
-		 * reset to 0 when transfer(s) were started.
-		 */
-		regmap_read(dspi->regmap, SPI_TCR, &spi_tcr);
-		spi_tcnt = SPI_TCR_GET_TCNT(spi_tcr);
-		/* Update total number of bytes that were transferred */
-		msg->actual_length += spi_tcnt * dspi->bytes_per_word;
-
-		trans_mode = dspi->devtype_data->trans_mode;
-		switch (trans_mode) {
-		case DSPI_EOQ_MODE:
-			dspi_eoq_read(dspi);
-			break;
-		case DSPI_TCFQ_MODE:
-			dspi_tcfq_read(dspi);
-			break;
-		default:
-			dev_err(&dspi->pdev->dev, "unsupported trans_mode %u\n",
-				trans_mode);
-				return IRQ_HANDLED;
-		}
-
-		if (!dspi->len) {
-			dspi->waitflags = 1;
-			wake_up_interruptible(&dspi->waitq);
-		} else {
-			switch (trans_mode) {
-			case DSPI_EOQ_MODE:
-				dspi_eoq_write(dspi);
-				break;
-			case DSPI_TCFQ_MODE:
-				dspi_tcfq_write(dspi);
-				break;
-			default:
-				dev_err(&dspi->pdev->dev,
-					"unsupported trans_mode %u\n",
-					trans_mode);
-			}
-		}
-	}
-
-	return IRQ_HANDLED;
-}
-
 static const struct of_device_id fsl_dspi_dt_ids[] = {
 	{ .compatible = "fsl,vf610-dspi", .data = &vf610_data, },
 	{ .compatible = "fsl,ls1021a-v1.0-dspi", .data = &ls1021a_v1_data, },
@@ -890,10 +912,10 @@
 #ifdef CONFIG_PM_SLEEP
 static int dspi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct fsl_dspi *dspi = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr);
 
-	spi_master_suspend(master);
+	spi_controller_suspend(ctlr);
 	clk_disable_unprepare(dspi->clk);
 
 	pinctrl_pm_select_sleep_state(dev);
@@ -903,8 +925,8 @@
 
 static int dspi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct fsl_dspi *dspi = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr);
 	int ret;
 
 	pinctrl_pm_select_default_state(dev);
@@ -912,7 +934,7 @@
 	ret = clk_prepare_enable(dspi->clk);
 	if (ret)
 		return ret;
-	spi_master_resume(master);
+	spi_controller_resume(ctlr);
 
 	return 0;
 }
@@ -927,16 +949,16 @@
 };
 
 static const struct regmap_access_table dspi_volatile_table = {
-	.yes_ranges     = dspi_volatile_ranges,
-	.n_yes_ranges   = ARRAY_SIZE(dspi_volatile_ranges),
+	.yes_ranges	= dspi_volatile_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(dspi_volatile_ranges),
 };
 
 static const struct regmap_config dspi_regmap_config = {
-	.reg_bits = 32,
-	.val_bits = 32,
-	.reg_stride = 4,
-	.max_register = 0x88,
-	.volatile_table = &dspi_volatile_table,
+	.reg_bits	= 32,
+	.val_bits	= 32,
+	.reg_stride	= 4,
+	.max_register	= 0x88,
+	.volatile_table	= &dspi_volatile_table,
 };
 
 static const struct regmap_range dspi_xspi_volatile_ranges[] = {
@@ -947,31 +969,37 @@
 };
 
 static const struct regmap_access_table dspi_xspi_volatile_table = {
-	.yes_ranges     = dspi_xspi_volatile_ranges,
-	.n_yes_ranges   = ARRAY_SIZE(dspi_xspi_volatile_ranges),
+	.yes_ranges	= dspi_xspi_volatile_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(dspi_xspi_volatile_ranges),
 };
 
 static const struct regmap_config dspi_xspi_regmap_config[] = {
 	{
-		.reg_bits = 32,
-		.val_bits = 32,
-		.reg_stride = 4,
-		.max_register = 0x13c,
-		.volatile_table = &dspi_xspi_volatile_table,
+		.reg_bits	= 32,
+		.val_bits	= 32,
+		.reg_stride	= 4,
+		.max_register	= 0x13c,
+		.volatile_table	= &dspi_xspi_volatile_table,
 	},
 	{
-		.name = "pushr",
-		.reg_bits = 16,
-		.val_bits = 16,
-		.reg_stride = 2,
-		.max_register = 0x2,
+		.name		= "pushr",
+		.reg_bits	= 16,
+		.val_bits	= 16,
+		.reg_stride	= 2,
+		.max_register	= 0x2,
 	},
 };
 
 static void dspi_init(struct fsl_dspi *dspi)
 {
-	regmap_write(dspi->regmap, SPI_MCR, SPI_MCR_MASTER | SPI_MCR_PCSIS |
-		     (dspi->devtype_data->xspi_mode ? SPI_MCR_XSPI : 0));
+	unsigned int mcr = SPI_MCR_PCSIS;
+
+	if (dspi->devtype_data->xspi_mode)
+		mcr |= SPI_MCR_XSPI;
+	if (!spi_controller_is_slave(dspi->ctlr))
+		mcr |= SPI_MCR_MASTER;
+
+	regmap_write(dspi->regmap, SPI_MCR, mcr);
 	regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR);
 	if (dspi->devtype_data->xspi_mode)
 		regmap_write(dspi->regmap, SPI_CTARE(0),
@@ -981,34 +1009,33 @@
 static int dspi_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
-	struct spi_master *master;
+	const struct regmap_config *regmap_config;
+	struct fsl_dspi_platform_data *pdata;
+	struct spi_controller *ctlr;
+	int ret, cs_num, bus_num;
 	struct fsl_dspi *dspi;
 	struct resource *res;
-	const struct regmap_config *regmap_config;
 	void __iomem *base;
-	struct fsl_dspi_platform_data *pdata;
-	int ret = 0, cs_num, bus_num;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct fsl_dspi));
-	if (!master)
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(struct fsl_dspi));
+	if (!ctlr)
 		return -ENOMEM;
 
-	dspi = spi_master_get_devdata(master);
+	dspi = spi_controller_get_devdata(ctlr);
 	dspi->pdev = pdev;
-	dspi->master = master;
+	dspi->ctlr = ctlr;
 
-	master->transfer = NULL;
-	master->setup = dspi_setup;
-	master->transfer_one_message = dspi_transfer_one_message;
-	master->dev.of_node = pdev->dev.of_node;
+	ctlr->setup = dspi_setup;
+	ctlr->transfer_one_message = dspi_transfer_one_message;
+	ctlr->dev.of_node = pdev->dev.of_node;
 
-	master->cleanup = dspi_cleanup;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
+	ctlr->cleanup = dspi_cleanup;
+	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
 
 	pdata = dev_get_platdata(&pdev->dev);
 	if (pdata) {
-		master->num_chipselect = pdata->cs_num;
-		master->bus_num = pdata->bus_num;
+		ctlr->num_chipselect = pdata->cs_num;
+		ctlr->bus_num = pdata->bus_num;
 
 		dspi->devtype_data = &coldfire_data;
 	} else {
@@ -1016,35 +1043,38 @@
 		ret = of_property_read_u32(np, "spi-num-chipselects", &cs_num);
 		if (ret < 0) {
 			dev_err(&pdev->dev, "can't get spi-num-chipselects\n");
-			goto out_master_put;
+			goto out_ctlr_put;
 		}
-		master->num_chipselect = cs_num;
+		ctlr->num_chipselect = cs_num;
 
 		ret = of_property_read_u32(np, "bus-num", &bus_num);
 		if (ret < 0) {
 			dev_err(&pdev->dev, "can't get bus-num\n");
-			goto out_master_put;
+			goto out_ctlr_put;
 		}
-		master->bus_num = bus_num;
+		ctlr->bus_num = bus_num;
+
+		if (of_property_read_bool(np, "spi-slave"))
+			ctlr->slave = true;
 
 		dspi->devtype_data = of_device_get_match_data(&pdev->dev);
 		if (!dspi->devtype_data) {
 			dev_err(&pdev->dev, "can't get devtype_data\n");
 			ret = -EFAULT;
-			goto out_master_put;
+			goto out_ctlr_put;
 		}
 	}
 
 	if (dspi->devtype_data->xspi_mode)
-		master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
+		ctlr->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 	else
-		master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
+		ctlr->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(base)) {
 		ret = PTR_ERR(base);
-		goto out_master_put;
+		goto out_ctlr_put;
 	}
 
 	if (dspi->devtype_data->xspi_mode)
@@ -1056,7 +1086,7 @@
 		dev_err(&pdev->dev, "failed to init regmap: %ld\n",
 				PTR_ERR(dspi->regmap));
 		ret = PTR_ERR(dspi->regmap);
-		goto out_master_put;
+		goto out_ctlr_put;
 	}
 
 	if (dspi->devtype_data->xspi_mode) {
@@ -1068,7 +1098,7 @@
 				"failed to init pushr regmap: %ld\n",
 				PTR_ERR(dspi->regmap_pushr));
 			ret = PTR_ERR(dspi->regmap_pushr);
-			goto out_master_put;
+			goto out_ctlr_put;
 		}
 	}
 
@@ -1076,27 +1106,32 @@
 	if (IS_ERR(dspi->clk)) {
 		ret = PTR_ERR(dspi->clk);
 		dev_err(&pdev->dev, "unable to get clock\n");
-		goto out_master_put;
+		goto out_ctlr_put;
 	}
 	ret = clk_prepare_enable(dspi->clk);
 	if (ret)
-		goto out_master_put;
+		goto out_ctlr_put;
 
 	dspi_init(dspi);
+
 	dspi->irq = platform_get_irq(pdev, 0);
-	if (dspi->irq < 0) {
-		dev_err(&pdev->dev, "can't get platform irq\n");
-		ret = dspi->irq;
-		goto out_clk_put;
+	if (dspi->irq <= 0) {
+		dev_info(&pdev->dev,
+			 "can't get platform irq, using poll mode\n");
+		dspi->irq = 0;
+		goto poll_mode;
 	}
 
-	ret = devm_request_irq(&pdev->dev, dspi->irq, dspi_interrupt, 0,
-			pdev->name, dspi);
+	ret = devm_request_irq(&pdev->dev, dspi->irq, dspi_interrupt,
+			       IRQF_SHARED, pdev->name, dspi);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Unable to attach DSPI interrupt\n");
 		goto out_clk_put;
 	}
 
+	init_waitqueue_head(&dspi->waitq);
+
+poll_mode:
 	if (dspi->devtype_data->trans_mode == DSPI_DMA_MODE) {
 		ret = dspi_request_dma(dspi, res->start);
 		if (ret < 0) {
@@ -1105,15 +1140,14 @@
 		}
 	}
 
-	master->max_speed_hz =
+	ctlr->max_speed_hz =
 		clk_get_rate(dspi->clk) / dspi->devtype_data->max_clock_factor;
 
-	init_waitqueue_head(&dspi->waitq);
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, ctlr);
 
-	ret = spi_register_master(master);
+	ret = spi_register_controller(ctlr);
 	if (ret != 0) {
-		dev_err(&pdev->dev, "Problem registering DSPI master\n");
+		dev_err(&pdev->dev, "Problem registering DSPI ctlr\n");
 		goto out_clk_put;
 	}
 
@@ -1121,32 +1155,32 @@
 
 out_clk_put:
 	clk_disable_unprepare(dspi->clk);
-out_master_put:
-	spi_master_put(master);
+out_ctlr_put:
+	spi_controller_put(ctlr);
 
 	return ret;
 }
 
 static int dspi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct fsl_dspi *dspi = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = platform_get_drvdata(pdev);
+	struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr);
 
 	/* Disconnect from the SPI framework */
 	dspi_release_dma(dspi);
 	clk_disable_unprepare(dspi->clk);
-	spi_unregister_master(dspi->master);
+	spi_unregister_controller(dspi->ctlr);
 
 	return 0;
 }
 
 static struct platform_driver fsl_dspi_driver = {
-	.driver.name    = DRIVER_NAME,
-	.driver.of_match_table = fsl_dspi_dt_ids,
-	.driver.owner   = THIS_MODULE,
-	.driver.pm = &dspi_pm,
-	.probe          = dspi_probe,
-	.remove		= dspi_remove,
+	.driver.name		= DRIVER_NAME,
+	.driver.of_match_table	= fsl_dspi_dt_ids,
+	.driver.owner		= THIS_MODULE,
+	.driver.pm		= &dspi_pm,
+	.probe			= dspi_probe,
+	.remove			= dspi_remove,
 };
 module_platform_driver(fsl_dspi_driver);
 
diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
index 1e8ff62..f203267 100644
--- a/drivers/spi/spi-fsl-espi.c
+++ b/drivers/spi/spi-fsl-espi.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Freescale eSPI controller driver.
  *
  * Copyright 2010 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 #include <linux/delay.h>
 #include <linux/err.h>
@@ -798,10 +794,8 @@
 	int ret;
 
 	ret = spi_master_suspend(master);
-	if (ret) {
-		dev_warn(dev, "cannot suspend master\n");
+	if (ret)
 		return ret;
-	}
 
 	return pm_runtime_force_suspend(dev);
 }
diff --git a/drivers/spi/spi-fsl-lib.c b/drivers/spi/spi-fsl-lib.c
index 1e43412..76e1192 100644
--- a/drivers/spi/spi-fsl-lib.c
+++ b/drivers/spi/spi-fsl-lib.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Freescale SPI/eSPI controller driver library.
  *
@@ -10,11 +11,6 @@
  * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
  *
  * Copyright 2010 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 #include <linux/dma-mapping.h>
 #include <linux/fsl_devices.h>
diff --git a/drivers/spi/spi-fsl-lib.h b/drivers/spi/spi-fsl-lib.h
index f303f30..015a1ab 100644
--- a/drivers/spi/spi-fsl-lib.h
+++ b/drivers/spi/spi-fsl-lib.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Freescale SPI/eSPI controller driver library.
  *
@@ -9,11 +10,6 @@
  * CPM SPI and QE buffer descriptors mode support:
  * Copyright (c) 2009  MontaVista Software, Inc.
  * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 #ifndef __SPI_FSL_LIB_H__
 #define __SPI_FSL_LIB_H__
@@ -95,8 +91,7 @@
 
 struct mpc8xxx_spi_probe_info {
 	struct fsl_spi_platform_data pdata;
-	int *gpios;
-	bool *alow_flags;
+	__be32 __iomem *immr_spi_cs;
 };
 
 extern u32 mpc8xxx_spi_tx_buf_u8(struct mpc8xxx_spi *mpc8xxx_spi);
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index e6d5cc6..d08e932 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -3,11 +3,15 @@
 // Freescale i.MX7ULP LPSPI driver
 //
 // Copyright 2016 Freescale Semiconductor, Inc.
+// Copyright 2018 NXP Semiconductors
 
 #include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 #include <linux/err.h>
+#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
@@ -15,7 +19,12 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/dma-imx.h>
+#include <linux/platform_data/spi-imx.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
@@ -23,6 +32,11 @@
 
 #define DRIVER_NAME "fsl_lpspi"
 
+#define FSL_LPSPI_RPM_TIMEOUT 50 /* 50ms */
+
+/* The maximum bytes that edma can transfer once.*/
+#define FSL_LPSPI_MAX_EDMA_BYTES  ((1 << 15) - 1)
+
 /* i.MX7ULP LPSPI registers */
 #define IMX7ULP_VERID	0x0
 #define IMX7ULP_PARAM	0x4
@@ -47,16 +61,23 @@
 #define CR_RTF		BIT(8)
 #define CR_RST		BIT(1)
 #define CR_MEN		BIT(0)
+#define SR_MBF		BIT(24)
 #define SR_TCF		BIT(10)
+#define SR_FCF		BIT(9)
 #define SR_RDF		BIT(1)
 #define SR_TDF		BIT(0)
 #define IER_TCIE	BIT(10)
+#define IER_FCIE	BIT(9)
 #define IER_RDIE	BIT(1)
 #define IER_TDIE	BIT(0)
+#define DER_RDDE	BIT(1)
+#define DER_TDDE	BIT(0)
 #define CFGR1_PCSCFG	BIT(27)
+#define CFGR1_PINCFG	(BIT(24)|BIT(25))
 #define CFGR1_PCSPOL	BIT(8)
 #define CFGR1_NOSTALL	BIT(3)
 #define CFGR1_MASTER	BIT(0)
+#define FSR_TXCOUNT	(0xFF)
 #define RSR_RXEMPTY	BIT(1)
 #define TCR_CPOL	BIT(31)
 #define TCR_CPHA	BIT(30)
@@ -78,7 +99,11 @@
 struct fsl_lpspi_data {
 	struct device *dev;
 	void __iomem *base;
-	struct clk *clk;
+	unsigned long base_phys;
+	struct clk *clk_ipg;
+	struct clk *clk_per;
+	bool is_slave;
+	bool is_first_byte;
 
 	void *rx_buf;
 	const void *tx_buf;
@@ -86,11 +111,21 @@
 	void (*rx)(struct fsl_lpspi_data *);
 
 	u32 remain;
+	u8 watermark;
 	u8 txfifosize;
 	u8 rxfifosize;
 
 	struct lpspi_config config;
 	struct completion xfer_done;
+
+	bool slave_aborted;
+
+	/* DMA */
+	bool usedma;
+	struct completion dma_rx_completion;
+	struct completion dma_tx_completion;
+
+	int chipselect[0];
 };
 
 static const struct of_device_id fsl_lpspi_dt_ids[] = {
@@ -137,37 +172,71 @@
 	writel(enable, fsl_lpspi->base + IMX7ULP_IER);
 }
 
-static int lpspi_prepare_xfer_hardware(struct spi_master *master)
+static int fsl_lpspi_bytes_per_word(const int bpw)
 {
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
-
-	return clk_prepare_enable(fsl_lpspi->clk);
+	return DIV_ROUND_UP(bpw, BITS_PER_BYTE);
 }
 
-static int lpspi_unprepare_xfer_hardware(struct spi_master *master)
+static bool fsl_lpspi_can_dma(struct spi_controller *controller,
+			      struct spi_device *spi,
+			      struct spi_transfer *transfer)
 {
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+	unsigned int bytes_per_word;
 
-	clk_disable_unprepare(fsl_lpspi->clk);
+	if (!controller->dma_rx)
+		return false;
+
+	bytes_per_word = fsl_lpspi_bytes_per_word(transfer->bits_per_word);
+
+	switch (bytes_per_word)
+	{
+		case 1:
+		case 2:
+		case 4:
+			break;
+		default:
+			return false;
+	}
+
+	return true;
+}
+
+static int lpspi_prepare_xfer_hardware(struct spi_controller *controller)
+{
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
+	int ret;
+
+	ret = pm_runtime_get_sync(fsl_lpspi->dev);
+	if (ret < 0) {
+		dev_err(fsl_lpspi->dev, "failed to enable clock\n");
+		return ret;
+	}
 
 	return 0;
 }
 
-static int fsl_lpspi_txfifo_empty(struct fsl_lpspi_data *fsl_lpspi)
+static int lpspi_unprepare_xfer_hardware(struct spi_controller *controller)
 {
-	u32 txcnt;
-	unsigned long orig_jiffies = jiffies;
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
 
-	do {
-		txcnt = readl(fsl_lpspi->base + IMX7ULP_FSR) & 0xff;
+	pm_runtime_mark_last_busy(fsl_lpspi->dev);
+	pm_runtime_put_autosuspend(fsl_lpspi->dev);
 
-		if (time_after(jiffies, orig_jiffies + msecs_to_jiffies(500))) {
-			dev_dbg(fsl_lpspi->dev, "txfifo empty timeout\n");
-			return -ETIMEDOUT;
-		}
-		cond_resched();
+	return 0;
+}
 
-	} while (txcnt);
+static int fsl_lpspi_prepare_message(struct spi_controller *controller,
+				     struct spi_message *msg)
+{
+	struct fsl_lpspi_data *fsl_lpspi =
+					spi_controller_get_devdata(controller);
+	struct spi_device *spi = msg->spi;
+	int gpio = fsl_lpspi->chipselect[spi->chip_select];
+
+	if (gpio_is_valid(gpio))
+		gpio_direction_output(gpio, spi->mode & SPI_CS_HIGH ? 0 : 1);
 
 	return 0;
 }
@@ -175,6 +244,7 @@
 static void fsl_lpspi_write_tx_fifo(struct fsl_lpspi_data *fsl_lpspi)
 {
 	u8 txfifo_cnt;
+	u32 temp;
 
 	txfifo_cnt = readl(fsl_lpspi->base + IMX7ULP_FSR) & 0xff;
 
@@ -185,9 +255,15 @@
 		txfifo_cnt++;
 	}
 
-	if (!fsl_lpspi->remain && (txfifo_cnt < fsl_lpspi->txfifosize))
-		writel(0, fsl_lpspi->base + IMX7ULP_TDR);
-	else
+	if (txfifo_cnt < fsl_lpspi->txfifosize) {
+		if (!fsl_lpspi->is_slave) {
+			temp = readl(fsl_lpspi->base + IMX7ULP_TCR);
+			temp &= ~TCR_CONTC;
+			writel(temp, fsl_lpspi->base + IMX7ULP_TCR);
+		}
+
+		fsl_lpspi_intctrl(fsl_lpspi, IER_FCIE);
+	} else
 		fsl_lpspi_intctrl(fsl_lpspi, IER_TDIE);
 }
 
@@ -197,27 +273,29 @@
 		fsl_lpspi->rx(fsl_lpspi);
 }
 
-static void fsl_lpspi_set_cmd(struct fsl_lpspi_data *fsl_lpspi,
-			      bool is_first_xfer)
+static void fsl_lpspi_set_cmd(struct fsl_lpspi_data *fsl_lpspi)
 {
 	u32 temp = 0;
 
 	temp |= fsl_lpspi->config.bpw - 1;
-	temp |= fsl_lpspi->config.prescale << 27;
 	temp |= (fsl_lpspi->config.mode & 0x3) << 30;
-	temp |= (fsl_lpspi->config.chip_select & 0x3) << 24;
+	if (!fsl_lpspi->is_slave) {
+		temp |= fsl_lpspi->config.prescale << 27;
+		temp |= (fsl_lpspi->config.chip_select & 0x3) << 24;
 
-	/*
-	 * Set TCR_CONT will keep SS asserted after current transfer.
-	 * For the first transfer, clear TCR_CONTC to assert SS.
-	 * For subsequent transfer, set TCR_CONTC to keep SS asserted.
-	 */
-	temp |= TCR_CONT;
-	if (is_first_xfer)
-		temp &= ~TCR_CONTC;
-	else
-		temp |= TCR_CONTC;
-
+		/*
+		 * Set TCR_CONT will keep SS asserted after current transfer.
+		 * For the first transfer, clear TCR_CONTC to assert SS.
+		 * For subsequent transfer, set TCR_CONTC to keep SS asserted.
+		 */
+		if (!fsl_lpspi->usedma) {
+			temp |= TCR_CONT;
+			if (fsl_lpspi->is_first_byte)
+				temp &= ~TCR_CONTC;
+			else
+				temp |= TCR_CONTC;
+		}
+	}
 	writel(temp, fsl_lpspi->base + IMX7ULP_TCR);
 
 	dev_dbg(fsl_lpspi->dev, "TCR=0x%x\n", temp);
@@ -227,7 +305,11 @@
 {
 	u32 temp;
 
-	temp = fsl_lpspi->txfifosize >> 1 | (fsl_lpspi->rxfifosize >> 1) << 16;
+	if (!fsl_lpspi->usedma)
+		temp = fsl_lpspi->watermark >> 1 |
+		       (fsl_lpspi->watermark >> 1) << 16;
+	else
+		temp = fsl_lpspi->watermark >> 1;
 
 	writel(temp, fsl_lpspi->base + IMX7ULP_FCR);
 
@@ -240,7 +322,14 @@
 	unsigned int perclk_rate, scldiv;
 	u8 prescale;
 
-	perclk_rate = clk_get_rate(fsl_lpspi->clk);
+	perclk_rate = clk_get_rate(fsl_lpspi->clk_per);
+
+	if (config.speed_hz > perclk_rate / 2) {
+		dev_err(fsl_lpspi->dev,
+		      "per-clk should be at least two times of transfer speed");
+		return -EINVAL;
+	}
+
 	for (prescale = 0; prescale < 8; prescale++) {
 		scldiv = perclk_rate /
 			 (clkdivs[prescale] * config.speed_hz) - 2;
@@ -253,30 +342,79 @@
 	if (prescale == 8 && scldiv >= 256)
 		return -EINVAL;
 
-	writel(scldiv, fsl_lpspi->base + IMX7ULP_CCR);
+	writel(scldiv | (scldiv << 8) | ((scldiv >> 1) << 16),
+					fsl_lpspi->base + IMX7ULP_CCR);
 
-	dev_dbg(fsl_lpspi->dev, "perclk=%d, speed=%d, prescale =%d, scldiv=%d\n",
+	dev_dbg(fsl_lpspi->dev, "perclk=%d, speed=%d, prescale=%d, scldiv=%d\n",
 		perclk_rate, config.speed_hz, prescale, scldiv);
 
 	return 0;
 }
 
+static int fsl_lpspi_dma_configure(struct spi_controller *controller)
+{
+	int ret;
+	enum dma_slave_buswidth buswidth;
+	struct dma_slave_config rx = {}, tx = {};
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
+
+	switch (fsl_lpspi_bytes_per_word(fsl_lpspi->config.bpw)) {
+	case 4:
+		buswidth = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		break;
+	case 2:
+		buswidth = DMA_SLAVE_BUSWIDTH_2_BYTES;
+		break;
+	case 1:
+		buswidth = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	tx.direction = DMA_MEM_TO_DEV;
+	tx.dst_addr = fsl_lpspi->base_phys + IMX7ULP_TDR;
+	tx.dst_addr_width = buswidth;
+	tx.dst_maxburst = 1;
+	ret = dmaengine_slave_config(controller->dma_tx, &tx);
+	if (ret) {
+		dev_err(fsl_lpspi->dev, "TX dma configuration failed with %d\n",
+			ret);
+		return ret;
+	}
+
+	rx.direction = DMA_DEV_TO_MEM;
+	rx.src_addr = fsl_lpspi->base_phys + IMX7ULP_RDR;
+	rx.src_addr_width = buswidth;
+	rx.src_maxburst = 1;
+	ret = dmaengine_slave_config(controller->dma_rx, &rx);
+	if (ret) {
+		dev_err(fsl_lpspi->dev, "RX dma configuration failed with %d\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int fsl_lpspi_config(struct fsl_lpspi_data *fsl_lpspi)
 {
 	u32 temp;
 	int ret;
 
-	temp = CR_RST;
-	writel(temp, fsl_lpspi->base + IMX7ULP_CR);
-	writel(0, fsl_lpspi->base + IMX7ULP_CR);
-
-	ret = fsl_lpspi_set_bitrate(fsl_lpspi);
-	if (ret)
-		return ret;
+	if (!fsl_lpspi->is_slave) {
+		ret = fsl_lpspi_set_bitrate(fsl_lpspi);
+		if (ret)
+			return ret;
+	}
 
 	fsl_lpspi_set_watermark(fsl_lpspi);
 
-	temp = CFGR1_PCSCFG | CFGR1_MASTER | CFGR1_NOSTALL;
+	if (!fsl_lpspi->is_slave)
+		temp = CFGR1_MASTER;
+	else
+		temp = CFGR1_PINCFG;
 	if (fsl_lpspi->config.mode & SPI_CS_HIGH)
 		temp |= CFGR1_PCSPOL;
 	writel(temp, fsl_lpspi->base + IMX7ULP_CFGR1);
@@ -285,17 +423,27 @@
 	temp |= CR_RRF | CR_RTF | CR_MEN;
 	writel(temp, fsl_lpspi->base + IMX7ULP_CR);
 
+	temp = 0;
+	if (fsl_lpspi->usedma)
+		temp = DER_TDDE | DER_RDDE;
+	writel(temp, fsl_lpspi->base + IMX7ULP_DER);
+
 	return 0;
 }
 
-static void fsl_lpspi_setup_transfer(struct spi_device *spi,
+static int fsl_lpspi_setup_transfer(struct spi_controller *controller,
+				     struct spi_device *spi,
 				     struct spi_transfer *t)
 {
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(spi->master);
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(spi->controller);
+
+	if (t == NULL)
+		return -EINVAL;
 
 	fsl_lpspi->config.mode = spi->mode;
-	fsl_lpspi->config.bpw = t ? t->bits_per_word : spi->bits_per_word;
-	fsl_lpspi->config.speed_hz = t ? t->speed_hz : spi->max_speed_hz;
+	fsl_lpspi->config.bpw = t->bits_per_word;
+	fsl_lpspi->config.speed_hz = t->speed_hz;
 	fsl_lpspi->config.chip_select = spi->chip_select;
 
 	if (!fsl_lpspi->config.speed_hz)
@@ -315,14 +463,251 @@
 		fsl_lpspi->tx = fsl_lpspi_buf_tx_u32;
 	}
 
-	fsl_lpspi_config(fsl_lpspi);
+	if (t->len <= fsl_lpspi->txfifosize)
+		fsl_lpspi->watermark = t->len;
+	else
+		fsl_lpspi->watermark = fsl_lpspi->txfifosize;
+
+	if (fsl_lpspi_can_dma(controller, spi, t))
+		fsl_lpspi->usedma = 1;
+	else
+		fsl_lpspi->usedma = 0;
+
+	return fsl_lpspi_config(fsl_lpspi);
 }
 
-static int fsl_lpspi_transfer_one(struct spi_master *master,
-				  struct spi_device *spi,
+static int fsl_lpspi_slave_abort(struct spi_controller *controller)
+{
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
+
+	fsl_lpspi->slave_aborted = true;
+	if (!fsl_lpspi->usedma)
+		complete(&fsl_lpspi->xfer_done);
+	else {
+		complete(&fsl_lpspi->dma_tx_completion);
+		complete(&fsl_lpspi->dma_rx_completion);
+	}
+
+	return 0;
+}
+
+static int fsl_lpspi_wait_for_completion(struct spi_controller *controller)
+{
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
+
+	if (fsl_lpspi->is_slave) {
+		if (wait_for_completion_interruptible(&fsl_lpspi->xfer_done) ||
+			fsl_lpspi->slave_aborted) {
+			dev_dbg(fsl_lpspi->dev, "interrupted\n");
+			return -EINTR;
+		}
+	} else {
+		if (!wait_for_completion_timeout(&fsl_lpspi->xfer_done, HZ)) {
+			dev_dbg(fsl_lpspi->dev, "wait for completion timeout\n");
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
+static int fsl_lpspi_reset(struct fsl_lpspi_data *fsl_lpspi)
+{
+	u32 temp;
+
+	if (!fsl_lpspi->usedma) {
+		/* Disable all interrupt */
+		fsl_lpspi_intctrl(fsl_lpspi, 0);
+	}
+
+	/* W1C for all flags in SR */
+	temp = 0x3F << 8;
+	writel(temp, fsl_lpspi->base + IMX7ULP_SR);
+
+	/* Clear FIFO and disable module */
+	temp = CR_RRF | CR_RTF;
+	writel(temp, fsl_lpspi->base + IMX7ULP_CR);
+
+	return 0;
+}
+
+static void fsl_lpspi_dma_rx_callback(void *cookie)
+{
+	struct fsl_lpspi_data *fsl_lpspi = (struct fsl_lpspi_data *)cookie;
+
+	complete(&fsl_lpspi->dma_rx_completion);
+}
+
+static void fsl_lpspi_dma_tx_callback(void *cookie)
+{
+	struct fsl_lpspi_data *fsl_lpspi = (struct fsl_lpspi_data *)cookie;
+
+	complete(&fsl_lpspi->dma_tx_completion);
+}
+
+static int fsl_lpspi_calculate_timeout(struct fsl_lpspi_data *fsl_lpspi,
+				       int size)
+{
+	unsigned long timeout = 0;
+
+	/* Time with actual data transfer and CS change delay related to HW */
+	timeout = (8 + 4) * size / fsl_lpspi->config.speed_hz;
+
+	/* Add extra second for scheduler related activities */
+	timeout += 1;
+
+	/* Double calculated timeout */
+	return msecs_to_jiffies(2 * timeout * MSEC_PER_SEC);
+}
+
+static int fsl_lpspi_dma_transfer(struct spi_controller *controller,
+				struct fsl_lpspi_data *fsl_lpspi,
+				struct spi_transfer *transfer)
+{
+	struct dma_async_tx_descriptor *desc_tx, *desc_rx;
+	unsigned long transfer_timeout;
+	unsigned long timeout;
+	struct sg_table *tx = &transfer->tx_sg, *rx = &transfer->rx_sg;
+	int ret;
+
+	ret = fsl_lpspi_dma_configure(controller);
+	if (ret)
+		return ret;
+
+	desc_rx = dmaengine_prep_slave_sg(controller->dma_rx,
+				rx->sgl, rx->nents, DMA_DEV_TO_MEM,
+				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc_rx)
+		return -EINVAL;
+
+	desc_rx->callback = fsl_lpspi_dma_rx_callback;
+	desc_rx->callback_param = (void *)fsl_lpspi;
+	dmaengine_submit(desc_rx);
+	reinit_completion(&fsl_lpspi->dma_rx_completion);
+	dma_async_issue_pending(controller->dma_rx);
+
+	desc_tx = dmaengine_prep_slave_sg(controller->dma_tx,
+				tx->sgl, tx->nents, DMA_MEM_TO_DEV,
+				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc_tx) {
+		dmaengine_terminate_all(controller->dma_tx);
+		return -EINVAL;
+	}
+
+	desc_tx->callback = fsl_lpspi_dma_tx_callback;
+	desc_tx->callback_param = (void *)fsl_lpspi;
+	dmaengine_submit(desc_tx);
+	reinit_completion(&fsl_lpspi->dma_tx_completion);
+	dma_async_issue_pending(controller->dma_tx);
+
+	fsl_lpspi->slave_aborted = false;
+
+	if (!fsl_lpspi->is_slave) {
+		transfer_timeout = fsl_lpspi_calculate_timeout(fsl_lpspi,
+							       transfer->len);
+
+		/* Wait eDMA to finish the data transfer.*/
+		timeout = wait_for_completion_timeout(&fsl_lpspi->dma_tx_completion,
+						      transfer_timeout);
+		if (!timeout) {
+			dev_err(fsl_lpspi->dev, "I/O Error in DMA TX\n");
+			dmaengine_terminate_all(controller->dma_tx);
+			dmaengine_terminate_all(controller->dma_rx);
+			fsl_lpspi_reset(fsl_lpspi);
+			return -ETIMEDOUT;
+		}
+
+		timeout = wait_for_completion_timeout(&fsl_lpspi->dma_rx_completion,
+						      transfer_timeout);
+		if (!timeout) {
+			dev_err(fsl_lpspi->dev, "I/O Error in DMA RX\n");
+			dmaengine_terminate_all(controller->dma_tx);
+			dmaengine_terminate_all(controller->dma_rx);
+			fsl_lpspi_reset(fsl_lpspi);
+			return -ETIMEDOUT;
+		}
+	} else {
+		if (wait_for_completion_interruptible(&fsl_lpspi->dma_tx_completion) ||
+			fsl_lpspi->slave_aborted) {
+			dev_dbg(fsl_lpspi->dev,
+				"I/O Error in DMA TX interrupted\n");
+			dmaengine_terminate_all(controller->dma_tx);
+			dmaengine_terminate_all(controller->dma_rx);
+			fsl_lpspi_reset(fsl_lpspi);
+			return -EINTR;
+		}
+
+		if (wait_for_completion_interruptible(&fsl_lpspi->dma_rx_completion) ||
+			fsl_lpspi->slave_aborted) {
+			dev_dbg(fsl_lpspi->dev,
+				"I/O Error in DMA RX interrupted\n");
+			dmaengine_terminate_all(controller->dma_tx);
+			dmaengine_terminate_all(controller->dma_rx);
+			fsl_lpspi_reset(fsl_lpspi);
+			return -EINTR;
+		}
+	}
+
+	fsl_lpspi_reset(fsl_lpspi);
+
+	return 0;
+}
+
+static void fsl_lpspi_dma_exit(struct spi_controller *controller)
+{
+	if (controller->dma_rx) {
+		dma_release_channel(controller->dma_rx);
+		controller->dma_rx = NULL;
+	}
+
+	if (controller->dma_tx) {
+		dma_release_channel(controller->dma_tx);
+		controller->dma_tx = NULL;
+	}
+}
+
+static int fsl_lpspi_dma_init(struct device *dev,
+			      struct fsl_lpspi_data *fsl_lpspi,
+			      struct spi_controller *controller)
+{
+	int ret;
+
+	/* Prepare for TX DMA: */
+	controller->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+	if (IS_ERR(controller->dma_tx)) {
+		ret = PTR_ERR(controller->dma_tx);
+		dev_dbg(dev, "can't get the TX DMA channel, error %d!\n", ret);
+		controller->dma_tx = NULL;
+		goto err;
+	}
+
+	/* Prepare for RX DMA: */
+	controller->dma_rx = dma_request_slave_channel_reason(dev, "rx");
+	if (IS_ERR(controller->dma_rx)) {
+		ret = PTR_ERR(controller->dma_rx);
+		dev_dbg(dev, "can't get the RX DMA channel, error %d\n", ret);
+		controller->dma_rx = NULL;
+		goto err;
+	}
+
+	init_completion(&fsl_lpspi->dma_rx_completion);
+	init_completion(&fsl_lpspi->dma_tx_completion);
+	controller->can_dma = fsl_lpspi_can_dma;
+	controller->max_dma_len = FSL_LPSPI_MAX_EDMA_BYTES;
+
+	return 0;
+err:
+	fsl_lpspi_dma_exit(controller);
+	return ret;
+}
+
+static int fsl_lpspi_pio_transfer(struct spi_controller *controller,
 				  struct spi_transfer *t)
 {
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
 	int ret;
 
 	fsl_lpspi->tx_buf = t->tx_buf;
@@ -330,110 +715,185 @@
 	fsl_lpspi->remain = t->len;
 
 	reinit_completion(&fsl_lpspi->xfer_done);
+	fsl_lpspi->slave_aborted = false;
+
 	fsl_lpspi_write_tx_fifo(fsl_lpspi);
 
-	ret = wait_for_completion_timeout(&fsl_lpspi->xfer_done, HZ);
-	if (!ret) {
-		dev_dbg(fsl_lpspi->dev, "wait for completion timeout\n");
-		return -ETIMEDOUT;
-	}
-
-	ret = fsl_lpspi_txfifo_empty(fsl_lpspi);
+	ret = fsl_lpspi_wait_for_completion(controller);
 	if (ret)
 		return ret;
 
-	fsl_lpspi_read_rx_fifo(fsl_lpspi);
+	fsl_lpspi_reset(fsl_lpspi);
 
 	return 0;
 }
 
-static int fsl_lpspi_transfer_one_msg(struct spi_master *master,
-				      struct spi_message *msg)
+static int fsl_lpspi_transfer_one(struct spi_controller *controller,
+				  struct spi_device *spi,
+				  struct spi_transfer *t)
 {
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
-	struct spi_device *spi = msg->spi;
-	struct spi_transfer *xfer;
-	bool is_first_xfer = true;
-	u32 temp;
-	int ret = 0;
+	struct fsl_lpspi_data *fsl_lpspi =
+					spi_controller_get_devdata(controller);
+	int ret;
 
-	msg->status = 0;
-	msg->actual_length = 0;
+	fsl_lpspi->is_first_byte = true;
+	ret = fsl_lpspi_setup_transfer(controller, spi, t);
+	if (ret < 0)
+		return ret;
 
-	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
-		fsl_lpspi_setup_transfer(spi, xfer);
-		fsl_lpspi_set_cmd(fsl_lpspi, is_first_xfer);
+	fsl_lpspi_set_cmd(fsl_lpspi);
+	fsl_lpspi->is_first_byte = false;
 
-		is_first_xfer = false;
+	if (fsl_lpspi->usedma)
+		ret = fsl_lpspi_dma_transfer(controller, fsl_lpspi, t);
+	else
+		ret = fsl_lpspi_pio_transfer(controller, t);
+	if (ret < 0)
+		return ret;
 
-		ret = fsl_lpspi_transfer_one(master, spi, xfer);
-		if (ret < 0)
-			goto complete;
-
-		msg->actual_length += xfer->len;
-	}
-
-complete:
-	/* de-assert SS, then finalize current message */
-	temp = readl(fsl_lpspi->base + IMX7ULP_TCR);
-	temp &= ~TCR_CONTC;
-	writel(temp, fsl_lpspi->base + IMX7ULP_TCR);
-
-	msg->status = ret;
-	spi_finalize_current_message(master);
-
-	return ret;
+	return 0;
 }
 
 static irqreturn_t fsl_lpspi_isr(int irq, void *dev_id)
 {
+	u32 temp_SR, temp_IER;
 	struct fsl_lpspi_data *fsl_lpspi = dev_id;
-	u32 temp;
 
+	temp_IER = readl(fsl_lpspi->base + IMX7ULP_IER);
 	fsl_lpspi_intctrl(fsl_lpspi, 0);
-	temp = readl(fsl_lpspi->base + IMX7ULP_SR);
+	temp_SR = readl(fsl_lpspi->base + IMX7ULP_SR);
 
 	fsl_lpspi_read_rx_fifo(fsl_lpspi);
 
-	if (temp & SR_TDF) {
+	if ((temp_SR & SR_TDF) && (temp_IER & IER_TDIE)) {
 		fsl_lpspi_write_tx_fifo(fsl_lpspi);
+		return IRQ_HANDLED;
+	}
 
-		if (!fsl_lpspi->remain)
+	if (temp_SR & SR_MBF ||
+	    readl(fsl_lpspi->base + IMX7ULP_FSR) & FSR_TXCOUNT) {
+		writel(SR_FCF, fsl_lpspi->base + IMX7ULP_SR);
+		fsl_lpspi_intctrl(fsl_lpspi, IER_FCIE);
+		return IRQ_HANDLED;
+	}
+
+	if (temp_SR & SR_FCF && (temp_IER & IER_FCIE)) {
+		writel(SR_FCF, fsl_lpspi->base + IMX7ULP_SR);
 			complete(&fsl_lpspi->xfer_done);
-
 		return IRQ_HANDLED;
 	}
 
 	return IRQ_NONE;
 }
 
+#ifdef CONFIG_PM
+static int fsl_lpspi_runtime_resume(struct device *dev)
+{
+	struct spi_controller *controller = dev_get_drvdata(dev);
+	struct fsl_lpspi_data *fsl_lpspi;
+	int ret;
+
+	fsl_lpspi = spi_controller_get_devdata(controller);
+
+	ret = clk_prepare_enable(fsl_lpspi->clk_per);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(fsl_lpspi->clk_ipg);
+	if (ret) {
+		clk_disable_unprepare(fsl_lpspi->clk_per);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int fsl_lpspi_runtime_suspend(struct device *dev)
+{
+	struct spi_controller *controller = dev_get_drvdata(dev);
+	struct fsl_lpspi_data *fsl_lpspi;
+
+	fsl_lpspi = spi_controller_get_devdata(controller);
+
+	clk_disable_unprepare(fsl_lpspi->clk_per);
+	clk_disable_unprepare(fsl_lpspi->clk_ipg);
+
+	return 0;
+}
+#endif
+
+static int fsl_lpspi_init_rpm(struct fsl_lpspi_data *fsl_lpspi)
+{
+	struct device *dev = fsl_lpspi->dev;
+
+	pm_runtime_enable(dev);
+	pm_runtime_set_autosuspend_delay(dev, FSL_LPSPI_RPM_TIMEOUT);
+	pm_runtime_use_autosuspend(dev);
+
+	return 0;
+}
+
 static int fsl_lpspi_probe(struct platform_device *pdev)
 {
+	struct device_node *np = pdev->dev.of_node;
 	struct fsl_lpspi_data *fsl_lpspi;
-	struct spi_master *master;
+	struct spi_controller *controller;
+	struct spi_imx_master *lpspi_platform_info =
+		dev_get_platdata(&pdev->dev);
 	struct resource *res;
-	int ret, irq;
+	int i, ret, irq;
 	u32 temp;
+	bool is_slave;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct fsl_lpspi_data));
-	if (!master)
+	is_slave = of_property_read_bool((&pdev->dev)->of_node, "spi-slave");
+	if (is_slave)
+		controller = spi_alloc_slave(&pdev->dev,
+					sizeof(struct fsl_lpspi_data));
+	else
+		controller = spi_alloc_master(&pdev->dev,
+					sizeof(struct fsl_lpspi_data));
+
+	if (!controller)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, controller);
 
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
-	master->bus_num = pdev->id;
-
-	fsl_lpspi = spi_master_get_devdata(master);
+	fsl_lpspi = spi_controller_get_devdata(controller);
 	fsl_lpspi->dev = &pdev->dev;
+	fsl_lpspi->is_slave = is_slave;
 
-	master->transfer_one_message = fsl_lpspi_transfer_one_msg;
-	master->prepare_transfer_hardware = lpspi_prepare_xfer_hardware;
-	master->unprepare_transfer_hardware = lpspi_unprepare_xfer_hardware;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
-	master->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX;
-	master->dev.of_node = pdev->dev.of_node;
-	master->bus_num = pdev->id;
+	if (!fsl_lpspi->is_slave) {
+		for (i = 0; i < controller->num_chipselect; i++) {
+			int cs_gpio = of_get_named_gpio(np, "cs-gpios", i);
+
+			if (!gpio_is_valid(cs_gpio) && lpspi_platform_info)
+				cs_gpio = lpspi_platform_info->chipselect[i];
+
+			fsl_lpspi->chipselect[i] = cs_gpio;
+			if (!gpio_is_valid(cs_gpio))
+				continue;
+
+			ret = devm_gpio_request(&pdev->dev,
+						fsl_lpspi->chipselect[i],
+						DRIVER_NAME);
+			if (ret) {
+				dev_err(&pdev->dev, "can't get cs gpios\n");
+				goto out_controller_put;
+			}
+		}
+		controller->cs_gpios = fsl_lpspi->chipselect;
+		controller->prepare_message = fsl_lpspi_prepare_message;
+	}
+
+	controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
+	controller->transfer_one = fsl_lpspi_transfer_one;
+	controller->prepare_transfer_hardware = lpspi_prepare_xfer_hardware;
+	controller->unprepare_transfer_hardware = lpspi_unprepare_xfer_hardware;
+	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	controller->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX;
+	controller->dev.of_node = pdev->dev.of_node;
+	controller->bus_num = pdev->id;
+	controller->slave_abort = fsl_lpspi_slave_abort;
 
 	init_completion(&fsl_lpspi->xfer_done);
 
@@ -441,74 +901,127 @@
 	fsl_lpspi->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(fsl_lpspi->base)) {
 		ret = PTR_ERR(fsl_lpspi->base);
-		goto out_master_put;
+		goto out_controller_put;
 	}
+	fsl_lpspi->base_phys = res->start;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
 		ret = irq;
-		goto out_master_put;
+		goto out_controller_put;
 	}
 
 	ret = devm_request_irq(&pdev->dev, irq, fsl_lpspi_isr, 0,
 			       dev_name(&pdev->dev), fsl_lpspi);
 	if (ret) {
 		dev_err(&pdev->dev, "can't get irq%d: %d\n", irq, ret);
-		goto out_master_put;
+		goto out_controller_put;
 	}
 
-	fsl_lpspi->clk = devm_clk_get(&pdev->dev, "ipg");
-	if (IS_ERR(fsl_lpspi->clk)) {
-		ret = PTR_ERR(fsl_lpspi->clk);
-		goto out_master_put;
+	fsl_lpspi->clk_per = devm_clk_get(&pdev->dev, "per");
+	if (IS_ERR(fsl_lpspi->clk_per)) {
+		ret = PTR_ERR(fsl_lpspi->clk_per);
+		goto out_controller_put;
 	}
 
-	ret = clk_prepare_enable(fsl_lpspi->clk);
-	if (ret) {
-		dev_err(&pdev->dev, "can't enable lpspi clock, ret=%d\n", ret);
-		goto out_master_put;
+	fsl_lpspi->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
+	if (IS_ERR(fsl_lpspi->clk_ipg)) {
+		ret = PTR_ERR(fsl_lpspi->clk_ipg);
+		goto out_controller_put;
+	}
+
+	/* enable the clock */
+	ret = fsl_lpspi_init_rpm(fsl_lpspi);
+	if (ret)
+		goto out_controller_put;
+
+	ret = pm_runtime_get_sync(fsl_lpspi->dev);
+	if (ret < 0) {
+		dev_err(fsl_lpspi->dev, "failed to enable clock\n");
+		return ret;
 	}
 
 	temp = readl(fsl_lpspi->base + IMX7ULP_PARAM);
 	fsl_lpspi->txfifosize = 1 << (temp & 0x0f);
 	fsl_lpspi->rxfifosize = 1 << ((temp >> 8) & 0x0f);
 
-	clk_disable_unprepare(fsl_lpspi->clk);
+	ret = fsl_lpspi_dma_init(&pdev->dev, fsl_lpspi, controller);
+	if (ret == -EPROBE_DEFER)
+		goto out_controller_put;
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	if (ret < 0)
+		dev_err(&pdev->dev, "dma setup error %d, use pio\n", ret);
+
+	ret = devm_spi_register_controller(&pdev->dev, controller);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "spi_register_master error.\n");
-		goto out_master_put;
+		dev_err(&pdev->dev, "spi_register_controller error.\n");
+		goto out_controller_put;
 	}
 
 	return 0;
 
-out_master_put:
-	spi_master_put(master);
+out_controller_put:
+	spi_controller_put(controller);
 
 	return ret;
 }
 
 static int fsl_lpspi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+	struct spi_controller *controller = platform_get_drvdata(pdev);
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
 
-	clk_disable_unprepare(fsl_lpspi->clk);
+	pm_runtime_disable(fsl_lpspi->dev);
+
+	spi_master_put(controller);
 
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int fsl_lpspi_suspend(struct device *dev)
+{
+	int ret;
+
+	pinctrl_pm_select_sleep_state(dev);
+	ret = pm_runtime_force_suspend(dev);
+	return ret;
+}
+
+static int fsl_lpspi_resume(struct device *dev)
+{
+	int ret;
+
+	ret = pm_runtime_force_resume(dev);
+	if (ret) {
+		dev_err(dev, "Error in resume: %d\n", ret);
+		return ret;
+	}
+
+	pinctrl_pm_select_default_state(dev);
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops fsl_lpspi_pm_ops = {
+	SET_RUNTIME_PM_OPS(fsl_lpspi_runtime_suspend,
+				fsl_lpspi_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(fsl_lpspi_suspend, fsl_lpspi_resume)
+};
+
 static struct platform_driver fsl_lpspi_driver = {
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = fsl_lpspi_dt_ids,
+		.pm = &fsl_lpspi_pm_ops,
 	},
 	.probe = fsl_lpspi_probe,
 	.remove = fsl_lpspi_remove,
 };
 module_platform_driver(fsl_lpspi_driver);
 
-MODULE_DESCRIPTION("LPSPI Master Controller driver");
+MODULE_DESCRIPTION("LPSPI Controller driver");
 MODULE_AUTHOR("Gao Pan <pandy.gao@nxp.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c
new file mode 100644
index 0000000..c02e24c
--- /dev/null
+++ b/drivers/spi/spi-fsl-qspi.c
@@ -0,0 +1,964 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Freescale QuadSPI driver.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ * Copyright (C) 2018 Bootlin
+ * Copyright (C) 2018 exceet electronics GmbH
+ * Copyright (C) 2018 Kontron Electronics GmbH
+ *
+ * Transition to SPI MEM interface:
+ * Authors:
+ *     Boris Brezillon <bbrezillon@kernel.org>
+ *     Frieder Schrempf <frieder.schrempf@kontron.de>
+ *     Yogesh Gaur <yogeshnarayan.gaur@nxp.com>
+ *     Suresh Gupta <suresh.gupta@nxp.com>
+ *
+ * Based on the original fsl-quadspi.c spi-nor driver:
+ * Author: Freescale Semiconductor, Inc.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_qos.h>
+#include <linux/sizes.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+/*
+ * The driver only uses one single LUT entry, that is updated on
+ * each call of exec_op(). Index 0 is preset at boot with a basic
+ * read operation, so let's use the last entry (15).
+ */
+#define	SEQID_LUT			15
+
+/* Registers used by the driver */
+#define QUADSPI_MCR			0x00
+#define QUADSPI_MCR_RESERVED_MASK	GENMASK(19, 16)
+#define QUADSPI_MCR_MDIS_MASK		BIT(14)
+#define QUADSPI_MCR_CLR_TXF_MASK	BIT(11)
+#define QUADSPI_MCR_CLR_RXF_MASK	BIT(10)
+#define QUADSPI_MCR_DDR_EN_MASK		BIT(7)
+#define QUADSPI_MCR_END_CFG_MASK	GENMASK(3, 2)
+#define QUADSPI_MCR_SWRSTHD_MASK	BIT(1)
+#define QUADSPI_MCR_SWRSTSD_MASK	BIT(0)
+
+#define QUADSPI_IPCR			0x08
+#define QUADSPI_IPCR_SEQID(x)		((x) << 24)
+
+#define QUADSPI_BUF3CR			0x1c
+#define QUADSPI_BUF3CR_ALLMST_MASK	BIT(31)
+#define QUADSPI_BUF3CR_ADATSZ(x)	((x) << 8)
+#define QUADSPI_BUF3CR_ADATSZ_MASK	GENMASK(15, 8)
+
+#define QUADSPI_BFGENCR			0x20
+#define QUADSPI_BFGENCR_SEQID(x)	((x) << 12)
+
+#define QUADSPI_BUF0IND			0x30
+#define QUADSPI_BUF1IND			0x34
+#define QUADSPI_BUF2IND			0x38
+#define QUADSPI_SFAR			0x100
+
+#define QUADSPI_SMPR			0x108
+#define QUADSPI_SMPR_DDRSMP_MASK	GENMASK(18, 16)
+#define QUADSPI_SMPR_FSDLY_MASK		BIT(6)
+#define QUADSPI_SMPR_FSPHS_MASK		BIT(5)
+#define QUADSPI_SMPR_HSENA_MASK		BIT(0)
+
+#define QUADSPI_RBCT			0x110
+#define QUADSPI_RBCT_WMRK_MASK		GENMASK(4, 0)
+#define QUADSPI_RBCT_RXBRD_USEIPS	BIT(8)
+
+#define QUADSPI_TBDR			0x154
+
+#define QUADSPI_SR			0x15c
+#define QUADSPI_SR_IP_ACC_MASK		BIT(1)
+#define QUADSPI_SR_AHB_ACC_MASK		BIT(2)
+
+#define QUADSPI_FR			0x160
+#define QUADSPI_FR_TFF_MASK		BIT(0)
+
+#define QUADSPI_SPTRCLR			0x16c
+#define QUADSPI_SPTRCLR_IPPTRC		BIT(8)
+#define QUADSPI_SPTRCLR_BFPTRC		BIT(0)
+
+#define QUADSPI_SFA1AD			0x180
+#define QUADSPI_SFA2AD			0x184
+#define QUADSPI_SFB1AD			0x188
+#define QUADSPI_SFB2AD			0x18c
+#define QUADSPI_RBDR(x)			(0x200 + ((x) * 4))
+
+#define QUADSPI_LUTKEY			0x300
+#define QUADSPI_LUTKEY_VALUE		0x5AF05AF0
+
+#define QUADSPI_LCKCR			0x304
+#define QUADSPI_LCKER_LOCK		BIT(0)
+#define QUADSPI_LCKER_UNLOCK		BIT(1)
+
+#define QUADSPI_RSER			0x164
+#define QUADSPI_RSER_TFIE		BIT(0)
+
+#define QUADSPI_LUT_BASE		0x310
+#define QUADSPI_LUT_OFFSET		(SEQID_LUT * 4 * 4)
+#define QUADSPI_LUT_REG(idx) \
+	(QUADSPI_LUT_BASE + QUADSPI_LUT_OFFSET + (idx) * 4)
+
+/* Instruction set for the LUT register */
+#define LUT_STOP		0
+#define LUT_CMD			1
+#define LUT_ADDR		2
+#define LUT_DUMMY		3
+#define LUT_MODE		4
+#define LUT_MODE2		5
+#define LUT_MODE4		6
+#define LUT_FSL_READ		7
+#define LUT_FSL_WRITE		8
+#define LUT_JMP_ON_CS		9
+#define LUT_ADDR_DDR		10
+#define LUT_MODE_DDR		11
+#define LUT_MODE2_DDR		12
+#define LUT_MODE4_DDR		13
+#define LUT_FSL_READ_DDR	14
+#define LUT_FSL_WRITE_DDR	15
+#define LUT_DATA_LEARN		16
+
+/*
+ * The PAD definitions for LUT register.
+ *
+ * The pad stands for the number of IO lines [0:3].
+ * For example, the quad read needs four IO lines,
+ * so you should use LUT_PAD(4).
+ */
+#define LUT_PAD(x) (fls(x) - 1)
+
+/*
+ * Macro for constructing the LUT entries with the following
+ * register layout:
+ *
+ *  ---------------------------------------------------
+ *  | INSTR1 | PAD1 | OPRND1 | INSTR0 | PAD0 | OPRND0 |
+ *  ---------------------------------------------------
+ */
+#define LUT_DEF(idx, ins, pad, opr)					\
+	((((ins) << 10) | ((pad) << 8) | (opr)) << (((idx) % 2) * 16))
+
+/* Controller needs driver to swap endianness */
+#define QUADSPI_QUIRK_SWAP_ENDIAN	BIT(0)
+
+/* Controller needs 4x internal clock */
+#define QUADSPI_QUIRK_4X_INT_CLK	BIT(1)
+
+/*
+ * TKT253890, the controller needs the driver to fill the txfifo with
+ * 16 bytes at least to trigger a data transfer, even though the extra
+ * data won't be transferred.
+ */
+#define QUADSPI_QUIRK_TKT253890		BIT(2)
+
+/* TKT245618, the controller cannot wake up from wait mode */
+#define QUADSPI_QUIRK_TKT245618		BIT(3)
+
+/*
+ * Controller adds QSPI_AMBA_BASE (base address of the mapped memory)
+ * internally. No need to add it when setting SFXXAD and SFAR registers
+ */
+#define QUADSPI_QUIRK_BASE_INTERNAL	BIT(4)
+
+struct fsl_qspi_devtype_data {
+	unsigned int rxfifo;
+	unsigned int txfifo;
+	unsigned int ahb_buf_size;
+	unsigned int quirks;
+	bool little_endian;
+};
+
+static const struct fsl_qspi_devtype_data vybrid_data = {
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_SWAP_ENDIAN,
+	.little_endian = true,
+};
+
+static const struct fsl_qspi_devtype_data imx6sx_data = {
+	.rxfifo = SZ_128,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_4X_INT_CLK | QUADSPI_QUIRK_TKT245618,
+	.little_endian = true,
+};
+
+static const struct fsl_qspi_devtype_data imx7d_data = {
+	.rxfifo = SZ_128,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_4X_INT_CLK,
+	.little_endian = true,
+};
+
+static const struct fsl_qspi_devtype_data imx6ul_data = {
+	.rxfifo = SZ_128,
+	.txfifo = SZ_512,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_4X_INT_CLK,
+	.little_endian = true,
+};
+
+static const struct fsl_qspi_devtype_data ls1021a_data = {
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = 0,
+	.little_endian = false,
+};
+
+static const struct fsl_qspi_devtype_data ls2080a_data = {
+	.rxfifo = SZ_128,
+	.txfifo = SZ_64,
+	.ahb_buf_size = SZ_1K,
+	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_BASE_INTERNAL,
+	.little_endian = true,
+};
+
+struct fsl_qspi {
+	void __iomem *iobase;
+	void __iomem *ahb_addr;
+	u32 memmap_phy;
+	struct clk *clk, *clk_en;
+	struct device *dev;
+	struct completion c;
+	const struct fsl_qspi_devtype_data *devtype_data;
+	struct mutex lock;
+	struct pm_qos_request pm_qos_req;
+	int selected;
+};
+
+static inline int needs_swap_endian(struct fsl_qspi *q)
+{
+	return q->devtype_data->quirks & QUADSPI_QUIRK_SWAP_ENDIAN;
+}
+
+static inline int needs_4x_clock(struct fsl_qspi *q)
+{
+	return q->devtype_data->quirks & QUADSPI_QUIRK_4X_INT_CLK;
+}
+
+static inline int needs_fill_txfifo(struct fsl_qspi *q)
+{
+	return q->devtype_data->quirks & QUADSPI_QUIRK_TKT253890;
+}
+
+static inline int needs_wakeup_wait_mode(struct fsl_qspi *q)
+{
+	return q->devtype_data->quirks & QUADSPI_QUIRK_TKT245618;
+}
+
+static inline int needs_amba_base_offset(struct fsl_qspi *q)
+{
+	return !(q->devtype_data->quirks & QUADSPI_QUIRK_BASE_INTERNAL);
+}
+
+/*
+ * An IC bug makes it necessary to rearrange the 32-bit data.
+ * Later chips, such as IMX6SLX, have fixed this bug.
+ */
+static inline u32 fsl_qspi_endian_xchg(struct fsl_qspi *q, u32 a)
+{
+	return needs_swap_endian(q) ? __swab32(a) : a;
+}
+
+/*
+ * R/W functions for big- or little-endian registers:
+ * The QSPI controller's endianness is independent of
+ * the CPU core's endianness. So far, although the CPU
+ * core is little-endian the QSPI controller can use
+ * big-endian or little-endian.
+ */
+static void qspi_writel(struct fsl_qspi *q, u32 val, void __iomem *addr)
+{
+	if (q->devtype_data->little_endian)
+		iowrite32(val, addr);
+	else
+		iowrite32be(val, addr);
+}
+
+static u32 qspi_readl(struct fsl_qspi *q, void __iomem *addr)
+{
+	if (q->devtype_data->little_endian)
+		return ioread32(addr);
+
+	return ioread32be(addr);
+}
+
+static irqreturn_t fsl_qspi_irq_handler(int irq, void *dev_id)
+{
+	struct fsl_qspi *q = dev_id;
+	u32 reg;
+
+	/* clear interrupt */
+	reg = qspi_readl(q, q->iobase + QUADSPI_FR);
+	qspi_writel(q, reg, q->iobase + QUADSPI_FR);
+
+	if (reg & QUADSPI_FR_TFF_MASK)
+		complete(&q->c);
+
+	dev_dbg(q->dev, "QUADSPI_FR : 0x%.8x:0x%.8x\n", 0, reg);
+	return IRQ_HANDLED;
+}
+
+static int fsl_qspi_check_buswidth(struct fsl_qspi *q, u8 width)
+{
+	switch (width) {
+	case 1:
+	case 2:
+	case 4:
+		return 0;
+	}
+
+	return -ENOTSUPP;
+}
+
+static bool fsl_qspi_supports_op(struct spi_mem *mem,
+				 const struct spi_mem_op *op)
+{
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	int ret;
+
+	ret = fsl_qspi_check_buswidth(q, op->cmd.buswidth);
+
+	if (op->addr.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->addr.buswidth);
+
+	if (op->dummy.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->dummy.buswidth);
+
+	if (op->data.nbytes)
+		ret |= fsl_qspi_check_buswidth(q, op->data.buswidth);
+
+	if (ret)
+		return false;
+
+	/*
+	 * The number of instructions needed for the op, needs
+	 * to fit into a single LUT entry.
+	 */
+	if (op->addr.nbytes +
+	   (op->dummy.nbytes ? 1:0) +
+	   (op->data.nbytes ? 1:0) > 6)
+		return false;
+
+	/* Max 64 dummy clock cycles supported */
+	if (op->dummy.nbytes &&
+	    (op->dummy.nbytes * 8 / op->dummy.buswidth > 64))
+		return false;
+
+	/* Max data length, check controller limits and alignment */
+	if (op->data.dir == SPI_MEM_DATA_IN &&
+	    (op->data.nbytes > q->devtype_data->ahb_buf_size ||
+	     (op->data.nbytes > q->devtype_data->rxfifo - 4 &&
+	      !IS_ALIGNED(op->data.nbytes, 8))))
+		return false;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT &&
+	    op->data.nbytes > q->devtype_data->txfifo)
+		return false;
+
+	return true;
+}
+
+static void fsl_qspi_prepare_lut(struct fsl_qspi *q,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	u32 lutval[4] = {};
+	int lutidx = 1, i;
+
+	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
+			     op->cmd.opcode);
+
+	/*
+	 * For some unknown reason, using LUT_ADDR doesn't work in some
+	 * cases (at least with only one byte long addresses), so
+	 * let's use LUT_MODE to write the address bytes one by one
+	 */
+	for (i = 0; i < op->addr.nbytes; i++) {
+		u8 addrbyte = op->addr.val >> (8 * (op->addr.nbytes - i - 1));
+
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_MODE,
+					      LUT_PAD(op->addr.buswidth),
+					      addrbyte);
+		lutidx++;
+	}
+
+	if (op->dummy.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_DUMMY,
+					      LUT_PAD(op->dummy.buswidth),
+					      op->dummy.nbytes * 8 /
+					      op->dummy.buswidth);
+		lutidx++;
+	}
+
+	if (op->data.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx,
+					      op->data.dir == SPI_MEM_DATA_IN ?
+					      LUT_FSL_READ : LUT_FSL_WRITE,
+					      LUT_PAD(op->data.buswidth),
+					      0);
+		lutidx++;
+	}
+
+	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_STOP, 0, 0);
+
+	/* unlock LUT */
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_UNLOCK, q->iobase + QUADSPI_LCKCR);
+
+	/* fill LUT */
+	for (i = 0; i < ARRAY_SIZE(lutval); i++)
+		qspi_writel(q, lutval[i], base + QUADSPI_LUT_REG(i));
+
+	/* lock LUT */
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_LOCK, q->iobase + QUADSPI_LCKCR);
+}
+
+static int fsl_qspi_clk_prep_enable(struct fsl_qspi *q)
+{
+	int ret;
+
+	ret = clk_prepare_enable(q->clk_en);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(q->clk);
+	if (ret) {
+		clk_disable_unprepare(q->clk_en);
+		return ret;
+	}
+
+	if (needs_wakeup_wait_mode(q))
+		pm_qos_add_request(&q->pm_qos_req, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	return 0;
+}
+
+static void fsl_qspi_clk_disable_unprep(struct fsl_qspi *q)
+{
+	if (needs_wakeup_wait_mode(q))
+		pm_qos_remove_request(&q->pm_qos_req);
+
+	clk_disable_unprepare(q->clk);
+	clk_disable_unprepare(q->clk_en);
+}
+
+/*
+ * If we have changed the content of the flash by writing or erasing, or if we
+ * read from flash with a different offset into the page buffer, we need to
+ * invalidate the AHB buffer. If we do not do so, we may read out the wrong
+ * data. The spec tells us reset the AHB domain and Serial Flash domain at
+ * the same time.
+ */
+static void fsl_qspi_invalidate(struct fsl_qspi *q)
+{
+	u32 reg;
+
+	reg = qspi_readl(q, q->iobase + QUADSPI_MCR);
+	reg |= QUADSPI_MCR_SWRSTHD_MASK | QUADSPI_MCR_SWRSTSD_MASK;
+	qspi_writel(q, reg, q->iobase + QUADSPI_MCR);
+
+	/*
+	 * The minimum delay : 1 AHB + 2 SFCK clocks.
+	 * Delay 1 us is enough.
+	 */
+	udelay(1);
+
+	reg &= ~(QUADSPI_MCR_SWRSTHD_MASK | QUADSPI_MCR_SWRSTSD_MASK);
+	qspi_writel(q, reg, q->iobase + QUADSPI_MCR);
+}
+
+static void fsl_qspi_select_mem(struct fsl_qspi *q, struct spi_device *spi)
+{
+	unsigned long rate = spi->max_speed_hz;
+	int ret;
+
+	if (q->selected == spi->chip_select)
+		return;
+
+	if (needs_4x_clock(q))
+		rate *= 4;
+
+	fsl_qspi_clk_disable_unprep(q);
+
+	ret = clk_set_rate(q->clk, rate);
+	if (ret)
+		return;
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret)
+		return;
+
+	q->selected = spi->chip_select;
+
+	fsl_qspi_invalidate(q);
+}
+
+static void fsl_qspi_read_ahb(struct fsl_qspi *q, const struct spi_mem_op *op)
+{
+	memcpy_fromio(op->data.buf.in,
+		      q->ahb_addr + q->selected * q->devtype_data->ahb_buf_size,
+		      op->data.nbytes);
+}
+
+static void fsl_qspi_fill_txfifo(struct fsl_qspi *q,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	int i;
+	u32 val;
+
+	for (i = 0; i < ALIGN_DOWN(op->data.nbytes, 4); i += 4) {
+		memcpy(&val, op->data.buf.out + i, 4);
+		val = fsl_qspi_endian_xchg(q, val);
+		qspi_writel(q, val, base + QUADSPI_TBDR);
+	}
+
+	if (i < op->data.nbytes) {
+		memcpy(&val, op->data.buf.out + i, op->data.nbytes - i);
+		val = fsl_qspi_endian_xchg(q, val);
+		qspi_writel(q, val, base + QUADSPI_TBDR);
+	}
+
+	if (needs_fill_txfifo(q)) {
+		for (i = op->data.nbytes; i < 16; i += 4)
+			qspi_writel(q, 0, base + QUADSPI_TBDR);
+	}
+}
+
+static void fsl_qspi_read_rxfifo(struct fsl_qspi *q,
+			  const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	int i;
+	u8 *buf = op->data.buf.in;
+	u32 val;
+
+	for (i = 0; i < ALIGN_DOWN(op->data.nbytes, 4); i += 4) {
+		val = qspi_readl(q, base + QUADSPI_RBDR(i / 4));
+		val = fsl_qspi_endian_xchg(q, val);
+		memcpy(buf + i, &val, 4);
+	}
+
+	if (i < op->data.nbytes) {
+		val = qspi_readl(q, base + QUADSPI_RBDR(i / 4));
+		val = fsl_qspi_endian_xchg(q, val);
+		memcpy(buf + i, &val, op->data.nbytes - i);
+	}
+}
+
+static int fsl_qspi_do_op(struct fsl_qspi *q, const struct spi_mem_op *op)
+{
+	void __iomem *base = q->iobase;
+	int err = 0;
+
+	init_completion(&q->c);
+
+	/*
+	 * Always start the sequence at the same index since we update
+	 * the LUT at each exec_op() call. And also specify the DATA
+	 * length, since it's has not been specified in the LUT.
+	 */
+	qspi_writel(q, op->data.nbytes | QUADSPI_IPCR_SEQID(SEQID_LUT),
+		    base + QUADSPI_IPCR);
+
+	/* Wait for the interrupt. */
+	if (!wait_for_completion_timeout(&q->c, msecs_to_jiffies(1000)))
+		err = -ETIMEDOUT;
+
+	if (!err && op->data.nbytes && op->data.dir == SPI_MEM_DATA_IN)
+		fsl_qspi_read_rxfifo(q, op);
+
+	return err;
+}
+
+static int fsl_qspi_readl_poll_tout(struct fsl_qspi *q, void __iomem *base,
+				    u32 mask, u32 delay_us, u32 timeout_us)
+{
+	u32 reg;
+
+	if (!q->devtype_data->little_endian)
+		mask = (u32)cpu_to_be32(mask);
+
+	return readl_poll_timeout(base, reg, !(reg & mask), delay_us,
+				  timeout_us);
+}
+
+static int fsl_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	void __iomem *base = q->iobase;
+	u32 addr_offset = 0;
+	int err = 0;
+
+	mutex_lock(&q->lock);
+
+	/* wait for the controller being ready */
+	fsl_qspi_readl_poll_tout(q, base + QUADSPI_SR, (QUADSPI_SR_IP_ACC_MASK |
+				 QUADSPI_SR_AHB_ACC_MASK), 10, 1000);
+
+	fsl_qspi_select_mem(q, mem->spi);
+
+	if (needs_amba_base_offset(q))
+		addr_offset = q->memmap_phy;
+
+	qspi_writel(q,
+		    q->selected * q->devtype_data->ahb_buf_size + addr_offset,
+		    base + QUADSPI_SFAR);
+
+	qspi_writel(q, qspi_readl(q, base + QUADSPI_MCR) |
+		    QUADSPI_MCR_CLR_RXF_MASK | QUADSPI_MCR_CLR_TXF_MASK,
+		    base + QUADSPI_MCR);
+
+	qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC | QUADSPI_SPTRCLR_IPPTRC,
+		    base + QUADSPI_SPTRCLR);
+
+	fsl_qspi_prepare_lut(q, op);
+
+	/*
+	 * If we have large chunks of data, we read them through the AHB bus
+	 * by accessing the mapped memory. In all other cases we use
+	 * IP commands to access the flash.
+	 */
+	if (op->data.nbytes > (q->devtype_data->rxfifo - 4) &&
+	    op->data.dir == SPI_MEM_DATA_IN) {
+		fsl_qspi_read_ahb(q, op);
+	} else {
+		qspi_writel(q, QUADSPI_RBCT_WMRK_MASK |
+			    QUADSPI_RBCT_RXBRD_USEIPS, base + QUADSPI_RBCT);
+
+		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
+			fsl_qspi_fill_txfifo(q, op);
+
+		err = fsl_qspi_do_op(q, op);
+	}
+
+	/* Invalidate the data in the AHB buffer. */
+	fsl_qspi_invalidate(q);
+
+	mutex_unlock(&q->lock);
+
+	return err;
+}
+
+static int fsl_qspi_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		if (op->data.nbytes > q->devtype_data->txfifo)
+			op->data.nbytes = q->devtype_data->txfifo;
+	} else {
+		if (op->data.nbytes > q->devtype_data->ahb_buf_size)
+			op->data.nbytes = q->devtype_data->ahb_buf_size;
+		else if (op->data.nbytes > (q->devtype_data->rxfifo - 4))
+			op->data.nbytes = ALIGN_DOWN(op->data.nbytes, 8);
+	}
+
+	return 0;
+}
+
+static int fsl_qspi_default_setup(struct fsl_qspi *q)
+{
+	void __iomem *base = q->iobase;
+	u32 reg, addr_offset = 0;
+	int ret;
+
+	/* disable and unprepare clock to avoid glitch pass to controller */
+	fsl_qspi_clk_disable_unprep(q);
+
+	/* the default frequency, we will change it later if necessary. */
+	ret = clk_set_rate(q->clk, 66000000);
+	if (ret)
+		return ret;
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret)
+		return ret;
+
+	/* Reset the module */
+	qspi_writel(q, QUADSPI_MCR_SWRSTSD_MASK | QUADSPI_MCR_SWRSTHD_MASK,
+		    base + QUADSPI_MCR);
+	udelay(1);
+
+	/* Disable the module */
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK | QUADSPI_MCR_RESERVED_MASK,
+		    base + QUADSPI_MCR);
+
+	reg = qspi_readl(q, base + QUADSPI_SMPR);
+	qspi_writel(q, reg & ~(QUADSPI_SMPR_FSDLY_MASK
+			| QUADSPI_SMPR_FSPHS_MASK
+			| QUADSPI_SMPR_HSENA_MASK
+			| QUADSPI_SMPR_DDRSMP_MASK), base + QUADSPI_SMPR);
+
+	/* We only use the buffer3 for AHB read */
+	qspi_writel(q, 0, base + QUADSPI_BUF0IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF1IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF2IND);
+
+	qspi_writel(q, QUADSPI_BFGENCR_SEQID(SEQID_LUT),
+		    q->iobase + QUADSPI_BFGENCR);
+	qspi_writel(q, QUADSPI_RBCT_WMRK_MASK, base + QUADSPI_RBCT);
+	qspi_writel(q, QUADSPI_BUF3CR_ALLMST_MASK |
+		    QUADSPI_BUF3CR_ADATSZ(q->devtype_data->ahb_buf_size / 8),
+		    base + QUADSPI_BUF3CR);
+
+	if (needs_amba_base_offset(q))
+		addr_offset = q->memmap_phy;
+
+	/*
+	 * In HW there can be a maximum of four chips on two buses with
+	 * two chip selects on each bus. We use four chip selects in SW
+	 * to differentiate between the four chips.
+	 * We use ahb_buf_size for each chip and set SFA1AD, SFA2AD, SFB1AD,
+	 * SFB2AD accordingly.
+	 */
+	qspi_writel(q, q->devtype_data->ahb_buf_size + addr_offset,
+		    base + QUADSPI_SFA1AD);
+	qspi_writel(q, q->devtype_data->ahb_buf_size * 2 + addr_offset,
+		    base + QUADSPI_SFA2AD);
+	qspi_writel(q, q->devtype_data->ahb_buf_size * 3 + addr_offset,
+		    base + QUADSPI_SFB1AD);
+	qspi_writel(q, q->devtype_data->ahb_buf_size * 4 + addr_offset,
+		    base + QUADSPI_SFB2AD);
+
+	q->selected = -1;
+
+	/* Enable the module */
+	qspi_writel(q, QUADSPI_MCR_RESERVED_MASK | QUADSPI_MCR_END_CFG_MASK,
+		    base + QUADSPI_MCR);
+
+	/* clear all interrupt status */
+	qspi_writel(q, 0xffffffff, q->iobase + QUADSPI_FR);
+
+	/* enable the interrupt */
+	qspi_writel(q, QUADSPI_RSER_TFIE, q->iobase + QUADSPI_RSER);
+
+	return 0;
+}
+
+static const char *fsl_qspi_get_name(struct spi_mem *mem)
+{
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	struct device *dev = &mem->spi->dev;
+	const char *name;
+
+	/*
+	 * In order to keep mtdparts compatible with the old MTD driver at
+	 * mtd/spi-nor/fsl-quadspi.c, we set a custom name derived from the
+	 * platform_device of the controller.
+	 */
+	if (of_get_available_child_count(q->dev->of_node) == 1)
+		return dev_name(q->dev);
+
+	name = devm_kasprintf(dev, GFP_KERNEL,
+			      "%s-%d", dev_name(q->dev),
+			      mem->spi->chip_select);
+
+	if (!name) {
+		dev_err(dev, "failed to get memory for custom flash name\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return name;
+}
+
+static const struct spi_controller_mem_ops fsl_qspi_mem_ops = {
+	.adjust_op_size = fsl_qspi_adjust_op_size,
+	.supports_op = fsl_qspi_supports_op,
+	.exec_op = fsl_qspi_exec_op,
+	.get_name = fsl_qspi_get_name,
+};
+
+static int fsl_qspi_probe(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *res;
+	struct fsl_qspi *q;
+	int ret;
+
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(*q));
+	if (!ctlr)
+		return -ENOMEM;
+
+	ctlr->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD |
+			  SPI_TX_DUAL | SPI_TX_QUAD;
+
+	q = spi_controller_get_devdata(ctlr);
+	q->dev = dev;
+	q->devtype_data = of_device_get_match_data(dev);
+	if (!q->devtype_data) {
+		ret = -ENODEV;
+		goto err_put_ctrl;
+	}
+
+	platform_set_drvdata(pdev, q);
+
+	/* find the resources */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "QuadSPI");
+	q->iobase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(q->iobase)) {
+		ret = PTR_ERR(q->iobase);
+		goto err_put_ctrl;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					"QuadSPI-memory");
+	q->ahb_addr = devm_ioremap_resource(dev, res);
+	if (IS_ERR(q->ahb_addr)) {
+		ret = PTR_ERR(q->ahb_addr);
+		goto err_put_ctrl;
+	}
+
+	q->memmap_phy = res->start;
+
+	/* find the clocks */
+	q->clk_en = devm_clk_get(dev, "qspi_en");
+	if (IS_ERR(q->clk_en)) {
+		ret = PTR_ERR(q->clk_en);
+		goto err_put_ctrl;
+	}
+
+	q->clk = devm_clk_get(dev, "qspi");
+	if (IS_ERR(q->clk)) {
+		ret = PTR_ERR(q->clk);
+		goto err_put_ctrl;
+	}
+
+	ret = fsl_qspi_clk_prep_enable(q);
+	if (ret) {
+		dev_err(dev, "can not enable the clock\n");
+		goto err_put_ctrl;
+	}
+
+	/* find the irq */
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		goto err_disable_clk;
+
+	ret = devm_request_irq(dev, ret,
+			fsl_qspi_irq_handler, 0, pdev->name, q);
+	if (ret) {
+		dev_err(dev, "failed to request irq: %d\n", ret);
+		goto err_disable_clk;
+	}
+
+	mutex_init(&q->lock);
+
+	ctlr->bus_num = -1;
+	ctlr->num_chipselect = 4;
+	ctlr->mem_ops = &fsl_qspi_mem_ops;
+
+	fsl_qspi_default_setup(q);
+
+	ctlr->dev.of_node = np;
+
+	ret = devm_spi_register_controller(dev, ctlr);
+	if (ret)
+		goto err_destroy_mutex;
+
+	return 0;
+
+err_destroy_mutex:
+	mutex_destroy(&q->lock);
+
+err_disable_clk:
+	fsl_qspi_clk_disable_unprep(q);
+
+err_put_ctrl:
+	spi_controller_put(ctlr);
+
+	dev_err(dev, "Freescale QuadSPI probe failed\n");
+	return ret;
+}
+
+static int fsl_qspi_remove(struct platform_device *pdev)
+{
+	struct fsl_qspi *q = platform_get_drvdata(pdev);
+
+	/* disable the hardware */
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, 0x0, q->iobase + QUADSPI_RSER);
+
+	fsl_qspi_clk_disable_unprep(q);
+
+	mutex_destroy(&q->lock);
+
+	return 0;
+}
+
+static int fsl_qspi_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int fsl_qspi_resume(struct device *dev)
+{
+	struct fsl_qspi *q = dev_get_drvdata(dev);
+
+	fsl_qspi_default_setup(q);
+
+	return 0;
+}
+
+static const struct of_device_id fsl_qspi_dt_ids[] = {
+	{ .compatible = "fsl,vf610-qspi", .data = &vybrid_data, },
+	{ .compatible = "fsl,imx6sx-qspi", .data = &imx6sx_data, },
+	{ .compatible = "fsl,imx7d-qspi", .data = &imx7d_data, },
+	{ .compatible = "fsl,imx6ul-qspi", .data = &imx6ul_data, },
+	{ .compatible = "fsl,ls1021a-qspi", .data = &ls1021a_data, },
+	{ .compatible = "fsl,ls2080a-qspi", .data = &ls2080a_data, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, fsl_qspi_dt_ids);
+
+static const struct dev_pm_ops fsl_qspi_pm_ops = {
+	.suspend	= fsl_qspi_suspend,
+	.resume		= fsl_qspi_resume,
+};
+
+static struct platform_driver fsl_qspi_driver = {
+	.driver = {
+		.name	= "fsl-quadspi",
+		.of_match_table = fsl_qspi_dt_ids,
+		.pm =   &fsl_qspi_pm_ops,
+	},
+	.probe          = fsl_qspi_probe,
+	.remove		= fsl_qspi_remove,
+};
+module_platform_driver(fsl_qspi_driver);
+
+MODULE_DESCRIPTION("Freescale QuadSPI Controller Driver");
+MODULE_AUTHOR("Freescale Semiconductor Inc.");
+MODULE_AUTHOR("Boris Brezillon <bbrezillon@kernel.org>");
+MODULE_AUTHOR("Frieder Schrempf <frieder.schrempf@kontron.de>");
+MODULE_AUTHOR("Yogesh Gaur <yogeshnarayan.gaur@nxp.com>");
+MODULE_AUTHOR("Suresh Gupta <suresh.gupta@nxp.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 8f2e978..4b80ace 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Freescale SPI controller driver.
  *
@@ -13,16 +14,11 @@
  * GRLIB support:
  * Copyright (c) 2012 Aeroflex Gaisler AB.
  * Author: Andreas Larsson <andreas@gaisler.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/fsl_devices.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
@@ -32,13 +28,20 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_gpio.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/types.h>
 
+#ifdef CONFIG_FSL_SOC
+#include <sysdev/fsl_soc.h>
+#endif
+
+/* Specific to the MPC8306/MPC8309 */
+#define IMMR_SPI_CS_OFFSET 0x14c
+#define SPI_BOOT_SEL_BIT   0x80000000
+
 #include "spi-fsl-lib.h"
 #include "spi-fsl-cpm.h"
 #include "spi-fsl-spi.h"
@@ -355,33 +358,50 @@
 static int fsl_spi_do_one_msg(struct spi_master *master,
 			      struct spi_message *m)
 {
+	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
 	struct spi_device *spi = m->spi;
 	struct spi_transfer *t, *first;
 	unsigned int cs_change;
 	const int nsecs = 50;
-	int status;
+	int status, last_bpw;
+
+	/*
+	 * In CPU mode, optimize large byte transfers to use larger
+	 * bits_per_word values to reduce number of interrupts taken.
+	 */
+	if (!(mpc8xxx_spi->flags & SPI_CPM_MODE)) {
+		list_for_each_entry(t, &m->transfers, transfer_list) {
+			if (t->len < 256 || t->bits_per_word != 8)
+				continue;
+			if ((t->len & 3) == 0)
+				t->bits_per_word = 32;
+			else if ((t->len & 1) == 0)
+				t->bits_per_word = 16;
+		}
+	}
 
 	/* Don't allow changes if CS is active */
-	first = list_first_entry(&m->transfers, struct spi_transfer,
-			transfer_list);
+	cs_change = 1;
 	list_for_each_entry(t, &m->transfers, transfer_list) {
-		if ((first->bits_per_word != t->bits_per_word) ||
-			(first->speed_hz != t->speed_hz)) {
+		if (cs_change)
+			first = t;
+		cs_change = t->cs_change;
+		if (first->speed_hz != t->speed_hz) {
 			dev_err(&spi->dev,
-				"bits_per_word/speed_hz should be same for the same SPI transfer\n");
+				"speed_hz cannot change while CS is active\n");
 			return -EINVAL;
 		}
 	}
 
+	last_bpw = -1;
 	cs_change = 1;
 	status = -EINVAL;
 	list_for_each_entry(t, &m->transfers, transfer_list) {
-		if (t->bits_per_word || t->speed_hz) {
-			if (cs_change)
-				status = fsl_spi_setup_transfer(spi, t);
-			if (status < 0)
-				break;
-		}
+		if (cs_change || last_bpw != t->bits_per_word)
+			status = fsl_spi_setup_transfer(spi, t);
+		if (status < 0)
+			break;
+		last_bpw = t->bits_per_word;
 
 		if (cs_change) {
 			fsl_spi_chipselect(spi, BITBANG_CS_ACTIVE);
@@ -407,7 +427,6 @@
 	}
 
 	m->status = status;
-	spi_finalize_current_message(master);
 
 	if (status || !cs_change) {
 		ndelay(nsecs);
@@ -415,6 +434,7 @@
 	}
 
 	fsl_spi_setup_transfer(spi, NULL);
+	spi_finalize_current_message(master);
 	return 0;
 }
 
@@ -460,32 +480,6 @@
 		return retval;
 	}
 
-	if (mpc8xxx_spi->type == TYPE_GRLIB) {
-		if (gpio_is_valid(spi->cs_gpio)) {
-			int desel;
-
-			retval = gpio_request(spi->cs_gpio,
-					      dev_name(&spi->dev));
-			if (retval)
-				return retval;
-
-			desel = !(spi->mode & SPI_CS_HIGH);
-			retval = gpio_direction_output(spi->cs_gpio, desel);
-			if (retval) {
-				gpio_free(spi->cs_gpio);
-				return retval;
-			}
-		} else if (spi->cs_gpio != -ENOENT) {
-			if (spi->cs_gpio < 0)
-				return spi->cs_gpio;
-			return -EINVAL;
-		}
-		/* When spi->cs_gpio == -ENOENT, a hole in the phandle list
-		 * indicates to use native chipselect if present, or allow for
-		 * an always selected chip
-		 */
-	}
-
 	/* Initialize chipselect - might be active for SPI_CS_HIGH mode */
 	fsl_spi_chipselect(spi, BITBANG_CS_INACTIVE);
 
@@ -494,12 +488,8 @@
 
 static void fsl_spi_cleanup(struct spi_device *spi)
 {
-	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
 	struct spi_mpc8xxx_cs *cs = spi_get_ctldata(spi);
 
-	if (mpc8xxx_spi->type == TYPE_GRLIB && gpio_is_valid(spi->cs_gpio))
-		gpio_free(spi->cs_gpio);
-
 	kfree(cs);
 	spi_set_ctldata(spi, NULL);
 }
@@ -565,8 +555,8 @@
 	u32 slvsel;
 	u16 cs = spi->chip_select;
 
-	if (gpio_is_valid(spi->cs_gpio)) {
-		gpio_set_value(spi->cs_gpio, on);
+	if (spi->cs_gpiod) {
+		gpiod_set_value(spi->cs_gpiod, on);
 	} else if (cs < mpc8xxx_spi->native_chipselects) {
 		slvsel = mpc8xxx_spi_read_reg(&reg_base->slvsel);
 		slvsel = on ? (slvsel | (1 << cs)) : (slvsel & ~(1 << cs));
@@ -697,115 +687,17 @@
 
 static void fsl_spi_cs_control(struct spi_device *spi, bool on)
 {
-	struct device *dev = spi->dev.parent->parent;
-	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
-	struct mpc8xxx_spi_probe_info *pinfo = to_of_pinfo(pdata);
-	u16 cs = spi->chip_select;
-	int gpio = pinfo->gpios[cs];
-	bool alow = pinfo->alow_flags[cs];
+	if (spi->cs_gpiod) {
+		gpiod_set_value(spi->cs_gpiod, on);
+	} else {
+		struct device *dev = spi->dev.parent->parent;
+		struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
+		struct mpc8xxx_spi_probe_info *pinfo = to_of_pinfo(pdata);
 
-	gpio_set_value(gpio, on ^ alow);
-}
-
-static int of_fsl_spi_get_chipselects(struct device *dev)
-{
-	struct device_node *np = dev->of_node;
-	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
-	struct mpc8xxx_spi_probe_info *pinfo = to_of_pinfo(pdata);
-	int ngpios;
-	int i = 0;
-	int ret;
-
-	ngpios = of_gpio_count(np);
-	if (ngpios <= 0) {
-		/*
-		 * SPI w/o chip-select line. One SPI device is still permitted
-		 * though.
-		 */
-		pdata->max_chipselect = 1;
-		return 0;
+		if (WARN_ON_ONCE(!pinfo->immr_spi_cs))
+			return;
+		iowrite32be(on ? SPI_BOOT_SEL_BIT : 0, pinfo->immr_spi_cs);
 	}
-
-	pinfo->gpios = kmalloc_array(ngpios, sizeof(*pinfo->gpios),
-				     GFP_KERNEL);
-	if (!pinfo->gpios)
-		return -ENOMEM;
-	memset(pinfo->gpios, -1, ngpios * sizeof(*pinfo->gpios));
-
-	pinfo->alow_flags = kcalloc(ngpios, sizeof(*pinfo->alow_flags),
-				    GFP_KERNEL);
-	if (!pinfo->alow_flags) {
-		ret = -ENOMEM;
-		goto err_alloc_flags;
-	}
-
-	for (; i < ngpios; i++) {
-		int gpio;
-		enum of_gpio_flags flags;
-
-		gpio = of_get_gpio_flags(np, i, &flags);
-		if (!gpio_is_valid(gpio)) {
-			dev_err(dev, "invalid gpio #%d: %d\n", i, gpio);
-			ret = gpio;
-			goto err_loop;
-		}
-
-		ret = gpio_request(gpio, dev_name(dev));
-		if (ret) {
-			dev_err(dev, "can't request gpio #%d: %d\n", i, ret);
-			goto err_loop;
-		}
-
-		pinfo->gpios[i] = gpio;
-		pinfo->alow_flags[i] = flags & OF_GPIO_ACTIVE_LOW;
-
-		ret = gpio_direction_output(pinfo->gpios[i],
-					    pinfo->alow_flags[i]);
-		if (ret) {
-			dev_err(dev,
-				"can't set output direction for gpio #%d: %d\n",
-				i, ret);
-			goto err_loop;
-		}
-	}
-
-	pdata->max_chipselect = ngpios;
-	pdata->cs_control = fsl_spi_cs_control;
-
-	return 0;
-
-err_loop:
-	while (i >= 0) {
-		if (gpio_is_valid(pinfo->gpios[i]))
-			gpio_free(pinfo->gpios[i]);
-		i--;
-	}
-
-	kfree(pinfo->alow_flags);
-	pinfo->alow_flags = NULL;
-err_alloc_flags:
-	kfree(pinfo->gpios);
-	pinfo->gpios = NULL;
-	return ret;
-}
-
-static int of_fsl_spi_free_chipselects(struct device *dev)
-{
-	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
-	struct mpc8xxx_spi_probe_info *pinfo = to_of_pinfo(pdata);
-	int i;
-
-	if (!pinfo->gpios)
-		return 0;
-
-	for (i = 0; i < pdata->max_chipselect; i++) {
-		if (gpio_is_valid(pinfo->gpios[i]))
-			gpio_free(pinfo->gpios[i]);
-	}
-
-	kfree(pinfo->gpios);
-	kfree(pinfo->alow_flags);
-	return 0;
 }
 
 static int of_fsl_spi_probe(struct platform_device *ofdev)
@@ -823,9 +715,21 @@
 
 	type = fsl_spi_get_type(&ofdev->dev);
 	if (type == TYPE_FSL) {
-		ret = of_fsl_spi_get_chipselects(dev);
-		if (ret)
-			goto err;
+		struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
+#if IS_ENABLED(CONFIG_FSL_SOC)
+		struct mpc8xxx_spi_probe_info *pinfo = to_of_pinfo(pdata);
+		bool spisel_boot = of_property_read_bool(np, "fsl,spisel_boot");
+
+		if (spisel_boot) {
+			pinfo->immr_spi_cs = ioremap(get_immrbase() + IMMR_SPI_CS_OFFSET, 4);
+			if (!pinfo->immr_spi_cs) {
+				ret = -ENOMEM;
+				goto err;
+			}
+		}
+#endif
+
+		pdata->cs_control = fsl_spi_cs_control;
 	}
 
 	ret = of_address_to_resource(np, 0, &mem);
@@ -848,8 +752,6 @@
 
 err:
 	irq_dispose_mapping(irq);
-	if (type == TYPE_FSL)
-		of_fsl_spi_free_chipselects(dev);
 	return ret;
 }
 
@@ -859,8 +761,6 @@
 	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
 
 	fsl_spi_cpm_free(mpc8xxx_spi);
-	if (mpc8xxx_spi->type == TYPE_FSL)
-		of_fsl_spi_free_chipselects(&ofdev->dev);
 	return 0;
 }
 
diff --git a/drivers/spi/spi-fsl-spi.h b/drivers/spi/spi-fsl-spi.h
index 9a6dae0..fd8a5be 100644
--- a/drivers/spi/spi-fsl-spi.h
+++ b/drivers/spi/spi-fsl-spi.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Freescale SPI controller driver.
  *
@@ -13,11 +14,6 @@
  * GRLIB support:
  * Copyright (c) 2012 Aeroflex Gaisler AB.
  * Author: Andreas Larsson <andreas@gaisler.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 
 #ifndef __SPI_FSL_SPI_H__
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
new file mode 100644
index 0000000..6f3d64a
--- /dev/null
+++ b/drivers/spi/spi-geni-qcom.c
@@ -0,0 +1,691 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018, The Linux foundation. All rights reserved.
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/qcom-geni-se.h>
+#include <linux/spi/spi.h>
+#include <linux/spinlock.h>
+
+/* SPI SE specific registers and respective register fields */
+#define SE_SPI_CPHA		0x224
+#define CPHA			BIT(0)
+
+#define SE_SPI_LOOPBACK		0x22c
+#define LOOPBACK_ENABLE		0x1
+#define NORMAL_MODE		0x0
+#define LOOPBACK_MSK		GENMASK(1, 0)
+
+#define SE_SPI_CPOL		0x230
+#define CPOL			BIT(2)
+
+#define SE_SPI_DEMUX_OUTPUT_INV	0x24c
+#define CS_DEMUX_OUTPUT_INV_MSK	GENMASK(3, 0)
+
+#define SE_SPI_DEMUX_SEL	0x250
+#define CS_DEMUX_OUTPUT_SEL	GENMASK(3, 0)
+
+#define SE_SPI_TRANS_CFG	0x25c
+#define CS_TOGGLE		BIT(0)
+
+#define SE_SPI_WORD_LEN		0x268
+#define WORD_LEN_MSK		GENMASK(9, 0)
+#define MIN_WORD_LEN		4
+
+#define SE_SPI_TX_TRANS_LEN	0x26c
+#define SE_SPI_RX_TRANS_LEN	0x270
+#define TRANS_LEN_MSK		GENMASK(23, 0)
+
+#define SE_SPI_PRE_POST_CMD_DLY	0x274
+
+#define SE_SPI_DELAY_COUNTERS	0x278
+#define SPI_INTER_WORDS_DELAY_MSK	GENMASK(9, 0)
+#define SPI_CS_CLK_DELAY_MSK		GENMASK(19, 10)
+#define SPI_CS_CLK_DELAY_SHFT		10
+
+/* M_CMD OP codes for SPI */
+#define SPI_TX_ONLY		1
+#define SPI_RX_ONLY		2
+#define SPI_FULL_DUPLEX		3
+#define SPI_TX_RX		7
+#define SPI_CS_ASSERT		8
+#define SPI_CS_DEASSERT		9
+#define SPI_SCK_ONLY		10
+/* M_CMD params for SPI */
+#define SPI_PRE_CMD_DELAY	BIT(0)
+#define TIMESTAMP_BEFORE	BIT(1)
+#define FRAGMENTATION		BIT(2)
+#define TIMESTAMP_AFTER		BIT(3)
+#define POST_CMD_DELAY		BIT(4)
+
+enum spi_m_cmd_opcode {
+	CMD_NONE,
+	CMD_XFER,
+	CMD_CS,
+	CMD_CANCEL,
+};
+
+struct spi_geni_master {
+	struct geni_se se;
+	struct device *dev;
+	u32 tx_fifo_depth;
+	u32 fifo_width_bits;
+	u32 tx_wm;
+	unsigned long cur_speed_hz;
+	unsigned int cur_bits_per_word;
+	unsigned int tx_rem_bytes;
+	unsigned int rx_rem_bytes;
+	const struct spi_transfer *cur_xfer;
+	struct completion xfer_done;
+	unsigned int oversampling;
+	spinlock_t lock;
+	enum spi_m_cmd_opcode cur_mcmd;
+	int irq;
+};
+
+static int get_spi_clk_cfg(unsigned int speed_hz,
+			struct spi_geni_master *mas,
+			unsigned int *clk_idx,
+			unsigned int *clk_div)
+{
+	unsigned long sclk_freq;
+	unsigned int actual_hz;
+	struct geni_se *se = &mas->se;
+	int ret;
+
+	ret = geni_se_clk_freq_match(&mas->se,
+				speed_hz * mas->oversampling,
+				clk_idx, &sclk_freq, false);
+	if (ret) {
+		dev_err(mas->dev, "Failed(%d) to find src clk for %dHz\n",
+							ret, speed_hz);
+		return ret;
+	}
+
+	*clk_div = DIV_ROUND_UP(sclk_freq, mas->oversampling * speed_hz);
+	actual_hz = sclk_freq / (mas->oversampling * *clk_div);
+
+	dev_dbg(mas->dev, "req %u=>%u sclk %lu, idx %d, div %d\n", speed_hz,
+				actual_hz, sclk_freq, *clk_idx, *clk_div);
+	ret = clk_set_rate(se->clk, sclk_freq);
+	if (ret)
+		dev_err(mas->dev, "clk_set_rate failed %d\n", ret);
+	return ret;
+}
+
+static void handle_fifo_timeout(struct spi_master *spi,
+				struct spi_message *msg)
+{
+	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+	unsigned long time_left, flags;
+	struct geni_se *se = &mas->se;
+
+	spin_lock_irqsave(&mas->lock, flags);
+	reinit_completion(&mas->xfer_done);
+	mas->cur_mcmd = CMD_CANCEL;
+	geni_se_cancel_m_cmd(se);
+	writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
+	spin_unlock_irqrestore(&mas->lock, flags);
+	time_left = wait_for_completion_timeout(&mas->xfer_done, HZ);
+	if (time_left)
+		return;
+
+	spin_lock_irqsave(&mas->lock, flags);
+	reinit_completion(&mas->xfer_done);
+	geni_se_abort_m_cmd(se);
+	spin_unlock_irqrestore(&mas->lock, flags);
+	time_left = wait_for_completion_timeout(&mas->xfer_done, HZ);
+	if (!time_left)
+		dev_err(mas->dev, "Failed to cancel/abort m_cmd\n");
+}
+
+static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
+{
+	struct spi_geni_master *mas = spi_master_get_devdata(slv->master);
+	struct spi_master *spi = dev_get_drvdata(mas->dev);
+	struct geni_se *se = &mas->se;
+	unsigned long time_left;
+
+	reinit_completion(&mas->xfer_done);
+	pm_runtime_get_sync(mas->dev);
+	if (!(slv->mode & SPI_CS_HIGH))
+		set_flag = !set_flag;
+
+	mas->cur_mcmd = CMD_CS;
+	if (set_flag)
+		geni_se_setup_m_cmd(se, SPI_CS_ASSERT, 0);
+	else
+		geni_se_setup_m_cmd(se, SPI_CS_DEASSERT, 0);
+
+	time_left = wait_for_completion_timeout(&mas->xfer_done, HZ);
+	if (!time_left)
+		handle_fifo_timeout(spi, NULL);
+
+	pm_runtime_put(mas->dev);
+}
+
+static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode,
+					unsigned int bits_per_word)
+{
+	unsigned int pack_words;
+	bool msb_first = (mode & SPI_LSB_FIRST) ? false : true;
+	struct geni_se *se = &mas->se;
+	u32 word_len;
+
+	word_len = readl(se->base + SE_SPI_WORD_LEN);
+
+	/*
+	 * If bits_per_word isn't a byte aligned value, set the packing to be
+	 * 1 SPI word per FIFO word.
+	 */
+	if (!(mas->fifo_width_bits % bits_per_word))
+		pack_words = mas->fifo_width_bits / bits_per_word;
+	else
+		pack_words = 1;
+	word_len &= ~WORD_LEN_MSK;
+	word_len |= ((bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK);
+	geni_se_config_packing(&mas->se, bits_per_word, pack_words, msb_first,
+								true, true);
+	writel(word_len, se->base + SE_SPI_WORD_LEN);
+}
+
+static int setup_fifo_params(struct spi_device *spi_slv,
+					struct spi_master *spi)
+{
+	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+	struct geni_se *se = &mas->se;
+	u32 loopback_cfg, cpol, cpha, demux_output_inv;
+	u32 demux_sel, clk_sel, m_clk_cfg, idx, div;
+	int ret;
+
+	loopback_cfg = readl(se->base + SE_SPI_LOOPBACK);
+	cpol = readl(se->base + SE_SPI_CPOL);
+	cpha = readl(se->base + SE_SPI_CPHA);
+	demux_output_inv = 0;
+	loopback_cfg &= ~LOOPBACK_MSK;
+	cpol &= ~CPOL;
+	cpha &= ~CPHA;
+
+	if (spi_slv->mode & SPI_LOOP)
+		loopback_cfg |= LOOPBACK_ENABLE;
+
+	if (spi_slv->mode & SPI_CPOL)
+		cpol |= CPOL;
+
+	if (spi_slv->mode & SPI_CPHA)
+		cpha |= CPHA;
+
+	if (spi_slv->mode & SPI_CS_HIGH)
+		demux_output_inv = BIT(spi_slv->chip_select);
+
+	demux_sel = spi_slv->chip_select;
+	mas->cur_speed_hz = spi_slv->max_speed_hz;
+	mas->cur_bits_per_word = spi_slv->bits_per_word;
+
+	ret = get_spi_clk_cfg(mas->cur_speed_hz, mas, &idx, &div);
+	if (ret) {
+		dev_err(mas->dev, "Err setting clks ret(%d) for %ld\n",
+							ret, mas->cur_speed_hz);
+		return ret;
+	}
+
+	clk_sel = idx & CLK_SEL_MSK;
+	m_clk_cfg = (div << CLK_DIV_SHFT) | SER_CLK_EN;
+	spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word);
+	writel(loopback_cfg, se->base + SE_SPI_LOOPBACK);
+	writel(demux_sel, se->base + SE_SPI_DEMUX_SEL);
+	writel(cpha, se->base + SE_SPI_CPHA);
+	writel(cpol, se->base + SE_SPI_CPOL);
+	writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV);
+	writel(clk_sel, se->base + SE_GENI_CLK_SEL);
+	writel(m_clk_cfg, se->base + GENI_SER_M_CLK_CFG);
+	return 0;
+}
+
+static int spi_geni_prepare_message(struct spi_master *spi,
+					struct spi_message *spi_msg)
+{
+	int ret;
+	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+	struct geni_se *se = &mas->se;
+
+	geni_se_select_mode(se, GENI_SE_FIFO);
+	ret = setup_fifo_params(spi_msg->spi, spi);
+	if (ret)
+		dev_err(mas->dev, "Couldn't select mode %d\n", ret);
+	return ret;
+}
+
+static int spi_geni_init(struct spi_geni_master *mas)
+{
+	struct geni_se *se = &mas->se;
+	unsigned int proto, major, minor, ver;
+
+	pm_runtime_get_sync(mas->dev);
+
+	proto = geni_se_read_proto(se);
+	if (proto != GENI_SE_SPI) {
+		dev_err(mas->dev, "Invalid proto %d\n", proto);
+		pm_runtime_put(mas->dev);
+		return -ENXIO;
+	}
+	mas->tx_fifo_depth = geni_se_get_tx_fifo_depth(se);
+
+	/* Width of Tx and Rx FIFO is same */
+	mas->fifo_width_bits = geni_se_get_tx_fifo_width(se);
+
+	/*
+	 * Hardware programming guide suggests to configure
+	 * RX FIFO RFR level to fifo_depth-2.
+	 */
+	geni_se_init(se, 0x0, mas->tx_fifo_depth - 2);
+	/* Transmit an entire FIFO worth of data per IRQ */
+	mas->tx_wm = 1;
+	ver = geni_se_get_qup_hw_version(se);
+	major = GENI_SE_VERSION_MAJOR(ver);
+	minor = GENI_SE_VERSION_MINOR(ver);
+
+	if (major == 1 && minor == 0)
+		mas->oversampling = 2;
+	else
+		mas->oversampling = 1;
+
+	pm_runtime_put(mas->dev);
+	return 0;
+}
+
+static void setup_fifo_xfer(struct spi_transfer *xfer,
+				struct spi_geni_master *mas,
+				u16 mode, struct spi_master *spi)
+{
+	u32 m_cmd = 0;
+	u32 spi_tx_cfg, len;
+	struct geni_se *se = &mas->se;
+
+	spi_tx_cfg = readl(se->base + SE_SPI_TRANS_CFG);
+	if (xfer->bits_per_word != mas->cur_bits_per_word) {
+		spi_setup_word_len(mas, mode, xfer->bits_per_word);
+		mas->cur_bits_per_word = xfer->bits_per_word;
+	}
+
+	/* Speed and bits per word can be overridden per transfer */
+	if (xfer->speed_hz != mas->cur_speed_hz) {
+		int ret;
+		u32 clk_sel, m_clk_cfg;
+		unsigned int idx, div;
+
+		ret = get_spi_clk_cfg(xfer->speed_hz, mas, &idx, &div);
+		if (ret) {
+			dev_err(mas->dev, "Err setting clks:%d\n", ret);
+			return;
+		}
+		/*
+		 * SPI core clock gets configured with the requested frequency
+		 * or the frequency closer to the requested frequency.
+		 * For that reason requested frequency is stored in the
+		 * cur_speed_hz and referred in the consecutive transfer instead
+		 * of calling clk_get_rate() API.
+		 */
+		mas->cur_speed_hz = xfer->speed_hz;
+		clk_sel = idx & CLK_SEL_MSK;
+		m_clk_cfg = (div << CLK_DIV_SHFT) | SER_CLK_EN;
+		writel(clk_sel, se->base + SE_GENI_CLK_SEL);
+		writel(m_clk_cfg, se->base + GENI_SER_M_CLK_CFG);
+	}
+
+	mas->tx_rem_bytes = 0;
+	mas->rx_rem_bytes = 0;
+	if (xfer->tx_buf && xfer->rx_buf)
+		m_cmd = SPI_FULL_DUPLEX;
+	else if (xfer->tx_buf)
+		m_cmd = SPI_TX_ONLY;
+	else if (xfer->rx_buf)
+		m_cmd = SPI_RX_ONLY;
+
+	spi_tx_cfg &= ~CS_TOGGLE;
+
+	if (!(mas->cur_bits_per_word % MIN_WORD_LEN))
+		len = xfer->len * BITS_PER_BYTE / mas->cur_bits_per_word;
+	else
+		len = xfer->len / (mas->cur_bits_per_word / BITS_PER_BYTE + 1);
+	len &= TRANS_LEN_MSK;
+
+	mas->cur_xfer = xfer;
+	if (m_cmd & SPI_TX_ONLY) {
+		mas->tx_rem_bytes = xfer->len;
+		writel(len, se->base + SE_SPI_TX_TRANS_LEN);
+	}
+
+	if (m_cmd & SPI_RX_ONLY) {
+		writel(len, se->base + SE_SPI_RX_TRANS_LEN);
+		mas->rx_rem_bytes = xfer->len;
+	}
+	writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
+	mas->cur_mcmd = CMD_XFER;
+	geni_se_setup_m_cmd(se, m_cmd, FRAGMENTATION);
+
+	/*
+	 * TX_WATERMARK_REG should be set after SPI configuration and
+	 * setting up GENI SE engine, as driver starts data transfer
+	 * for the watermark interrupt.
+	 */
+	if (m_cmd & SPI_TX_ONLY)
+		writel(mas->tx_wm, se->base + SE_GENI_TX_WATERMARK_REG);
+}
+
+static int spi_geni_transfer_one(struct spi_master *spi,
+				struct spi_device *slv,
+				struct spi_transfer *xfer)
+{
+	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+
+	/* Terminate and return success for 0 byte length transfer */
+	if (!xfer->len)
+		return 0;
+
+	setup_fifo_xfer(xfer, mas, slv->mode, spi);
+	return 1;
+}
+
+static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
+{
+	/*
+	 * Calculate how many bytes we'll put in each FIFO word.  If the
+	 * transfer words don't pack cleanly into a FIFO word we'll just put
+	 * one transfer word in each FIFO word.  If they do pack we'll pack 'em.
+	 */
+	if (mas->fifo_width_bits % mas->cur_bits_per_word)
+		return roundup_pow_of_two(DIV_ROUND_UP(mas->cur_bits_per_word,
+						       BITS_PER_BYTE));
+
+	return mas->fifo_width_bits / BITS_PER_BYTE;
+}
+
+static void geni_spi_handle_tx(struct spi_geni_master *mas)
+{
+	struct geni_se *se = &mas->se;
+	unsigned int max_bytes;
+	const u8 *tx_buf;
+	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
+	unsigned int i = 0;
+
+	max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word;
+	if (mas->tx_rem_bytes < max_bytes)
+		max_bytes = mas->tx_rem_bytes;
+
+	tx_buf = mas->cur_xfer->tx_buf + mas->cur_xfer->len - mas->tx_rem_bytes;
+	while (i < max_bytes) {
+		unsigned int j;
+		unsigned int bytes_to_write;
+		u32 fifo_word = 0;
+		u8 *fifo_byte = (u8 *)&fifo_word;
+
+		bytes_to_write = min(bytes_per_fifo_word, max_bytes - i);
+		for (j = 0; j < bytes_to_write; j++)
+			fifo_byte[j] = tx_buf[i++];
+		iowrite32_rep(se->base + SE_GENI_TX_FIFOn, &fifo_word, 1);
+	}
+	mas->tx_rem_bytes -= max_bytes;
+	if (!mas->tx_rem_bytes)
+		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
+}
+
+static void geni_spi_handle_rx(struct spi_geni_master *mas)
+{
+	struct geni_se *se = &mas->se;
+	u32 rx_fifo_status;
+	unsigned int rx_bytes;
+	unsigned int rx_last_byte_valid;
+	u8 *rx_buf;
+	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
+	unsigned int i = 0;
+
+	rx_fifo_status = readl(se->base + SE_GENI_RX_FIFO_STATUS);
+	rx_bytes = (rx_fifo_status & RX_FIFO_WC_MSK) * bytes_per_fifo_word;
+	if (rx_fifo_status & RX_LAST) {
+		rx_last_byte_valid = rx_fifo_status & RX_LAST_BYTE_VALID_MSK;
+		rx_last_byte_valid >>= RX_LAST_BYTE_VALID_SHFT;
+		if (rx_last_byte_valid && rx_last_byte_valid < 4)
+			rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid;
+	}
+	if (mas->rx_rem_bytes < rx_bytes)
+		rx_bytes = mas->rx_rem_bytes;
+
+	rx_buf = mas->cur_xfer->rx_buf + mas->cur_xfer->len - mas->rx_rem_bytes;
+	while (i < rx_bytes) {
+		u32 fifo_word = 0;
+		u8 *fifo_byte = (u8 *)&fifo_word;
+		unsigned int bytes_to_read;
+		unsigned int j;
+
+		bytes_to_read = min(bytes_per_fifo_word, rx_bytes - i);
+		ioread32_rep(se->base + SE_GENI_RX_FIFOn, &fifo_word, 1);
+		for (j = 0; j < bytes_to_read; j++)
+			rx_buf[i++] = fifo_byte[j];
+	}
+	mas->rx_rem_bytes -= rx_bytes;
+}
+
+static irqreturn_t geni_spi_isr(int irq, void *data)
+{
+	struct spi_master *spi = data;
+	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+	struct geni_se *se = &mas->se;
+	u32 m_irq;
+	unsigned long flags;
+
+	if (mas->cur_mcmd == CMD_NONE)
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&mas->lock, flags);
+	m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS);
+
+	if ((m_irq & M_RX_FIFO_WATERMARK_EN) || (m_irq & M_RX_FIFO_LAST_EN))
+		geni_spi_handle_rx(mas);
+
+	if (m_irq & M_TX_FIFO_WATERMARK_EN)
+		geni_spi_handle_tx(mas);
+
+	if (m_irq & M_CMD_DONE_EN) {
+		if (mas->cur_mcmd == CMD_XFER)
+			spi_finalize_current_transfer(spi);
+		else if (mas->cur_mcmd == CMD_CS)
+			complete(&mas->xfer_done);
+		mas->cur_mcmd = CMD_NONE;
+		/*
+		 * If this happens, then a CMD_DONE came before all the Tx
+		 * buffer bytes were sent out. This is unusual, log this
+		 * condition and disable the WM interrupt to prevent the
+		 * system from stalling due an interrupt storm.
+		 * If this happens when all Rx bytes haven't been received, log
+		 * the condition.
+		 * The only known time this can happen is if bits_per_word != 8
+		 * and some registers that expect xfer lengths in num spi_words
+		 * weren't written correctly.
+		 */
+		if (mas->tx_rem_bytes) {
+			writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
+			dev_err(mas->dev, "Premature done. tx_rem = %d bpw%d\n",
+				mas->tx_rem_bytes, mas->cur_bits_per_word);
+		}
+		if (mas->rx_rem_bytes)
+			dev_err(mas->dev, "Premature done. rx_rem = %d bpw%d\n",
+				mas->rx_rem_bytes, mas->cur_bits_per_word);
+	}
+
+	if ((m_irq & M_CMD_CANCEL_EN) || (m_irq & M_CMD_ABORT_EN)) {
+		mas->cur_mcmd = CMD_NONE;
+		complete(&mas->xfer_done);
+	}
+
+	writel(m_irq, se->base + SE_GENI_M_IRQ_CLEAR);
+	spin_unlock_irqrestore(&mas->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static int spi_geni_probe(struct platform_device *pdev)
+{
+	int ret, irq;
+	struct spi_master *spi;
+	struct spi_geni_master *mas;
+	void __iomem *base;
+	struct clk *clk;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	clk = devm_clk_get(&pdev->dev, "se");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "Err getting SE Core clk %ld\n",
+						PTR_ERR(clk));
+		return PTR_ERR(clk);
+	}
+
+	spi = spi_alloc_master(&pdev->dev, sizeof(*mas));
+	if (!spi)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, spi);
+	mas = spi_master_get_devdata(spi);
+	mas->irq = irq;
+	mas->dev = &pdev->dev;
+	mas->se.dev = &pdev->dev;
+	mas->se.wrapper = dev_get_drvdata(pdev->dev.parent);
+	mas->se.base = base;
+	mas->se.clk = clk;
+
+	spi->bus_num = -1;
+	spi->dev.of_node = pdev->dev.of_node;
+	spi->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP | SPI_CS_HIGH;
+	spi->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
+	spi->num_chipselect = 4;
+	spi->max_speed_hz = 50000000;
+	spi->prepare_message = spi_geni_prepare_message;
+	spi->transfer_one = spi_geni_transfer_one;
+	spi->auto_runtime_pm = true;
+	spi->handle_err = handle_fifo_timeout;
+	spi->set_cs = spi_geni_set_cs;
+
+	init_completion(&mas->xfer_done);
+	spin_lock_init(&mas->lock);
+	pm_runtime_enable(&pdev->dev);
+
+	ret = spi_geni_init(mas);
+	if (ret)
+		goto spi_geni_probe_runtime_disable;
+
+	ret = request_irq(mas->irq, geni_spi_isr,
+			IRQF_TRIGGER_HIGH, "spi_geni", spi);
+	if (ret)
+		goto spi_geni_probe_runtime_disable;
+
+	ret = spi_register_master(spi);
+	if (ret)
+		goto spi_geni_probe_free_irq;
+
+	return 0;
+spi_geni_probe_free_irq:
+	free_irq(mas->irq, spi);
+spi_geni_probe_runtime_disable:
+	pm_runtime_disable(&pdev->dev);
+	spi_master_put(spi);
+	return ret;
+}
+
+static int spi_geni_remove(struct platform_device *pdev)
+{
+	struct spi_master *spi = platform_get_drvdata(pdev);
+	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+
+	/* Unregister _before_ disabling pm_runtime() so we stop transfers */
+	spi_unregister_master(spi);
+
+	free_irq(mas->irq, spi);
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+static int __maybe_unused spi_geni_runtime_suspend(struct device *dev)
+{
+	struct spi_master *spi = dev_get_drvdata(dev);
+	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+
+	return geni_se_resources_off(&mas->se);
+}
+
+static int __maybe_unused spi_geni_runtime_resume(struct device *dev)
+{
+	struct spi_master *spi = dev_get_drvdata(dev);
+	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+
+	return geni_se_resources_on(&mas->se);
+}
+
+static int __maybe_unused spi_geni_suspend(struct device *dev)
+{
+	struct spi_master *spi = dev_get_drvdata(dev);
+	int ret;
+
+	ret = spi_master_suspend(spi);
+	if (ret)
+		return ret;
+
+	ret = pm_runtime_force_suspend(dev);
+	if (ret)
+		spi_master_resume(spi);
+
+	return ret;
+}
+
+static int __maybe_unused spi_geni_resume(struct device *dev)
+{
+	struct spi_master *spi = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_runtime_force_resume(dev);
+	if (ret)
+		return ret;
+
+	ret = spi_master_resume(spi);
+	if (ret)
+		pm_runtime_force_suspend(dev);
+
+	return ret;
+}
+
+static const struct dev_pm_ops spi_geni_pm_ops = {
+	SET_RUNTIME_PM_OPS(spi_geni_runtime_suspend,
+					spi_geni_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(spi_geni_suspend, spi_geni_resume)
+};
+
+static const struct of_device_id spi_geni_dt_match[] = {
+	{ .compatible = "qcom,geni-spi" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, spi_geni_dt_match);
+
+static struct platform_driver spi_geni_driver = {
+	.probe  = spi_geni_probe,
+	.remove = spi_geni_remove,
+	.driver = {
+		.name = "geni_spi",
+		.pm = &spi_geni_pm_ops,
+		.of_match_table = spi_geni_dt_match,
+	},
+};
+module_platform_driver(spi_geni_driver);
+
+MODULE_DESCRIPTION("SPI driver for GENI based QUP cores");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 088772e..1d3e23e 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * SPI master driver using generic bitbanged GPIO
  *
  * Copyright (C) 2006,2008 David Brownell
  * Copyright (C) 2017 Linus Walleij
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -35,20 +26,16 @@
  * platform_device->driver_data ... points to spi_gpio
  *
  * spi->controller_state ... reserved for bitbang framework code
- * spi->controller_data ... holds chipselect GPIO
  *
  * spi->master->dev.driver_data ... points to spi_gpio->bitbang
  */
 
 struct spi_gpio {
 	struct spi_bitbang		bitbang;
-	struct spi_gpio_platform_data	pdata;
-	struct platform_device		*pdev;
 	struct gpio_desc		*sck;
 	struct gpio_desc		*miso;
 	struct gpio_desc		*mosi;
 	struct gpio_desc		**cs_gpios;
-	bool				has_cs;
 };
 
 /*----------------------------------------------------------------------*/
@@ -96,12 +83,6 @@
 	return spi_gpio;
 }
 
-static inline struct spi_gpio_platform_data *__pure
-spi_to_pdata(const struct spi_device *spi)
-{
-	return &spi_to_spi_gpio(spi)->pdata;
-}
-
 /* These helpers are in turn called by the bitbang inlines */
 static inline void setsck(const struct spi_device *spi, int is_on)
 {
@@ -224,7 +205,7 @@
 		gpiod_set_value_cansleep(spi_gpio->sck, spi->mode & SPI_CPOL);
 
 	/* Drive chip select line, if we have one */
-	if (spi_gpio->has_cs) {
+	if (spi_gpio->cs_gpios) {
 		struct gpio_desc *cs = spi_gpio->cs_gpios[spi->chip_select];
 
 		/* SPI chip selects are normally active-low */
@@ -242,10 +223,12 @@
 	 * The CS GPIOs have already been
 	 * initialized from the descriptor lookup.
 	 */
-	cs = spi_gpio->cs_gpios[spi->chip_select];
-	if (!spi->controller_state && cs)
-		status = gpiod_direction_output(cs,
-						!(spi->mode & SPI_CS_HIGH));
+	if (spi_gpio->cs_gpios) {
+		cs = spi_gpio->cs_gpios[spi->chip_select];
+		if (!spi->controller_state && cs)
+			status = gpiod_direction_output(cs,
+						  !(spi->mode & SPI_CS_HIGH));
+	}
 
 	if (!status)
 		status = spi_bitbang_setup(spi);
@@ -256,11 +239,29 @@
 static int spi_gpio_set_direction(struct spi_device *spi, bool output)
 {
 	struct spi_gpio *spi_gpio = spi_to_spi_gpio(spi);
+	int ret;
 
 	if (output)
 		return gpiod_direction_output(spi_gpio->mosi, 1);
-	else
-		return gpiod_direction_input(spi_gpio->mosi);
+
+	ret = gpiod_direction_input(spi_gpio->mosi);
+	if (ret)
+		return ret;
+	/*
+	 * Send a turnaround high impedance cycle when switching
+	 * from output to input. Theoretically there should be
+	 * a clock delay here, but as has been noted above, the
+	 * nsec delay function for bit-banged GPIO is simply
+	 * {} because bit-banging just doesn't get fast enough
+	 * anyway.
+	 */
+	if (spi->mode & SPI_3WIRE_HIZ) {
+		gpiod_set_value_cansleep(spi_gpio->sck,
+					 !(spi->mode & SPI_CPOL));
+		gpiod_set_value_cansleep(spi_gpio->sck,
+					 !!(spi->mode & SPI_CPOL));
+	}
+	return 0;
 }
 
 static void spi_gpio_cleanup(struct spi_device *spi)
@@ -278,41 +279,18 @@
  * floating signals.  (A weak pulldown would save power too, but many
  * drivers expect to see all-ones data as the no slave "response".)
  */
-static int spi_gpio_request(struct device *dev,
-			    struct spi_gpio *spi_gpio,
-			    unsigned int num_chipselects,
-			    u16 *mflags)
+static int spi_gpio_request(struct device *dev, struct spi_gpio *spi_gpio)
 {
-	int i;
-
 	spi_gpio->mosi = devm_gpiod_get_optional(dev, "mosi", GPIOD_OUT_LOW);
 	if (IS_ERR(spi_gpio->mosi))
 		return PTR_ERR(spi_gpio->mosi);
-	if (!spi_gpio->mosi)
-		/* HW configuration without MOSI pin */
-		*mflags |= SPI_MASTER_NO_TX;
 
 	spi_gpio->miso = devm_gpiod_get_optional(dev, "miso", GPIOD_IN);
 	if (IS_ERR(spi_gpio->miso))
 		return PTR_ERR(spi_gpio->miso);
-	/*
-	 * No setting SPI_MASTER_NO_RX here - if there is only a MOSI
-	 * pin connected the host can still do RX by changing the
-	 * direction of the line.
-	 */
 
 	spi_gpio->sck = devm_gpiod_get(dev, "sck", GPIOD_OUT_LOW);
-	if (IS_ERR(spi_gpio->sck))
-		return PTR_ERR(spi_gpio->sck);
-
-	for (i = 0; i < num_chipselects; i++) {
-		spi_gpio->cs_gpios[i] = devm_gpiod_get_index(dev, "cs",
-							     i, GPIOD_OUT_HIGH);
-		if (IS_ERR(spi_gpio->cs_gpios[i]))
-			return PTR_ERR(spi_gpio->cs_gpios[i]);
-	}
-
-	return 0;
+	return PTR_ERR_OR_ZERO(spi_gpio->sck);
 }
 
 #ifdef CONFIG_OF
@@ -322,144 +300,140 @@
 };
 MODULE_DEVICE_TABLE(of, spi_gpio_dt_ids);
 
-static int spi_gpio_probe_dt(struct platform_device *pdev)
+static int spi_gpio_probe_dt(struct platform_device *pdev,
+			     struct spi_master *master)
 {
-	int ret;
-	u32 tmp;
-	struct spi_gpio_platform_data	*pdata;
-	struct device_node *np = pdev->dev.of_node;
-	const struct of_device_id *of_id =
-			of_match_device(spi_gpio_dt_ids, &pdev->dev);
+	master->dev.of_node = pdev->dev.of_node;
+	master->use_gpio_descriptors = true;
 
-	if (!of_id)
-		return 0;
-
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return -ENOMEM;
-
-
-	ret = of_property_read_u32(np, "num-chipselects", &tmp);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "num-chipselects property not found\n");
-		goto error_free;
-	}
-
-	pdata->num_chipselect = tmp;
-	pdev->dev.platform_data = pdata;
-
-	return 1;
-
-error_free:
-	devm_kfree(&pdev->dev, pdata);
-	return ret;
+	return 0;
 }
 #else
-static inline int spi_gpio_probe_dt(struct platform_device *pdev)
+static inline int spi_gpio_probe_dt(struct platform_device *pdev,
+				    struct spi_master *master)
 {
 	return 0;
 }
 #endif
 
+static int spi_gpio_probe_pdata(struct platform_device *pdev,
+				struct spi_master *master)
+{
+	struct device *dev = &pdev->dev;
+	struct spi_gpio_platform_data *pdata = dev_get_platdata(dev);
+	struct spi_gpio *spi_gpio = spi_master_get_devdata(master);
+	int i;
+
+#ifdef GENERIC_BITBANG
+	if (!pdata || !pdata->num_chipselect)
+		return -ENODEV;
+#endif
+	/*
+	 * The master needs to think there is a chipselect even if not
+	 * connected
+	 */
+	master->num_chipselect = pdata->num_chipselect ?: 1;
+
+	spi_gpio->cs_gpios = devm_kcalloc(dev, master->num_chipselect,
+					  sizeof(*spi_gpio->cs_gpios),
+					  GFP_KERNEL);
+	if (!spi_gpio->cs_gpios)
+		return -ENOMEM;
+
+	for (i = 0; i < master->num_chipselect; i++) {
+		spi_gpio->cs_gpios[i] = devm_gpiod_get_index(dev, "cs", i,
+							     GPIOD_OUT_HIGH);
+		if (IS_ERR(spi_gpio->cs_gpios[i]))
+			return PTR_ERR(spi_gpio->cs_gpios[i]);
+	}
+
+	return 0;
+}
+
+static void spi_gpio_put(void *data)
+{
+	spi_master_put(data);
+}
+
 static int spi_gpio_probe(struct platform_device *pdev)
 {
 	int				status;
 	struct spi_master		*master;
 	struct spi_gpio			*spi_gpio;
-	struct spi_gpio_platform_data	*pdata;
-	u16 master_flags = 0;
-	bool use_of = 0;
+	struct device			*dev = &pdev->dev;
+	struct spi_bitbang		*bb;
+	const struct of_device_id	*of_id;
 
-	status = spi_gpio_probe_dt(pdev);
-	if (status < 0)
-		return status;
-	if (status > 0)
-		use_of = 1;
+	of_id = of_match_device(spi_gpio_dt_ids, &pdev->dev);
 
-	pdata = dev_get_platdata(&pdev->dev);
-#ifdef GENERIC_BITBANG
-	if (!pdata || (!use_of && !pdata->num_chipselect))
-		return -ENODEV;
-#endif
-
-	master = spi_alloc_master(&pdev->dev, sizeof(*spi_gpio));
+	master = spi_alloc_master(dev, sizeof(*spi_gpio));
 	if (!master)
 		return -ENOMEM;
 
+	status = devm_add_action_or_reset(&pdev->dev, spi_gpio_put, master);
+	if (status)
+		return status;
+
+	if (of_id)
+		status = spi_gpio_probe_dt(pdev, master);
+	else
+		status = spi_gpio_probe_pdata(pdev, master);
+
+	if (status)
+		return status;
+
 	spi_gpio = spi_master_get_devdata(master);
 
-	spi_gpio->cs_gpios = devm_kcalloc(&pdev->dev,
-				pdata->num_chipselect,
-				sizeof(*spi_gpio->cs_gpios),
-				GFP_KERNEL);
-	if (!spi_gpio->cs_gpios)
-		return -ENOMEM;
-
-	platform_set_drvdata(pdev, spi_gpio);
-
-	/* Determine if we have chip selects connected */
-	spi_gpio->has_cs = !!pdata->num_chipselect;
-
-	spi_gpio->pdev = pdev;
-	if (pdata)
-		spi_gpio->pdata = *pdata;
-
-	status = spi_gpio_request(&pdev->dev, spi_gpio,
-				  pdata->num_chipselect, &master_flags);
+	status = spi_gpio_request(dev, spi_gpio);
 	if (status)
 		return status;
 
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
-	master->mode_bits = SPI_3WIRE | SPI_CPHA | SPI_CPOL;
-	master->flags = master_flags;
+	master->mode_bits = SPI_3WIRE | SPI_3WIRE_HIZ | SPI_CPHA | SPI_CPOL |
+			    SPI_CS_HIGH;
+	if (!spi_gpio->mosi) {
+		/* HW configuration without MOSI pin
+		 *
+		 * No setting SPI_MASTER_NO_RX here - if there is only
+		 * a MOSI pin connected the host can still do RX by
+		 * changing the direction of the line.
+		 */
+		master->flags = SPI_MASTER_NO_TX;
+	}
+
 	master->bus_num = pdev->id;
-	/* The master needs to think there is a chipselect even if not connected */
-	master->num_chipselect = spi_gpio->has_cs ? pdata->num_chipselect : 1;
 	master->setup = spi_gpio_setup;
 	master->cleanup = spi_gpio_cleanup;
-#ifdef CONFIG_OF
-	master->dev.of_node = pdev->dev.of_node;
-#endif
 
-	spi_gpio->bitbang.master = master;
-	spi_gpio->bitbang.chipselect = spi_gpio_chipselect;
-	spi_gpio->bitbang.set_line_direction = spi_gpio_set_direction;
+	bb = &spi_gpio->bitbang;
+	bb->master = master;
+	/*
+	 * There is some additional business, apart from driving the CS GPIO
+	 * line, that we need to do on selection. This makes the local
+	 * callback for chipselect always get called.
+	 */
+	master->flags |= SPI_MASTER_GPIO_SS;
+	bb->chipselect = spi_gpio_chipselect;
+	bb->set_line_direction = spi_gpio_set_direction;
 
-	if ((master_flags & SPI_MASTER_NO_TX) == 0) {
-		spi_gpio->bitbang.txrx_word[SPI_MODE_0] = spi_gpio_txrx_word_mode0;
-		spi_gpio->bitbang.txrx_word[SPI_MODE_1] = spi_gpio_txrx_word_mode1;
-		spi_gpio->bitbang.txrx_word[SPI_MODE_2] = spi_gpio_txrx_word_mode2;
-		spi_gpio->bitbang.txrx_word[SPI_MODE_3] = spi_gpio_txrx_word_mode3;
+	if (master->flags & SPI_MASTER_NO_TX) {
+		bb->txrx_word[SPI_MODE_0] = spi_gpio_spec_txrx_word_mode0;
+		bb->txrx_word[SPI_MODE_1] = spi_gpio_spec_txrx_word_mode1;
+		bb->txrx_word[SPI_MODE_2] = spi_gpio_spec_txrx_word_mode2;
+		bb->txrx_word[SPI_MODE_3] = spi_gpio_spec_txrx_word_mode3;
 	} else {
-		spi_gpio->bitbang.txrx_word[SPI_MODE_0] = spi_gpio_spec_txrx_word_mode0;
-		spi_gpio->bitbang.txrx_word[SPI_MODE_1] = spi_gpio_spec_txrx_word_mode1;
-		spi_gpio->bitbang.txrx_word[SPI_MODE_2] = spi_gpio_spec_txrx_word_mode2;
-		spi_gpio->bitbang.txrx_word[SPI_MODE_3] = spi_gpio_spec_txrx_word_mode3;
+		bb->txrx_word[SPI_MODE_0] = spi_gpio_txrx_word_mode0;
+		bb->txrx_word[SPI_MODE_1] = spi_gpio_txrx_word_mode1;
+		bb->txrx_word[SPI_MODE_2] = spi_gpio_txrx_word_mode2;
+		bb->txrx_word[SPI_MODE_3] = spi_gpio_txrx_word_mode3;
 	}
-	spi_gpio->bitbang.setup_transfer = spi_bitbang_setup_transfer;
-	spi_gpio->bitbang.flags = SPI_CS_HIGH;
+	bb->setup_transfer = spi_bitbang_setup_transfer;
 
-	status = spi_bitbang_start(&spi_gpio->bitbang);
+	status = spi_bitbang_init(&spi_gpio->bitbang);
 	if (status)
-		spi_master_put(master);
+		return status;
 
-	return status;
-}
-
-static int spi_gpio_remove(struct platform_device *pdev)
-{
-	struct spi_gpio			*spi_gpio;
-	struct spi_gpio_platform_data	*pdata;
-
-	spi_gpio = platform_get_drvdata(pdev);
-	pdata = dev_get_platdata(&pdev->dev);
-
-	/* stop() unregisters child devices too */
-	spi_bitbang_stop(&spi_gpio->bitbang);
-
-	spi_master_put(spi_gpio->bitbang.master);
-
-	return 0;
+	return devm_spi_register_master(&pdev->dev, spi_master_get(master));
 }
 
 MODULE_ALIAS("platform:" DRIVER_NAME);
@@ -470,7 +444,6 @@
 		.of_match_table = of_match_ptr(spi_gpio_dt_ids),
 	},
 	.probe		= spi_gpio_probe,
-	.remove		= spi_gpio_remove,
 };
 module_platform_driver(spi_gpio_driver);
 
diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c
index e6eb979..439b01e 100644
--- a/drivers/spi/spi-img-spfi.c
+++ b/drivers/spi/spi-img-spfi.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * IMG SPFI controller driver
  *
  * Copyright (C) 2007,2008,2013 Imagination Technologies Ltd.
  * Copyright (C) 2014 Google, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 08dd3a3..09c9a1e 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -28,6 +28,10 @@
 
 #define DRIVER_NAME "spi_imx"
 
+static bool use_dma = true;
+module_param(use_dma, bool, 0644);
+MODULE_PARM_DESC(use_dma, "Enable usage of DMA when available (default)");
+
 #define MXC_CSPIRXDATA		0x00
 #define MXC_CSPITXDATA		0x04
 #define MXC_CSPICTRL		0x08
@@ -39,8 +43,8 @@
 #define MXC_INT_TE	(1 << 1) /* Transmit FIFO empty interrupt */
 #define MXC_INT_RDR	BIT(4) /* Receive date threshold interrupt */
 
-/* The maximum  bytes that a sdma BD can transfer.*/
-#define MAX_SDMA_BD_BYTES  (1 << 15)
+/* The maximum bytes that a sdma BD can transfer. */
+#define MAX_SDMA_BD_BYTES (1 << 15)
 #define MX51_ECSPI_CTRL_MAX_BURST	512
 /* The maximum bytes that IMX53_ECSPI can transfer in slave mode.*/
 #define MX53_MAX_TRANSFER_BYTES		512
@@ -59,10 +63,13 @@
 
 struct spi_imx_devtype_data {
 	void (*intctrl)(struct spi_imx_data *, int);
-	int (*config)(struct spi_device *);
+	int (*prepare_message)(struct spi_imx_data *, struct spi_message *);
+	int (*prepare_transfer)(struct spi_imx_data *, struct spi_device *,
+				struct spi_transfer *);
 	void (*trigger)(struct spi_imx_data *);
 	int (*rx_available)(struct spi_imx_data *);
 	void (*reset)(struct spi_imx_data *);
+	void (*setup_wml)(struct spi_imx_data *);
 	void (*disable)(struct spi_imx_data *);
 	bool has_dmamode;
 	bool has_slavemode;
@@ -84,7 +91,6 @@
 	unsigned long spi_clk;
 	unsigned int spi_bus_clk;
 
-	unsigned int speed_hz;
 	unsigned int bits_per_word;
 	unsigned int spi_drctl;
 
@@ -216,7 +222,9 @@
 			 struct spi_transfer *transfer)
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
-	unsigned int bytes_per_word, i;
+
+	if (!use_dma)
+		return false;
 
 	if (!master->dma_rx)
 		return false;
@@ -224,14 +232,9 @@
 	if (spi_imx->slave_mode)
 		return false;
 
-	bytes_per_word = spi_imx_bytes_per_word(transfer->bits_per_word);
+	if (transfer->len < spi_imx->devtype_data->fifo_size)
+		return false;
 
-	for (i = spi_imx->devtype_data->fifo_size / 2; i > 0; i--) {
-		if (!(transfer->len % (i * bytes_per_word)))
-			break;
-	}
-
-	spi_imx->wml = i;
 	spi_imx->dynamic_burst = 0;
 
 	return true;
@@ -261,7 +264,7 @@
 #define MX51_ECSPI_INT_RREN		(1 <<  3)
 #define MX51_ECSPI_INT_RDREN		(1 <<  4)
 
-#define MX51_ECSPI_DMA      0x14
+#define MX51_ECSPI_DMA		0x14
 #define MX51_ECSPI_DMA_TX_WML(wml)	((wml) & 0x3f)
 #define MX51_ECSPI_DMA_RX_WML(wml)	(((wml) & 0x3f) << 16)
 #define MX51_ECSPI_DMA_RXT_WML(wml)	(((wml) & 0x3f) << 24)
@@ -491,11 +494,12 @@
 	writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL);
 }
 
-static int mx51_ecspi_config(struct spi_device *spi)
+static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
+				      struct spi_message *msg)
 {
-	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+	struct spi_device *spi = msg->spi;
 	u32 ctrl = MX51_ECSPI_CTRL_ENABLE;
-	u32 clk = spi_imx->speed_hz, delay, reg;
+	u32 testreg;
 	u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG);
 
 	/* set Master or Slave mode */
@@ -510,19 +514,21 @@
 	if (spi->mode & SPI_READY)
 		ctrl |= MX51_ECSPI_CTRL_DRCTL(spi_imx->spi_drctl);
 
-	/* set clock speed */
-	ctrl |= mx51_ecspi_clkdiv(spi_imx, spi_imx->speed_hz, &clk);
-	spi_imx->spi_bus_clk = clk;
-
 	/* set chip select to use */
 	ctrl |= MX51_ECSPI_CTRL_CS(spi->chip_select);
 
-	if (spi_imx->slave_mode && is_imx53_ecspi(spi_imx))
-		ctrl |= (spi_imx->slave_burst * 8 - 1)
-			<< MX51_ECSPI_CTRL_BL_OFFSET;
+	/*
+	 * The ctrl register must be written first, with the EN bit set other
+	 * registers must not be written to.
+	 */
+	writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL);
+
+	testreg = readl(spi_imx->base + MX51_ECSPI_TESTREG);
+	if (spi->mode & SPI_LOOP)
+		testreg |= MX51_ECSPI_TESTREG_LBC;
 	else
-		ctrl |= (spi_imx->bits_per_word - 1)
-			<< MX51_ECSPI_CTRL_BL_OFFSET;
+		testreg &= ~MX51_ECSPI_TESTREG_LBC;
+	writel(testreg, spi_imx->base + MX51_ECSPI_TESTREG);
 
 	/*
 	 * eCSPI burst completion by Chip Select signal in Slave mode
@@ -546,26 +552,44 @@
 		cfg &= ~MX51_ECSPI_CONFIG_SCLKPOL(spi->chip_select);
 		cfg &= ~MX51_ECSPI_CONFIG_SCLKCTL(spi->chip_select);
 	}
+
 	if (spi->mode & SPI_CS_HIGH)
 		cfg |= MX51_ECSPI_CONFIG_SSBPOL(spi->chip_select);
 	else
 		cfg &= ~MX51_ECSPI_CONFIG_SSBPOL(spi->chip_select);
 
+	writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG);
+
+	return 0;
+}
+
+static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx,
+				       struct spi_device *spi,
+				       struct spi_transfer *t)
+{
+	u32 ctrl = readl(spi_imx->base + MX51_ECSPI_CTRL);
+	u32 clk = t->speed_hz, delay;
+
+	/* Clear BL field and set the right value */
+	ctrl &= ~MX51_ECSPI_CTRL_BL_MASK;
+	if (spi_imx->slave_mode && is_imx53_ecspi(spi_imx))
+		ctrl |= (spi_imx->slave_burst * 8 - 1)
+			<< MX51_ECSPI_CTRL_BL_OFFSET;
+	else
+		ctrl |= (spi_imx->bits_per_word - 1)
+			<< MX51_ECSPI_CTRL_BL_OFFSET;
+
+	/* set clock speed */
+	ctrl &= ~(0xf << MX51_ECSPI_CTRL_POSTDIV_OFFSET |
+		  0xf << MX51_ECSPI_CTRL_PREDIV_OFFSET);
+	ctrl |= mx51_ecspi_clkdiv(spi_imx, t->speed_hz, &clk);
+	spi_imx->spi_bus_clk = clk;
+
 	if (spi_imx->usedma)
 		ctrl |= MX51_ECSPI_CTRL_SMC;
 
-	/* CTRL register always go first to bring out controller from reset */
 	writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL);
 
-	reg = readl(spi_imx->base + MX51_ECSPI_TESTREG);
-	if (spi->mode & SPI_LOOP)
-		reg |= MX51_ECSPI_TESTREG_LBC;
-	else
-		reg &= ~MX51_ECSPI_TESTREG_LBC;
-	writel(reg, spi_imx->base + MX51_ECSPI_TESTREG);
-
-	writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG);
-
 	/*
 	 * Wait until the changes in the configuration register CONFIGREG
 	 * propagate into the hardware. It takes exactly one tick of the
@@ -583,18 +607,20 @@
 	else			/* SCLK is _very_ slow */
 		usleep_range(delay, delay + 10);
 
+	return 0;
+}
+
+static void mx51_setup_wml(struct spi_imx_data *spi_imx)
+{
 	/*
 	 * Configure the DMA register: setup the watermark
 	 * and enable DMA request.
 	 */
-
-	writel(MX51_ECSPI_DMA_RX_WML(spi_imx->wml) |
+	writel(MX51_ECSPI_DMA_RX_WML(spi_imx->wml - 1) |
 		MX51_ECSPI_DMA_TX_WML(spi_imx->wml) |
 		MX51_ECSPI_DMA_RXT_WML(spi_imx->wml) |
 		MX51_ECSPI_DMA_TEDEN | MX51_ECSPI_DMA_RXDEN |
 		MX51_ECSPI_DMA_RXTDEN, spi_imx->base + MX51_ECSPI_DMA);
-
-	return 0;
 }
 
 static int mx51_ecspi_rx_available(struct spi_imx_data *spi_imx)
@@ -661,13 +687,20 @@
 	writel(reg, spi_imx->base + MXC_CSPICTRL);
 }
 
-static int mx31_config(struct spi_device *spi)
+static int mx31_prepare_message(struct spi_imx_data *spi_imx,
+				struct spi_message *msg)
 {
-	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+	return 0;
+}
+
+static int mx31_prepare_transfer(struct spi_imx_data *spi_imx,
+				 struct spi_device *spi,
+				 struct spi_transfer *t)
+{
 	unsigned int reg = MX31_CSPICTRL_ENABLE | MX31_CSPICTRL_MASTER;
 	unsigned int clk;
 
-	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, spi_imx->speed_hz, &clk) <<
+	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, t->speed_hz, &clk) <<
 		MX31_CSPICTRL_DR_SHIFT;
 	spi_imx->spi_bus_clk = clk;
 
@@ -702,8 +735,10 @@
 	writel(reg, spi_imx->base + MX31_CSPI_TESTREG);
 
 	if (spi_imx->usedma) {
-		/* configure DMA requests when RXFIFO is half full and
-		   when TXFIFO is half empty */
+		/*
+		 * configure DMA requests when RXFIFO is half full and
+		 * when TXFIFO is half empty
+		 */
 		writel(MX31_DMAREG_RH_DEN | MX31_DMAREG_TH_DEN,
 			spi_imx->base + MX31_CSPI_DMAREG);
 	}
@@ -757,14 +792,21 @@
 	writel(reg, spi_imx->base + MXC_CSPICTRL);
 }
 
-static int mx21_config(struct spi_device *spi)
+static int mx21_prepare_message(struct spi_imx_data *spi_imx,
+				struct spi_message *msg)
 {
-	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+	return 0;
+}
+
+static int mx21_prepare_transfer(struct spi_imx_data *spi_imx,
+				 struct spi_device *spi,
+				 struct spi_transfer *t)
+{
 	unsigned int reg = MX21_CSPICTRL_ENABLE | MX21_CSPICTRL_MASTER;
 	unsigned int max = is_imx27_cspi(spi_imx) ? 16 : 18;
 	unsigned int clk;
 
-	reg |= spi_imx_clkdiv_1(spi_imx->spi_clk, spi_imx->speed_hz, max, &clk)
+	reg |= spi_imx_clkdiv_1(spi_imx->spi_clk, t->speed_hz, max, &clk)
 		<< MX21_CSPICTRL_DR_SHIFT;
 	spi_imx->spi_bus_clk = clk;
 
@@ -826,13 +868,20 @@
 	writel(reg, spi_imx->base + MXC_CSPICTRL);
 }
 
-static int mx1_config(struct spi_device *spi)
+static int mx1_prepare_message(struct spi_imx_data *spi_imx,
+			       struct spi_message *msg)
 {
-	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+	return 0;
+}
+
+static int mx1_prepare_transfer(struct spi_imx_data *spi_imx,
+				struct spi_device *spi,
+				struct spi_transfer *t)
+{
 	unsigned int reg = MX1_CSPICTRL_ENABLE | MX1_CSPICTRL_MASTER;
 	unsigned int clk;
 
-	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, spi_imx->speed_hz, &clk) <<
+	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, t->speed_hz, &clk) <<
 		MX1_CSPICTRL_DR_SHIFT;
 	spi_imx->spi_bus_clk = clk;
 
@@ -860,7 +909,8 @@
 
 static struct spi_imx_devtype_data imx1_cspi_devtype_data = {
 	.intctrl = mx1_intctrl,
-	.config = mx1_config,
+	.prepare_message = mx1_prepare_message,
+	.prepare_transfer = mx1_prepare_transfer,
 	.trigger = mx1_trigger,
 	.rx_available = mx1_rx_available,
 	.reset = mx1_reset,
@@ -873,7 +923,8 @@
 
 static struct spi_imx_devtype_data imx21_cspi_devtype_data = {
 	.intctrl = mx21_intctrl,
-	.config = mx21_config,
+	.prepare_message = mx21_prepare_message,
+	.prepare_transfer = mx21_prepare_transfer,
 	.trigger = mx21_trigger,
 	.rx_available = mx21_rx_available,
 	.reset = mx21_reset,
@@ -887,7 +938,8 @@
 static struct spi_imx_devtype_data imx27_cspi_devtype_data = {
 	/* i.mx27 cspi shares the functions with i.mx21 one */
 	.intctrl = mx21_intctrl,
-	.config = mx21_config,
+	.prepare_message = mx21_prepare_message,
+	.prepare_transfer = mx21_prepare_transfer,
 	.trigger = mx21_trigger,
 	.rx_available = mx21_rx_available,
 	.reset = mx21_reset,
@@ -900,7 +952,8 @@
 
 static struct spi_imx_devtype_data imx31_cspi_devtype_data = {
 	.intctrl = mx31_intctrl,
-	.config = mx31_config,
+	.prepare_message = mx31_prepare_message,
+	.prepare_transfer = mx31_prepare_transfer,
 	.trigger = mx31_trigger,
 	.rx_available = mx31_rx_available,
 	.reset = mx31_reset,
@@ -914,7 +967,8 @@
 static struct spi_imx_devtype_data imx35_cspi_devtype_data = {
 	/* i.mx35 and later cspi shares the functions with i.mx31 one */
 	.intctrl = mx31_intctrl,
-	.config = mx31_config,
+	.prepare_message = mx31_prepare_message,
+	.prepare_transfer = mx31_prepare_transfer,
 	.trigger = mx31_trigger,
 	.rx_available = mx31_rx_available,
 	.reset = mx31_reset,
@@ -927,10 +981,12 @@
 
 static struct spi_imx_devtype_data imx51_ecspi_devtype_data = {
 	.intctrl = mx51_ecspi_intctrl,
-	.config = mx51_ecspi_config,
+	.prepare_message = mx51_ecspi_prepare_message,
+	.prepare_transfer = mx51_ecspi_prepare_transfer,
 	.trigger = mx51_ecspi_trigger,
 	.rx_available = mx51_ecspi_rx_available,
 	.reset = mx51_ecspi_reset,
+	.setup_wml = mx51_setup_wml,
 	.fifo_size = 64,
 	.has_dmamode = true,
 	.dynamic_burst = true,
@@ -941,7 +997,8 @@
 
 static struct spi_imx_devtype_data imx53_ecspi_devtype_data = {
 	.intctrl = mx51_ecspi_intctrl,
-	.config = mx51_ecspi_config,
+	.prepare_message = mx51_ecspi_prepare_message,
+	.prepare_transfer = mx51_ecspi_prepare_transfer,
 	.trigger = mx51_ecspi_trigger,
 	.rx_available = mx51_ecspi_rx_available,
 	.reset = mx51_ecspi_reset,
@@ -1049,7 +1106,7 @@
 		if (!spi_imx->count)
 			break;
 		if (spi_imx->dynamic_burst &&
-		    spi_imx->txfifo >=  DIV_ROUND_UP(spi_imx->remainder,
+		    spi_imx->txfifo >= DIV_ROUND_UP(spi_imx->remainder,
 						     fifo_words))
 			break;
 		spi_imx->tx(spi_imx);
@@ -1138,13 +1195,11 @@
 				 struct spi_transfer *t)
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
-	int ret;
 
 	if (!t)
 		return 0;
 
 	spi_imx->bits_per_word = t->bits_per_word;
-	spi_imx->speed_hz  = t->speed_hz;
 
 	/*
 	 * Initialize the functions for transfer. To transfer non byte-aligned
@@ -1179,19 +1234,13 @@
 	else
 		spi_imx->usedma = 0;
 
-	if (spi_imx->usedma) {
-		ret = spi_imx_dma_configure(spi->master);
-		if (ret)
-			return ret;
-	}
-
 	if (is_imx53_ecspi(spi_imx) && spi_imx->slave_mode) {
 		spi_imx->rx = mx53_ecspi_rx_slave;
 		spi_imx->tx = mx53_ecspi_tx_slave;
 		spi_imx->slave_burst = t->len;
 	}
 
-	spi_imx->devtype_data->config(spi);
+	spi_imx->devtype_data->prepare_transfer(spi_imx, spi, t);
 
 	return 0;
 }
@@ -1289,6 +1338,31 @@
 	unsigned long timeout;
 	struct spi_master *master = spi_imx->bitbang.master;
 	struct sg_table *tx = &transfer->tx_sg, *rx = &transfer->rx_sg;
+	struct scatterlist *last_sg = sg_last(rx->sgl, rx->nents);
+	unsigned int bytes_per_word, i;
+	int ret;
+
+	/* Get the right burst length from the last sg to ensure no tail data */
+	bytes_per_word = spi_imx_bytes_per_word(transfer->bits_per_word);
+	for (i = spi_imx->devtype_data->fifo_size / 2; i > 0; i--) {
+		if (!(sg_dma_len(last_sg) % (i * bytes_per_word)))
+			break;
+	}
+	/* Use 1 as wml in case no available burst length got */
+	if (i == 0)
+		i = 1;
+
+	spi_imx->wml =  i;
+
+	ret = spi_imx_dma_configure(master);
+	if (ret)
+		return ret;
+
+	if (!spi_imx->devtype_data->setup_wml) {
+		dev_err(spi_imx->dev, "No setup_wml()?\n");
+		return -EINVAL;
+	}
+	spi_imx->devtype_data->setup_wml(spi_imx);
 
 	/*
 	 * The TX DMA setup starts the transfer, so make sure RX is configured
@@ -1427,7 +1501,7 @@
 
 	/* flush rxfifo before transfer */
 	while (spi_imx->devtype_data->rx_available(spi_imx))
-		spi_imx->rx(spi_imx);
+		readl(spi_imx->base + MXC_CSPIRXDATA);
 
 	if (spi_imx->slave_mode)
 		return spi_imx_pio_transfer_slave(spi, transfer);
@@ -1475,7 +1549,13 @@
 		return ret;
 	}
 
-	return 0;
+	ret = spi_imx->devtype_data->prepare_message(spi_imx, msg);
+	if (ret) {
+		clk_disable(spi_imx->clk_ipg);
+		clk_disable(spi_imx->clk_per);
+	}
+
+	return ret;
 }
 
 static int
diff --git a/drivers/spi/spi-iproc-qspi.c b/drivers/spi/spi-iproc-qspi.c
index be6ccb2..de297da 100644
--- a/drivers/spi/spi-iproc-qspi.c
+++ b/drivers/spi/spi-iproc-qspi.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2016 Broadcom Limited
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
  */
 
 #include <linux/device.h>
diff --git a/drivers/spi/spi-jcore.c b/drivers/spi/spi-jcore.c
index 702fe57..cc49fa4 100644
--- a/drivers/spi/spi-jcore.c
+++ b/drivers/spi/spi-jcore.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * J-Core SPI controller driver
  *
diff --git a/drivers/spi/spi-lantiq-ssc.c b/drivers/spi/spi-lantiq-ssc.c
index d597661..9dfe8b0 100644
--- a/drivers/spi/spi-lantiq-ssc.c
+++ b/drivers/spi/spi-lantiq-ssc.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2011-2015 Daniel Schwierzeck <daniel.schwierzeck@gmail.com>
  * Copyright (C) 2016 Hauke Mehrtens <hauke@hauke-m.de>
- *
- * This program is free software; you can distribute it and/or modify it
- * under the terms of the GNU General Public License (Version 2) as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -822,22 +819,16 @@
 	}
 
 	rx_irq = platform_get_irq_byname(pdev, LTQ_SPI_RX_IRQ_NAME);
-	if (rx_irq < 0) {
-		dev_err(dev, "failed to get %s\n", LTQ_SPI_RX_IRQ_NAME);
+	if (rx_irq < 0)
 		return -ENXIO;
-	}
 
 	tx_irq = platform_get_irq_byname(pdev, LTQ_SPI_TX_IRQ_NAME);
-	if (tx_irq < 0) {
-		dev_err(dev, "failed to get %s\n", LTQ_SPI_TX_IRQ_NAME);
+	if (tx_irq < 0)
 		return -ENXIO;
-	}
 
 	err_irq = platform_get_irq_byname(pdev, LTQ_SPI_ERR_IRQ_NAME);
-	if (err_irq < 0) {
-		dev_err(dev, "failed to get %s\n", LTQ_SPI_ERR_IRQ_NAME);
+	if (err_irq < 0)
 		return -ENXIO;
-	}
 
 	master = spi_alloc_master(dev, sizeof(struct lantiq_ssc_spi));
 	if (!master)
diff --git a/drivers/spi/spi-lm70llp.c b/drivers/spi/spi-lm70llp.c
index 4549efd..174dba2 100644
--- a/drivers/spi/spi-lm70llp.c
+++ b/drivers/spi/spi-lm70llp.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Driver for LM70EVAL-LLP board for the LM70 sensor
  *
  * Copyright (C) 2006 Kaiwan N Billimoria <kaiwan@designergraphix.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -43,7 +34,7 @@
  * available (on page 4) here:
  *  http://www.national.com/appinfo/tempsensors/files/LM70LLPEVALmanual.pdf
  *
- * Also see Documentation/spi/spi-lm70llp.  The SPI<->parport code here is
+ * Also see Documentation/spi/spi-lm70llp.rst.  The SPI<->parport code here is
  * (heavily) based on spi-butterfly by David Brownell.
  *
  * The LM70 LLP connects to the PC parallel port in the following manner:
diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c
index bed7403..6f18d49 100644
--- a/drivers/spi/spi-loopback-test.c
+++ b/drivers/spi/spi-loopback-test.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  linux/drivers/spi/spi-loopback-test.c
  *
@@ -6,16 +7,6 @@
  *  Loopback test driver to test several typical spi_message conditions
  *  that a spi_master driver may encounter
  *  this can also get used for regression testing
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
  */
 
 #include <linux/delay.h>
diff --git a/drivers/spi/spi-lp8841-rtc.c b/drivers/spi/spi-lp8841-rtc.c
index faa577d..2d43654 100644
--- a/drivers/spi/spi-lp8841-rtc.c
+++ b/drivers/spi/spi-lp8841-rtc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * SPI master driver for ICP DAS LP-8841 RTC
  *
@@ -8,16 +9,6 @@
  * Dallas DS1302 RTC Support
  * Copyright (C) 2002 David McCullough
  * Copyright (C) 2003 - 2007 Paul Mundt
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #include <linux/delay.h>
 #include <linux/kernel.h>
@@ -194,7 +185,6 @@
 	int				ret;
 	struct spi_master		*master;
 	struct spi_lp8841_rtc		*data;
-	void				*iomem;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*data));
 	if (!master)
@@ -216,8 +206,7 @@
 
 	data = spi_master_get_devdata(master);
 
-	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	data->iomem = devm_ioremap_resource(&pdev->dev, iomem);
+	data->iomem = devm_platform_ioremap_resource(pdev, 0);
 	ret = PTR_ERR_OR_ZERO(data->iomem);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to get IO address\n");
diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index eb72dba..9f0fa9f 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -12,6 +12,8 @@
 
 #include "internals.h"
 
+#define SPI_MEM_MAX_BUSWIDTH		8
+
 /**
  * spi_controller_dma_map_mem_op_data() - DMA-map the buffer attached to a
  *					  memory operation
@@ -119,6 +121,13 @@
 
 		break;
 
+	case 8:
+		if ((tx && (mode & SPI_TX_OCTAL)) ||
+		    (!tx && (mode & SPI_RX_OCTAL)))
+			return 0;
+
+		break;
+
 	default:
 		break;
 	}
@@ -126,8 +135,8 @@
 	return -ENOTSUPP;
 }
 
-static bool spi_mem_default_supports_op(struct spi_mem *mem,
-					const struct spi_mem_op *op)
+bool spi_mem_default_supports_op(struct spi_mem *mem,
+				 const struct spi_mem_op *op)
 {
 	if (spi_check_buswidth_req(mem, op->cmd.buswidth, true))
 		return false;
@@ -140,7 +149,7 @@
 	    spi_check_buswidth_req(mem, op->dummy.buswidth, true))
 		return false;
 
-	if (op->data.nbytes &&
+	if (op->data.dir != SPI_MEM_NO_DATA &&
 	    spi_check_buswidth_req(mem, op->data.buswidth,
 				   op->data.dir == SPI_MEM_DATA_OUT))
 		return false;
@@ -149,6 +158,44 @@
 }
 EXPORT_SYMBOL_GPL(spi_mem_default_supports_op);
 
+static bool spi_mem_buswidth_is_valid(u8 buswidth)
+{
+	if (hweight8(buswidth) > 1 || buswidth > SPI_MEM_MAX_BUSWIDTH)
+		return false;
+
+	return true;
+}
+
+static int spi_mem_check_op(const struct spi_mem_op *op)
+{
+	if (!op->cmd.buswidth)
+		return -EINVAL;
+
+	if ((op->addr.nbytes && !op->addr.buswidth) ||
+	    (op->dummy.nbytes && !op->dummy.buswidth) ||
+	    (op->data.nbytes && !op->data.buswidth))
+		return -EINVAL;
+
+	if (!spi_mem_buswidth_is_valid(op->cmd.buswidth) ||
+	    !spi_mem_buswidth_is_valid(op->addr.buswidth) ||
+	    !spi_mem_buswidth_is_valid(op->dummy.buswidth) ||
+	    !spi_mem_buswidth_is_valid(op->data.buswidth))
+		return -EINVAL;
+
+	return 0;
+}
+
+static bool spi_mem_internal_supports_op(struct spi_mem *mem,
+					 const struct spi_mem_op *op)
+{
+	struct spi_controller *ctlr = mem->spi->controller;
+
+	if (ctlr->mem_ops && ctlr->mem_ops->supports_op)
+		return ctlr->mem_ops->supports_op(mem, op);
+
+	return spi_mem_default_supports_op(mem, op);
+}
+
 /**
  * spi_mem_supports_op() - Check if a memory device and the controller it is
  *			   connected to support a specific memory operation
@@ -166,15 +213,51 @@
  */
 bool spi_mem_supports_op(struct spi_mem *mem, const struct spi_mem_op *op)
 {
-	struct spi_controller *ctlr = mem->spi->controller;
+	if (spi_mem_check_op(op))
+		return false;
 
-	if (ctlr->mem_ops && ctlr->mem_ops->supports_op)
-		return ctlr->mem_ops->supports_op(mem, op);
-
-	return spi_mem_default_supports_op(mem, op);
+	return spi_mem_internal_supports_op(mem, op);
 }
 EXPORT_SYMBOL_GPL(spi_mem_supports_op);
 
+static int spi_mem_access_start(struct spi_mem *mem)
+{
+	struct spi_controller *ctlr = mem->spi->controller;
+
+	/*
+	 * Flush the message queue before executing our SPI memory
+	 * operation to prevent preemption of regular SPI transfers.
+	 */
+	spi_flush_queue(ctlr);
+
+	if (ctlr->auto_runtime_pm) {
+		int ret;
+
+		ret = pm_runtime_get_sync(ctlr->dev.parent);
+		if (ret < 0) {
+			dev_err(&ctlr->dev, "Failed to power device: %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	mutex_lock(&ctlr->bus_lock_mutex);
+	mutex_lock(&ctlr->io_mutex);
+
+	return 0;
+}
+
+static void spi_mem_access_end(struct spi_mem *mem)
+{
+	struct spi_controller *ctlr = mem->spi->controller;
+
+	mutex_unlock(&ctlr->io_mutex);
+	mutex_unlock(&ctlr->bus_lock_mutex);
+
+	if (ctlr->auto_runtime_pm)
+		pm_runtime_put(ctlr->dev.parent);
+}
+
 /**
  * spi_mem_exec_op() - Execute a memory operation
  * @mem: the SPI memory
@@ -196,34 +279,21 @@
 	u8 *tmpbuf;
 	int ret;
 
-	if (!spi_mem_supports_op(mem, op))
+	ret = spi_mem_check_op(op);
+	if (ret)
+		return ret;
+
+	if (!spi_mem_internal_supports_op(mem, op))
 		return -ENOTSUPP;
 
 	if (ctlr->mem_ops) {
-		/*
-		 * Flush the message queue before executing our SPI memory
-		 * operation to prevent preemption of regular SPI transfers.
-		 */
-		spi_flush_queue(ctlr);
+		ret = spi_mem_access_start(mem);
+		if (ret)
+			return ret;
 
-		if (ctlr->auto_runtime_pm) {
-			ret = pm_runtime_get_sync(ctlr->dev.parent);
-			if (ret < 0) {
-				dev_err(&ctlr->dev,
-					"Failed to power device: %d\n",
-					ret);
-				return ret;
-			}
-		}
-
-		mutex_lock(&ctlr->bus_lock_mutex);
-		mutex_lock(&ctlr->io_mutex);
 		ret = ctlr->mem_ops->exec_op(mem, op);
-		mutex_unlock(&ctlr->io_mutex);
-		mutex_unlock(&ctlr->bus_lock_mutex);
 
-		if (ctlr->auto_runtime_pm)
-			pm_runtime_put(ctlr->dev.parent);
+		spi_mem_access_end(mem);
 
 		/*
 		 * Some controllers only optimize specific paths (typically the
@@ -369,6 +439,280 @@
 }
 EXPORT_SYMBOL_GPL(spi_mem_adjust_op_size);
 
+static ssize_t spi_mem_no_dirmap_read(struct spi_mem_dirmap_desc *desc,
+				      u64 offs, size_t len, void *buf)
+{
+	struct spi_mem_op op = desc->info.op_tmpl;
+	int ret;
+
+	op.addr.val = desc->info.offset + offs;
+	op.data.buf.in = buf;
+	op.data.nbytes = len;
+	ret = spi_mem_adjust_op_size(desc->mem, &op);
+	if (ret)
+		return ret;
+
+	ret = spi_mem_exec_op(desc->mem, &op);
+	if (ret)
+		return ret;
+
+	return op.data.nbytes;
+}
+
+static ssize_t spi_mem_no_dirmap_write(struct spi_mem_dirmap_desc *desc,
+				       u64 offs, size_t len, const void *buf)
+{
+	struct spi_mem_op op = desc->info.op_tmpl;
+	int ret;
+
+	op.addr.val = desc->info.offset + offs;
+	op.data.buf.out = buf;
+	op.data.nbytes = len;
+	ret = spi_mem_adjust_op_size(desc->mem, &op);
+	if (ret)
+		return ret;
+
+	ret = spi_mem_exec_op(desc->mem, &op);
+	if (ret)
+		return ret;
+
+	return op.data.nbytes;
+}
+
+/**
+ * spi_mem_dirmap_create() - Create a direct mapping descriptor
+ * @mem: SPI mem device this direct mapping should be created for
+ * @info: direct mapping information
+ *
+ * This function is creating a direct mapping descriptor which can then be used
+ * to access the memory using spi_mem_dirmap_read() or spi_mem_dirmap_write().
+ * If the SPI controller driver does not support direct mapping, this function
+ * fallback to an implementation using spi_mem_exec_op(), so that the caller
+ * doesn't have to bother implementing a fallback on his own.
+ *
+ * Return: a valid pointer in case of success, and ERR_PTR() otherwise.
+ */
+struct spi_mem_dirmap_desc *
+spi_mem_dirmap_create(struct spi_mem *mem,
+		      const struct spi_mem_dirmap_info *info)
+{
+	struct spi_controller *ctlr = mem->spi->controller;
+	struct spi_mem_dirmap_desc *desc;
+	int ret = -ENOTSUPP;
+
+	/* Make sure the number of address cycles is between 1 and 8 bytes. */
+	if (!info->op_tmpl.addr.nbytes || info->op_tmpl.addr.nbytes > 8)
+		return ERR_PTR(-EINVAL);
+
+	/* data.dir should either be SPI_MEM_DATA_IN or SPI_MEM_DATA_OUT. */
+	if (info->op_tmpl.data.dir == SPI_MEM_NO_DATA)
+		return ERR_PTR(-EINVAL);
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return ERR_PTR(-ENOMEM);
+
+	desc->mem = mem;
+	desc->info = *info;
+	if (ctlr->mem_ops && ctlr->mem_ops->dirmap_create)
+		ret = ctlr->mem_ops->dirmap_create(desc);
+
+	if (ret) {
+		desc->nodirmap = true;
+		if (!spi_mem_supports_op(desc->mem, &desc->info.op_tmpl))
+			ret = -ENOTSUPP;
+		else
+			ret = 0;
+	}
+
+	if (ret) {
+		kfree(desc);
+		return ERR_PTR(ret);
+	}
+
+	return desc;
+}
+EXPORT_SYMBOL_GPL(spi_mem_dirmap_create);
+
+/**
+ * spi_mem_dirmap_destroy() - Destroy a direct mapping descriptor
+ * @desc: the direct mapping descriptor to destroy
+ *
+ * This function destroys a direct mapping descriptor previously created by
+ * spi_mem_dirmap_create().
+ */
+void spi_mem_dirmap_destroy(struct spi_mem_dirmap_desc *desc)
+{
+	struct spi_controller *ctlr = desc->mem->spi->controller;
+
+	if (!desc->nodirmap && ctlr->mem_ops && ctlr->mem_ops->dirmap_destroy)
+		ctlr->mem_ops->dirmap_destroy(desc);
+
+	kfree(desc);
+}
+EXPORT_SYMBOL_GPL(spi_mem_dirmap_destroy);
+
+static void devm_spi_mem_dirmap_release(struct device *dev, void *res)
+{
+	struct spi_mem_dirmap_desc *desc = *(struct spi_mem_dirmap_desc **)res;
+
+	spi_mem_dirmap_destroy(desc);
+}
+
+/**
+ * devm_spi_mem_dirmap_create() - Create a direct mapping descriptor and attach
+ *				  it to a device
+ * @dev: device the dirmap desc will be attached to
+ * @mem: SPI mem device this direct mapping should be created for
+ * @info: direct mapping information
+ *
+ * devm_ variant of the spi_mem_dirmap_create() function. See
+ * spi_mem_dirmap_create() for more details.
+ *
+ * Return: a valid pointer in case of success, and ERR_PTR() otherwise.
+ */
+struct spi_mem_dirmap_desc *
+devm_spi_mem_dirmap_create(struct device *dev, struct spi_mem *mem,
+			   const struct spi_mem_dirmap_info *info)
+{
+	struct spi_mem_dirmap_desc **ptr, *desc;
+
+	ptr = devres_alloc(devm_spi_mem_dirmap_release, sizeof(*ptr),
+			   GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	desc = spi_mem_dirmap_create(mem, info);
+	if (IS_ERR(desc)) {
+		devres_free(ptr);
+	} else {
+		*ptr = desc;
+		devres_add(dev, ptr);
+	}
+
+	return desc;
+}
+EXPORT_SYMBOL_GPL(devm_spi_mem_dirmap_create);
+
+static int devm_spi_mem_dirmap_match(struct device *dev, void *res, void *data)
+{
+        struct spi_mem_dirmap_desc **ptr = res;
+
+        if (WARN_ON(!ptr || !*ptr))
+                return 0;
+
+	return *ptr == data;
+}
+
+/**
+ * devm_spi_mem_dirmap_destroy() - Destroy a direct mapping descriptor attached
+ *				   to a device
+ * @dev: device the dirmap desc is attached to
+ * @desc: the direct mapping descriptor to destroy
+ *
+ * devm_ variant of the spi_mem_dirmap_destroy() function. See
+ * spi_mem_dirmap_destroy() for more details.
+ */
+void devm_spi_mem_dirmap_destroy(struct device *dev,
+				 struct spi_mem_dirmap_desc *desc)
+{
+	devres_release(dev, devm_spi_mem_dirmap_release,
+		       devm_spi_mem_dirmap_match, desc);
+}
+EXPORT_SYMBOL_GPL(devm_spi_mem_dirmap_destroy);
+
+/**
+ * spi_mem_dirmap_read() - Read data through a direct mapping
+ * @desc: direct mapping descriptor
+ * @offs: offset to start reading from. Note that this is not an absolute
+ *	  offset, but the offset within the direct mapping which already has
+ *	  its own offset
+ * @len: length in bytes
+ * @buf: destination buffer. This buffer must be DMA-able
+ *
+ * This function reads data from a memory device using a direct mapping
+ * previously instantiated with spi_mem_dirmap_create().
+ *
+ * Return: the amount of data read from the memory device or a negative error
+ * code. Note that the returned size might be smaller than @len, and the caller
+ * is responsible for calling spi_mem_dirmap_read() again when that happens.
+ */
+ssize_t spi_mem_dirmap_read(struct spi_mem_dirmap_desc *desc,
+			    u64 offs, size_t len, void *buf)
+{
+	struct spi_controller *ctlr = desc->mem->spi->controller;
+	ssize_t ret;
+
+	if (desc->info.op_tmpl.data.dir != SPI_MEM_DATA_IN)
+		return -EINVAL;
+
+	if (!len)
+		return 0;
+
+	if (desc->nodirmap) {
+		ret = spi_mem_no_dirmap_read(desc, offs, len, buf);
+	} else if (ctlr->mem_ops && ctlr->mem_ops->dirmap_read) {
+		ret = spi_mem_access_start(desc->mem);
+		if (ret)
+			return ret;
+
+		ret = ctlr->mem_ops->dirmap_read(desc, offs, len, buf);
+
+		spi_mem_access_end(desc->mem);
+	} else {
+		ret = -ENOTSUPP;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spi_mem_dirmap_read);
+
+/**
+ * spi_mem_dirmap_write() - Write data through a direct mapping
+ * @desc: direct mapping descriptor
+ * @offs: offset to start writing from. Note that this is not an absolute
+ *	  offset, but the offset within the direct mapping which already has
+ *	  its own offset
+ * @len: length in bytes
+ * @buf: source buffer. This buffer must be DMA-able
+ *
+ * This function writes data to a memory device using a direct mapping
+ * previously instantiated with spi_mem_dirmap_create().
+ *
+ * Return: the amount of data written to the memory device or a negative error
+ * code. Note that the returned size might be smaller than @len, and the caller
+ * is responsible for calling spi_mem_dirmap_write() again when that happens.
+ */
+ssize_t spi_mem_dirmap_write(struct spi_mem_dirmap_desc *desc,
+			     u64 offs, size_t len, const void *buf)
+{
+	struct spi_controller *ctlr = desc->mem->spi->controller;
+	ssize_t ret;
+
+	if (desc->info.op_tmpl.data.dir != SPI_MEM_DATA_OUT)
+		return -EINVAL;
+
+	if (!len)
+		return 0;
+
+	if (desc->nodirmap) {
+		ret = spi_mem_no_dirmap_write(desc, offs, len, buf);
+	} else if (ctlr->mem_ops && ctlr->mem_ops->dirmap_write) {
+		ret = spi_mem_access_start(desc->mem);
+		if (ret)
+			return ret;
+
+		ret = ctlr->mem_ops->dirmap_write(desc, offs, len, buf);
+
+		spi_mem_access_end(desc->mem);
+	} else {
+		ret = -ENOTSUPP;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spi_mem_dirmap_write);
+
 static inline struct spi_mem_driver *to_spi_mem_drv(struct device_driver *drv)
 {
 	return container_of(drv, struct spi_mem_driver, spidrv.driver);
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index 7fe4488..f3f1044 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -503,7 +503,6 @@
 {
 	struct spi_master *master;
 	struct meson_spicc_device *spicc;
-	struct resource *res;
 	int ret, irq, rate;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*spicc));
@@ -517,8 +516,7 @@
 	spicc->pdev = pdev;
 	platform_set_drvdata(pdev, spicc);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	spicc->base = devm_ioremap_resource(&pdev->dev, res);
+	spicc->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(spicc->base)) {
 		dev_err(&pdev->dev, "io resource mapping failed\n");
 		ret = PTR_ERR(spicc->base);
diff --git a/drivers/spi/spi-meson-spifc.c b/drivers/spi/spi-meson-spifc.c
index 616566e..c7b0399 100644
--- a/drivers/spi/spi-meson-spifc.c
+++ b/drivers/spi/spi-meson-spifc.c
@@ -1,15 +1,9 @@
-/*
- * Driver for Amlogic Meson SPI flash controller (SPIFC)
- *
- * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Driver for Amlogic Meson SPI flash controller (SPIFC)
+//
+// Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
+//
 
 #include <linux/clk.h>
 #include <linux/delay.h>
@@ -292,7 +286,6 @@
 {
 	struct spi_master *master;
 	struct meson_spifc *spifc;
-	struct resource *res;
 	void __iomem *base;
 	unsigned int rate;
 	int ret = 0;
@@ -306,8 +299,7 @@
 	spifc = spi_master_get_devdata(master);
 	spifc->dev = &pdev->dev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(spifc->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base)) {
 		ret = PTR_ERR(base);
 		goto out_err;
diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c
index c3ec46c..a337b84 100644
--- a/drivers/spi/spi-mpc512x-psc.c
+++ b/drivers/spi/spi-mpc512x-psc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * MPC512x PSC in SPI mode driver.
  *
@@ -7,11 +8,6 @@
  *
  * Fork of mpc52xx_psc_spi.c:
  *	Copyright (C) 2006 TOPTICA Photonics AG., Dragos Carp
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 
 #include <linux/module.h>
diff --git a/drivers/spi/spi-mpc52xx-psc.c b/drivers/spi/spi-mpc52xx-psc.c
index 42a8b85..c7e478b 100644
--- a/drivers/spi/spi-mpc52xx-psc.c
+++ b/drivers/spi/spi-mpc52xx-psc.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * MPC52xx PSC in SPI mode driver.
  *
  * Maintainer: Dragos Carp
  *
  * Copyright (C) 2006 TOPTICA Photonics AG.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 
 #include <linux/module.h>
diff --git a/drivers/spi/spi-mpc52xx.c b/drivers/spi/spi-mpc52xx.c
index 0e55784..ef2f244 100644
--- a/drivers/spi/spi-mpc52xx.c
+++ b/drivers/spi/spi-mpc52xx.c
@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * MPC52xx SPI bus driver.
  *
  * Copyright (C) 2008 Secret Lab Technologies Ltd.
  *
- * This file is released under the GPLv2
- *
  * This is the driver for the MPC5200's dedicated SPI controller.
  *
  * Note: this driver does not support the MPC5200 PSC in SPI mode.  For
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 86bf456..6888a4d 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015 MediaTek Inc.
  * Author: Leilk Liu <leilk.liu@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
@@ -25,6 +17,7 @@
 #include <linux/platform_data/spi-mt65xx.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
+#include <linux/dma-mapping.h>
 
 #define SPI_CFG0_REG                      0x0000
 #define SPI_CFG1_REG                      0x0004
@@ -36,6 +29,8 @@
 #define SPI_STATUS0_REG                   0x001c
 #define SPI_PAD_SEL_REG                   0x0024
 #define SPI_CFG2_REG                      0x0028
+#define SPI_TX_SRC_REG_64                 0x002c
+#define SPI_RX_DST_REG_64                 0x0030
 
 #define SPI_CFG0_SCK_HIGH_OFFSET          0
 #define SPI_CFG0_SCK_LOW_OFFSET           8
@@ -81,6 +76,10 @@
 
 #define MTK_SPI_MAX_FIFO_SIZE 32U
 #define MTK_SPI_PACKET_SIZE 1024
+#define MTK_SPI_32BITS_MASK  (0xffffffff)
+
+#define DMA_ADDR_EXT_BITS (36)
+#define DMA_ADDR_DEF_BITS (32)
 
 struct mtk_spi_compatible {
 	bool need_pad_sel;
@@ -88,6 +87,8 @@
 	bool must_tx;
 	/* some IC design adjust cfg register to enhance time accuracy */
 	bool enhance_timing;
+	/* some IC support DMA addr extension */
+	bool dma_ext;
 };
 
 struct mtk_spi {
@@ -98,6 +99,7 @@
 	struct clk *parent_clk, *sel_clk, *spi_clk;
 	struct spi_transfer *cur_transfer;
 	u32 xfer_len;
+	u32 num_xfered;
 	struct scatterlist *tx_sgl, *rx_sgl;
 	u32 tx_sgl_len, rx_sgl_len;
 	const struct mtk_spi_compatible *dev_comp;
@@ -109,6 +111,13 @@
 	.must_tx = true,
 };
 
+static const struct mtk_spi_compatible mt6765_compat = {
+	.need_pad_sel = true,
+	.must_tx = true,
+	.enhance_timing = true,
+	.dma_ext = true,
+};
+
 static const struct mtk_spi_compatible mt7622_compat = {
 	.must_tx = true,
 	.enhance_timing = true,
@@ -119,13 +128,17 @@
 	.must_tx = true,
 };
 
+static const struct mtk_spi_compatible mt8183_compat = {
+	.need_pad_sel = true,
+	.must_tx = true,
+	.enhance_timing = true,
+};
+
 /*
  * A piece of default chip info unless the platform
  * supplies it.
  */
 static const struct mtk_chip_config mtk_default_chip_info = {
-	.rx_mlsb = 1,
-	.tx_mlsb = 1,
 	.cs_pol = 0,
 	.sample_sel = 0,
 };
@@ -140,15 +153,24 @@
 	{ .compatible = "mediatek,mt6589-spi",
 		.data = (void *)&mtk_common_compat,
 	},
+	{ .compatible = "mediatek,mt6765-spi",
+		.data = (void *)&mt6765_compat,
+	},
 	{ .compatible = "mediatek,mt7622-spi",
 		.data = (void *)&mt7622_compat,
 	},
+	{ .compatible = "mediatek,mt7629-spi",
+		.data = (void *)&mt7622_compat,
+	},
 	{ .compatible = "mediatek,mt8135-spi",
 		.data = (void *)&mtk_common_compat,
 	},
 	{ .compatible = "mediatek,mt8173-spi",
 		.data = (void *)&mt8173_compat,
 	},
+	{ .compatible = "mediatek,mt8183-spi",
+		.data = (void *)&mt8183_compat,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, mtk_spi_of_match);
@@ -190,14 +212,13 @@
 		reg_val &= ~SPI_CMD_CPOL;
 
 	/* set the mlsbx and mlsbtx */
-	if (chip_config->tx_mlsb)
-		reg_val |= SPI_CMD_TXMSBF;
-	else
+	if (spi->mode & SPI_LSB_FIRST) {
 		reg_val &= ~SPI_CMD_TXMSBF;
-	if (chip_config->rx_mlsb)
-		reg_val |= SPI_CMD_RXMSBF;
-	else
 		reg_val &= ~SPI_CMD_RXMSBF;
+	} else {
+		reg_val |= SPI_CMD_TXMSBF;
+		reg_val |= SPI_CMD_RXMSBF;
+	}
 
 	/* set the tx/rx endian */
 #ifdef __LITTLE_ENDIAN
@@ -369,10 +390,25 @@
 {
 	struct mtk_spi *mdata = spi_master_get_devdata(master);
 
-	if (mdata->tx_sgl)
-		writel(xfer->tx_dma, mdata->base + SPI_TX_SRC_REG);
-	if (mdata->rx_sgl)
-		writel(xfer->rx_dma, mdata->base + SPI_RX_DST_REG);
+	if (mdata->tx_sgl) {
+		writel((u32)(xfer->tx_dma & MTK_SPI_32BITS_MASK),
+		       mdata->base + SPI_TX_SRC_REG);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		if (mdata->dev_comp->dma_ext)
+			writel((u32)(xfer->tx_dma >> 32),
+			       mdata->base + SPI_TX_SRC_REG_64);
+#endif
+	}
+
+	if (mdata->rx_sgl) {
+		writel((u32)(xfer->rx_dma & MTK_SPI_32BITS_MASK),
+		       mdata->base + SPI_RX_DST_REG);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		if (mdata->dev_comp->dma_ext)
+			writel((u32)(xfer->rx_dma >> 32),
+			       mdata->base + SPI_RX_DST_REG_64);
+#endif
+	}
 }
 
 static int mtk_spi_fifo_transfer(struct spi_master *master,
@@ -385,6 +421,7 @@
 
 	mdata->cur_transfer = xfer;
 	mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, xfer->len);
+	mdata->num_xfered = 0;
 	mtk_spi_prepare_transfer(master, xfer);
 	mtk_spi_setup_packet(master);
 
@@ -415,6 +452,7 @@
 	mdata->tx_sgl_len = 0;
 	mdata->rx_sgl_len = 0;
 	mdata->cur_transfer = xfer;
+	mdata->num_xfered = 0;
 
 	mtk_spi_prepare_transfer(master, xfer);
 
@@ -482,7 +520,7 @@
 
 static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id)
 {
-	u32 cmd, reg_val, cnt, remainder;
+	u32 cmd, reg_val, cnt, remainder, len;
 	struct spi_master *master = dev_id;
 	struct mtk_spi *mdata = spi_master_get_devdata(master);
 	struct spi_transfer *trans = mdata->cur_transfer;
@@ -497,36 +535,38 @@
 		if (trans->rx_buf) {
 			cnt = mdata->xfer_len / 4;
 			ioread32_rep(mdata->base + SPI_RX_DATA_REG,
-				     trans->rx_buf, cnt);
+				     trans->rx_buf + mdata->num_xfered, cnt);
 			remainder = mdata->xfer_len % 4;
 			if (remainder > 0) {
 				reg_val = readl(mdata->base + SPI_RX_DATA_REG);
-				memcpy(trans->rx_buf + (cnt * 4),
-					&reg_val, remainder);
+				memcpy(trans->rx_buf +
+					mdata->num_xfered +
+					(cnt * 4),
+					&reg_val,
+					remainder);
 			}
 		}
 
-		trans->len -= mdata->xfer_len;
-		if (!trans->len) {
+		mdata->num_xfered += mdata->xfer_len;
+		if (mdata->num_xfered == trans->len) {
 			spi_finalize_current_transfer(master);
 			return IRQ_HANDLED;
 		}
 
-		if (trans->tx_buf)
-			trans->tx_buf += mdata->xfer_len;
-		if (trans->rx_buf)
-			trans->rx_buf += mdata->xfer_len;
-
-		mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, trans->len);
+		len = trans->len - mdata->num_xfered;
+		mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, len);
 		mtk_spi_setup_packet(master);
 
-		cnt = trans->len / 4;
-		iowrite32_rep(mdata->base + SPI_TX_DATA_REG, trans->tx_buf, cnt);
+		cnt = mdata->xfer_len / 4;
+		iowrite32_rep(mdata->base + SPI_TX_DATA_REG,
+				trans->tx_buf + mdata->num_xfered, cnt);
 
-		remainder = trans->len % 4;
+		remainder = mdata->xfer_len % 4;
 		if (remainder > 0) {
 			reg_val = 0;
-			memcpy(&reg_val, trans->tx_buf + (cnt * 4), remainder);
+			memcpy(&reg_val,
+				trans->tx_buf + (cnt * 4) + mdata->num_xfered,
+				remainder);
 			writel(reg_val, mdata->base + SPI_TX_DATA_REG);
 		}
 
@@ -580,7 +620,7 @@
 	struct mtk_spi *mdata;
 	const struct of_device_id *of_id;
 	struct resource *res;
-	int i, irq, ret;
+	int i, irq, ret, addr_bits;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*mdata));
 	if (!master) {
@@ -590,7 +630,7 @@
 
 	master->auto_runtime_pm = true;
 	master->dev.of_node = pdev->dev.of_node;
-	master->mode_bits = SPI_CPOL | SPI_CPHA;
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
 
 	master->set_cs = mtk_spi_set_cs;
 	master->prepare_message = mtk_spi_prepare_message;
@@ -658,7 +698,6 @@
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(&pdev->dev, "failed to get irq (%d)\n", irq);
 		ret = irq;
 		goto err_put_master;
 	}
@@ -747,6 +786,15 @@
 		}
 	}
 
+	if (mdata->dev_comp->dma_ext)
+		addr_bits = DMA_ADDR_EXT_BITS;
+	else
+		addr_bits = DMA_ADDR_DEF_BITS;
+	ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(addr_bits));
+	if (ret)
+		dev_notice(&pdev->dev, "SPI dma_set_mask(%d) failed, ret:%d\n",
+			   addr_bits, ret);
+
 	return 0;
 
 err_disable_runtime_pm:
diff --git a/drivers/spi/spi-mt7621.c b/drivers/spi/spi-mt7621.c
new file mode 100644
index 0000000..2c3b7a2
--- /dev/null
+++ b/drivers/spi/spi-mt7621.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// spi-mt7621.c -- MediaTek MT7621 SPI controller driver
+//
+// Copyright (C) 2011 Sergiy <piratfm@gmail.com>
+// Copyright (C) 2011-2013 Gabor Juhos <juhosg@openwrt.org>
+// Copyright (C) 2014-2015 Felix Fietkau <nbd@nbd.name>
+//
+// Some parts are based on spi-orion.c:
+//   Author: Shadi Ammouri <shadi@marvell.com>
+//   Copyright (C) 2007-2008 Marvell Ltd.
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/reset.h>
+#include <linux/spi/spi.h>
+
+#define DRIVER_NAME		"spi-mt7621"
+
+/* in usec */
+#define RALINK_SPI_WAIT_MAX_LOOP 2000
+
+/* SPISTAT register bit field */
+#define SPISTAT_BUSY		BIT(0)
+
+#define MT7621_SPI_TRANS	0x00
+#define SPITRANS_BUSY		BIT(16)
+
+#define MT7621_SPI_OPCODE	0x04
+#define MT7621_SPI_DATA0	0x08
+#define MT7621_SPI_DATA4	0x18
+#define SPI_CTL_TX_RX_CNT_MASK	0xff
+#define SPI_CTL_START		BIT(8)
+
+#define MT7621_SPI_MASTER	0x28
+#define MASTER_MORE_BUFMODE	BIT(2)
+#define MASTER_FULL_DUPLEX	BIT(10)
+#define MASTER_RS_CLK_SEL	GENMASK(27, 16)
+#define MASTER_RS_CLK_SEL_SHIFT	16
+#define MASTER_RS_SLAVE_SEL	GENMASK(31, 29)
+
+#define MT7621_SPI_MOREBUF	0x2c
+#define MT7621_SPI_POLAR	0x38
+#define MT7621_SPI_SPACE	0x3c
+
+#define MT7621_CPHA		BIT(5)
+#define MT7621_CPOL		BIT(4)
+#define MT7621_LSB_FIRST	BIT(3)
+
+struct mt7621_spi {
+	struct spi_controller	*master;
+	void __iomem		*base;
+	unsigned int		sys_freq;
+	unsigned int		speed;
+	struct clk		*clk;
+	int			pending_write;
+};
+
+static inline struct mt7621_spi *spidev_to_mt7621_spi(struct spi_device *spi)
+{
+	return spi_controller_get_devdata(spi->master);
+}
+
+static inline u32 mt7621_spi_read(struct mt7621_spi *rs, u32 reg)
+{
+	return ioread32(rs->base + reg);
+}
+
+static inline void mt7621_spi_write(struct mt7621_spi *rs, u32 reg, u32 val)
+{
+	iowrite32(val, rs->base + reg);
+}
+
+static void mt7621_spi_set_cs(struct spi_device *spi, int enable)
+{
+	struct mt7621_spi *rs = spidev_to_mt7621_spi(spi);
+	int cs = spi->chip_select;
+	u32 polar = 0;
+	u32 master;
+
+	/*
+	 * Select SPI device 7, enable "more buffer mode" and disable
+	 * full-duplex (only half-duplex really works on this chip
+	 * reliably)
+	 */
+	master = mt7621_spi_read(rs, MT7621_SPI_MASTER);
+	master |= MASTER_RS_SLAVE_SEL | MASTER_MORE_BUFMODE;
+	master &= ~MASTER_FULL_DUPLEX;
+	mt7621_spi_write(rs, MT7621_SPI_MASTER, master);
+
+	rs->pending_write = 0;
+
+	if (enable)
+		polar = BIT(cs);
+	mt7621_spi_write(rs, MT7621_SPI_POLAR, polar);
+}
+
+static int mt7621_spi_prepare(struct spi_device *spi, unsigned int speed)
+{
+	struct mt7621_spi *rs = spidev_to_mt7621_spi(spi);
+	u32 rate;
+	u32 reg;
+
+	dev_dbg(&spi->dev, "speed:%u\n", speed);
+
+	rate = DIV_ROUND_UP(rs->sys_freq, speed);
+	dev_dbg(&spi->dev, "rate-1:%u\n", rate);
+
+	if (rate > 4097)
+		return -EINVAL;
+
+	if (rate < 2)
+		rate = 2;
+
+	reg = mt7621_spi_read(rs, MT7621_SPI_MASTER);
+	reg &= ~MASTER_RS_CLK_SEL;
+	reg |= (rate - 2) << MASTER_RS_CLK_SEL_SHIFT;
+	rs->speed = speed;
+
+	reg &= ~MT7621_LSB_FIRST;
+	if (spi->mode & SPI_LSB_FIRST)
+		reg |= MT7621_LSB_FIRST;
+
+	/*
+	 * This SPI controller seems to be tested on SPI flash only and some
+	 * bits are swizzled under other SPI modes probably due to incorrect
+	 * wiring inside the silicon. Only mode 0 works correctly.
+	 */
+	reg &= ~(MT7621_CPHA | MT7621_CPOL);
+
+	mt7621_spi_write(rs, MT7621_SPI_MASTER, reg);
+
+	return 0;
+}
+
+static inline int mt7621_spi_wait_till_ready(struct mt7621_spi *rs)
+{
+	int i;
+
+	for (i = 0; i < RALINK_SPI_WAIT_MAX_LOOP; i++) {
+		u32 status;
+
+		status = mt7621_spi_read(rs, MT7621_SPI_TRANS);
+		if ((status & SPITRANS_BUSY) == 0)
+			return 0;
+		cpu_relax();
+		udelay(1);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static void mt7621_spi_read_half_duplex(struct mt7621_spi *rs,
+					int rx_len, u8 *buf)
+{
+	int tx_len;
+
+	/*
+	 * Combine with any pending write, and perform one or more half-duplex
+	 * transactions reading 'len' bytes. Data to be written is already in
+	 * MT7621_SPI_DATA.
+	 */
+	tx_len = rs->pending_write;
+	rs->pending_write = 0;
+
+	while (rx_len || tx_len) {
+		int i;
+		u32 val = (min(tx_len, 4) * 8) << 24;
+		int rx = min(rx_len, 32);
+
+		if (tx_len > 4)
+			val |= (tx_len - 4) * 8;
+		val |= (rx * 8) << 12;
+		mt7621_spi_write(rs, MT7621_SPI_MOREBUF, val);
+
+		tx_len = 0;
+
+		val = mt7621_spi_read(rs, MT7621_SPI_TRANS);
+		val |= SPI_CTL_START;
+		mt7621_spi_write(rs, MT7621_SPI_TRANS, val);
+
+		mt7621_spi_wait_till_ready(rs);
+
+		for (i = 0; i < rx; i++) {
+			if ((i % 4) == 0)
+				val = mt7621_spi_read(rs, MT7621_SPI_DATA0 + i);
+			*buf++ = val & 0xff;
+			val >>= 8;
+		}
+
+		rx_len -= i;
+	}
+}
+
+static inline void mt7621_spi_flush(struct mt7621_spi *rs)
+{
+	mt7621_spi_read_half_duplex(rs, 0, NULL);
+}
+
+static void mt7621_spi_write_half_duplex(struct mt7621_spi *rs,
+					 int tx_len, const u8 *buf)
+{
+	int len = rs->pending_write;
+	int val = 0;
+
+	if (len & 3) {
+		val = mt7621_spi_read(rs, MT7621_SPI_OPCODE + (len & ~3));
+		if (len < 4) {
+			val <<= (4 - len) * 8;
+			val = swab32(val);
+		}
+	}
+
+	while (tx_len > 0) {
+		if (len >= 36) {
+			rs->pending_write = len;
+			mt7621_spi_flush(rs);
+			len = 0;
+		}
+
+		val |= *buf++ << (8 * (len & 3));
+		len++;
+		if ((len & 3) == 0) {
+			if (len == 4)
+				/* The byte-order of the opcode is weird! */
+				val = swab32(val);
+			mt7621_spi_write(rs, MT7621_SPI_OPCODE + len - 4, val);
+			val = 0;
+		}
+		tx_len -= 1;
+	}
+
+	if (len & 3) {
+		if (len < 4) {
+			val = swab32(val);
+			val >>= (4 - len) * 8;
+		}
+		mt7621_spi_write(rs, MT7621_SPI_OPCODE + (len & ~3), val);
+	}
+
+	rs->pending_write = len;
+}
+
+static int mt7621_spi_transfer_one_message(struct spi_controller *master,
+					   struct spi_message *m)
+{
+	struct mt7621_spi *rs = spi_controller_get_devdata(master);
+	struct spi_device *spi = m->spi;
+	unsigned int speed = spi->max_speed_hz;
+	struct spi_transfer *t = NULL;
+	int status = 0;
+
+	mt7621_spi_wait_till_ready(rs);
+
+	list_for_each_entry(t, &m->transfers, transfer_list)
+		if (t->speed_hz < speed)
+			speed = t->speed_hz;
+
+	if (mt7621_spi_prepare(spi, speed)) {
+		status = -EIO;
+		goto msg_done;
+	}
+
+	/* Assert CS */
+	mt7621_spi_set_cs(spi, 1);
+
+	m->actual_length = 0;
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		if ((t->rx_buf) && (t->tx_buf)) {
+			/*
+			 * This controller will shift some extra data out
+			 * of spi_opcode if (mosi_bit_cnt > 0) &&
+			 * (cmd_bit_cnt == 0). So the claimed full-duplex
+			 * support is broken since we have no way to read
+			 * the MISO value during that bit.
+			 */
+			status = -EIO;
+			goto msg_done;
+		} else if (t->rx_buf) {
+			mt7621_spi_read_half_duplex(rs, t->len, t->rx_buf);
+		} else if (t->tx_buf) {
+			mt7621_spi_write_half_duplex(rs, t->len, t->tx_buf);
+		}
+		m->actual_length += t->len;
+	}
+
+	/* Flush data and deassert CS */
+	mt7621_spi_flush(rs);
+	mt7621_spi_set_cs(spi, 0);
+
+msg_done:
+	m->status = status;
+	spi_finalize_current_message(master);
+
+	return 0;
+}
+
+static int mt7621_spi_setup(struct spi_device *spi)
+{
+	struct mt7621_spi *rs = spidev_to_mt7621_spi(spi);
+
+	if ((spi->max_speed_hz == 0) ||
+	    (spi->max_speed_hz > (rs->sys_freq / 2)))
+		spi->max_speed_hz = rs->sys_freq / 2;
+
+	if (spi->max_speed_hz < (rs->sys_freq / 4097)) {
+		dev_err(&spi->dev, "setup: requested speed is too low %d Hz\n",
+			spi->max_speed_hz);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id mt7621_spi_match[] = {
+	{ .compatible = "ralink,mt7621-spi" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mt7621_spi_match);
+
+static int mt7621_spi_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct spi_controller *master;
+	struct mt7621_spi *rs;
+	void __iomem *base;
+	int status = 0;
+	struct clk *clk;
+	int ret;
+
+	match = of_match_device(mt7621_spi_match, &pdev->dev);
+	if (!match)
+		return -EINVAL;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "unable to get SYS clock, err=%d\n",
+			status);
+		return PTR_ERR(clk);
+	}
+
+	status = clk_prepare_enable(clk);
+	if (status)
+		return status;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(*rs));
+	if (!master) {
+		dev_info(&pdev->dev, "master allocation failed\n");
+		return -ENOMEM;
+	}
+
+	master->mode_bits = SPI_LSB_FIRST;
+	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
+	master->setup = mt7621_spi_setup;
+	master->transfer_one_message = mt7621_spi_transfer_one_message;
+	master->bits_per_word_mask = SPI_BPW_MASK(8);
+	master->dev.of_node = pdev->dev.of_node;
+	master->num_chipselect = 2;
+
+	dev_set_drvdata(&pdev->dev, master);
+
+	rs = spi_controller_get_devdata(master);
+	rs->base = base;
+	rs->clk = clk;
+	rs->master = master;
+	rs->sys_freq = clk_get_rate(rs->clk);
+	rs->pending_write = 0;
+	dev_info(&pdev->dev, "sys_freq: %u\n", rs->sys_freq);
+
+	ret = device_reset(&pdev->dev);
+	if (ret) {
+		dev_err(&pdev->dev, "SPI reset failed!\n");
+		return ret;
+	}
+
+	return devm_spi_register_controller(&pdev->dev, master);
+}
+
+static int mt7621_spi_remove(struct platform_device *pdev)
+{
+	struct spi_controller *master;
+	struct mt7621_spi *rs;
+
+	master = dev_get_drvdata(&pdev->dev);
+	rs = spi_controller_get_devdata(master);
+
+	clk_disable_unprepare(rs->clk);
+
+	return 0;
+}
+
+MODULE_ALIAS("platform:" DRIVER_NAME);
+
+static struct platform_driver mt7621_spi_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = mt7621_spi_match,
+	},
+	.probe = mt7621_spi_probe,
+	.remove = mt7621_spi_remove,
+};
+
+module_platform_driver(mt7621_spi_driver);
+
+MODULE_DESCRIPTION("MT7621 SPI driver");
+MODULE_AUTHOR("Felix Fietkau <nbd@nbd.name>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c
new file mode 100644
index 0000000..f48563c
--- /dev/null
+++ b/drivers/spi/spi-mxic.c
@@ -0,0 +1,617 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (C) 2018 Macronix International Co., Ltd.
+//
+// Authors:
+//	Mason Yang <masonccyang@mxic.com.tw>
+//	zhengxunli <zhengxunli@mxic.com.tw>
+//	Boris Brezillon <boris.brezillon@bootlin.com>
+//
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+#define HC_CFG			0x0
+#define HC_CFG_IF_CFG(x)	((x) << 27)
+#define HC_CFG_DUAL_SLAVE	BIT(31)
+#define HC_CFG_INDIVIDUAL	BIT(30)
+#define HC_CFG_NIO(x)		(((x) / 4) << 27)
+#define HC_CFG_TYPE(s, t)	((t) << (23 + ((s) * 2)))
+#define HC_CFG_TYPE_SPI_NOR	0
+#define HC_CFG_TYPE_SPI_NAND	1
+#define HC_CFG_TYPE_SPI_RAM	2
+#define HC_CFG_TYPE_RAW_NAND	3
+#define HC_CFG_SLV_ACT(x)	((x) << 21)
+#define HC_CFG_CLK_PH_EN	BIT(20)
+#define HC_CFG_CLK_POL_INV	BIT(19)
+#define HC_CFG_BIG_ENDIAN	BIT(18)
+#define HC_CFG_DATA_PASS	BIT(17)
+#define HC_CFG_IDLE_SIO_LVL(x)	((x) << 16)
+#define HC_CFG_MAN_START_EN	BIT(3)
+#define HC_CFG_MAN_START	BIT(2)
+#define HC_CFG_MAN_CS_EN	BIT(1)
+#define HC_CFG_MAN_CS_ASSERT	BIT(0)
+
+#define INT_STS			0x4
+#define INT_STS_EN		0x8
+#define INT_SIG_EN		0xc
+#define INT_STS_ALL		GENMASK(31, 0)
+#define INT_RDY_PIN		BIT(26)
+#define INT_RDY_SR		BIT(25)
+#define INT_LNR_SUSP		BIT(24)
+#define INT_ECC_ERR		BIT(17)
+#define INT_CRC_ERR		BIT(16)
+#define INT_LWR_DIS		BIT(12)
+#define INT_LRD_DIS		BIT(11)
+#define INT_SDMA_INT		BIT(10)
+#define INT_DMA_FINISH		BIT(9)
+#define INT_RX_NOT_FULL		BIT(3)
+#define INT_RX_NOT_EMPTY	BIT(2)
+#define INT_TX_NOT_FULL		BIT(1)
+#define INT_TX_EMPTY		BIT(0)
+
+#define HC_EN			0x10
+#define HC_EN_BIT		BIT(0)
+
+#define TXD(x)			(0x14 + ((x) * 4))
+#define RXD			0x24
+
+#define SS_CTRL(s)		(0x30 + ((s) * 4))
+#define LRD_CFG			0x44
+#define LWR_CFG			0x80
+#define RWW_CFG			0x70
+#define OP_READ			BIT(23)
+#define OP_DUMMY_CYC(x)		((x) << 17)
+#define OP_ADDR_BYTES(x)	((x) << 14)
+#define OP_CMD_BYTES(x)		(((x) - 1) << 13)
+#define OP_OCTA_CRC_EN		BIT(12)
+#define OP_DQS_EN		BIT(11)
+#define OP_ENHC_EN		BIT(10)
+#define OP_PREAMBLE_EN		BIT(9)
+#define OP_DATA_DDR		BIT(8)
+#define OP_DATA_BUSW(x)		((x) << 6)
+#define OP_ADDR_DDR		BIT(5)
+#define OP_ADDR_BUSW(x)		((x) << 3)
+#define OP_CMD_DDR		BIT(2)
+#define OP_CMD_BUSW(x)		(x)
+#define OP_BUSW_1		0
+#define OP_BUSW_2		1
+#define OP_BUSW_4		2
+#define OP_BUSW_8		3
+
+#define OCTA_CRC		0x38
+#define OCTA_CRC_IN_EN(s)	BIT(3 + ((s) * 16))
+#define OCTA_CRC_CHUNK(s, x)	((fls((x) / 32)) << (1 + ((s) * 16)))
+#define OCTA_CRC_OUT_EN(s)	BIT(0 + ((s) * 16))
+
+#define ONFI_DIN_CNT(s)		(0x3c + (s))
+
+#define LRD_CTRL		0x48
+#define RWW_CTRL		0x74
+#define LWR_CTRL		0x84
+#define LMODE_EN		BIT(31)
+#define LMODE_SLV_ACT(x)	((x) << 21)
+#define LMODE_CMD1(x)		((x) << 8)
+#define LMODE_CMD0(x)		(x)
+
+#define LRD_ADDR		0x4c
+#define LWR_ADDR		0x88
+#define LRD_RANGE		0x50
+#define LWR_RANGE		0x8c
+
+#define AXI_SLV_ADDR		0x54
+
+#define DMAC_RD_CFG		0x58
+#define DMAC_WR_CFG		0x94
+#define DMAC_CFG_PERIPH_EN	BIT(31)
+#define DMAC_CFG_ALLFLUSH_EN	BIT(30)
+#define DMAC_CFG_LASTFLUSH_EN	BIT(29)
+#define DMAC_CFG_QE(x)		(((x) + 1) << 16)
+#define DMAC_CFG_BURST_LEN(x)	(((x) + 1) << 12)
+#define DMAC_CFG_BURST_SZ(x)	((x) << 8)
+#define DMAC_CFG_DIR_READ	BIT(1)
+#define DMAC_CFG_START		BIT(0)
+
+#define DMAC_RD_CNT		0x5c
+#define DMAC_WR_CNT		0x98
+
+#define SDMA_ADDR		0x60
+
+#define DMAM_CFG		0x64
+#define DMAM_CFG_START		BIT(31)
+#define DMAM_CFG_CONT		BIT(30)
+#define DMAM_CFG_SDMA_GAP(x)	(fls((x) / 8192) << 2)
+#define DMAM_CFG_DIR_READ	BIT(1)
+#define DMAM_CFG_EN		BIT(0)
+
+#define DMAM_CNT		0x68
+
+#define LNR_TIMER_TH		0x6c
+
+#define RDM_CFG0		0x78
+#define RDM_CFG0_POLY(x)	(x)
+
+#define RDM_CFG1		0x7c
+#define RDM_CFG1_RDM_EN		BIT(31)
+#define RDM_CFG1_SEED(x)	(x)
+
+#define LWR_SUSP_CTRL		0x90
+#define LWR_SUSP_CTRL_EN	BIT(31)
+
+#define DMAS_CTRL		0x9c
+#define DMAS_CTRL_DIR_READ	BIT(31)
+#define DMAS_CTRL_EN		BIT(30)
+
+#define DATA_STROB		0xa0
+#define DATA_STROB_EDO_EN	BIT(2)
+#define DATA_STROB_INV_POL	BIT(1)
+#define DATA_STROB_DELAY_2CYC	BIT(0)
+
+#define IDLY_CODE(x)		(0xa4 + ((x) * 4))
+#define IDLY_CODE_VAL(x, v)	((v) << (((x) % 4) * 8))
+
+#define GPIO			0xc4
+#define GPIO_PT(x)		BIT(3 + ((x) * 16))
+#define GPIO_RESET(x)		BIT(2 + ((x) * 16))
+#define GPIO_HOLDB(x)		BIT(1 + ((x) * 16))
+#define GPIO_WPB(x)		BIT((x) * 16)
+
+#define HC_VER			0xd0
+
+#define HW_TEST(x)		(0xe0 + ((x) * 4))
+
+struct mxic_spi {
+	struct clk *ps_clk;
+	struct clk *send_clk;
+	struct clk *send_dly_clk;
+	void __iomem *regs;
+	u32 cur_speed_hz;
+};
+
+static int mxic_spi_clk_enable(struct mxic_spi *mxic)
+{
+	int ret;
+
+	ret = clk_prepare_enable(mxic->send_clk);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(mxic->send_dly_clk);
+	if (ret)
+		goto err_send_dly_clk;
+
+	return ret;
+
+err_send_dly_clk:
+	clk_disable_unprepare(mxic->send_clk);
+
+	return ret;
+}
+
+static void mxic_spi_clk_disable(struct mxic_spi *mxic)
+{
+	clk_disable_unprepare(mxic->send_clk);
+	clk_disable_unprepare(mxic->send_dly_clk);
+}
+
+static void mxic_spi_set_input_delay_dqs(struct mxic_spi *mxic, u8 idly_code)
+{
+	writel(IDLY_CODE_VAL(0, idly_code) |
+	       IDLY_CODE_VAL(1, idly_code) |
+	       IDLY_CODE_VAL(2, idly_code) |
+	       IDLY_CODE_VAL(3, idly_code),
+	       mxic->regs + IDLY_CODE(0));
+	writel(IDLY_CODE_VAL(4, idly_code) |
+	       IDLY_CODE_VAL(5, idly_code) |
+	       IDLY_CODE_VAL(6, idly_code) |
+	       IDLY_CODE_VAL(7, idly_code),
+	       mxic->regs + IDLY_CODE(1));
+}
+
+static int mxic_spi_clk_setup(struct mxic_spi *mxic, unsigned long freq)
+{
+	int ret;
+
+	ret = clk_set_rate(mxic->send_clk, freq);
+	if (ret)
+		return ret;
+
+	ret = clk_set_rate(mxic->send_dly_clk, freq);
+	if (ret)
+		return ret;
+
+	/*
+	 * A constant delay range from 0x0 ~ 0x1F for input delay,
+	 * the unit is 78 ps, the max input delay is 2.418 ns.
+	 */
+	mxic_spi_set_input_delay_dqs(mxic, 0xf);
+
+	/*
+	 * Phase degree = 360 * freq * output-delay
+	 * where output-delay is a constant value 1 ns in FPGA.
+	 *
+	 * Get Phase degree = 360 * freq * 1 ns
+	 *                  = 360 * freq * 1 sec / 1000000000
+	 *                  = 9 * freq / 25000000
+	 */
+	ret = clk_set_phase(mxic->send_dly_clk, 9 * freq / 25000000);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int mxic_spi_set_freq(struct mxic_spi *mxic, unsigned long freq)
+{
+	int ret;
+
+	if (mxic->cur_speed_hz == freq)
+		return 0;
+
+	mxic_spi_clk_disable(mxic);
+	ret = mxic_spi_clk_setup(mxic, freq);
+	if (ret)
+		return ret;
+
+	ret = mxic_spi_clk_enable(mxic);
+	if (ret)
+		return ret;
+
+	mxic->cur_speed_hz = freq;
+
+	return 0;
+}
+
+static void mxic_spi_hw_init(struct mxic_spi *mxic)
+{
+	writel(0, mxic->regs + DATA_STROB);
+	writel(INT_STS_ALL, mxic->regs + INT_STS_EN);
+	writel(0, mxic->regs + HC_EN);
+	writel(0, mxic->regs + LRD_CFG);
+	writel(0, mxic->regs + LRD_CTRL);
+	writel(HC_CFG_NIO(1) | HC_CFG_TYPE(0, HC_CFG_TYPE_SPI_NAND) |
+	       HC_CFG_SLV_ACT(0) | HC_CFG_MAN_CS_EN | HC_CFG_IDLE_SIO_LVL(1),
+	       mxic->regs + HC_CFG);
+}
+
+static int mxic_spi_data_xfer(struct mxic_spi *mxic, const void *txbuf,
+			      void *rxbuf, unsigned int len)
+{
+	unsigned int pos = 0;
+
+	while (pos < len) {
+		unsigned int nbytes = len - pos;
+		u32 data = 0xffffffff;
+		u32 sts;
+		int ret;
+
+		if (nbytes > 4)
+			nbytes = 4;
+
+		if (txbuf)
+			memcpy(&data, txbuf + pos, nbytes);
+
+		ret = readl_poll_timeout(mxic->regs + INT_STS, sts,
+					 sts & INT_TX_EMPTY, 0, USEC_PER_SEC);
+		if (ret)
+			return ret;
+
+		writel(data, mxic->regs + TXD(nbytes % 4));
+
+		if (rxbuf) {
+			ret = readl_poll_timeout(mxic->regs + INT_STS, sts,
+						 sts & INT_TX_EMPTY, 0,
+						 USEC_PER_SEC);
+			if (ret)
+				return ret;
+
+			ret = readl_poll_timeout(mxic->regs + INT_STS, sts,
+						 sts & INT_RX_NOT_EMPTY, 0,
+						 USEC_PER_SEC);
+			if (ret)
+				return ret;
+
+			data = readl(mxic->regs + RXD);
+			data >>= (8 * (4 - nbytes));
+			memcpy(rxbuf + pos, &data, nbytes);
+			WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY);
+		} else {
+			readl(mxic->regs + RXD);
+		}
+		WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY);
+
+		pos += nbytes;
+	}
+
+	return 0;
+}
+
+static bool mxic_spi_mem_supports_op(struct spi_mem *mem,
+				     const struct spi_mem_op *op)
+{
+	if (op->data.buswidth > 4 || op->addr.buswidth > 4 ||
+	    op->dummy.buswidth > 4 || op->cmd.buswidth > 4)
+		return false;
+
+	if (op->data.nbytes && op->dummy.nbytes &&
+	    op->data.buswidth != op->dummy.buswidth)
+		return false;
+
+	if (op->addr.nbytes > 7)
+		return false;
+
+	return true;
+}
+
+static int mxic_spi_mem_exec_op(struct spi_mem *mem,
+				const struct spi_mem_op *op)
+{
+	struct mxic_spi *mxic = spi_master_get_devdata(mem->spi->master);
+	int nio = 1, i, ret;
+	u32 ss_ctrl;
+	u8 addr[8];
+
+	ret = mxic_spi_set_freq(mxic, mem->spi->max_speed_hz);
+	if (ret)
+		return ret;
+
+	if (mem->spi->mode & (SPI_TX_QUAD | SPI_RX_QUAD))
+		nio = 4;
+	else if (mem->spi->mode & (SPI_TX_DUAL | SPI_RX_DUAL))
+		nio = 2;
+
+	writel(HC_CFG_NIO(nio) |
+	       HC_CFG_TYPE(mem->spi->chip_select, HC_CFG_TYPE_SPI_NOR) |
+	       HC_CFG_SLV_ACT(mem->spi->chip_select) | HC_CFG_IDLE_SIO_LVL(1) |
+	       HC_CFG_MAN_CS_EN,
+	       mxic->regs + HC_CFG);
+	writel(HC_EN_BIT, mxic->regs + HC_EN);
+
+	ss_ctrl = OP_CMD_BYTES(1) | OP_CMD_BUSW(fls(op->cmd.buswidth) - 1);
+
+	if (op->addr.nbytes)
+		ss_ctrl |= OP_ADDR_BYTES(op->addr.nbytes) |
+			   OP_ADDR_BUSW(fls(op->addr.buswidth) - 1);
+
+	if (op->dummy.nbytes)
+		ss_ctrl |= OP_DUMMY_CYC(op->dummy.nbytes);
+
+	if (op->data.nbytes) {
+		ss_ctrl |= OP_DATA_BUSW(fls(op->data.buswidth) - 1);
+		if (op->data.dir == SPI_MEM_DATA_IN)
+			ss_ctrl |= OP_READ;
+	}
+
+	writel(ss_ctrl, mxic->regs + SS_CTRL(mem->spi->chip_select));
+
+	writel(readl(mxic->regs + HC_CFG) | HC_CFG_MAN_CS_ASSERT,
+	       mxic->regs + HC_CFG);
+
+	ret = mxic_spi_data_xfer(mxic, &op->cmd.opcode, NULL, 1);
+	if (ret)
+		goto out;
+
+	for (i = 0; i < op->addr.nbytes; i++)
+		addr[i] = op->addr.val >> (8 * (op->addr.nbytes - i - 1));
+
+	ret = mxic_spi_data_xfer(mxic, addr, NULL, op->addr.nbytes);
+	if (ret)
+		goto out;
+
+	ret = mxic_spi_data_xfer(mxic, NULL, NULL, op->dummy.nbytes);
+	if (ret)
+		goto out;
+
+	ret = mxic_spi_data_xfer(mxic,
+				 op->data.dir == SPI_MEM_DATA_OUT ?
+				 op->data.buf.out : NULL,
+				 op->data.dir == SPI_MEM_DATA_IN ?
+				 op->data.buf.in : NULL,
+				 op->data.nbytes);
+
+out:
+	writel(readl(mxic->regs + HC_CFG) & ~HC_CFG_MAN_CS_ASSERT,
+	       mxic->regs + HC_CFG);
+	writel(0, mxic->regs + HC_EN);
+
+	return ret;
+}
+
+static const struct spi_controller_mem_ops mxic_spi_mem_ops = {
+	.supports_op = mxic_spi_mem_supports_op,
+	.exec_op = mxic_spi_mem_exec_op,
+};
+
+static void mxic_spi_set_cs(struct spi_device *spi, bool lvl)
+{
+	struct mxic_spi *mxic = spi_master_get_devdata(spi->master);
+
+	if (!lvl) {
+		writel(readl(mxic->regs + HC_CFG) | HC_CFG_MAN_CS_EN,
+		       mxic->regs + HC_CFG);
+		writel(HC_EN_BIT, mxic->regs + HC_EN);
+		writel(readl(mxic->regs + HC_CFG) | HC_CFG_MAN_CS_ASSERT,
+		       mxic->regs + HC_CFG);
+	} else {
+		writel(readl(mxic->regs + HC_CFG) & ~HC_CFG_MAN_CS_ASSERT,
+		       mxic->regs + HC_CFG);
+		writel(0, mxic->regs + HC_EN);
+	}
+}
+
+static int mxic_spi_transfer_one(struct spi_master *master,
+				 struct spi_device *spi,
+				 struct spi_transfer *t)
+{
+	struct mxic_spi *mxic = spi_master_get_devdata(master);
+	unsigned int busw = OP_BUSW_1;
+	int ret;
+
+	if (t->rx_buf && t->tx_buf) {
+		if (((spi->mode & SPI_TX_QUAD) &&
+		     !(spi->mode & SPI_RX_QUAD)) ||
+		    ((spi->mode & SPI_TX_DUAL) &&
+		     !(spi->mode & SPI_RX_DUAL)))
+			return -ENOTSUPP;
+	}
+
+	ret = mxic_spi_set_freq(mxic, t->speed_hz);
+	if (ret)
+		return ret;
+
+	if (t->tx_buf) {
+		if (spi->mode & SPI_TX_QUAD)
+			busw = OP_BUSW_4;
+		else if (spi->mode & SPI_TX_DUAL)
+			busw = OP_BUSW_2;
+	} else if (t->rx_buf) {
+		if (spi->mode & SPI_RX_QUAD)
+			busw = OP_BUSW_4;
+		else if (spi->mode & SPI_RX_DUAL)
+			busw = OP_BUSW_2;
+	}
+
+	writel(OP_CMD_BYTES(1) | OP_CMD_BUSW(busw) |
+	       OP_DATA_BUSW(busw) | (t->rx_buf ? OP_READ : 0),
+	       mxic->regs + SS_CTRL(0));
+
+	ret = mxic_spi_data_xfer(mxic, t->tx_buf, t->rx_buf, t->len);
+	if (ret)
+		return ret;
+
+	spi_finalize_current_transfer(master);
+
+	return 0;
+}
+
+static int __maybe_unused mxic_spi_runtime_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct mxic_spi *mxic = spi_master_get_devdata(master);
+
+	mxic_spi_clk_disable(mxic);
+	clk_disable_unprepare(mxic->ps_clk);
+
+	return 0;
+}
+
+static int __maybe_unused mxic_spi_runtime_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct mxic_spi *mxic = spi_master_get_devdata(master);
+	int ret;
+
+	ret = clk_prepare_enable(mxic->ps_clk);
+	if (ret) {
+		dev_err(dev, "Cannot enable ps_clock.\n");
+		return ret;
+	}
+
+	return mxic_spi_clk_enable(mxic);
+}
+
+static const struct dev_pm_ops mxic_spi_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(mxic_spi_runtime_suspend,
+			   mxic_spi_runtime_resume, NULL)
+};
+
+static int mxic_spi_probe(struct platform_device *pdev)
+{
+	struct spi_master *master;
+	struct resource *res;
+	struct mxic_spi *mxic;
+	int ret;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(struct mxic_spi));
+	if (!master)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, master);
+
+	mxic = spi_master_get_devdata(master);
+
+	master->dev.of_node = pdev->dev.of_node;
+
+	mxic->ps_clk = devm_clk_get(&pdev->dev, "ps_clk");
+	if (IS_ERR(mxic->ps_clk))
+		return PTR_ERR(mxic->ps_clk);
+
+	mxic->send_clk = devm_clk_get(&pdev->dev, "send_clk");
+	if (IS_ERR(mxic->send_clk))
+		return PTR_ERR(mxic->send_clk);
+
+	mxic->send_dly_clk = devm_clk_get(&pdev->dev, "send_dly_clk");
+	if (IS_ERR(mxic->send_dly_clk))
+		return PTR_ERR(mxic->send_dly_clk);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
+	mxic->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mxic->regs))
+		return PTR_ERR(mxic->regs);
+
+	pm_runtime_enable(&pdev->dev);
+	master->auto_runtime_pm = true;
+
+	master->num_chipselect = 1;
+	master->mem_ops = &mxic_spi_mem_ops;
+
+	master->set_cs = mxic_spi_set_cs;
+	master->transfer_one = mxic_spi_transfer_one;
+	master->bits_per_word_mask = SPI_BPW_MASK(8);
+	master->mode_bits = SPI_CPOL | SPI_CPHA |
+			SPI_RX_DUAL | SPI_TX_DUAL |
+			SPI_RX_QUAD | SPI_TX_QUAD;
+
+	mxic_spi_hw_init(mxic);
+
+	ret = spi_register_master(master);
+	if (ret) {
+		dev_err(&pdev->dev, "spi_register_master failed\n");
+		goto err_put_master;
+	}
+
+	return 0;
+
+err_put_master:
+	spi_master_put(master);
+	pm_runtime_disable(&pdev->dev);
+
+	return ret;
+}
+
+static int mxic_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+
+	pm_runtime_disable(&pdev->dev);
+	spi_unregister_master(master);
+
+	return 0;
+}
+
+static const struct of_device_id mxic_spi_of_ids[] = {
+	{ .compatible = "mxicy,mx25f0a-spi", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mxic_spi_of_ids);
+
+static struct platform_driver mxic_spi_driver = {
+	.probe = mxic_spi_probe,
+	.remove = mxic_spi_remove,
+	.driver = {
+		.name = "mxic-spi",
+		.of_match_table = mxic_spi_of_ids,
+		.pm = &mxic_spi_dev_pm_ops,
+	},
+};
+module_platform_driver(mxic_spi_driver);
+
+MODULE_AUTHOR("Mason Yang <masonccyang@mxic.com.tw>");
+MODULE_DESCRIPTION("MX25F0A SPI controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c
index 6ac95a2..996c1c8 100644
--- a/drivers/spi/spi-mxs.c
+++ b/drivers/spi/spi-mxs.c
@@ -39,6 +39,7 @@
 #include <linux/stmp_device.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/mxs-spi.h>
+#include <trace/events/spi.h>
 
 #define DRIVER_NAME		"mxs-spi"
 
@@ -374,6 +375,8 @@
 
 	list_for_each_entry(t, &m->transfers, transfer_list) {
 
+		trace_spi_transfer_start(m, t);
+
 		status = mxs_spi_setup_transfer(m->spi, t);
 		if (status)
 			break;
@@ -419,6 +422,8 @@
 						flag);
 		}
 
+		trace_spi_transfer_stop(m, t);
+
 		if (status) {
 			stmp_reset_block(ssp->base);
 			break;
@@ -527,7 +532,6 @@
 	struct spi_master *master;
 	struct mxs_spi *spi;
 	struct mxs_ssp *ssp;
-	struct resource *iores;
 	struct clk *clk;
 	void __iomem *base;
 	int devid, clk_freq;
@@ -540,12 +544,11 @@
 	 */
 	const int clk_freq_default = 160000000;
 
-	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq_err = platform_get_irq(pdev, 0);
 	if (irq_err < 0)
 		return irq_err;
 
-	base = devm_ioremap_resource(&pdev->dev, iores);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c
new file mode 100644
index 0000000..cb52fd8
--- /dev/null
+++ b/drivers/spi/spi-npcm-fiu.c
@@ -0,0 +1,769 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Nuvoton Technology corporation.
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/vmalloc.h>
+#include <linux/regmap.h>
+#include <linux/of_device.h>
+#include <linux/spi/spi-mem.h>
+#include <linux/mfd/syscon.h>
+
+/* NPCM7xx GCR module */
+#define NPCM7XX_INTCR3_OFFSET		0x9C
+#define NPCM7XX_INTCR3_FIU_FIX		BIT(6)
+
+/* Flash Interface Unit (FIU) Registers */
+#define NPCM_FIU_DRD_CFG		0x00
+#define NPCM_FIU_DWR_CFG		0x04
+#define NPCM_FIU_UMA_CFG		0x08
+#define NPCM_FIU_UMA_CTS		0x0C
+#define NPCM_FIU_UMA_CMD		0x10
+#define NPCM_FIU_UMA_ADDR		0x14
+#define NPCM_FIU_PRT_CFG		0x18
+#define NPCM_FIU_UMA_DW0		0x20
+#define NPCM_FIU_UMA_DW1		0x24
+#define NPCM_FIU_UMA_DW2		0x28
+#define NPCM_FIU_UMA_DW3		0x2C
+#define NPCM_FIU_UMA_DR0		0x30
+#define NPCM_FIU_UMA_DR1		0x34
+#define NPCM_FIU_UMA_DR2		0x38
+#define NPCM_FIU_UMA_DR3		0x3C
+#define NPCM_FIU_MAX_REG_LIMIT		0x80
+
+/* FIU Direct Read Configuration Register */
+#define NPCM_FIU_DRD_CFG_LCK		BIT(31)
+#define NPCM_FIU_DRD_CFG_R_BURST	GENMASK(25, 24)
+#define NPCM_FIU_DRD_CFG_ADDSIZ		GENMASK(17, 16)
+#define NPCM_FIU_DRD_CFG_DBW		GENMASK(13, 12)
+#define NPCM_FIU_DRD_CFG_ACCTYPE	GENMASK(9, 8)
+#define NPCM_FIU_DRD_CFG_RDCMD		GENMASK(7, 0)
+#define NPCM_FIU_DRD_ADDSIZ_SHIFT	16
+#define NPCM_FIU_DRD_DBW_SHIFT		12
+#define NPCM_FIU_DRD_ACCTYPE_SHIFT	8
+
+/* FIU Direct Write Configuration Register */
+#define NPCM_FIU_DWR_CFG_LCK		BIT(31)
+#define NPCM_FIU_DWR_CFG_W_BURST	GENMASK(25, 24)
+#define NPCM_FIU_DWR_CFG_ADDSIZ		GENMASK(17, 16)
+#define NPCM_FIU_DWR_CFG_ABPCK		GENMASK(11, 10)
+#define NPCM_FIU_DWR_CFG_DBPCK		GENMASK(9, 8)
+#define NPCM_FIU_DWR_CFG_WRCMD		GENMASK(7, 0)
+#define NPCM_FIU_DWR_ADDSIZ_SHIFT	16
+#define NPCM_FIU_DWR_ABPCK_SHIFT	10
+#define NPCM_FIU_DWR_DBPCK_SHIFT	8
+
+/* FIU UMA Configuration Register */
+#define NPCM_FIU_UMA_CFG_LCK		BIT(31)
+#define NPCM_FIU_UMA_CFG_CMMLCK		BIT(30)
+#define NPCM_FIU_UMA_CFG_RDATSIZ	GENMASK(28, 24)
+#define NPCM_FIU_UMA_CFG_DBSIZ		GENMASK(23, 21)
+#define NPCM_FIU_UMA_CFG_WDATSIZ	GENMASK(20, 16)
+#define NPCM_FIU_UMA_CFG_ADDSIZ		GENMASK(13, 11)
+#define NPCM_FIU_UMA_CFG_CMDSIZ		BIT(10)
+#define NPCM_FIU_UMA_CFG_RDBPCK		GENMASK(9, 8)
+#define NPCM_FIU_UMA_CFG_DBPCK		GENMASK(7, 6)
+#define NPCM_FIU_UMA_CFG_WDBPCK		GENMASK(5, 4)
+#define NPCM_FIU_UMA_CFG_ADBPCK		GENMASK(3, 2)
+#define NPCM_FIU_UMA_CFG_CMBPCK		GENMASK(1, 0)
+#define NPCM_FIU_UMA_CFG_ADBPCK_SHIFT	2
+#define NPCM_FIU_UMA_CFG_WDBPCK_SHIFT	4
+#define NPCM_FIU_UMA_CFG_DBPCK_SHIFT	6
+#define NPCM_FIU_UMA_CFG_RDBPCK_SHIFT	8
+#define NPCM_FIU_UMA_CFG_ADDSIZ_SHIFT	11
+#define NPCM_FIU_UMA_CFG_WDATSIZ_SHIFT	16
+#define NPCM_FIU_UMA_CFG_DBSIZ_SHIFT	21
+#define NPCM_FIU_UMA_CFG_RDATSIZ_SHIFT	24
+
+/* FIU UMA Control and Status Register */
+#define NPCM_FIU_UMA_CTS_RDYIE		BIT(25)
+#define NPCM_FIU_UMA_CTS_RDYST		BIT(24)
+#define NPCM_FIU_UMA_CTS_SW_CS		BIT(16)
+#define NPCM_FIU_UMA_CTS_DEV_NUM	GENMASK(9, 8)
+#define NPCM_FIU_UMA_CTS_EXEC_DONE	BIT(0)
+#define NPCM_FIU_UMA_CTS_DEV_NUM_SHIFT	8
+
+/* FIU UMA Command Register */
+#define NPCM_FIU_UMA_CMD_DUM3		GENMASK(31, 24)
+#define NPCM_FIU_UMA_CMD_DUM2		GENMASK(23, 16)
+#define NPCM_FIU_UMA_CMD_DUM1		GENMASK(15, 8)
+#define NPCM_FIU_UMA_CMD_CMD		GENMASK(7, 0)
+
+/* FIU UMA Address Register */
+#define NPCM_FIU_UMA_ADDR_UMA_ADDR	GENMASK(31, 0)
+#define NPCM_FIU_UMA_ADDR_AB3		GENMASK(31, 24)
+#define NPCM_FIU_UMA_ADDR_AB2		GENMASK(23, 16)
+#define NPCM_FIU_UMA_ADDR_AB1		GENMASK(15, 8)
+#define NPCM_FIU_UMA_ADDR_AB0		GENMASK(7, 0)
+
+/* FIU UMA Write Data Bytes 0-3 Register */
+#define NPCM_FIU_UMA_DW0_WB3		GENMASK(31, 24)
+#define NPCM_FIU_UMA_DW0_WB2		GENMASK(23, 16)
+#define NPCM_FIU_UMA_DW0_WB1		GENMASK(15, 8)
+#define NPCM_FIU_UMA_DW0_WB0		GENMASK(7, 0)
+
+/* FIU UMA Write Data Bytes 4-7 Register */
+#define NPCM_FIU_UMA_DW1_WB7		GENMASK(31, 24)
+#define NPCM_FIU_UMA_DW1_WB6		GENMASK(23, 16)
+#define NPCM_FIU_UMA_DW1_WB5		GENMASK(15, 8)
+#define NPCM_FIU_UMA_DW1_WB4		GENMASK(7, 0)
+
+/* FIU UMA Write Data Bytes 8-11 Register */
+#define NPCM_FIU_UMA_DW2_WB11		GENMASK(31, 24)
+#define NPCM_FIU_UMA_DW2_WB10		GENMASK(23, 16)
+#define NPCM_FIU_UMA_DW2_WB9		GENMASK(15, 8)
+#define NPCM_FIU_UMA_DW2_WB8		GENMASK(7, 0)
+
+/* FIU UMA Write Data Bytes 12-15 Register */
+#define NPCM_FIU_UMA_DW3_WB15		GENMASK(31, 24)
+#define NPCM_FIU_UMA_DW3_WB14		GENMASK(23, 16)
+#define NPCM_FIU_UMA_DW3_WB13		GENMASK(15, 8)
+#define NPCM_FIU_UMA_DW3_WB12		GENMASK(7, 0)
+
+/* FIU UMA Read Data Bytes 0-3 Register */
+#define NPCM_FIU_UMA_DR0_RB3		GENMASK(31, 24)
+#define NPCM_FIU_UMA_DR0_RB2		GENMASK(23, 16)
+#define NPCM_FIU_UMA_DR0_RB1		GENMASK(15, 8)
+#define NPCM_FIU_UMA_DR0_RB0		GENMASK(7, 0)
+
+/* FIU UMA Read Data Bytes 4-7 Register */
+#define NPCM_FIU_UMA_DR1_RB15		GENMASK(31, 24)
+#define NPCM_FIU_UMA_DR1_RB14		GENMASK(23, 16)
+#define NPCM_FIU_UMA_DR1_RB13		GENMASK(15, 8)
+#define NPCM_FIU_UMA_DR1_RB12		GENMASK(7, 0)
+
+/* FIU UMA Read Data Bytes 8-11 Register */
+#define NPCM_FIU_UMA_DR2_RB15		GENMASK(31, 24)
+#define NPCM_FIU_UMA_DR2_RB14		GENMASK(23, 16)
+#define NPCM_FIU_UMA_DR2_RB13		GENMASK(15, 8)
+#define NPCM_FIU_UMA_DR2_RB12		GENMASK(7, 0)
+
+/* FIU UMA Read Data Bytes 12-15 Register */
+#define NPCM_FIU_UMA_DR3_RB15		GENMASK(31, 24)
+#define NPCM_FIU_UMA_DR3_RB14		GENMASK(23, 16)
+#define NPCM_FIU_UMA_DR3_RB13		GENMASK(15, 8)
+#define NPCM_FIU_UMA_DR3_RB12		GENMASK(7, 0)
+
+/* FIU Read Mode */
+enum {
+	DRD_SINGLE_WIRE_MODE	= 0,
+	DRD_DUAL_IO_MODE	= 1,
+	DRD_QUAD_IO_MODE	= 2,
+	DRD_SPI_X_MODE		= 3,
+};
+
+enum {
+	DWR_ABPCK_BIT_PER_CLK	= 0,
+	DWR_ABPCK_2_BIT_PER_CLK	= 1,
+	DWR_ABPCK_4_BIT_PER_CLK	= 2,
+};
+
+enum {
+	DWR_DBPCK_BIT_PER_CLK	= 0,
+	DWR_DBPCK_2_BIT_PER_CLK	= 1,
+	DWR_DBPCK_4_BIT_PER_CLK	= 2,
+};
+
+#define NPCM_FIU_DRD_16_BYTE_BURST	0x3000000
+#define NPCM_FIU_DWR_16_BYTE_BURST	0x3000000
+
+#define MAP_SIZE_128MB			0x8000000
+#define MAP_SIZE_16MB			0x1000000
+#define MAP_SIZE_8MB			0x800000
+
+#define NUM_BITS_IN_BYTE		8
+#define FIU_DRD_MAX_DUMMY_NUMBER	3
+#define NPCM_MAX_CHIP_NUM		4
+#define CHUNK_SIZE			16
+#define UMA_MICRO_SEC_TIMEOUT		150
+
+enum {
+	FIU0 = 0,
+	FIU3,
+	FIUX,
+};
+
+struct npcm_fiu_info {
+	char *name;
+	u32 fiu_id;
+	u32 max_map_size;
+	u32 max_cs;
+};
+
+struct fiu_data {
+	const struct npcm_fiu_info *npcm_fiu_data_info;
+	int fiu_max;
+};
+
+static const struct npcm_fiu_info npxm7xx_fiu_info[] = {
+	{.name = "FIU0", .fiu_id = FIU0,
+		.max_map_size = MAP_SIZE_128MB, .max_cs = 2},
+	{.name = "FIU3", .fiu_id = FIU3,
+		.max_map_size = MAP_SIZE_128MB, .max_cs = 4},
+	{.name = "FIUX", .fiu_id = FIUX,
+		.max_map_size = MAP_SIZE_16MB, .max_cs = 2} };
+
+static const struct fiu_data npxm7xx_fiu_data = {
+	.npcm_fiu_data_info = npxm7xx_fiu_info,
+	.fiu_max = 3,
+};
+
+struct npcm_fiu_spi;
+
+struct npcm_fiu_chip {
+	void __iomem *flash_region_mapped_ptr;
+	struct npcm_fiu_spi *fiu;
+	unsigned long clkrate;
+	u32 chipselect;
+};
+
+struct npcm_fiu_spi {
+	struct npcm_fiu_chip chip[NPCM_MAX_CHIP_NUM];
+	const struct npcm_fiu_info *info;
+	struct spi_mem_op drd_op;
+	struct resource *res_mem;
+	struct regmap *regmap;
+	unsigned long clkrate;
+	struct device *dev;
+	struct clk *clk;
+	bool spix_mode;
+};
+
+static const struct regmap_config npcm_mtd_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = NPCM_FIU_MAX_REG_LIMIT,
+};
+
+static void npcm_fiu_set_drd(struct npcm_fiu_spi *fiu,
+			     const struct spi_mem_op *op)
+{
+	regmap_update_bits(fiu->regmap, NPCM_FIU_DRD_CFG,
+			   NPCM_FIU_DRD_CFG_ACCTYPE,
+			   ilog2(op->addr.buswidth) <<
+			   NPCM_FIU_DRD_ACCTYPE_SHIFT);
+	fiu->drd_op.addr.buswidth = op->addr.buswidth;
+	regmap_update_bits(fiu->regmap, NPCM_FIU_DRD_CFG,
+			   NPCM_FIU_DRD_CFG_DBW,
+			   ((op->dummy.nbytes * ilog2(op->addr.buswidth))
+			    / NUM_BITS_IN_BYTE) << NPCM_FIU_DRD_DBW_SHIFT);
+	fiu->drd_op.dummy.nbytes = op->dummy.nbytes;
+	regmap_update_bits(fiu->regmap, NPCM_FIU_DRD_CFG,
+			   NPCM_FIU_DRD_CFG_RDCMD, op->cmd.opcode);
+	fiu->drd_op.cmd.opcode = op->cmd.opcode;
+	regmap_update_bits(fiu->regmap, NPCM_FIU_DRD_CFG,
+			   NPCM_FIU_DRD_CFG_ADDSIZ,
+			   (op->addr.nbytes - 3) << NPCM_FIU_DRD_ADDSIZ_SHIFT);
+	fiu->drd_op.addr.nbytes = op->addr.nbytes;
+}
+
+static ssize_t npcm_fiu_direct_read(struct spi_mem_dirmap_desc *desc,
+				    u64 offs, size_t len, void *buf)
+{
+	struct npcm_fiu_spi *fiu =
+		spi_controller_get_devdata(desc->mem->spi->master);
+	struct npcm_fiu_chip *chip = &fiu->chip[desc->mem->spi->chip_select];
+	void __iomem *src = (void __iomem *)(chip->flash_region_mapped_ptr +
+					     offs);
+	u8 *buf_rx = buf;
+	u32 i;
+
+	if (fiu->spix_mode) {
+		for (i = 0 ; i < len ; i++)
+			*(buf_rx + i) = ioread8(src + i);
+	} else {
+		if (desc->info.op_tmpl.addr.buswidth != fiu->drd_op.addr.buswidth ||
+		    desc->info.op_tmpl.dummy.nbytes != fiu->drd_op.dummy.nbytes ||
+		    desc->info.op_tmpl.cmd.opcode != fiu->drd_op.cmd.opcode ||
+		    desc->info.op_tmpl.addr.nbytes != fiu->drd_op.addr.nbytes)
+			npcm_fiu_set_drd(fiu, &desc->info.op_tmpl);
+
+		memcpy_fromio(buf_rx, src, len);
+	}
+
+	return len;
+}
+
+static ssize_t npcm_fiu_direct_write(struct spi_mem_dirmap_desc *desc,
+				     u64 offs, size_t len, const void *buf)
+{
+	struct npcm_fiu_spi *fiu =
+		spi_controller_get_devdata(desc->mem->spi->master);
+	struct npcm_fiu_chip *chip = &fiu->chip[desc->mem->spi->chip_select];
+	void __iomem *dst = (void __iomem *)(chip->flash_region_mapped_ptr +
+					     offs);
+	const u8 *buf_tx = buf;
+	u32 i;
+
+	if (fiu->spix_mode)
+		for (i = 0 ; i < len ; i++)
+			iowrite8(*(buf_tx + i), dst + i);
+	else
+		memcpy_toio(dst, buf_tx, len);
+
+	return len;
+}
+
+static int npcm_fiu_uma_read(struct spi_mem *mem,
+			     const struct spi_mem_op *op, u32 addr,
+			      bool is_address_size, u8 *data, u32 data_size)
+{
+	struct npcm_fiu_spi *fiu =
+		spi_controller_get_devdata(mem->spi->master);
+	u32 uma_cfg = BIT(10);
+	u32 data_reg[4];
+	int ret;
+	u32 val;
+	u32 i;
+
+	regmap_update_bits(fiu->regmap, NPCM_FIU_UMA_CTS,
+			   NPCM_FIU_UMA_CTS_DEV_NUM,
+			   (mem->spi->chip_select <<
+			    NPCM_FIU_UMA_CTS_DEV_NUM_SHIFT));
+	regmap_update_bits(fiu->regmap, NPCM_FIU_UMA_CMD,
+			   NPCM_FIU_UMA_CMD_CMD, op->cmd.opcode);
+
+	if (is_address_size) {
+		uma_cfg |= ilog2(op->cmd.buswidth);
+		uma_cfg |= ilog2(op->addr.buswidth)
+			<< NPCM_FIU_UMA_CFG_ADBPCK_SHIFT;
+		uma_cfg |= ilog2(op->dummy.buswidth)
+			<< NPCM_FIU_UMA_CFG_DBPCK_SHIFT;
+		uma_cfg |= ilog2(op->data.buswidth)
+			<< NPCM_FIU_UMA_CFG_RDBPCK_SHIFT;
+		uma_cfg |= op->dummy.nbytes << NPCM_FIU_UMA_CFG_DBSIZ_SHIFT;
+		uma_cfg |= op->addr.nbytes << NPCM_FIU_UMA_CFG_ADDSIZ_SHIFT;
+		regmap_write(fiu->regmap, NPCM_FIU_UMA_ADDR, addr);
+	} else {
+		regmap_write(fiu->regmap, NPCM_FIU_UMA_ADDR, 0x0);
+	}
+
+	uma_cfg |= data_size << NPCM_FIU_UMA_CFG_RDATSIZ_SHIFT;
+	regmap_write(fiu->regmap, NPCM_FIU_UMA_CFG, uma_cfg);
+	regmap_write_bits(fiu->regmap, NPCM_FIU_UMA_CTS,
+			  NPCM_FIU_UMA_CTS_EXEC_DONE,
+			  NPCM_FIU_UMA_CTS_EXEC_DONE);
+	ret = regmap_read_poll_timeout(fiu->regmap, NPCM_FIU_UMA_CTS, val,
+				       (!(val & NPCM_FIU_UMA_CTS_EXEC_DONE)), 0,
+				       UMA_MICRO_SEC_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (data_size) {
+		for (i = 0; i < DIV_ROUND_UP(data_size, 4); i++)
+			regmap_read(fiu->regmap, NPCM_FIU_UMA_DR0 + (i * 4),
+				    &data_reg[i]);
+		memcpy(data, data_reg, data_size);
+	}
+
+	return 0;
+}
+
+static int npcm_fiu_uma_write(struct spi_mem *mem,
+			      const struct spi_mem_op *op, u8 cmd,
+			      bool is_address_size, u8 *data, u32 data_size)
+{
+	struct npcm_fiu_spi *fiu =
+		spi_controller_get_devdata(mem->spi->master);
+	u32 uma_cfg = BIT(10);
+	u32 data_reg[4] = {0};
+	u32 val;
+	u32 i;
+
+	regmap_update_bits(fiu->regmap, NPCM_FIU_UMA_CTS,
+			   NPCM_FIU_UMA_CTS_DEV_NUM,
+			   (mem->spi->chip_select <<
+			    NPCM_FIU_UMA_CTS_DEV_NUM_SHIFT));
+
+	regmap_update_bits(fiu->regmap, NPCM_FIU_UMA_CMD,
+			   NPCM_FIU_UMA_CMD_CMD, cmd);
+
+	if (data_size) {
+		memcpy(data_reg, data, data_size);
+		for (i = 0; i < DIV_ROUND_UP(data_size, 4); i++)
+			regmap_write(fiu->regmap, NPCM_FIU_UMA_DW0 + (i * 4),
+				     data_reg[i]);
+	}
+
+	if (is_address_size) {
+		uma_cfg |= ilog2(op->cmd.buswidth);
+		uma_cfg |= ilog2(op->addr.buswidth) <<
+			NPCM_FIU_UMA_CFG_ADBPCK_SHIFT;
+		uma_cfg |= ilog2(op->data.buswidth) <<
+			NPCM_FIU_UMA_CFG_WDBPCK_SHIFT;
+		uma_cfg |= op->addr.nbytes << NPCM_FIU_UMA_CFG_ADDSIZ_SHIFT;
+		regmap_write(fiu->regmap, NPCM_FIU_UMA_ADDR, op->addr.val);
+	} else {
+		regmap_write(fiu->regmap, NPCM_FIU_UMA_ADDR, 0x0);
+	}
+
+	uma_cfg |= (data_size << NPCM_FIU_UMA_CFG_WDATSIZ_SHIFT);
+	regmap_write(fiu->regmap, NPCM_FIU_UMA_CFG, uma_cfg);
+
+	regmap_write_bits(fiu->regmap, NPCM_FIU_UMA_CTS,
+			  NPCM_FIU_UMA_CTS_EXEC_DONE,
+			  NPCM_FIU_UMA_CTS_EXEC_DONE);
+
+	return regmap_read_poll_timeout(fiu->regmap, NPCM_FIU_UMA_CTS, val,
+				       (!(val & NPCM_FIU_UMA_CTS_EXEC_DONE)), 0,
+					UMA_MICRO_SEC_TIMEOUT);
+}
+
+static int npcm_fiu_manualwrite(struct spi_mem *mem,
+				const struct spi_mem_op *op)
+{
+	struct npcm_fiu_spi *fiu =
+		spi_controller_get_devdata(mem->spi->master);
+	u8 *data = (u8 *)op->data.buf.out;
+	u32 num_data_chunks;
+	u32 remain_data;
+	u32 idx = 0;
+	int ret;
+
+	num_data_chunks  = op->data.nbytes / CHUNK_SIZE;
+	remain_data  = op->data.nbytes % CHUNK_SIZE;
+
+	regmap_update_bits(fiu->regmap, NPCM_FIU_UMA_CTS,
+			   NPCM_FIU_UMA_CTS_DEV_NUM,
+			   (mem->spi->chip_select <<
+			    NPCM_FIU_UMA_CTS_DEV_NUM_SHIFT));
+	regmap_update_bits(fiu->regmap, NPCM_FIU_UMA_CTS,
+			   NPCM_FIU_UMA_CTS_SW_CS, 0);
+
+	ret = npcm_fiu_uma_write(mem, op, op->cmd.opcode, true, NULL, 0);
+	if (ret)
+		return ret;
+
+	/* Starting the data writing loop in multiples of 8 */
+	for (idx = 0; idx < num_data_chunks; ++idx) {
+		ret = npcm_fiu_uma_write(mem, op, data[0], false,
+					 &data[1], CHUNK_SIZE - 1);
+		if (ret)
+			return ret;
+
+		data += CHUNK_SIZE;
+	}
+
+	/* Handling chunk remains */
+	if (remain_data > 0) {
+		ret = npcm_fiu_uma_write(mem, op, data[0], false,
+					 &data[1], remain_data - 1);
+		if (ret)
+			return ret;
+	}
+
+	regmap_update_bits(fiu->regmap, NPCM_FIU_UMA_CTS,
+			   NPCM_FIU_UMA_CTS_SW_CS, NPCM_FIU_UMA_CTS_SW_CS);
+
+	return 0;
+}
+
+static int npcm_fiu_read(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	u8 *data = op->data.buf.in;
+	int i, readlen, currlen;
+	u8 *buf_ptr;
+	u32 addr;
+	int ret;
+
+	i = 0;
+	currlen = op->data.nbytes;
+
+	do {
+		addr = ((u32)op->addr.val + i);
+		if (currlen < 16)
+			readlen = currlen;
+		else
+			readlen = 16;
+
+		buf_ptr = data + i;
+		ret = npcm_fiu_uma_read(mem, op, addr, true, buf_ptr,
+					readlen);
+		if (ret)
+			return ret;
+
+		i += readlen;
+		currlen -= 16;
+	} while (currlen > 0);
+
+	return 0;
+}
+
+static void npcm_fiux_set_direct_wr(struct npcm_fiu_spi *fiu)
+{
+	regmap_write(fiu->regmap, NPCM_FIU_DWR_CFG,
+		     NPCM_FIU_DWR_16_BYTE_BURST);
+	regmap_update_bits(fiu->regmap, NPCM_FIU_DWR_CFG,
+			   NPCM_FIU_DWR_CFG_ABPCK,
+			   DWR_ABPCK_4_BIT_PER_CLK << NPCM_FIU_DWR_ABPCK_SHIFT);
+	regmap_update_bits(fiu->regmap, NPCM_FIU_DWR_CFG,
+			   NPCM_FIU_DWR_CFG_DBPCK,
+			   DWR_DBPCK_4_BIT_PER_CLK << NPCM_FIU_DWR_DBPCK_SHIFT);
+}
+
+static void npcm_fiux_set_direct_rd(struct npcm_fiu_spi *fiu)
+{
+	u32 rx_dummy = 0;
+
+	regmap_write(fiu->regmap, NPCM_FIU_DRD_CFG,
+		     NPCM_FIU_DRD_16_BYTE_BURST);
+	regmap_update_bits(fiu->regmap, NPCM_FIU_DRD_CFG,
+			   NPCM_FIU_DRD_CFG_ACCTYPE,
+			   DRD_SPI_X_MODE << NPCM_FIU_DRD_ACCTYPE_SHIFT);
+	regmap_update_bits(fiu->regmap, NPCM_FIU_DRD_CFG,
+			   NPCM_FIU_DRD_CFG_DBW,
+			   rx_dummy << NPCM_FIU_DRD_DBW_SHIFT);
+}
+
+static int npcm_fiu_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct npcm_fiu_spi *fiu =
+		spi_controller_get_devdata(mem->spi->master);
+	struct npcm_fiu_chip *chip = &fiu->chip[mem->spi->chip_select];
+	int ret = 0;
+	u8 *buf;
+
+	dev_dbg(fiu->dev, "cmd:%#x mode:%d.%d.%d.%d addr:%#llx len:%#x\n",
+		op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
+		op->dummy.buswidth, op->data.buswidth, op->addr.val,
+		op->data.nbytes);
+
+	if (fiu->spix_mode || op->addr.nbytes > 4)
+		return -ENOTSUPP;
+
+	if (fiu->clkrate != chip->clkrate) {
+		ret = clk_set_rate(fiu->clk, chip->clkrate);
+		if (ret < 0)
+			dev_warn(fiu->dev, "Failed setting %lu frequency, stay at %lu frequency\n",
+				 chip->clkrate, fiu->clkrate);
+		else
+			fiu->clkrate = chip->clkrate;
+	}
+
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		if (!op->addr.nbytes) {
+			buf = op->data.buf.in;
+			ret = npcm_fiu_uma_read(mem, op, op->addr.val, false,
+						buf, op->data.nbytes);
+		} else {
+			ret = npcm_fiu_read(mem, op);
+		}
+	} else  {
+		if (!op->addr.nbytes && !op->data.nbytes)
+			ret = npcm_fiu_uma_write(mem, op, op->cmd.opcode, false,
+						 NULL, 0);
+		if (op->addr.nbytes && !op->data.nbytes) {
+			int i;
+			u8 buf_addr[4];
+			u32 addr = op->addr.val;
+
+			for (i = op->addr.nbytes - 1; i >= 0; i--) {
+				buf_addr[i] = addr & 0xff;
+				addr >>= 8;
+			}
+			ret = npcm_fiu_uma_write(mem, op, op->cmd.opcode, false,
+						 buf_addr, op->addr.nbytes);
+		}
+		if (!op->addr.nbytes && op->data.nbytes)
+			ret = npcm_fiu_uma_write(mem, op, op->cmd.opcode, false,
+						 (u8 *)op->data.buf.out,
+						 op->data.nbytes);
+		if (op->addr.nbytes && op->data.nbytes)
+			ret = npcm_fiu_manualwrite(mem, op);
+	}
+
+	return ret;
+}
+
+static int npcm_fiu_dirmap_create(struct spi_mem_dirmap_desc *desc)
+{
+	struct npcm_fiu_spi *fiu =
+		spi_controller_get_devdata(desc->mem->spi->master);
+	struct npcm_fiu_chip *chip = &fiu->chip[desc->mem->spi->chip_select];
+	struct regmap *gcr_regmap;
+
+	if (!fiu->res_mem) {
+		dev_warn(fiu->dev, "Reserved memory not defined, direct read disabled\n");
+		desc->nodirmap = true;
+		return 0;
+	}
+
+	if (!fiu->spix_mode &&
+	    desc->info.op_tmpl.data.dir == SPI_MEM_DATA_OUT) {
+		desc->nodirmap = true;
+		return 0;
+	}
+
+	if (!chip->flash_region_mapped_ptr) {
+		chip->flash_region_mapped_ptr =
+			devm_ioremap_nocache(fiu->dev, (fiu->res_mem->start +
+							(fiu->info->max_map_size *
+						    desc->mem->spi->chip_select)),
+					     (u32)desc->info.length);
+		if (!chip->flash_region_mapped_ptr) {
+			dev_warn(fiu->dev, "Error mapping memory region, direct read disabled\n");
+			desc->nodirmap = true;
+			return 0;
+		}
+	}
+
+	if (of_device_is_compatible(fiu->dev->of_node, "nuvoton,npcm750-fiu")) {
+		gcr_regmap =
+			syscon_regmap_lookup_by_compatible("nuvoton,npcm750-gcr");
+		if (IS_ERR(gcr_regmap)) {
+			dev_warn(fiu->dev, "Didn't find nuvoton,npcm750-gcr, direct read disabled\n");
+			desc->nodirmap = true;
+			return 0;
+		}
+		regmap_update_bits(gcr_regmap, NPCM7XX_INTCR3_OFFSET,
+				   NPCM7XX_INTCR3_FIU_FIX,
+				   NPCM7XX_INTCR3_FIU_FIX);
+	}
+
+	if (desc->info.op_tmpl.data.dir == SPI_MEM_DATA_IN) {
+		if (!fiu->spix_mode)
+			npcm_fiu_set_drd(fiu, &desc->info.op_tmpl);
+		else
+			npcm_fiux_set_direct_rd(fiu);
+
+	} else {
+		npcm_fiux_set_direct_wr(fiu);
+	}
+
+	return 0;
+}
+
+static int npcm_fiu_setup(struct spi_device *spi)
+{
+	struct spi_controller *ctrl = spi->master;
+	struct npcm_fiu_spi *fiu = spi_controller_get_devdata(ctrl);
+	struct npcm_fiu_chip *chip;
+
+	chip = &fiu->chip[spi->chip_select];
+	chip->fiu = fiu;
+	chip->chipselect = spi->chip_select;
+	chip->clkrate = spi->max_speed_hz;
+
+	fiu->clkrate = clk_get_rate(fiu->clk);
+
+	return 0;
+}
+
+static const struct spi_controller_mem_ops npcm_fiu_mem_ops = {
+	.exec_op = npcm_fiu_exec_op,
+	.dirmap_create = npcm_fiu_dirmap_create,
+	.dirmap_read = npcm_fiu_direct_read,
+	.dirmap_write = npcm_fiu_direct_write,
+};
+
+static const struct of_device_id npcm_fiu_dt_ids[] = {
+	{ .compatible = "nuvoton,npcm750-fiu", .data = &npxm7xx_fiu_data  },
+	{ /* sentinel */ }
+};
+
+static int npcm_fiu_probe(struct platform_device *pdev)
+{
+	const struct fiu_data *fiu_data_match;
+	const struct of_device_id *match;
+	struct device *dev = &pdev->dev;
+	struct spi_controller *ctrl;
+	struct npcm_fiu_spi *fiu;
+	void __iomem *regbase;
+	struct resource *res;
+	int ret;
+	int id;
+
+	ctrl = spi_alloc_master(dev, sizeof(*fiu));
+	if (!ctrl)
+		return -ENOMEM;
+
+	fiu = spi_controller_get_devdata(ctrl);
+
+	match = of_match_device(npcm_fiu_dt_ids, dev);
+	if (!match || !match->data) {
+		dev_err(dev, "No compatible OF match\n");
+		return -ENODEV;
+	}
+
+	fiu_data_match = match->data;
+	id = of_alias_get_id(dev->of_node, "fiu");
+	if (id < 0 || id >= fiu_data_match->fiu_max) {
+		dev_err(dev, "Invalid platform device id: %d\n", id);
+		return -EINVAL;
+	}
+
+	fiu->info = &fiu_data_match->npcm_fiu_data_info[id];
+
+	platform_set_drvdata(pdev, fiu);
+	fiu->dev = dev;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "control");
+	regbase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(regbase))
+		return PTR_ERR(regbase);
+
+	fiu->regmap = devm_regmap_init_mmio(dev, regbase,
+					    &npcm_mtd_regmap_config);
+	if (IS_ERR(fiu->regmap)) {
+		dev_err(dev, "Failed to create regmap\n");
+		return PTR_ERR(fiu->regmap);
+	}
+
+	fiu->res_mem = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						    "memory");
+	fiu->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(fiu->clk))
+		return PTR_ERR(fiu->clk);
+
+	fiu->spix_mode = of_property_read_bool(dev->of_node,
+					       "nuvoton,spix-mode");
+
+	platform_set_drvdata(pdev, fiu);
+	clk_prepare_enable(fiu->clk);
+
+	ctrl->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD
+		| SPI_TX_DUAL | SPI_TX_QUAD;
+	ctrl->setup = npcm_fiu_setup;
+	ctrl->bus_num = -1;
+	ctrl->mem_ops = &npcm_fiu_mem_ops;
+	ctrl->num_chipselect = fiu->info->max_cs;
+	ctrl->dev.of_node = dev->of_node;
+
+	ret = devm_spi_register_master(dev, ctrl);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int npcm_fiu_remove(struct platform_device *pdev)
+{
+	struct npcm_fiu_spi *fiu = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(fiu->clk);
+	return 0;
+}
+
+MODULE_DEVICE_TABLE(of, npcm_fiu_dt_ids);
+
+static struct platform_driver npcm_fiu_driver = {
+	.driver = {
+		.name	= "NPCM-FIU",
+		.bus	= &platform_bus_type,
+		.of_match_table = npcm_fiu_dt_ids,
+	},
+	.probe      = npcm_fiu_probe,
+	.remove	    = npcm_fiu_remove,
+};
+module_platform_driver(npcm_fiu_driver);
+
+MODULE_DESCRIPTION("Nuvoton FLASH Interface Unit SPI Controller Driver");
+MODULE_AUTHOR("Tomer Maimon <tomer.maimon@nuvoton.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-npcm-pspi.c b/drivers/spi/spi-npcm-pspi.c
new file mode 100644
index 0000000..b191d57
--- /dev/null
+++ b/drivers/spi/spi-npcm-pspi.c
@@ -0,0 +1,493 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Nuvoton Technology corporation.
+
+#include <linux/kernel.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+
+#include <asm/unaligned.h>
+
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+
+struct npcm_pspi {
+	struct completion xfer_done;
+	struct regmap *rst_regmap;
+	struct spi_master *master;
+	unsigned int tx_bytes;
+	unsigned int rx_bytes;
+	void __iomem *base;
+	bool is_save_param;
+	u8 bits_per_word;
+	const u8 *tx_buf;
+	struct clk *clk;
+	u32 speed_hz;
+	u8 *rx_buf;
+	u16 mode;
+	u32 id;
+};
+
+#define DRIVER_NAME "npcm-pspi"
+
+#define NPCM_PSPI_DATA		0x00
+#define NPCM_PSPI_CTL1		0x02
+#define NPCM_PSPI_STAT		0x04
+
+/* definitions for control and status register */
+#define NPCM_PSPI_CTL1_SPIEN	BIT(0)
+#define NPCM_PSPI_CTL1_MOD	BIT(2)
+#define NPCM_PSPI_CTL1_EIR	BIT(5)
+#define NPCM_PSPI_CTL1_EIW	BIT(6)
+#define NPCM_PSPI_CTL1_SCM	BIT(7)
+#define NPCM_PSPI_CTL1_SCIDL	BIT(8)
+#define NPCM_PSPI_CTL1_SCDV6_0	GENMASK(15, 9)
+
+#define NPCM_PSPI_STAT_BSY	BIT(0)
+#define NPCM_PSPI_STAT_RBF	BIT(1)
+
+/* general definitions */
+#define NPCM_PSPI_TIMEOUT_MS		2000
+#define NPCM_PSPI_MAX_CLK_DIVIDER	256
+#define NPCM_PSPI_MIN_CLK_DIVIDER	4
+#define NPCM_PSPI_DEFAULT_CLK		25000000
+
+/* reset register */
+#define NPCM7XX_IPSRST2_OFFSET	0x24
+
+#define NPCM7XX_PSPI1_RESET	BIT(22)
+#define NPCM7XX_PSPI2_RESET	BIT(23)
+
+static inline unsigned int bytes_per_word(unsigned int bits)
+{
+	return bits <= 8 ? 1 : 2;
+}
+
+static inline void npcm_pspi_irq_enable(struct npcm_pspi *priv, u16 mask)
+{
+	u16 val;
+
+	val = ioread16(priv->base + NPCM_PSPI_CTL1);
+	val |= mask;
+	iowrite16(val, priv->base + NPCM_PSPI_CTL1);
+}
+
+static inline void npcm_pspi_irq_disable(struct npcm_pspi *priv, u16 mask)
+{
+	u16 val;
+
+	val = ioread16(priv->base + NPCM_PSPI_CTL1);
+	val &= ~mask;
+	iowrite16(val, priv->base + NPCM_PSPI_CTL1);
+}
+
+static inline void npcm_pspi_enable(struct npcm_pspi *priv)
+{
+	u16 val;
+
+	val = ioread16(priv->base + NPCM_PSPI_CTL1);
+	val |= NPCM_PSPI_CTL1_SPIEN;
+	iowrite16(val, priv->base + NPCM_PSPI_CTL1);
+}
+
+static inline void npcm_pspi_disable(struct npcm_pspi *priv)
+{
+	u16 val;
+
+	val = ioread16(priv->base + NPCM_PSPI_CTL1);
+	val &= ~NPCM_PSPI_CTL1_SPIEN;
+	iowrite16(val, priv->base + NPCM_PSPI_CTL1);
+}
+
+static void npcm_pspi_set_mode(struct spi_device *spi)
+{
+	struct npcm_pspi *priv = spi_master_get_devdata(spi->master);
+	u16 regtemp;
+	u16 mode_val;
+
+	switch (spi->mode & (SPI_CPOL | SPI_CPHA)) {
+	case SPI_MODE_0:
+		mode_val = 0;
+		break;
+	case SPI_MODE_1:
+		mode_val = NPCM_PSPI_CTL1_SCIDL;
+		break;
+	case SPI_MODE_2:
+		mode_val = NPCM_PSPI_CTL1_SCM;
+		break;
+	case SPI_MODE_3:
+		mode_val = NPCM_PSPI_CTL1_SCIDL | NPCM_PSPI_CTL1_SCM;
+		break;
+	}
+
+	regtemp = ioread16(priv->base + NPCM_PSPI_CTL1);
+	regtemp &= ~(NPCM_PSPI_CTL1_SCM | NPCM_PSPI_CTL1_SCIDL);
+	iowrite16(regtemp | mode_val, priv->base + NPCM_PSPI_CTL1);
+}
+
+static void npcm_pspi_set_transfer_size(struct npcm_pspi *priv, int size)
+{
+	u16 regtemp;
+
+	regtemp = ioread16(NPCM_PSPI_CTL1 + priv->base);
+
+	switch (size) {
+	case 8:
+		regtemp &= ~NPCM_PSPI_CTL1_MOD;
+		break;
+	case 16:
+		regtemp |= NPCM_PSPI_CTL1_MOD;
+		break;
+	}
+
+	iowrite16(regtemp, NPCM_PSPI_CTL1 + priv->base);
+}
+
+static void npcm_pspi_set_baudrate(struct npcm_pspi *priv, unsigned int speed)
+{
+	u32 ckdiv;
+	u16 regtemp;
+
+	/* the supported rates are numbers from 4 to 256. */
+	ckdiv = DIV_ROUND_CLOSEST(clk_get_rate(priv->clk), (2 * speed)) - 1;
+
+	regtemp = ioread16(NPCM_PSPI_CTL1 + priv->base);
+	regtemp &= ~NPCM_PSPI_CTL1_SCDV6_0;
+	iowrite16(regtemp | (ckdiv << 9), NPCM_PSPI_CTL1 + priv->base);
+}
+
+static void npcm_pspi_setup_transfer(struct spi_device *spi,
+				     struct spi_transfer *t)
+{
+	struct npcm_pspi *priv = spi_master_get_devdata(spi->master);
+
+	priv->tx_buf = t->tx_buf;
+	priv->rx_buf = t->rx_buf;
+	priv->tx_bytes = t->len;
+	priv->rx_bytes = t->len;
+
+	if (!priv->is_save_param || priv->mode != spi->mode) {
+		npcm_pspi_set_mode(spi);
+		priv->mode = spi->mode;
+	}
+
+	if (!priv->is_save_param || priv->bits_per_word != t->bits_per_word) {
+		npcm_pspi_set_transfer_size(priv, t->bits_per_word);
+		priv->bits_per_word = t->bits_per_word;
+	}
+
+	if (!priv->is_save_param || priv->speed_hz != t->speed_hz) {
+		npcm_pspi_set_baudrate(priv, t->speed_hz);
+		priv->speed_hz = t->speed_hz;
+	}
+
+	if (!priv->is_save_param)
+		priv->is_save_param = true;
+}
+
+static void npcm_pspi_send(struct npcm_pspi *priv)
+{
+	int wsize;
+
+	wsize = min(bytes_per_word(priv->bits_per_word), priv->tx_bytes);
+	priv->tx_bytes -= wsize;
+
+	if (!priv->tx_buf)
+		return;
+
+	switch (wsize) {
+	case 1:
+		iowrite8(*priv->tx_buf, NPCM_PSPI_DATA + priv->base);
+		break;
+	case 2:
+		iowrite16(*priv->tx_buf, NPCM_PSPI_DATA + priv->base);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	priv->tx_buf += wsize;
+}
+
+static void npcm_pspi_recv(struct npcm_pspi *priv)
+{
+	int rsize;
+	u16 val;
+
+	rsize = min(bytes_per_word(priv->bits_per_word), priv->rx_bytes);
+	priv->rx_bytes -= rsize;
+
+	if (!priv->rx_buf)
+		return;
+
+	switch (rsize) {
+	case 1:
+		val = ioread8(priv->base + NPCM_PSPI_DATA);
+		break;
+	case 2:
+		val = ioread16(priv->base + NPCM_PSPI_DATA);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	*priv->rx_buf = val;
+	priv->rx_buf += rsize;
+}
+
+static int npcm_pspi_transfer_one(struct spi_master *master,
+				  struct spi_device *spi,
+				  struct spi_transfer *t)
+{
+	struct npcm_pspi *priv = spi_master_get_devdata(master);
+	int status;
+
+	npcm_pspi_setup_transfer(spi, t);
+	reinit_completion(&priv->xfer_done);
+	npcm_pspi_enable(priv);
+	status = wait_for_completion_timeout(&priv->xfer_done,
+					     msecs_to_jiffies
+					     (NPCM_PSPI_TIMEOUT_MS));
+	if (status == 0) {
+		npcm_pspi_disable(priv);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int npcm_pspi_prepare_transfer_hardware(struct spi_master *master)
+{
+	struct npcm_pspi *priv = spi_master_get_devdata(master);
+
+	npcm_pspi_irq_enable(priv, NPCM_PSPI_CTL1_EIR | NPCM_PSPI_CTL1_EIW);
+
+	return 0;
+}
+
+static int npcm_pspi_unprepare_transfer_hardware(struct spi_master *master)
+{
+	struct npcm_pspi *priv = spi_master_get_devdata(master);
+
+	npcm_pspi_irq_disable(priv, NPCM_PSPI_CTL1_EIR | NPCM_PSPI_CTL1_EIW);
+
+	return 0;
+}
+
+static void npcm_pspi_reset_hw(struct npcm_pspi *priv)
+{
+	regmap_write(priv->rst_regmap, NPCM7XX_IPSRST2_OFFSET,
+		     NPCM7XX_PSPI1_RESET << priv->id);
+	regmap_write(priv->rst_regmap, NPCM7XX_IPSRST2_OFFSET, 0x0);
+}
+
+static irqreturn_t npcm_pspi_handler(int irq, void *dev_id)
+{
+	struct npcm_pspi *priv = dev_id;
+	u16 val;
+	u8 stat;
+
+	stat = ioread8(priv->base + NPCM_PSPI_STAT);
+
+	if (!priv->tx_buf && !priv->rx_buf)
+		return IRQ_NONE;
+
+	if (priv->tx_buf) {
+		if (stat & NPCM_PSPI_STAT_RBF) {
+			val = ioread8(NPCM_PSPI_DATA + priv->base);
+			if (priv->tx_bytes == 0) {
+				npcm_pspi_disable(priv);
+				complete(&priv->xfer_done);
+				return IRQ_HANDLED;
+			}
+		}
+
+		if ((stat & NPCM_PSPI_STAT_BSY) == 0)
+			if (priv->tx_bytes)
+				npcm_pspi_send(priv);
+	}
+
+	if (priv->rx_buf) {
+		if (stat & NPCM_PSPI_STAT_RBF) {
+			if (!priv->rx_bytes)
+				return IRQ_NONE;
+
+			npcm_pspi_recv(priv);
+
+			if (!priv->rx_bytes) {
+				npcm_pspi_disable(priv);
+				complete(&priv->xfer_done);
+				return IRQ_HANDLED;
+			}
+		}
+
+		if (((stat & NPCM_PSPI_STAT_BSY) == 0) && !priv->tx_buf)
+			iowrite8(0x0, NPCM_PSPI_DATA + priv->base);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int npcm_pspi_probe(struct platform_device *pdev)
+{
+	struct npcm_pspi *priv;
+	struct spi_master *master;
+	unsigned long clk_hz;
+	struct device_node *np = pdev->dev.of_node;
+	int num_cs, i;
+	int csgpio;
+	int irq;
+	int ret;
+
+	num_cs = of_gpio_named_count(np, "cs-gpios");
+	if (num_cs < 0)
+		return num_cs;
+
+	pdev->id = of_alias_get_id(np, "spi");
+	if (pdev->id < 0)
+		pdev->id = 0;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(*priv));
+	if (!master)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, master);
+
+	priv = spi_master_get_devdata(master);
+	priv->master = master;
+	priv->is_save_param = false;
+	priv->id = pdev->id;
+
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(priv->base)) {
+		ret = PTR_ERR(priv->base);
+		goto out_master_put;
+	}
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		ret = PTR_ERR(priv->clk);
+		goto out_master_put;
+	}
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		goto out_master_put;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		goto out_disable_clk;
+	}
+
+	priv->rst_regmap =
+		syscon_regmap_lookup_by_compatible("nuvoton,npcm750-rst");
+	if (IS_ERR(priv->rst_regmap)) {
+		dev_err(&pdev->dev, "failed to find nuvoton,npcm750-rst\n");
+		return PTR_ERR(priv->rst_regmap);
+	}
+
+	/* reset SPI-HW block */
+	npcm_pspi_reset_hw(priv);
+
+	ret = devm_request_irq(&pdev->dev, irq, npcm_pspi_handler, 0,
+			       "npcm-pspi", priv);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to request IRQ\n");
+		goto out_disable_clk;
+	}
+
+	init_completion(&priv->xfer_done);
+
+	clk_hz = clk_get_rate(priv->clk);
+
+	master->max_speed_hz = DIV_ROUND_UP(clk_hz, NPCM_PSPI_MIN_CLK_DIVIDER);
+	master->min_speed_hz = DIV_ROUND_UP(clk_hz, NPCM_PSPI_MAX_CLK_DIVIDER);
+	master->mode_bits = SPI_CPHA | SPI_CPOL;
+	master->dev.of_node = pdev->dev.of_node;
+	master->bus_num = pdev->id;
+	master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
+	master->transfer_one = npcm_pspi_transfer_one;
+	master->prepare_transfer_hardware =
+		npcm_pspi_prepare_transfer_hardware;
+	master->unprepare_transfer_hardware =
+		npcm_pspi_unprepare_transfer_hardware;
+	master->num_chipselect = num_cs;
+
+	for (i = 0; i < num_cs; i++) {
+		csgpio = of_get_named_gpio(np, "cs-gpios", i);
+		if (csgpio < 0) {
+			dev_err(&pdev->dev, "failed to get csgpio#%u\n", i);
+			goto out_disable_clk;
+		}
+		dev_dbg(&pdev->dev, "csgpio#%u = %d\n", i, csgpio);
+		ret = devm_gpio_request_one(&pdev->dev, csgpio,
+					    GPIOF_OUT_INIT_HIGH, DRIVER_NAME);
+		if (ret < 0) {
+			dev_err(&pdev->dev,
+				"failed to configure csgpio#%u %d\n"
+				, i, csgpio);
+			goto out_disable_clk;
+		}
+	}
+
+	/* set to default clock rate */
+	npcm_pspi_set_baudrate(priv, NPCM_PSPI_DEFAULT_CLK);
+
+	ret = devm_spi_register_master(&pdev->dev, master);
+	if (ret)
+		goto out_disable_clk;
+
+	pr_info("NPCM Peripheral SPI %d probed\n", pdev->id);
+
+	return 0;
+
+out_disable_clk:
+	clk_disable_unprepare(priv->clk);
+
+out_master_put:
+	spi_master_put(master);
+	return ret;
+}
+
+static int npcm_pspi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct npcm_pspi *priv = spi_master_get_devdata(master);
+
+	npcm_pspi_reset_hw(priv);
+	clk_disable_unprepare(priv->clk);
+
+	return 0;
+}
+
+static const struct of_device_id npcm_pspi_match[] = {
+	{ .compatible = "nuvoton,npcm750-pspi", .data = NULL },
+	{}
+};
+MODULE_DEVICE_TABLE(of, npcm_pspi_match);
+
+static struct platform_driver npcm_pspi_driver = {
+	.driver		= {
+		.name		= DRIVER_NAME,
+		.of_match_table	= npcm_pspi_match,
+	},
+	.probe		= npcm_pspi_probe,
+	.remove		= npcm_pspi_remove,
+};
+module_platform_driver(npcm_pspi_driver);
+
+MODULE_DESCRIPTION("NPCM peripheral SPI Controller driver");
+MODULE_AUTHOR("Tomer Maimon <tomer.maimon@nuvoton.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/spi/spi-nuc900.c b/drivers/spi/spi-nuc900.c
deleted file mode 100644
index f51a058..0000000
--- a/drivers/spi/spi-nuc900.c
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- * Copyright (c) 2009 Nuvoton technology.
- * Wan ZongShun <mcuos.com@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/gpio.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-
-#include <linux/spi/spi.h>
-#include <linux/spi/spi_bitbang.h>
-
-#include <linux/platform_data/spi-nuc900.h>
-
-/* usi registers offset */
-#define USI_CNT		0x00
-#define USI_DIV		0x04
-#define USI_SSR		0x08
-#define USI_RX0		0x10
-#define USI_TX0		0x10
-
-/* usi register bit */
-#define ENINT		(0x01 << 17)
-#define ENFLG		(0x01 << 16)
-#define SLEEP		(0x0f << 12)
-#define TXNUM		(0x03 << 8)
-#define TXBITLEN	(0x1f << 3)
-#define TXNEG		(0x01 << 2)
-#define RXNEG		(0x01 << 1)
-#define LSB		(0x01 << 10)
-#define SELECTLEV	(0x01 << 2)
-#define SELECTPOL	(0x01 << 31)
-#define SELECTSLAVE	0x01
-#define GOBUSY		0x01
-
-struct nuc900_spi {
-	struct spi_bitbang	 bitbang;
-	struct completion	 done;
-	void __iomem		*regs;
-	int			 irq;
-	int			 len;
-	int			 count;
-	const unsigned char	*tx;
-	unsigned char		*rx;
-	struct clk		*clk;
-	struct spi_master	*master;
-	struct nuc900_spi_info *pdata;
-	spinlock_t		lock;
-};
-
-static inline struct nuc900_spi *to_hw(struct spi_device *sdev)
-{
-	return spi_master_get_devdata(sdev->master);
-}
-
-static void nuc900_slave_select(struct spi_device *spi, unsigned int ssr)
-{
-	struct nuc900_spi *hw = to_hw(spi);
-	unsigned int val;
-	unsigned int cs = spi->mode & SPI_CS_HIGH ? 1 : 0;
-	unsigned int cpol = spi->mode & SPI_CPOL ? 1 : 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_SSR);
-
-	if (!cs)
-		val &= ~SELECTLEV;
-	else
-		val |= SELECTLEV;
-
-	if (!ssr)
-		val &= ~SELECTSLAVE;
-	else
-		val |= SELECTSLAVE;
-
-	__raw_writel(val, hw->regs + USI_SSR);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	if (!cpol)
-		val &= ~SELECTPOL;
-	else
-		val |= SELECTPOL;
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_spi_chipsel(struct spi_device *spi, int value)
-{
-	switch (value) {
-	case BITBANG_CS_INACTIVE:
-		nuc900_slave_select(spi, 0);
-		break;
-
-	case BITBANG_CS_ACTIVE:
-		nuc900_slave_select(spi, 1);
-		break;
-	}
-}
-
-static void nuc900_spi_setup_txnum(struct nuc900_spi *hw, unsigned int txnum)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT) & ~TXNUM;
-
-	if (txnum)
-		val |= txnum << 0x08;
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-
-}
-
-static void nuc900_spi_setup_txbitlen(struct nuc900_spi *hw,
-							unsigned int txbitlen)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT) & ~TXBITLEN;
-
-	val |= (txbitlen << 0x03);
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_spi_gobusy(struct nuc900_spi *hw)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	val |= GOBUSY;
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static inline unsigned int hw_txbyte(struct nuc900_spi *hw, int count)
-{
-	return hw->tx ? hw->tx[count] : 0;
-}
-
-static int nuc900_spi_txrx(struct spi_device *spi, struct spi_transfer *t)
-{
-	struct nuc900_spi *hw = to_hw(spi);
-
-	hw->tx = t->tx_buf;
-	hw->rx = t->rx_buf;
-	hw->len = t->len;
-	hw->count = 0;
-
-	__raw_writel(hw_txbyte(hw, 0x0), hw->regs + USI_TX0);
-
-	nuc900_spi_gobusy(hw);
-
-	wait_for_completion(&hw->done);
-
-	return hw->count;
-}
-
-static irqreturn_t nuc900_spi_irq(int irq, void *dev)
-{
-	struct nuc900_spi *hw = dev;
-	unsigned int status;
-	unsigned int count = hw->count;
-
-	status = __raw_readl(hw->regs + USI_CNT);
-	__raw_writel(status, hw->regs + USI_CNT);
-
-	if (status & ENFLG) {
-		hw->count++;
-
-		if (hw->rx)
-			hw->rx[count] = __raw_readl(hw->regs + USI_RX0);
-		count++;
-
-		if (count < hw->len) {
-			__raw_writel(hw_txbyte(hw, count), hw->regs + USI_TX0);
-			nuc900_spi_gobusy(hw);
-		} else {
-			complete(&hw->done);
-		}
-
-		return IRQ_HANDLED;
-	}
-
-	complete(&hw->done);
-	return IRQ_HANDLED;
-}
-
-static void nuc900_tx_edge(struct nuc900_spi *hw, unsigned int edge)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	if (edge)
-		val |= TXNEG;
-	else
-		val &= ~TXNEG;
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_rx_edge(struct nuc900_spi *hw, unsigned int edge)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	if (edge)
-		val |= RXNEG;
-	else
-		val &= ~RXNEG;
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_send_first(struct nuc900_spi *hw, unsigned int lsb)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	if (lsb)
-		val |= LSB;
-	else
-		val &= ~LSB;
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_set_sleep(struct nuc900_spi *hw, unsigned int sleep)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT) & ~SLEEP;
-
-	if (sleep)
-		val |= (sleep << 12);
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_enable_int(struct nuc900_spi *hw)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	val |= ENINT;
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_set_divider(struct nuc900_spi *hw)
-{
-	__raw_writel(hw->pdata->divider, hw->regs + USI_DIV);
-}
-
-static void nuc900_init_spi(struct nuc900_spi *hw)
-{
-	clk_enable(hw->clk);
-	spin_lock_init(&hw->lock);
-
-	nuc900_tx_edge(hw, hw->pdata->txneg);
-	nuc900_rx_edge(hw, hw->pdata->rxneg);
-	nuc900_send_first(hw, hw->pdata->lsb);
-	nuc900_set_sleep(hw, hw->pdata->sleep);
-	nuc900_spi_setup_txbitlen(hw, hw->pdata->txbitlen);
-	nuc900_spi_setup_txnum(hw, hw->pdata->txnum);
-	nuc900_set_divider(hw);
-	nuc900_enable_int(hw);
-}
-
-static int nuc900_spi_probe(struct platform_device *pdev)
-{
-	struct nuc900_spi *hw;
-	struct spi_master *master;
-	struct resource *res;
-	int err = 0;
-
-	master = spi_alloc_master(&pdev->dev, sizeof(struct nuc900_spi));
-	if (master == NULL) {
-		dev_err(&pdev->dev, "No memory for spi_master\n");
-		return -ENOMEM;
-	}
-
-	hw = spi_master_get_devdata(master);
-	hw->master = master;
-	hw->pdata  = dev_get_platdata(&pdev->dev);
-
-	if (hw->pdata == NULL) {
-		dev_err(&pdev->dev, "No platform data supplied\n");
-		err = -ENOENT;
-		goto err_pdata;
-	}
-
-	platform_set_drvdata(pdev, hw);
-	init_completion(&hw->done);
-
-	master->mode_bits          = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
-	if (hw->pdata->lsb)
-		master->mode_bits |= SPI_LSB_FIRST;
-	master->num_chipselect     = hw->pdata->num_cs;
-	master->bus_num            = hw->pdata->bus_num;
-	hw->bitbang.master         = hw->master;
-	hw->bitbang.chipselect     = nuc900_spi_chipsel;
-	hw->bitbang.txrx_bufs      = nuc900_spi_txrx;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	hw->regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(hw->regs)) {
-		err = PTR_ERR(hw->regs);
-		goto err_pdata;
-	}
-
-	hw->irq = platform_get_irq(pdev, 0);
-	if (hw->irq < 0) {
-		dev_err(&pdev->dev, "No IRQ specified\n");
-		err = -ENOENT;
-		goto err_pdata;
-	}
-
-	err = devm_request_irq(&pdev->dev, hw->irq, nuc900_spi_irq, 0,
-				pdev->name, hw);
-	if (err) {
-		dev_err(&pdev->dev, "Cannot claim IRQ\n");
-		goto err_pdata;
-	}
-
-	hw->clk = devm_clk_get(&pdev->dev, "spi");
-	if (IS_ERR(hw->clk)) {
-		dev_err(&pdev->dev, "No clock for device\n");
-		err = PTR_ERR(hw->clk);
-		goto err_pdata;
-	}
-
-	mfp_set_groupg(&pdev->dev, NULL);
-	nuc900_init_spi(hw);
-
-	err = spi_bitbang_start(&hw->bitbang);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to register SPI master\n");
-		goto err_register;
-	}
-
-	return 0;
-
-err_register:
-	clk_disable(hw->clk);
-err_pdata:
-	spi_master_put(hw->master);
-	return err;
-}
-
-static int nuc900_spi_remove(struct platform_device *dev)
-{
-	struct nuc900_spi *hw = platform_get_drvdata(dev);
-
-	spi_bitbang_stop(&hw->bitbang);
-	clk_disable(hw->clk);
-	spi_master_put(hw->master);
-	return 0;
-}
-
-static struct platform_driver nuc900_spi_driver = {
-	.probe		= nuc900_spi_probe,
-	.remove		= nuc900_spi_remove,
-	.driver		= {
-		.name	= "nuc900-spi",
-	},
-};
-module_platform_driver(nuc900_spi_driver);
-
-MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>");
-MODULE_DESCRIPTION("nuc900 spi driver!");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:nuc900-spi");
diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
new file mode 100644
index 0000000..501b923
--- /dev/null
+++ b/drivers/spi/spi-nxp-fspi.c
@@ -0,0 +1,1104 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * NXP FlexSPI(FSPI) controller driver.
+ *
+ * Copyright 2019 NXP.
+ *
+ * FlexSPI is a flexsible SPI host controller which supports two SPI
+ * channels and up to 4 external devices. Each channel supports
+ * Single/Dual/Quad/Octal mode data transfer (1/2/4/8 bidirectional
+ * data lines).
+ *
+ * FlexSPI controller is driven by the LUT(Look-up Table) registers
+ * LUT registers are a look-up-table for sequences of instructions.
+ * A valid sequence consists of four LUT registers.
+ * Maximum 32 LUT sequences can be programmed simultaneously.
+ *
+ * LUTs are being created at run-time based on the commands passed
+ * from the spi-mem framework, thus using single LUT index.
+ *
+ * Software triggered Flash read/write access by IP Bus.
+ *
+ * Memory mapped read access by AHB Bus.
+ *
+ * Based on SPI MEM interface and spi-fsl-qspi.c driver.
+ *
+ * Author:
+ *     Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
+ *     Boris Brezillon <bbrezillon@kernel.org>
+ *     Frieder Schrempf <frieder.schrempf@kontron.de>
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_qos.h>
+#include <linux/sizes.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+/*
+ * The driver only uses one single LUT entry, that is updated on
+ * each call of exec_op(). Index 0 is preset at boot with a basic
+ * read operation, so let's use the last entry (31).
+ */
+#define	SEQID_LUT			31
+
+/* Registers used by the driver */
+#define FSPI_MCR0			0x00
+#define FSPI_MCR0_AHB_TIMEOUT(x)	((x) << 24)
+#define FSPI_MCR0_IP_TIMEOUT(x)		((x) << 16)
+#define FSPI_MCR0_LEARN_EN		BIT(15)
+#define FSPI_MCR0_SCRFRUN_EN		BIT(14)
+#define FSPI_MCR0_OCTCOMB_EN		BIT(13)
+#define FSPI_MCR0_DOZE_EN		BIT(12)
+#define FSPI_MCR0_HSEN			BIT(11)
+#define FSPI_MCR0_SERCLKDIV		BIT(8)
+#define FSPI_MCR0_ATDF_EN		BIT(7)
+#define FSPI_MCR0_ARDF_EN		BIT(6)
+#define FSPI_MCR0_RXCLKSRC(x)		((x) << 4)
+#define FSPI_MCR0_END_CFG(x)		((x) << 2)
+#define FSPI_MCR0_MDIS			BIT(1)
+#define FSPI_MCR0_SWRST			BIT(0)
+
+#define FSPI_MCR1			0x04
+#define FSPI_MCR1_SEQ_TIMEOUT(x)	((x) << 16)
+#define FSPI_MCR1_AHB_TIMEOUT(x)	(x)
+
+#define FSPI_MCR2			0x08
+#define FSPI_MCR2_IDLE_WAIT(x)		((x) << 24)
+#define FSPI_MCR2_SAMEDEVICEEN		BIT(15)
+#define FSPI_MCR2_CLRLRPHS		BIT(14)
+#define FSPI_MCR2_ABRDATSZ		BIT(8)
+#define FSPI_MCR2_ABRLEARN		BIT(7)
+#define FSPI_MCR2_ABR_READ		BIT(6)
+#define FSPI_MCR2_ABRWRITE		BIT(5)
+#define FSPI_MCR2_ABRDUMMY		BIT(4)
+#define FSPI_MCR2_ABR_MODE		BIT(3)
+#define FSPI_MCR2_ABRCADDR		BIT(2)
+#define FSPI_MCR2_ABRRADDR		BIT(1)
+#define FSPI_MCR2_ABR_CMD		BIT(0)
+
+#define FSPI_AHBCR			0x0c
+#define FSPI_AHBCR_RDADDROPT		BIT(6)
+#define FSPI_AHBCR_PREF_EN		BIT(5)
+#define FSPI_AHBCR_BUFF_EN		BIT(4)
+#define FSPI_AHBCR_CACH_EN		BIT(3)
+#define FSPI_AHBCR_CLRTXBUF		BIT(2)
+#define FSPI_AHBCR_CLRRXBUF		BIT(1)
+#define FSPI_AHBCR_PAR_EN		BIT(0)
+
+#define FSPI_INTEN			0x10
+#define FSPI_INTEN_SCLKSBWR		BIT(9)
+#define FSPI_INTEN_SCLKSBRD		BIT(8)
+#define FSPI_INTEN_DATALRNFL		BIT(7)
+#define FSPI_INTEN_IPTXWE		BIT(6)
+#define FSPI_INTEN_IPRXWA		BIT(5)
+#define FSPI_INTEN_AHBCMDERR		BIT(4)
+#define FSPI_INTEN_IPCMDERR		BIT(3)
+#define FSPI_INTEN_AHBCMDGE		BIT(2)
+#define FSPI_INTEN_IPCMDGE		BIT(1)
+#define FSPI_INTEN_IPCMDDONE		BIT(0)
+
+#define FSPI_INTR			0x14
+#define FSPI_INTR_SCLKSBWR		BIT(9)
+#define FSPI_INTR_SCLKSBRD		BIT(8)
+#define FSPI_INTR_DATALRNFL		BIT(7)
+#define FSPI_INTR_IPTXWE		BIT(6)
+#define FSPI_INTR_IPRXWA		BIT(5)
+#define FSPI_INTR_AHBCMDERR		BIT(4)
+#define FSPI_INTR_IPCMDERR		BIT(3)
+#define FSPI_INTR_AHBCMDGE		BIT(2)
+#define FSPI_INTR_IPCMDGE		BIT(1)
+#define FSPI_INTR_IPCMDDONE		BIT(0)
+
+#define FSPI_LUTKEY			0x18
+#define FSPI_LUTKEY_VALUE		0x5AF05AF0
+
+#define FSPI_LCKCR			0x1C
+
+#define FSPI_LCKER_LOCK			0x1
+#define FSPI_LCKER_UNLOCK		0x2
+
+#define FSPI_BUFXCR_INVALID_MSTRID	0xE
+#define FSPI_AHBRX_BUF0CR0		0x20
+#define FSPI_AHBRX_BUF1CR0		0x24
+#define FSPI_AHBRX_BUF2CR0		0x28
+#define FSPI_AHBRX_BUF3CR0		0x2C
+#define FSPI_AHBRX_BUF4CR0		0x30
+#define FSPI_AHBRX_BUF5CR0		0x34
+#define FSPI_AHBRX_BUF6CR0		0x38
+#define FSPI_AHBRX_BUF7CR0		0x3C
+#define FSPI_AHBRXBUF0CR7_PREF		BIT(31)
+
+#define FSPI_AHBRX_BUF0CR1		0x40
+#define FSPI_AHBRX_BUF1CR1		0x44
+#define FSPI_AHBRX_BUF2CR1		0x48
+#define FSPI_AHBRX_BUF3CR1		0x4C
+#define FSPI_AHBRX_BUF4CR1		0x50
+#define FSPI_AHBRX_BUF5CR1		0x54
+#define FSPI_AHBRX_BUF6CR1		0x58
+#define FSPI_AHBRX_BUF7CR1		0x5C
+
+#define FSPI_FLSHA1CR0			0x60
+#define FSPI_FLSHA2CR0			0x64
+#define FSPI_FLSHB1CR0			0x68
+#define FSPI_FLSHB2CR0			0x6C
+#define FSPI_FLSHXCR0_SZ_KB		10
+#define FSPI_FLSHXCR0_SZ(x)		((x) >> FSPI_FLSHXCR0_SZ_KB)
+
+#define FSPI_FLSHA1CR1			0x70
+#define FSPI_FLSHA2CR1			0x74
+#define FSPI_FLSHB1CR1			0x78
+#define FSPI_FLSHB2CR1			0x7C
+#define FSPI_FLSHXCR1_CSINTR(x)		((x) << 16)
+#define FSPI_FLSHXCR1_CAS(x)		((x) << 11)
+#define FSPI_FLSHXCR1_WA		BIT(10)
+#define FSPI_FLSHXCR1_TCSH(x)		((x) << 5)
+#define FSPI_FLSHXCR1_TCSS(x)		(x)
+
+#define FSPI_FLSHA1CR2			0x80
+#define FSPI_FLSHA2CR2			0x84
+#define FSPI_FLSHB1CR2			0x88
+#define FSPI_FLSHB2CR2			0x8C
+#define FSPI_FLSHXCR2_CLRINSP		BIT(24)
+#define FSPI_FLSHXCR2_AWRWAIT		BIT(16)
+#define FSPI_FLSHXCR2_AWRSEQN_SHIFT	13
+#define FSPI_FLSHXCR2_AWRSEQI_SHIFT	8
+#define FSPI_FLSHXCR2_ARDSEQN_SHIFT	5
+#define FSPI_FLSHXCR2_ARDSEQI_SHIFT	0
+
+#define FSPI_IPCR0			0xA0
+
+#define FSPI_IPCR1			0xA4
+#define FSPI_IPCR1_IPAREN		BIT(31)
+#define FSPI_IPCR1_SEQNUM_SHIFT		24
+#define FSPI_IPCR1_SEQID_SHIFT		16
+#define FSPI_IPCR1_IDATSZ(x)		(x)
+
+#define FSPI_IPCMD			0xB0
+#define FSPI_IPCMD_TRG			BIT(0)
+
+#define FSPI_DLPR			0xB4
+
+#define FSPI_IPRXFCR			0xB8
+#define FSPI_IPRXFCR_CLR		BIT(0)
+#define FSPI_IPRXFCR_DMA_EN		BIT(1)
+#define FSPI_IPRXFCR_WMRK(x)		((x) << 2)
+
+#define FSPI_IPTXFCR			0xBC
+#define FSPI_IPTXFCR_CLR		BIT(0)
+#define FSPI_IPTXFCR_DMA_EN		BIT(1)
+#define FSPI_IPTXFCR_WMRK(x)		((x) << 2)
+
+#define FSPI_DLLACR			0xC0
+#define FSPI_DLLACR_OVRDEN		BIT(8)
+
+#define FSPI_DLLBCR			0xC4
+#define FSPI_DLLBCR_OVRDEN		BIT(8)
+
+#define FSPI_STS0			0xE0
+#define FSPI_STS0_DLPHB(x)		((x) << 8)
+#define FSPI_STS0_DLPHA(x)		((x) << 4)
+#define FSPI_STS0_CMD_SRC(x)		((x) << 2)
+#define FSPI_STS0_ARB_IDLE		BIT(1)
+#define FSPI_STS0_SEQ_IDLE		BIT(0)
+
+#define FSPI_STS1			0xE4
+#define FSPI_STS1_IP_ERRCD(x)		((x) << 24)
+#define FSPI_STS1_IP_ERRID(x)		((x) << 16)
+#define FSPI_STS1_AHB_ERRCD(x)		((x) << 8)
+#define FSPI_STS1_AHB_ERRID(x)		(x)
+
+#define FSPI_AHBSPNST			0xEC
+#define FSPI_AHBSPNST_DATLFT(x)		((x) << 16)
+#define FSPI_AHBSPNST_BUFID(x)		((x) << 1)
+#define FSPI_AHBSPNST_ACTIVE		BIT(0)
+
+#define FSPI_IPRXFSTS			0xF0
+#define FSPI_IPRXFSTS_RDCNTR(x)		((x) << 16)
+#define FSPI_IPRXFSTS_FILL(x)		(x)
+
+#define FSPI_IPTXFSTS			0xF4
+#define FSPI_IPTXFSTS_WRCNTR(x)		((x) << 16)
+#define FSPI_IPTXFSTS_FILL(x)		(x)
+
+#define FSPI_RFDR			0x100
+#define FSPI_TFDR			0x180
+
+#define FSPI_LUT_BASE			0x200
+#define FSPI_LUT_OFFSET			(SEQID_LUT * 4 * 4)
+#define FSPI_LUT_REG(idx) \
+	(FSPI_LUT_BASE + FSPI_LUT_OFFSET + (idx) * 4)
+
+/* register map end */
+
+/* Instruction set for the LUT register. */
+#define LUT_STOP			0x00
+#define LUT_CMD				0x01
+#define LUT_ADDR			0x02
+#define LUT_CADDR_SDR			0x03
+#define LUT_MODE			0x04
+#define LUT_MODE2			0x05
+#define LUT_MODE4			0x06
+#define LUT_MODE8			0x07
+#define LUT_NXP_WRITE			0x08
+#define LUT_NXP_READ			0x09
+#define LUT_LEARN_SDR			0x0A
+#define LUT_DATSZ_SDR			0x0B
+#define LUT_DUMMY			0x0C
+#define LUT_DUMMY_RWDS_SDR		0x0D
+#define LUT_JMP_ON_CS			0x1F
+#define LUT_CMD_DDR			0x21
+#define LUT_ADDR_DDR			0x22
+#define LUT_CADDR_DDR			0x23
+#define LUT_MODE_DDR			0x24
+#define LUT_MODE2_DDR			0x25
+#define LUT_MODE4_DDR			0x26
+#define LUT_MODE8_DDR			0x27
+#define LUT_WRITE_DDR			0x28
+#define LUT_READ_DDR			0x29
+#define LUT_LEARN_DDR			0x2A
+#define LUT_DATSZ_DDR			0x2B
+#define LUT_DUMMY_DDR			0x2C
+#define LUT_DUMMY_RWDS_DDR		0x2D
+
+/*
+ * Calculate number of required PAD bits for LUT register.
+ *
+ * The pad stands for the number of IO lines [0:7].
+ * For example, the octal read needs eight IO lines,
+ * so you should use LUT_PAD(8). This macro
+ * returns 3 i.e. use eight (2^3) IP lines for read.
+ */
+#define LUT_PAD(x) (fls(x) - 1)
+
+/*
+ * Macro for constructing the LUT entries with the following
+ * register layout:
+ *
+ *  ---------------------------------------------------
+ *  | INSTR1 | PAD1 | OPRND1 | INSTR0 | PAD0 | OPRND0 |
+ *  ---------------------------------------------------
+ */
+#define PAD_SHIFT		8
+#define INSTR_SHIFT		10
+#define OPRND_SHIFT		16
+
+/* Macros for constructing the LUT register. */
+#define LUT_DEF(idx, ins, pad, opr)			  \
+	((((ins) << INSTR_SHIFT) | ((pad) << PAD_SHIFT) | \
+	(opr)) << (((idx) % 2) * OPRND_SHIFT))
+
+#define POLL_TOUT		5000
+#define NXP_FSPI_MAX_CHIPSELECT		4
+
+struct nxp_fspi_devtype_data {
+	unsigned int rxfifo;
+	unsigned int txfifo;
+	unsigned int ahb_buf_size;
+	unsigned int quirks;
+	bool little_endian;
+};
+
+static const struct nxp_fspi_devtype_data lx2160a_data = {
+	.rxfifo = SZ_512,       /* (64  * 64 bits)  */
+	.txfifo = SZ_1K,        /* (128 * 64 bits)  */
+	.ahb_buf_size = SZ_2K,  /* (256 * 64 bits)  */
+	.quirks = 0,
+	.little_endian = true,  /* little-endian    */
+};
+
+struct nxp_fspi {
+	void __iomem *iobase;
+	void __iomem *ahb_addr;
+	u32 memmap_phy;
+	u32 memmap_phy_size;
+	struct clk *clk, *clk_en;
+	struct device *dev;
+	struct completion c;
+	const struct nxp_fspi_devtype_data *devtype_data;
+	struct mutex lock;
+	struct pm_qos_request pm_qos_req;
+	int selected;
+};
+
+/*
+ * R/W functions for big- or little-endian registers:
+ * The FSPI controller's endianness is independent of
+ * the CPU core's endianness. So far, although the CPU
+ * core is little-endian the FSPI controller can use
+ * big-endian or little-endian.
+ */
+static void fspi_writel(struct nxp_fspi *f, u32 val, void __iomem *addr)
+{
+	if (f->devtype_data->little_endian)
+		iowrite32(val, addr);
+	else
+		iowrite32be(val, addr);
+}
+
+static u32 fspi_readl(struct nxp_fspi *f, void __iomem *addr)
+{
+	if (f->devtype_data->little_endian)
+		return ioread32(addr);
+	else
+		return ioread32be(addr);
+}
+
+static irqreturn_t nxp_fspi_irq_handler(int irq, void *dev_id)
+{
+	struct nxp_fspi *f = dev_id;
+	u32 reg;
+
+	/* clear interrupt */
+	reg = fspi_readl(f, f->iobase + FSPI_INTR);
+	fspi_writel(f, FSPI_INTR_IPCMDDONE, f->iobase + FSPI_INTR);
+
+	if (reg & FSPI_INTR_IPCMDDONE)
+		complete(&f->c);
+
+	return IRQ_HANDLED;
+}
+
+static int nxp_fspi_check_buswidth(struct nxp_fspi *f, u8 width)
+{
+	switch (width) {
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		return 0;
+	}
+
+	return -ENOTSUPP;
+}
+
+static bool nxp_fspi_supports_op(struct spi_mem *mem,
+				 const struct spi_mem_op *op)
+{
+	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
+	int ret;
+
+	ret = nxp_fspi_check_buswidth(f, op->cmd.buswidth);
+
+	if (op->addr.nbytes)
+		ret |= nxp_fspi_check_buswidth(f, op->addr.buswidth);
+
+	if (op->dummy.nbytes)
+		ret |= nxp_fspi_check_buswidth(f, op->dummy.buswidth);
+
+	if (op->data.nbytes)
+		ret |= nxp_fspi_check_buswidth(f, op->data.buswidth);
+
+	if (ret)
+		return false;
+
+	/*
+	 * The number of address bytes should be equal to or less than 4 bytes.
+	 */
+	if (op->addr.nbytes > 4)
+		return false;
+
+	/*
+	 * If requested address value is greater than controller assigned
+	 * memory mapped space, return error as it didn't fit in the range
+	 * of assigned address space.
+	 */
+	if (op->addr.val >= f->memmap_phy_size)
+		return false;
+
+	/* Max 64 dummy clock cycles supported */
+	if (op->dummy.buswidth &&
+	    (op->dummy.nbytes * 8 / op->dummy.buswidth > 64))
+		return false;
+
+	/* Max data length, check controller limits and alignment */
+	if (op->data.dir == SPI_MEM_DATA_IN &&
+	    (op->data.nbytes > f->devtype_data->ahb_buf_size ||
+	     (op->data.nbytes > f->devtype_data->rxfifo - 4 &&
+	      !IS_ALIGNED(op->data.nbytes, 8))))
+		return false;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT &&
+	    op->data.nbytes > f->devtype_data->txfifo)
+		return false;
+
+	return true;
+}
+
+/* Instead of busy looping invoke readl_poll_timeout functionality. */
+static int fspi_readl_poll_tout(struct nxp_fspi *f, void __iomem *base,
+				u32 mask, u32 delay_us,
+				u32 timeout_us, bool c)
+{
+	u32 reg;
+
+	if (!f->devtype_data->little_endian)
+		mask = (u32)cpu_to_be32(mask);
+
+	if (c)
+		return readl_poll_timeout(base, reg, (reg & mask),
+					  delay_us, timeout_us);
+	else
+		return readl_poll_timeout(base, reg, !(reg & mask),
+					  delay_us, timeout_us);
+}
+
+/*
+ * If the slave device content being changed by Write/Erase, need to
+ * invalidate the AHB buffer. This can be achieved by doing the reset
+ * of controller after setting MCR0[SWRESET] bit.
+ */
+static inline void nxp_fspi_invalid(struct nxp_fspi *f)
+{
+	u32 reg;
+	int ret;
+
+	reg = fspi_readl(f, f->iobase + FSPI_MCR0);
+	fspi_writel(f, reg | FSPI_MCR0_SWRST, f->iobase + FSPI_MCR0);
+
+	/* w1c register, wait unit clear */
+	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_MCR0,
+				   FSPI_MCR0_SWRST, 0, POLL_TOUT, false);
+	WARN_ON(ret);
+}
+
+static void nxp_fspi_prepare_lut(struct nxp_fspi *f,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = f->iobase;
+	u32 lutval[4] = {};
+	int lutidx = 1, i;
+
+	/* cmd */
+	lutval[0] |= LUT_DEF(0, LUT_CMD, LUT_PAD(op->cmd.buswidth),
+			     op->cmd.opcode);
+
+	/* addr bytes */
+	if (op->addr.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_ADDR,
+					      LUT_PAD(op->addr.buswidth),
+					      op->addr.nbytes * 8);
+		lutidx++;
+	}
+
+	/* dummy bytes, if needed */
+	if (op->dummy.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_DUMMY,
+		/*
+		 * Due to FlexSPI controller limitation number of PAD for dummy
+		 * buswidth needs to be programmed as equal to data buswidth.
+		 */
+					      LUT_PAD(op->data.buswidth),
+					      op->dummy.nbytes * 8 /
+					      op->dummy.buswidth);
+		lutidx++;
+	}
+
+	/* read/write data bytes */
+	if (op->data.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx,
+					      op->data.dir == SPI_MEM_DATA_IN ?
+					      LUT_NXP_READ : LUT_NXP_WRITE,
+					      LUT_PAD(op->data.buswidth),
+					      0);
+		lutidx++;
+	}
+
+	/* stop condition. */
+	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_STOP, 0, 0);
+
+	/* unlock LUT */
+	fspi_writel(f, FSPI_LUTKEY_VALUE, f->iobase + FSPI_LUTKEY);
+	fspi_writel(f, FSPI_LCKER_UNLOCK, f->iobase + FSPI_LCKCR);
+
+	/* fill LUT */
+	for (i = 0; i < ARRAY_SIZE(lutval); i++)
+		fspi_writel(f, lutval[i], base + FSPI_LUT_REG(i));
+
+	dev_dbg(f->dev, "CMD[%x] lutval[0:%x \t 1:%x \t 2:%x \t 3:%x]\n",
+		op->cmd.opcode, lutval[0], lutval[1], lutval[2], lutval[3]);
+
+	/* lock LUT */
+	fspi_writel(f, FSPI_LUTKEY_VALUE, f->iobase + FSPI_LUTKEY);
+	fspi_writel(f, FSPI_LCKER_LOCK, f->iobase + FSPI_LCKCR);
+}
+
+static int nxp_fspi_clk_prep_enable(struct nxp_fspi *f)
+{
+	int ret;
+
+	ret = clk_prepare_enable(f->clk_en);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(f->clk);
+	if (ret) {
+		clk_disable_unprepare(f->clk_en);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void nxp_fspi_clk_disable_unprep(struct nxp_fspi *f)
+{
+	clk_disable_unprepare(f->clk);
+	clk_disable_unprepare(f->clk_en);
+}
+
+/*
+ * In FlexSPI controller, flash access is based on value of FSPI_FLSHXXCR0
+ * register and start base address of the slave device.
+ *
+ *							    (Higher address)
+ *				--------    <-- FLSHB2CR0
+ *				|  B2  |
+ *				|      |
+ *	B2 start address -->	--------    <-- FLSHB1CR0
+ *				|  B1  |
+ *				|      |
+ *	B1 start address -->	--------    <-- FLSHA2CR0
+ *				|  A2  |
+ *				|      |
+ *	A2 start address -->	--------    <-- FLSHA1CR0
+ *				|  A1  |
+ *				|      |
+ *	A1 start address -->	--------		    (Lower address)
+ *
+ *
+ * Start base address defines the starting address range for given CS and
+ * FSPI_FLSHXXCR0 defines the size of the slave device connected at given CS.
+ *
+ * But, different targets are having different combinations of number of CS,
+ * some targets only have single CS or two CS covering controller's full
+ * memory mapped space area.
+ * Thus, implementation is being done as independent of the size and number
+ * of the connected slave device.
+ * Assign controller memory mapped space size as the size to the connected
+ * slave device.
+ * Mark FLSHxxCR0 as zero initially and then assign value only to the selected
+ * chip-select Flash configuration register.
+ *
+ * For e.g. to access CS2 (B1), FLSHB1CR0 register would be equal to the
+ * memory mapped size of the controller.
+ * Value for rest of the CS FLSHxxCR0 register would be zero.
+ *
+ */
+static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi)
+{
+	unsigned long rate = spi->max_speed_hz;
+	int ret;
+	uint64_t size_kb;
+
+	/*
+	 * Return, if previously selected slave device is same as current
+	 * requested slave device.
+	 */
+	if (f->selected == spi->chip_select)
+		return;
+
+	/* Reset FLSHxxCR0 registers */
+	fspi_writel(f, 0, f->iobase + FSPI_FLSHA1CR0);
+	fspi_writel(f, 0, f->iobase + FSPI_FLSHA2CR0);
+	fspi_writel(f, 0, f->iobase + FSPI_FLSHB1CR0);
+	fspi_writel(f, 0, f->iobase + FSPI_FLSHB2CR0);
+
+	/* Assign controller memory mapped space as size, KBytes, of flash. */
+	size_kb = FSPI_FLSHXCR0_SZ(f->memmap_phy_size);
+
+	fspi_writel(f, size_kb, f->iobase + FSPI_FLSHA1CR0 +
+		    4 * spi->chip_select);
+
+	dev_dbg(f->dev, "Slave device [CS:%x] selected\n", spi->chip_select);
+
+	nxp_fspi_clk_disable_unprep(f);
+
+	ret = clk_set_rate(f->clk, rate);
+	if (ret)
+		return;
+
+	ret = nxp_fspi_clk_prep_enable(f);
+	if (ret)
+		return;
+
+	f->selected = spi->chip_select;
+}
+
+static void nxp_fspi_read_ahb(struct nxp_fspi *f, const struct spi_mem_op *op)
+{
+	u32 len = op->data.nbytes;
+
+	/* Read out the data directly from the AHB buffer. */
+	memcpy_fromio(op->data.buf.in, (f->ahb_addr + op->addr.val), len);
+}
+
+static void nxp_fspi_fill_txfifo(struct nxp_fspi *f,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = f->iobase;
+	int i, ret;
+	u8 *buf = (u8 *) op->data.buf.out;
+
+	/* clear the TX FIFO. */
+	fspi_writel(f, FSPI_IPTXFCR_CLR, base + FSPI_IPTXFCR);
+
+	/*
+	 * Default value of water mark level is 8 bytes, hence in single
+	 * write request controller can write max 8 bytes of data.
+	 */
+
+	for (i = 0; i < ALIGN_DOWN(op->data.nbytes, 8); i += 8) {
+		/* Wait for TXFIFO empty */
+		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
+					   FSPI_INTR_IPTXWE, 0,
+					   POLL_TOUT, true);
+		WARN_ON(ret);
+
+		fspi_writel(f, *(u32 *) (buf + i), base + FSPI_TFDR);
+		fspi_writel(f, *(u32 *) (buf + i + 4), base + FSPI_TFDR + 4);
+		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
+	}
+
+	if (i < op->data.nbytes) {
+		u32 data = 0;
+		int j;
+		/* Wait for TXFIFO empty */
+		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
+					   FSPI_INTR_IPTXWE, 0,
+					   POLL_TOUT, true);
+		WARN_ON(ret);
+
+		for (j = 0; j < ALIGN(op->data.nbytes - i, 4); j += 4) {
+			memcpy(&data, buf + i + j, 4);
+			fspi_writel(f, data, base + FSPI_TFDR + j);
+		}
+		fspi_writel(f, FSPI_INTR_IPTXWE, base + FSPI_INTR);
+	}
+}
+
+static void nxp_fspi_read_rxfifo(struct nxp_fspi *f,
+			  const struct spi_mem_op *op)
+{
+	void __iomem *base = f->iobase;
+	int i, ret;
+	int len = op->data.nbytes;
+	u8 *buf = (u8 *) op->data.buf.in;
+
+	/*
+	 * Default value of water mark level is 8 bytes, hence in single
+	 * read request controller can read max 8 bytes of data.
+	 */
+	for (i = 0; i < ALIGN_DOWN(len, 8); i += 8) {
+		/* Wait for RXFIFO available */
+		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
+					   FSPI_INTR_IPRXWA, 0,
+					   POLL_TOUT, true);
+		WARN_ON(ret);
+
+		*(u32 *)(buf + i) = fspi_readl(f, base + FSPI_RFDR);
+		*(u32 *)(buf + i + 4) = fspi_readl(f, base + FSPI_RFDR + 4);
+		/* move the FIFO pointer */
+		fspi_writel(f, FSPI_INTR_IPRXWA, base + FSPI_INTR);
+	}
+
+	if (i < len) {
+		u32 tmp;
+		int size, j;
+
+		buf = op->data.buf.in + i;
+		/* Wait for RXFIFO available */
+		ret = fspi_readl_poll_tout(f, f->iobase + FSPI_INTR,
+					   FSPI_INTR_IPRXWA, 0,
+					   POLL_TOUT, true);
+		WARN_ON(ret);
+
+		len = op->data.nbytes - i;
+		for (j = 0; j < op->data.nbytes - i; j += 4) {
+			tmp = fspi_readl(f, base + FSPI_RFDR + j);
+			size = min(len, 4);
+			memcpy(buf + j, &tmp, size);
+			len -= size;
+		}
+	}
+
+	/* invalid the RXFIFO */
+	fspi_writel(f, FSPI_IPRXFCR_CLR, base + FSPI_IPRXFCR);
+	/* move the FIFO pointer */
+	fspi_writel(f, FSPI_INTR_IPRXWA, base + FSPI_INTR);
+}
+
+static int nxp_fspi_do_op(struct nxp_fspi *f, const struct spi_mem_op *op)
+{
+	void __iomem *base = f->iobase;
+	int seqnum = 0;
+	int err = 0;
+	u32 reg;
+
+	reg = fspi_readl(f, base + FSPI_IPRXFCR);
+	/* invalid RXFIFO first */
+	reg &= ~FSPI_IPRXFCR_DMA_EN;
+	reg = reg | FSPI_IPRXFCR_CLR;
+	fspi_writel(f, reg, base + FSPI_IPRXFCR);
+
+	init_completion(&f->c);
+
+	fspi_writel(f, op->addr.val, base + FSPI_IPCR0);
+	/*
+	 * Always start the sequence at the same index since we update
+	 * the LUT at each exec_op() call. And also specify the DATA
+	 * length, since it's has not been specified in the LUT.
+	 */
+	fspi_writel(f, op->data.nbytes |
+		 (SEQID_LUT << FSPI_IPCR1_SEQID_SHIFT) |
+		 (seqnum << FSPI_IPCR1_SEQNUM_SHIFT),
+		 base + FSPI_IPCR1);
+
+	/* Trigger the LUT now. */
+	fspi_writel(f, FSPI_IPCMD_TRG, base + FSPI_IPCMD);
+
+	/* Wait for the interrupt. */
+	if (!wait_for_completion_timeout(&f->c, msecs_to_jiffies(1000)))
+		err = -ETIMEDOUT;
+
+	/* Invoke IP data read, if request is of data read. */
+	if (!err && op->data.nbytes && op->data.dir == SPI_MEM_DATA_IN)
+		nxp_fspi_read_rxfifo(f, op);
+
+	return err;
+}
+
+static int nxp_fspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
+	int err = 0;
+
+	mutex_lock(&f->lock);
+
+	/* Wait for controller being ready. */
+	err = fspi_readl_poll_tout(f, f->iobase + FSPI_STS0,
+				   FSPI_STS0_ARB_IDLE, 1, POLL_TOUT, true);
+	WARN_ON(err);
+
+	nxp_fspi_select_mem(f, mem->spi);
+
+	nxp_fspi_prepare_lut(f, op);
+	/*
+	 * If we have large chunks of data, we read them through the AHB bus
+	 * by accessing the mapped memory. In all other cases we use
+	 * IP commands to access the flash.
+	 */
+	if (op->data.nbytes > (f->devtype_data->rxfifo - 4) &&
+	    op->data.dir == SPI_MEM_DATA_IN) {
+		nxp_fspi_read_ahb(f, op);
+	} else {
+		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
+			nxp_fspi_fill_txfifo(f, op);
+
+		err = nxp_fspi_do_op(f, op);
+	}
+
+	/* Invalidate the data in the AHB buffer. */
+	nxp_fspi_invalid(f);
+
+	mutex_unlock(&f->lock);
+
+	return err;
+}
+
+static int nxp_fspi_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		if (op->data.nbytes > f->devtype_data->txfifo)
+			op->data.nbytes = f->devtype_data->txfifo;
+	} else {
+		if (op->data.nbytes > f->devtype_data->ahb_buf_size)
+			op->data.nbytes = f->devtype_data->ahb_buf_size;
+		else if (op->data.nbytes > (f->devtype_data->rxfifo - 4))
+			op->data.nbytes = ALIGN_DOWN(op->data.nbytes, 8);
+	}
+
+	return 0;
+}
+
+static int nxp_fspi_default_setup(struct nxp_fspi *f)
+{
+	void __iomem *base = f->iobase;
+	int ret, i;
+	u32 reg;
+
+	/* disable and unprepare clock to avoid glitch pass to controller */
+	nxp_fspi_clk_disable_unprep(f);
+
+	/* the default frequency, we will change it later if necessary. */
+	ret = clk_set_rate(f->clk, 20000000);
+	if (ret)
+		return ret;
+
+	ret = nxp_fspi_clk_prep_enable(f);
+	if (ret)
+		return ret;
+
+	/* Reset the module */
+	/* w1c register, wait unit clear */
+	ret = fspi_readl_poll_tout(f, f->iobase + FSPI_MCR0,
+				   FSPI_MCR0_SWRST, 0, POLL_TOUT, false);
+	WARN_ON(ret);
+
+	/* Disable the module */
+	fspi_writel(f, FSPI_MCR0_MDIS, base + FSPI_MCR0);
+
+	/* Reset the DLL register to default value */
+	fspi_writel(f, FSPI_DLLACR_OVRDEN, base + FSPI_DLLACR);
+	fspi_writel(f, FSPI_DLLBCR_OVRDEN, base + FSPI_DLLBCR);
+
+	/* enable module */
+	fspi_writel(f, FSPI_MCR0_AHB_TIMEOUT(0xFF) | FSPI_MCR0_IP_TIMEOUT(0xFF),
+		 base + FSPI_MCR0);
+
+	/*
+	 * Disable same device enable bit and configure all slave devices
+	 * independently.
+	 */
+	reg = fspi_readl(f, f->iobase + FSPI_MCR2);
+	reg = reg & ~(FSPI_MCR2_SAMEDEVICEEN);
+	fspi_writel(f, reg, base + FSPI_MCR2);
+
+	/* AHB configuration for access buffer 0~7. */
+	for (i = 0; i < 7; i++)
+		fspi_writel(f, 0, base + FSPI_AHBRX_BUF0CR0 + 4 * i);
+
+	/*
+	 * Set ADATSZ with the maximum AHB buffer size to improve the read
+	 * performance.
+	 */
+	fspi_writel(f, (f->devtype_data->ahb_buf_size / 8 |
+		  FSPI_AHBRXBUF0CR7_PREF), base + FSPI_AHBRX_BUF7CR0);
+
+	/* prefetch and no start address alignment limitation */
+	fspi_writel(f, FSPI_AHBCR_PREF_EN | FSPI_AHBCR_RDADDROPT,
+		 base + FSPI_AHBCR);
+
+	/* AHB Read - Set lut sequence ID for all CS. */
+	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA1CR2);
+	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHA2CR2);
+	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHB1CR2);
+	fspi_writel(f, SEQID_LUT, base + FSPI_FLSHB2CR2);
+
+	f->selected = -1;
+
+	/* enable the interrupt */
+	fspi_writel(f, FSPI_INTEN_IPCMDDONE, base + FSPI_INTEN);
+
+	return 0;
+}
+
+static const char *nxp_fspi_get_name(struct spi_mem *mem)
+{
+	struct nxp_fspi *f = spi_controller_get_devdata(mem->spi->master);
+	struct device *dev = &mem->spi->dev;
+	const char *name;
+
+	// Set custom name derived from the platform_device of the controller.
+	if (of_get_available_child_count(f->dev->of_node) == 1)
+		return dev_name(f->dev);
+
+	name = devm_kasprintf(dev, GFP_KERNEL,
+			      "%s-%d", dev_name(f->dev),
+			      mem->spi->chip_select);
+
+	if (!name) {
+		dev_err(dev, "failed to get memory for custom flash name\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return name;
+}
+
+static const struct spi_controller_mem_ops nxp_fspi_mem_ops = {
+	.adjust_op_size = nxp_fspi_adjust_op_size,
+	.supports_op = nxp_fspi_supports_op,
+	.exec_op = nxp_fspi_exec_op,
+	.get_name = nxp_fspi_get_name,
+};
+
+static int nxp_fspi_probe(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *res;
+	struct nxp_fspi *f;
+	int ret;
+
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(*f));
+	if (!ctlr)
+		return -ENOMEM;
+
+	ctlr->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL |
+			  SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL;
+
+	f = spi_controller_get_devdata(ctlr);
+	f->dev = dev;
+	f->devtype_data = of_device_get_match_data(dev);
+	if (!f->devtype_data) {
+		ret = -ENODEV;
+		goto err_put_ctrl;
+	}
+
+	platform_set_drvdata(pdev, f);
+
+	/* find the resources - configuration register address space */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fspi_base");
+	f->iobase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(f->iobase)) {
+		ret = PTR_ERR(f->iobase);
+		goto err_put_ctrl;
+	}
+
+	/* find the resources - controller memory mapped space */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fspi_mmap");
+	f->ahb_addr = devm_ioremap_resource(dev, res);
+	if (IS_ERR(f->ahb_addr)) {
+		ret = PTR_ERR(f->ahb_addr);
+		goto err_put_ctrl;
+	}
+
+	/* assign memory mapped starting address and mapped size. */
+	f->memmap_phy = res->start;
+	f->memmap_phy_size = resource_size(res);
+
+	/* find the clocks */
+	f->clk_en = devm_clk_get(dev, "fspi_en");
+	if (IS_ERR(f->clk_en)) {
+		ret = PTR_ERR(f->clk_en);
+		goto err_put_ctrl;
+	}
+
+	f->clk = devm_clk_get(dev, "fspi");
+	if (IS_ERR(f->clk)) {
+		ret = PTR_ERR(f->clk);
+		goto err_put_ctrl;
+	}
+
+	ret = nxp_fspi_clk_prep_enable(f);
+	if (ret) {
+		dev_err(dev, "can not enable the clock\n");
+		goto err_put_ctrl;
+	}
+
+	/* find the irq */
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		goto err_disable_clk;
+
+	ret = devm_request_irq(dev, ret,
+			nxp_fspi_irq_handler, 0, pdev->name, f);
+	if (ret) {
+		dev_err(dev, "failed to request irq: %d\n", ret);
+		goto err_disable_clk;
+	}
+
+	mutex_init(&f->lock);
+
+	ctlr->bus_num = -1;
+	ctlr->num_chipselect = NXP_FSPI_MAX_CHIPSELECT;
+	ctlr->mem_ops = &nxp_fspi_mem_ops;
+
+	nxp_fspi_default_setup(f);
+
+	ctlr->dev.of_node = np;
+
+	ret = spi_register_controller(ctlr);
+	if (ret)
+		goto err_destroy_mutex;
+
+	return 0;
+
+err_destroy_mutex:
+	mutex_destroy(&f->lock);
+
+err_disable_clk:
+	nxp_fspi_clk_disable_unprep(f);
+
+err_put_ctrl:
+	spi_controller_put(ctlr);
+
+	dev_err(dev, "NXP FSPI probe failed\n");
+	return ret;
+}
+
+static int nxp_fspi_remove(struct platform_device *pdev)
+{
+	struct nxp_fspi *f = platform_get_drvdata(pdev);
+
+	/* disable the hardware */
+	fspi_writel(f, FSPI_MCR0_MDIS, f->iobase + FSPI_MCR0);
+
+	nxp_fspi_clk_disable_unprep(f);
+
+	mutex_destroy(&f->lock);
+
+	return 0;
+}
+
+static int nxp_fspi_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int nxp_fspi_resume(struct device *dev)
+{
+	struct nxp_fspi *f = dev_get_drvdata(dev);
+
+	nxp_fspi_default_setup(f);
+
+	return 0;
+}
+
+static const struct of_device_id nxp_fspi_dt_ids[] = {
+	{ .compatible = "nxp,lx2160a-fspi", .data = (void *)&lx2160a_data, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, nxp_fspi_dt_ids);
+
+static const struct dev_pm_ops nxp_fspi_pm_ops = {
+	.suspend	= nxp_fspi_suspend,
+	.resume		= nxp_fspi_resume,
+};
+
+static struct platform_driver nxp_fspi_driver = {
+	.driver = {
+		.name	= "nxp-fspi",
+		.of_match_table = nxp_fspi_dt_ids,
+		.pm =   &nxp_fspi_pm_ops,
+	},
+	.probe          = nxp_fspi_probe,
+	.remove		= nxp_fspi_remove,
+};
+module_platform_driver(nxp_fspi_driver);
+
+MODULE_DESCRIPTION("NXP FSPI Controller Driver");
+MODULE_AUTHOR("NXP Semiconductor");
+MODULE_AUTHOR("Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>");
+MODULE_AUTHOR("Boris Brezillon <bbrezillon@kernel.org>");
+MODULE_AUTHOR("Frieder Schrempf <frieder.schrempf@kontron.de>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-oc-tiny.c b/drivers/spi/spi-oc-tiny.c
index 085f580..e2331eb 100644
--- a/drivers/spi/spi-oc-tiny.c
+++ b/drivers/spi/spi-oc-tiny.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * OpenCores tiny SPI master driver
  *
@@ -9,10 +10,6 @@
  * Copyright (c) 2006 Ben Dooks
  * Copyright (c) 2006 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/interrupt.h>
@@ -243,7 +240,6 @@
 	struct tiny_spi_platform_data *platp = dev_get_platdata(&pdev->dev);
 	struct tiny_spi *hw;
 	struct spi_master *master;
-	struct resource *res;
 	unsigned int i;
 	int err = -ENODEV;
 
@@ -267,8 +263,7 @@
 	hw->bitbang.txrx_bufs = tiny_spi_txrx_bufs;
 
 	/* find and map our resources */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	hw->base = devm_ioremap_resource(&pdev->dev, res);
+	hw->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(hw->base)) {
 		err = PTR_ERR(hw->base);
 		goto exit;
diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c
index 76a8425..b955ca8 100644
--- a/drivers/spi/spi-omap-100k.c
+++ b/drivers/spi/spi-omap-100k.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * OMAP7xx SPI 100k controller driver
  * Author: Fabrice Crohas <fcrohas@gmail.com>
@@ -6,16 +7,6 @@
  * Copyright (C) 2005, 2006 Nokia Corporation
  * Author:      Samuel Ortiz <samuel.ortiz@nokia.com> and
  *              Juha Yrj�l� <juha.yrjola@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index e2be7da..848e03e 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -1,19 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * OMAP2 McSPI controller driver
  *
  * Copyright (C) 2005, 2006 Nokia Corporation
  * Author:	Samuel Ortiz <samuel.ortiz@nokia.com> and
  *		Juha Yrj�l� <juha.yrjola@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
@@ -33,6 +24,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/gcd.h>
+#include <linux/iopoll.h>
 
 #include <linux/spi/spi.h>
 #include <linux/gpio.h>
@@ -126,6 +118,7 @@
 };
 
 struct omap2_mcspi {
+	struct completion	txdone;
 	struct spi_master	*master;
 	/* Virtual base address of the controller */
 	void __iomem		*base;
@@ -135,6 +128,7 @@
 	struct device		*dev;
 	struct omap2_mcspi_regs ctx;
 	int			fifo_depth;
+	bool			slave_aborted;
 	unsigned int		pin_dir:1;
 };
 
@@ -274,19 +268,23 @@
 	}
 }
 
-static void omap2_mcspi_set_master_mode(struct spi_master *master)
+static void omap2_mcspi_set_mode(struct spi_master *master)
 {
 	struct omap2_mcspi	*mcspi = spi_master_get_devdata(master);
 	struct omap2_mcspi_regs	*ctx = &mcspi->ctx;
 	u32 l;
 
 	/*
-	 * Setup when switching from (reset default) slave mode
-	 * to single-channel master mode
+	 * Choose master or slave mode
 	 */
 	l = mcspi_read_reg(master, OMAP2_MCSPI_MODULCTRL);
-	l &= ~(OMAP2_MCSPI_MODULCTRL_STEST | OMAP2_MCSPI_MODULCTRL_MS);
-	l |= OMAP2_MCSPI_MODULCTRL_SINGLE;
+	l &= ~(OMAP2_MCSPI_MODULCTRL_STEST);
+	if (spi_controller_is_slave(master)) {
+		l |= (OMAP2_MCSPI_MODULCTRL_MS);
+	} else {
+		l &= ~(OMAP2_MCSPI_MODULCTRL_MS);
+		l |= OMAP2_MCSPI_MODULCTRL_SINGLE;
+	}
 	mcspi_write_reg(master, OMAP2_MCSPI_MODULCTRL, l);
 
 	ctx->modulctrl = l;
@@ -299,7 +297,7 @@
 	struct omap2_mcspi_cs *cs = spi->controller_state;
 	struct omap2_mcspi *mcspi;
 	unsigned int wcnt;
-	int max_fifo_depth, fifo_depth, bytes_per_word;
+	int max_fifo_depth, bytes_per_word;
 	u32 chconf, xferlevel;
 
 	mcspi = spi_master_get_devdata(master);
@@ -315,10 +313,6 @@
 		else
 			max_fifo_depth = OMAP2_MCSPI_MAX_FIFODEPTH;
 
-		fifo_depth = gcd(t->len, max_fifo_depth);
-		if (fifo_depth < 2 || fifo_depth % bytes_per_word != 0)
-			goto disable_fifo;
-
 		wcnt = t->len / bytes_per_word;
 		if (wcnt > OMAP2_MCSPI_MAX_FIFOWCNT)
 			goto disable_fifo;
@@ -326,16 +320,17 @@
 		xferlevel = wcnt << 16;
 		if (t->rx_buf != NULL) {
 			chconf |= OMAP2_MCSPI_CHCONF_FFER;
-			xferlevel |= (fifo_depth - 1) << 8;
+			xferlevel |= (bytes_per_word - 1) << 8;
 		}
+
 		if (t->tx_buf != NULL) {
 			chconf |= OMAP2_MCSPI_CHCONF_FFET;
-			xferlevel |= fifo_depth - 1;
+			xferlevel |= bytes_per_word - 1;
 		}
 
 		mcspi_write_reg(master, OMAP2_MCSPI_XFERLEVEL, xferlevel);
 		mcspi_write_chconf0(spi, chconf);
-		mcspi->fifo_depth = fifo_depth;
+		mcspi->fifo_depth = max_fifo_depth;
 
 		return;
 	}
@@ -353,18 +348,22 @@
 
 static int mcspi_wait_for_reg_bit(void __iomem *reg, unsigned long bit)
 {
-	unsigned long timeout;
+	u32 val;
 
-	timeout = jiffies + msecs_to_jiffies(1000);
-	while (!(readl_relaxed(reg) & bit)) {
-		if (time_after(jiffies, timeout)) {
-			if (!(readl_relaxed(reg) & bit))
-				return -ETIMEDOUT;
-			else
-				return 0;
-		}
-		cpu_relax();
+	return readl_poll_timeout(reg, val, val & bit, 1, MSEC_PER_SEC);
+}
+
+static int mcspi_wait_for_completion(struct  omap2_mcspi *mcspi,
+				     struct completion *x)
+{
+	if (spi_controller_is_slave(mcspi->master)) {
+		if (wait_for_completion_interruptible(x) ||
+		    mcspi->slave_aborted)
+			return -EINTR;
+	} else {
+		wait_for_completion(x);
 	}
+
 	return 0;
 }
 
@@ -517,7 +516,12 @@
 	dma_async_issue_pending(mcspi_dma->dma_rx);
 	omap2_mcspi_set_dma_req(spi, 1, 1);
 
-	wait_for_completion(&mcspi_dma->dma_rx_completion);
+	ret = mcspi_wait_for_completion(mcspi, &mcspi_dma->dma_rx_completion);
+	if (ret || mcspi->slave_aborted) {
+		dmaengine_terminate_sync(mcspi_dma->dma_rx);
+		omap2_mcspi_set_dma_req(spi, 1, 0);
+		return 0;
+	}
 
 	for (x = 0; x < nb_sizes; x++)
 		kfree(sg_out[x]);
@@ -585,7 +589,6 @@
 	struct dma_slave_config	cfg;
 	enum dma_slave_buswidth width;
 	unsigned es;
-	u32			burst;
 	void __iomem		*chstat_reg;
 	void __iomem            *irqstat_reg;
 	int			wait_res;
@@ -605,34 +608,49 @@
 	}
 
 	count = xfer->len;
-	burst = 1;
-
-	if (mcspi->fifo_depth > 0) {
-		if (count > mcspi->fifo_depth)
-			burst = mcspi->fifo_depth / es;
-		else
-			burst = count / es;
-	}
 
 	memset(&cfg, 0, sizeof(cfg));
 	cfg.src_addr = cs->phys + OMAP2_MCSPI_RX0;
 	cfg.dst_addr = cs->phys + OMAP2_MCSPI_TX0;
 	cfg.src_addr_width = width;
 	cfg.dst_addr_width = width;
-	cfg.src_maxburst = burst;
-	cfg.dst_maxburst = burst;
+	cfg.src_maxburst = 1;
+	cfg.dst_maxburst = 1;
 
 	rx = xfer->rx_buf;
 	tx = xfer->tx_buf;
 
-	if (tx != NULL)
+	mcspi->slave_aborted = false;
+	reinit_completion(&mcspi_dma->dma_tx_completion);
+	reinit_completion(&mcspi_dma->dma_rx_completion);
+	reinit_completion(&mcspi->txdone);
+	if (tx) {
+		/* Enable EOW IRQ to know end of tx in slave mode */
+		if (spi_controller_is_slave(spi->master))
+			mcspi_write_reg(spi->master,
+					OMAP2_MCSPI_IRQENABLE,
+					OMAP2_MCSPI_IRQSTATUS_EOW);
 		omap2_mcspi_tx_dma(spi, xfer, cfg);
+	}
 
 	if (rx != NULL)
 		count = omap2_mcspi_rx_dma(spi, xfer, cfg, es);
 
 	if (tx != NULL) {
-		wait_for_completion(&mcspi_dma->dma_tx_completion);
+		int ret;
+
+		ret = mcspi_wait_for_completion(mcspi, &mcspi_dma->dma_tx_completion);
+		if (ret || mcspi->slave_aborted) {
+			dmaengine_terminate_sync(mcspi_dma->dma_tx);
+			omap2_mcspi_set_dma_req(spi, 0, 0);
+			return 0;
+		}
+
+		if (spi_controller_is_slave(mcspi->master)) {
+			ret = mcspi_wait_for_completion(mcspi, &mcspi->txdone);
+			if (ret || mcspi->slave_aborted)
+				return 0;
+		}
 
 		if (mcspi->fifo_depth > 0) {
 			irqstat_reg = mcspi->base + OMAP2_MCSPI_IRQSTATUS;
@@ -1089,6 +1107,36 @@
 		gpio_free(spi->cs_gpio);
 }
 
+static irqreturn_t omap2_mcspi_irq_handler(int irq, void *data)
+{
+	struct omap2_mcspi *mcspi = data;
+	u32 irqstat;
+
+	irqstat	= mcspi_read_reg(mcspi->master, OMAP2_MCSPI_IRQSTATUS);
+	if (!irqstat)
+		return IRQ_NONE;
+
+	/* Disable IRQ and wakeup slave xfer task */
+	mcspi_write_reg(mcspi->master, OMAP2_MCSPI_IRQENABLE, 0);
+	if (irqstat & OMAP2_MCSPI_IRQSTATUS_EOW)
+		complete(&mcspi->txdone);
+
+	return IRQ_HANDLED;
+}
+
+static int omap2_mcspi_slave_abort(struct spi_master *master)
+{
+	struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
+	struct omap2_mcspi_dma *mcspi_dma = mcspi->dma_channels;
+
+	mcspi->slave_aborted = true;
+	complete(&mcspi_dma->dma_rx_completion);
+	complete(&mcspi_dma->dma_tx_completion);
+	complete(&mcspi->txdone);
+
+	return 0;
+}
+
 static int omap2_mcspi_transfer_one(struct spi_master *master,
 				    struct spi_device *spi,
 				    struct spi_transfer *t)
@@ -1255,10 +1303,20 @@
 				struct spi_device *spi,
 				struct spi_transfer *xfer)
 {
+	struct omap2_mcspi *mcspi = spi_master_get_devdata(spi->master);
+	struct omap2_mcspi_dma *mcspi_dma =
+		&mcspi->dma_channels[spi->chip_select];
+
+	if (!mcspi_dma->dma_rx || !mcspi_dma->dma_tx)
+		return false;
+
+	if (spi_controller_is_slave(master))
+		return true;
+
 	return (xfer->len >= DMA_MIN_BYTES);
 }
 
-static int omap2_mcspi_master_setup(struct omap2_mcspi *mcspi)
+static int omap2_mcspi_controller_setup(struct omap2_mcspi *mcspi)
 {
 	struct spi_master	*master = mcspi->master;
 	struct omap2_mcspi_regs	*ctx = &mcspi->ctx;
@@ -1275,7 +1333,7 @@
 			OMAP2_MCSPI_WAKEUPENABLE_WKEN);
 	ctx->wakeupenable = OMAP2_MCSPI_WAKEUPENABLE_WKEN;
 
-	omap2_mcspi_set_master_mode(master);
+	omap2_mcspi_set_mode(master);
 	pm_runtime_mark_last_busy(mcspi->dev);
 	pm_runtime_put_autosuspend(mcspi->dev);
 	return 0;
@@ -1350,11 +1408,12 @@
 	struct device_node	*node = pdev->dev.of_node;
 	const struct of_device_id *match;
 
-	master = spi_alloc_master(&pdev->dev, sizeof *mcspi);
-	if (master == NULL) {
-		dev_dbg(&pdev->dev, "master allocation failed\n");
+	if (of_property_read_bool(node, "spi-slave"))
+		master = spi_alloc_slave(&pdev->dev, sizeof(*mcspi));
+	else
+		master = spi_alloc_master(&pdev->dev, sizeof(*mcspi));
+	if (!master)
 		return -ENOMEM;
-	}
 
 	/* the spi->mode bits understood by this driver: */
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
@@ -1366,6 +1425,7 @@
 	master->transfer_one = omap2_mcspi_transfer_one;
 	master->set_cs = omap2_mcspi_set_cs;
 	master->cleanup = omap2_mcspi_cleanup;
+	master->slave_abort = omap2_mcspi_slave_abort;
 	master->dev.of_node = node;
 	master->max_speed_hz = OMAP2_MCSPI_MAX_FREQ;
 	master->min_speed_hz = OMAP2_MCSPI_MAX_FREQ >> 15;
@@ -1417,15 +1477,31 @@
 		sprintf(mcspi->dma_channels[i].dma_tx_ch_name, "tx%d", i);
 	}
 
+	status = platform_get_irq(pdev, 0);
+	if (status == -EPROBE_DEFER)
+		goto free_master;
+	if (status < 0) {
+		dev_err(&pdev->dev, "no irq resource found\n");
+		goto free_master;
+	}
+	init_completion(&mcspi->txdone);
+	status = devm_request_irq(&pdev->dev, status,
+				  omap2_mcspi_irq_handler, 0, pdev->name,
+				  mcspi);
+	if (status) {
+		dev_err(&pdev->dev, "Cannot request IRQ");
+		goto free_master;
+	}
+
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT);
 	pm_runtime_enable(&pdev->dev);
 
-	status = omap2_mcspi_master_setup(mcspi);
+	status = omap2_mcspi_controller_setup(mcspi);
 	if (status < 0)
 		goto disable_pm;
 
-	status = devm_spi_register_master(&pdev->dev, master);
+	status = devm_spi_register_controller(&pdev->dev, master);
 	if (status < 0)
 		goto disable_pm;
 
diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index 47ef6b1..6643ccd 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Marvell Orion SPI controller driver
  *
  * Author: Shadi Ammouri <shadi@marvell.com>
  * Copyright (C) 2007-2008 Marvell Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/interrupt.h>
@@ -431,6 +428,7 @@
 	int word_len;
 	struct orion_spi *orion_spi;
 	int cs = spi->chip_select;
+	void __iomem *vaddr;
 
 	word_len = spi->bits_per_word;
 	count = xfer->len;
@@ -441,8 +439,9 @@
 	 * Use SPI direct write mode if base address is available. Otherwise
 	 * fall back to PIO mode for this transfer.
 	 */
-	if ((orion_spi->child[cs].direct_access.vaddr) && (xfer->tx_buf) &&
-	    (word_len == 8)) {
+	vaddr = orion_spi->child[cs].direct_access.vaddr;
+
+	if (vaddr && xfer->tx_buf && word_len == 8) {
 		unsigned int cnt = count / 4;
 		unsigned int rem = count % 4;
 
@@ -450,13 +449,11 @@
 		 * Send the TX-data to the SPI device via the direct
 		 * mapped address window
 		 */
-		iowrite32_rep(orion_spi->child[cs].direct_access.vaddr,
-			      xfer->tx_buf, cnt);
+		iowrite32_rep(vaddr, xfer->tx_buf, cnt);
 		if (rem) {
 			u32 *buf = (u32 *)xfer->tx_buf;
 
-			iowrite8_rep(orion_spi->child[cs].direct_access.vaddr,
-				     &buf[cnt], rem);
+			iowrite8_rep(vaddr, &buf[cnt], rem);
 		}
 
 		return count;
@@ -470,6 +467,8 @@
 			if (orion_spi_write_read_8bit(spi, &tx, &rx) < 0)
 				goto out;
 			count--;
+			if (xfer->word_delay_usecs)
+				udelay(xfer->word_delay_usecs);
 		} while (count);
 	} else if (word_len == 16) {
 		const u16 *tx = xfer->tx_buf;
@@ -479,6 +478,8 @@
 			if (orion_spi_write_read_16bit(spi, &tx, &rx) < 0)
 				goto out;
 			count -= 2;
+			if (xfer->word_delay_usecs)
+				udelay(xfer->word_delay_usecs);
 		} while (count);
 	}
 
@@ -683,6 +684,7 @@
 	}
 
 	for_each_available_child_of_node(pdev->dev.of_node, np) {
+		struct orion_direct_acc *dir_acc;
 		u32 cs;
 		int cs_gpio;
 
@@ -750,14 +752,13 @@
 		 * This needs to get extended for the direct SPI-NOR / SPI-NAND
 		 * support, once this gets implemented.
 		 */
-		spi->child[cs].direct_access.vaddr = devm_ioremap(&pdev->dev,
-							    r->start,
-							    PAGE_SIZE);
-		if (!spi->child[cs].direct_access.vaddr) {
+		dir_acc = &spi->child[cs].direct_access;
+		dir_acc->vaddr = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE);
+		if (!dir_acc->vaddr) {
 			status = -ENOMEM;
 			goto out_rel_axi_clk;
 		}
-		spi->child[cs].direct_access.size = PAGE_SIZE;
+		dir_acc->size = PAGE_SIZE;
 
 		dev_info(&pdev->dev, "CS%d configured for direct access\n", cs);
 	}
diff --git a/drivers/spi/spi-pic32-sqi.c b/drivers/spi/spi-pic32-sqi.c
index bd1c6b5..86ad175 100644
--- a/drivers/spi/spi-pic32-sqi.c
+++ b/drivers/spi/spi-pic32-sqi.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * PIC32 Quad SPI controller driver.
  *
  * Purna Chandra Mandal <purna.mandal@microchip.com>
  * Copyright (c) 2016, Microchip Technology Inc.
- *
- * This program is free software; you can distribute it and/or modify it
- * under the terms of the GNU General Public License (Version 2) as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
  */
 
 #include <linux/clk.h>
@@ -466,9 +458,9 @@
 	int i;
 
 	/* allocate coherent DMAable memory for hardware buffer descriptors. */
-	sqi->bd = dma_zalloc_coherent(&sqi->master->dev,
-				      sizeof(*bd) * PESQI_BD_COUNT,
-				      &sqi->bd_dma, GFP_DMA32);
+	sqi->bd = dma_alloc_coherent(&sqi->master->dev,
+				     sizeof(*bd) * PESQI_BD_COUNT,
+				     &sqi->bd_dma, GFP_KERNEL);
 	if (!sqi->bd) {
 		dev_err(&sqi->master->dev, "failed allocating dma buffer\n");
 		return -ENOMEM;
@@ -578,7 +570,6 @@
 {
 	struct spi_master *master;
 	struct pic32_sqi *sqi;
-	struct resource *reg;
 	int ret;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*sqi));
@@ -588,8 +579,7 @@
 	sqi = spi_master_get_devdata(master);
 	sqi->master = master;
 
-	reg = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	sqi->regs = devm_ioremap_resource(&pdev->dev, reg);
+	sqi->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(sqi->regs)) {
 		ret = PTR_ERR(sqi->regs);
 		goto err_free_master;
@@ -598,7 +588,6 @@
 	/* irq */
 	sqi->irq = platform_get_irq(pdev, 0);
 	if (sqi->irq < 0) {
-		dev_err(&pdev->dev, "no irq found\n");
 		ret = sqi->irq;
 		goto err_free_master;
 	}
@@ -656,7 +645,7 @@
 	master->max_speed_hz	= clk_get_rate(sqi->base_clk);
 	master->dma_alignment	= 32;
 	master->max_dma_len	= PESQI_BD_BUF_LEN_MAX;
-	master->dev.of_node	= of_node_get(pdev->dev.of_node);
+	master->dev.of_node	= pdev->dev.of_node;
 	master->mode_bits	= SPI_MODE_3 | SPI_MODE_0 | SPI_TX_DUAL |
 				  SPI_RX_DUAL | SPI_TX_QUAD | SPI_RX_QUAD;
 	master->flags		= SPI_MASTER_HALF_DUPLEX;
diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c
index f8a45af..69f517e 100644
--- a/drivers/spi/spi-pic32.c
+++ b/drivers/spi/spi-pic32.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Microchip PIC32 SPI controller driver.
  *
  * Purna Chandra Mandal <purna.mandal@microchip.com>
  * Copyright (c) 2016, Microchip Technology Inc.
- *
- * This program is free software; you can distribute it and/or modify it
- * under the terms of the GNU General Public License (Version 2) as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
  */
 
 #include <linux/clk.h>
@@ -320,7 +312,7 @@
 	desc_rx = dmaengine_prep_slave_sg(master->dma_rx,
 					  xfer->rx_sg.sgl,
 					  xfer->rx_sg.nents,
-					  DMA_FROM_DEVICE,
+					  DMA_DEV_TO_MEM,
 					  DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc_rx) {
 		ret = -EINVAL;
@@ -330,7 +322,7 @@
 	desc_tx = dmaengine_prep_slave_sg(master->dma_tx,
 					  xfer->tx_sg.sgl,
 					  xfer->tx_sg.nents,
-					  DMA_TO_DEVICE,
+					  DMA_MEM_TO_DEV,
 					  DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc_tx) {
 		ret = -EINVAL;
@@ -559,7 +551,7 @@
 		dev_err(&spi->dev, "wait error/timedout\n");
 		if (dma_issued) {
 			dmaengine_terminate_all(master->dma_rx);
-			dmaengine_terminate_all(master->dma_rx);
+			dmaengine_terminate_all(master->dma_tx);
 		}
 		ret = -ETIMEDOUT;
 	} else {
@@ -719,22 +711,16 @@
 
 	/* get irq resources: err-irq, rx-irq, tx-irq */
 	pic32s->fault_irq = platform_get_irq_byname(pdev, "fault");
-	if (pic32s->fault_irq < 0) {
-		dev_err(&pdev->dev, "fault-irq not found\n");
+	if (pic32s->fault_irq < 0)
 		return pic32s->fault_irq;
-	}
 
 	pic32s->rx_irq = platform_get_irq_byname(pdev, "rx");
-	if (pic32s->rx_irq < 0) {
-		dev_err(&pdev->dev, "rx-irq not found\n");
+	if (pic32s->rx_irq < 0)
 		return pic32s->rx_irq;
-	}
 
 	pic32s->tx_irq = platform_get_irq_byname(pdev, "tx");
-	if (pic32s->tx_irq < 0) {
-		dev_err(&pdev->dev, "tx-irq not found\n");
+	if (pic32s->tx_irq < 0)
 		return pic32s->tx_irq;
-	}
 
 	/* get clock */
 	pic32s->clk = devm_clk_get(&pdev->dev, "mck0");
@@ -774,7 +760,7 @@
 	if (ret)
 		goto err_master;
 
-	master->dev.of_node	= of_node_get(pdev->dev.of_node);
+	master->dev.of_node	= pdev->dev.of_node;
 	master->mode_bits	= SPI_MODE_3 | SPI_MODE_0 | SPI_CS_HIGH;
 	master->num_chipselect	= 1; /* single chip-select */
 	master->max_speed_hz	= clk_get_rate(pic32s->clk);
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 1af8c96..7fedea6 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * A driver for the ARM PL022 PrimeCell SSP/SPI bus master.
  *
@@ -10,16 +11,6 @@
  *	linux-2.6.17-rc3-mm1/drivers/spi/pxa2xx_spi.c
  * Initial adoption to PL022 by:
  *      Sachin Verma <sachin.verma@st.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/init.h>
@@ -253,6 +244,7 @@
 #define STATE_RUNNING			((void *) 1)
 #define STATE_DONE			((void *) 2)
 #define STATE_ERROR			((void *) -1)
+#define STATE_TIMEOUT			((void *) -2)
 
 /*
  * SSP State - Whether Enabled or Disabled
@@ -861,11 +853,10 @@
 
 	/* Update total bytes transferred */
 	msg->actual_length += pl022->cur_transfer->len;
-	if (pl022->cur_transfer->cs_change)
-		pl022_cs_control(pl022, SSP_CHIP_DESELECT);
-
 	/* Move to next transfer */
 	msg->state = next_transfer(pl022);
+	if (msg->state != STATE_DONE && pl022->cur_transfer->cs_change)
+		pl022_cs_control(pl022, SSP_CHIP_DESELECT);
 	tasklet_schedule(&pl022->pump_transfers);
 }
 
@@ -1333,10 +1324,10 @@
 		}
 		/* Update total bytes transferred */
 		msg->actual_length += pl022->cur_transfer->len;
-		if (pl022->cur_transfer->cs_change)
-			pl022_cs_control(pl022, SSP_CHIP_DESELECT);
 		/* Move to next transfer */
 		msg->state = next_transfer(pl022);
+		if (msg->state != STATE_DONE && pl022->cur_transfer->cs_change)
+			pl022_cs_control(pl022, SSP_CHIP_DESELECT);
 		tasklet_schedule(&pl022->pump_transfers);
 		return IRQ_HANDLED;
 	}
@@ -1485,15 +1476,37 @@
 	writew(irqflags, SSP_IMSC(pl022->virtbase));
 }
 
+static void print_current_status(struct pl022 *pl022)
+{
+	u32 read_cr0;
+	u16 read_cr1, read_dmacr, read_sr;
+
+	if (pl022->vendor->extended_cr)
+		read_cr0 = readl(SSP_CR0(pl022->virtbase));
+	else
+		read_cr0 = readw(SSP_CR0(pl022->virtbase));
+	read_cr1 = readw(SSP_CR1(pl022->virtbase));
+	read_dmacr = readw(SSP_DMACR(pl022->virtbase));
+	read_sr = readw(SSP_SR(pl022->virtbase));
+
+	dev_warn(&pl022->adev->dev, "spi-pl022 CR0: %x\n", read_cr0);
+	dev_warn(&pl022->adev->dev, "spi-pl022 CR1: %x\n", read_cr1);
+	dev_warn(&pl022->adev->dev, "spi-pl022 DMACR: %x\n", read_dmacr);
+	dev_warn(&pl022->adev->dev, "spi-pl022 SR: %x\n", read_sr);
+	dev_warn(&pl022->adev->dev,
+			"spi-pl022 exp_fifo_level/fifodepth: %u/%d\n",
+			pl022->exp_fifo_level,
+			pl022->vendor->fifodepth);
+
+}
+
 static void do_polling_transfer(struct pl022 *pl022)
 {
 	struct spi_message *message = NULL;
 	struct spi_transfer *transfer = NULL;
 	struct spi_transfer *previous = NULL;
-	struct chip_data *chip;
 	unsigned long time, timeout;
 
-	chip = pl022->cur_chip;
 	message = pl022->cur_msg;
 
 	while (message->state != STATE_DONE) {
@@ -1538,7 +1551,8 @@
 			if (time_after(time, timeout)) {
 				dev_warn(&pl022->adev->dev,
 				"%s: timeout!\n", __func__);
-				message->state = STATE_ERROR;
+				message->state = STATE_TIMEOUT;
+				print_current_status(pl022);
 				goto out;
 			}
 			cpu_relax();
@@ -1546,15 +1560,18 @@
 
 		/* Update total byte transferred */
 		message->actual_length += pl022->cur_transfer->len;
-		if (pl022->cur_transfer->cs_change)
-			pl022_cs_control(pl022, SSP_CHIP_DESELECT);
 		/* Move to next transfer */
 		message->state = next_transfer(pl022);
+		if (message->state != STATE_DONE
+		    && pl022->cur_transfer->cs_change)
+			pl022_cs_control(pl022, SSP_CHIP_DESELECT);
 	}
 out:
 	/* Handle end of message */
 	if (message->state == STATE_DONE)
 		message->status = 0;
+	else if (message->state == STATE_TIMEOUT)
+		message->status = -EAGAIN;
 	else
 		message->status = -EIO;
 
@@ -2325,10 +2342,8 @@
 	int ret;
 
 	ret = spi_master_suspend(pl022->master);
-	if (ret) {
-		dev_warn(dev, "cannot suspend master\n");
+	if (ret)
 		return ret;
-	}
 
 	ret = pm_runtime_force_suspend(dev);
 	if (ret) {
@@ -2353,9 +2368,7 @@
 
 	/* Start the queue running */
 	ret = spi_master_resume(pl022->master);
-	if (ret)
-		dev_err(dev, "problem starting queue (%d)\n", ret);
-	else
+	if (!ret)
 		dev_dbg(dev, "resumed\n");
 
 	return ret;
diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index 967d948..0ea2d9a 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * SPI_PPC4XX SPI controller driver.
  *
@@ -10,10 +11,6 @@
  * Copyright (c) 2006 Ben Dooks
  * Copyright (c) 2006 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
  */
 
 /*
diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index 2fa7f4b..37567bc 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * PXA2xx SPI DMA engine support.
  *
  * Copyright (C) 2013, Intel Corporation
  * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/device.h>
@@ -23,7 +20,7 @@
 static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data,
 					     bool error)
 {
-	struct spi_message *msg = drv_data->master->cur_msg;
+	struct spi_message *msg = drv_data->controller->cur_msg;
 
 	/*
 	 * It is possible that one CPU is handling ROR interrupt and other
@@ -59,7 +56,7 @@
 			msg->status = -EIO;
 		}
 
-		spi_finalize_current_transfer(drv_data->master);
+		spi_finalize_current_transfer(drv_data->controller);
 	}
 }
 
@@ -74,7 +71,7 @@
 			   struct spi_transfer *xfer)
 {
 	struct chip_data *chip =
-		spi_get_ctldata(drv_data->master->cur_msg->spi);
+		spi_get_ctldata(drv_data->controller->cur_msg->spi);
 	enum dma_slave_buswidth width;
 	struct dma_slave_config cfg;
 	struct dma_chan *chan;
@@ -102,14 +99,14 @@
 		cfg.dst_maxburst = chip->dma_burst_size;
 
 		sgt = &xfer->tx_sg;
-		chan = drv_data->master->dma_tx;
+		chan = drv_data->controller->dma_tx;
 	} else {
 		cfg.src_addr = drv_data->ssdr_physical;
 		cfg.src_addr_width = width;
 		cfg.src_maxburst = chip->dma_burst_size;
 
 		sgt = &xfer->rx_sg;
-		chan = drv_data->master->dma_rx;
+		chan = drv_data->controller->dma_rx;
 	}
 
 	ret = dmaengine_slave_config(chan, &cfg);
@@ -130,8 +127,8 @@
 	if (status & SSSR_ROR) {
 		dev_err(&drv_data->pdev->dev, "FIFO overrun\n");
 
-		dmaengine_terminate_async(drv_data->master->dma_rx);
-		dmaengine_terminate_async(drv_data->master->dma_tx);
+		dmaengine_terminate_async(drv_data->controller->dma_rx);
+		dmaengine_terminate_async(drv_data->controller->dma_tx);
 
 		pxa2xx_spi_dma_transfer_complete(drv_data, true);
 		return IRQ_HANDLED;
@@ -171,15 +168,15 @@
 	return 0;
 
 err_rx:
-	dmaengine_terminate_async(drv_data->master->dma_tx);
+	dmaengine_terminate_async(drv_data->controller->dma_tx);
 err_tx:
 	return err;
 }
 
 void pxa2xx_spi_dma_start(struct driver_data *drv_data)
 {
-	dma_async_issue_pending(drv_data->master->dma_rx);
-	dma_async_issue_pending(drv_data->master->dma_tx);
+	dma_async_issue_pending(drv_data->controller->dma_rx);
+	dma_async_issue_pending(drv_data->controller->dma_tx);
 
 	atomic_set(&drv_data->dma_running, 1);
 }
@@ -187,30 +184,30 @@
 void pxa2xx_spi_dma_stop(struct driver_data *drv_data)
 {
 	atomic_set(&drv_data->dma_running, 0);
-	dmaengine_terminate_sync(drv_data->master->dma_rx);
-	dmaengine_terminate_sync(drv_data->master->dma_tx);
+	dmaengine_terminate_sync(drv_data->controller->dma_rx);
+	dmaengine_terminate_sync(drv_data->controller->dma_tx);
 }
 
 int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
 {
-	struct pxa2xx_spi_master *pdata = drv_data->master_info;
+	struct pxa2xx_spi_controller *pdata = drv_data->controller_info;
 	struct device *dev = &drv_data->pdev->dev;
-	struct spi_controller *master = drv_data->master;
+	struct spi_controller *controller = drv_data->controller;
 	dma_cap_mask_t mask;
 
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
 
-	master->dma_tx = dma_request_slave_channel_compat(mask,
+	controller->dma_tx = dma_request_slave_channel_compat(mask,
 				pdata->dma_filter, pdata->tx_param, dev, "tx");
-	if (!master->dma_tx)
+	if (!controller->dma_tx)
 		return -ENODEV;
 
-	master->dma_rx = dma_request_slave_channel_compat(mask,
+	controller->dma_rx = dma_request_slave_channel_compat(mask,
 				pdata->dma_filter, pdata->rx_param, dev, "rx");
-	if (!master->dma_rx) {
-		dma_release_channel(master->dma_tx);
-		master->dma_tx = NULL;
+	if (!controller->dma_rx) {
+		dma_release_channel(controller->dma_tx);
+		controller->dma_tx = NULL;
 		return -ENODEV;
 	}
 
@@ -219,17 +216,17 @@
 
 void pxa2xx_spi_dma_release(struct driver_data *drv_data)
 {
-	struct spi_controller *master = drv_data->master;
+	struct spi_controller *controller = drv_data->controller;
 
-	if (master->dma_rx) {
-		dmaengine_terminate_sync(master->dma_rx);
-		dma_release_channel(master->dma_rx);
-		master->dma_rx = NULL;
+	if (controller->dma_rx) {
+		dmaengine_terminate_sync(controller->dma_rx);
+		dma_release_channel(controller->dma_rx);
+		controller->dma_rx = NULL;
 	}
-	if (master->dma_tx) {
-		dmaengine_terminate_sync(master->dma_tx);
-		dma_release_channel(master->dma_tx);
-		master->dma_tx = NULL;
+	if (controller->dma_tx) {
+		dmaengine_terminate_sync(controller->dma_tx);
+		dma_release_channel(controller->dma_tx);
+		controller->dma_tx = NULL;
 	}
 }
 
@@ -239,13 +236,15 @@
 					   u32 *threshold)
 {
 	struct pxa2xx_spi_chip *chip_info = spi->controller_data;
+	struct driver_data *drv_data = spi_controller_get_devdata(spi->controller);
+	u32 dma_burst_size = drv_data->controller_info->dma_burst_size;
 
 	/*
 	 * If the DMA burst size is given in chip_info we use that,
 	 * otherwise we use the default. Also we use the default FIFO
 	 * thresholds for now.
 	 */
-	*burst_code = chip_info ? chip_info->dma_burst_size : 1;
+	*burst_code = chip_info ? chip_info->dma_burst_size : dma_burst_size;
 	*threshold = SSCR1_RxTresh(RX_THRESH_DFLT)
 		   | SSCR1_TxTresh(TX_THRESH_DFLT);
 
diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 869f188..f236e30 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * CE4100's SPI device is more or less the same one as found on PXA
  *
@@ -5,7 +6,6 @@
  */
 #include <linux/clk-provider.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/spi/pxa2xx_spi.h>
@@ -35,6 +35,8 @@
 	void *tx_param;
 	void *rx_param;
 
+	int dma_burst_size;
+
 	int (*setup)(struct pci_dev *pdev, struct pxa_spi_info *c);
 };
 
@@ -133,6 +135,7 @@
 	rx->dma_dev = &dma_dev->dev;
 
 	c->dma_filter = lpss_dma_filter;
+	c->dma_burst_size = 8;
 	return 0;
 }
 
@@ -197,7 +200,7 @@
 	struct platform_device_info pi;
 	int ret;
 	struct platform_device *pdev;
-	struct pxa2xx_spi_master spi_pdata;
+	struct pxa2xx_spi_controller spi_pdata;
 	struct ssp_device *ssp;
 	struct pxa_spi_info *c;
 	char buf[40];
@@ -223,6 +226,7 @@
 	spi_pdata.tx_param = c->tx_param;
 	spi_pdata.rx_param = c->rx_param;
 	spi_pdata.enable_dma = c->rx_param && c->tx_param;
+	spi_pdata.dma_burst_size = c->dma_burst_size ? c->dma_burst_size : 1;
 
 	ssp = &spi_pdata.ssp;
 	ssp->phys_base = pci_resource_start(dev, 0);
@@ -265,7 +269,7 @@
 static void pxa2xx_spi_pci_remove(struct pci_dev *dev)
 {
 	struct platform_device *pdev = pci_get_drvdata(dev);
-	struct pxa2xx_spi_master *spi_pdata;
+	struct pxa2xx_spi_controller *spi_pdata;
 
 	spi_pdata = dev_get_platdata(&pdev->dev);
 
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 14f4ea5..bb6a14d 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
  * Copyright (C) 2013, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/bitops.h>
@@ -33,6 +24,7 @@
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
 #include <linux/acpi.h>
+#include <linux/of_device.h>
 
 #include "spi-pxa2xx.h"
 
@@ -327,7 +319,7 @@
 	__lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value);
 
 	/* Enable multiblock DMA transfers */
-	if (drv_data->master_info->enable_dma) {
+	if (drv_data->controller_info->enable_dma) {
 		__lpss_ssp_write_priv(drv_data, config->reg_ssp, 1);
 
 		if (config->reg_general >= 0) {
@@ -367,7 +359,7 @@
 		__lpss_ssp_write_priv(drv_data,
 				      config->reg_cs_ctrl, value);
 		ndelay(1000000000 /
-		       (drv_data->master->max_speed_hz / 2));
+		       (drv_data->controller->max_speed_hz / 2));
 	}
 }
 
@@ -566,7 +558,7 @@
 static void reset_sccr1(struct driver_data *drv_data)
 {
 	struct chip_data *chip =
-		spi_get_ctldata(drv_data->master->cur_msg->spi);
+		spi_get_ctldata(drv_data->controller->cur_msg->spi);
 	u32 sccr1_reg;
 
 	sccr1_reg = pxa2xx_spi_read(drv_data, SSCR1) & ~drv_data->int_cr1;
@@ -598,8 +590,8 @@
 
 	dev_err(&drv_data->pdev->dev, "%s\n", msg);
 
-	drv_data->master->cur_msg->status = -EIO;
-	spi_finalize_current_transfer(drv_data->master);
+	drv_data->controller->cur_msg->status = -EIO;
+	spi_finalize_current_transfer(drv_data->controller);
 }
 
 static void int_transfer_complete(struct driver_data *drv_data)
@@ -610,7 +602,7 @@
 	if (!pxa25x_ssp_comp(drv_data))
 		pxa2xx_spi_write(drv_data, SSTO, 0);
 
-	spi_finalize_current_transfer(drv_data->master);
+	spi_finalize_current_transfer(drv_data->controller);
 }
 
 static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
@@ -625,6 +617,11 @@
 		return IRQ_HANDLED;
 	}
 
+	if (irq_status & SSSR_TUR) {
+		int_error_stop(drv_data, "interrupt_transfer: fifo underrun");
+		return IRQ_HANDLED;
+	}
+
 	if (irq_status & SSSR_TINT) {
 		pxa2xx_spi_write(drv_data, SSSR, SSSR_TINT);
 		if (drv_data->read(drv_data)) {
@@ -665,9 +662,11 @@
 			bytes_left = drv_data->rx_end - drv_data->rx;
 			switch (drv_data->n_bytes) {
 			case 4:
-				bytes_left >>= 1;
+				bytes_left >>= 2;
+				break;
 			case 2:
 				bytes_left >>= 1;
+				break;
 			}
 
 			rx_thre = pxa2xx_spi_get_rx_default_thre(drv_data);
@@ -739,7 +738,7 @@
 	pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg & ~drv_data->int_cr1);
 	pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg);
 
-	if (!drv_data->master->cur_msg) {
+	if (!drv_data->controller->cur_msg) {
 		handle_bad_msg(drv_data);
 		/* Never fail */
 		return IRQ_HANDLED;
@@ -871,22 +870,26 @@
 
 static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
 {
-	unsigned long ssp_clk = drv_data->master->max_speed_hz;
+	unsigned long ssp_clk = drv_data->controller->max_speed_hz;
 	const struct ssp_device *ssp = drv_data->ssp;
 
 	rate = min_t(int, ssp_clk, rate);
 
+	/*
+	 * Calculate the divisor for the SCR (Serial Clock Rate), avoiding
+	 * that the SSP transmission rate can be greater than the device rate
+	 */
 	if (ssp->type == PXA25x_SSP || ssp->type == CE4100_SSP)
-		return (ssp_clk / (2 * rate) - 1) & 0xff;
+		return (DIV_ROUND_UP(ssp_clk, 2 * rate) - 1) & 0xff;
 	else
-		return (ssp_clk / rate - 1) & 0xfff;
+		return (DIV_ROUND_UP(ssp_clk, rate) - 1)  & 0xfff;
 }
 
 static unsigned int pxa2xx_ssp_get_clk_div(struct driver_data *drv_data,
 					   int rate)
 {
 	struct chip_data *chip =
-		spi_get_ctldata(drv_data->master->cur_msg->spi);
+		spi_get_ctldata(drv_data->controller->cur_msg->spi);
 	unsigned int clk_div;
 
 	switch (drv_data->ssp_type) {
@@ -900,7 +903,7 @@
 	return clk_div << 8;
 }
 
-static bool pxa2xx_spi_can_dma(struct spi_controller *master,
+static bool pxa2xx_spi_can_dma(struct spi_controller *controller,
 			       struct spi_device *spi,
 			       struct spi_transfer *xfer)
 {
@@ -911,13 +914,13 @@
 	       xfer->len >= chip->dma_burst_size;
 }
 
-static int pxa2xx_spi_transfer_one(struct spi_controller *master,
+static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 				   struct spi_device *spi,
 				   struct spi_transfer *transfer)
 {
-	struct driver_data *drv_data = spi_controller_get_devdata(master);
-	struct spi_message *message = master->cur_msg;
-	struct chip_data *chip = spi_get_ctldata(message->spi);
+	struct driver_data *drv_data = spi_controller_get_devdata(controller);
+	struct spi_message *message = controller->cur_msg;
+	struct chip_data *chip = spi_get_ctldata(spi);
 	u32 dma_thresh = chip->dma_threshold;
 	u32 dma_burst = chip->dma_burst_size;
 	u32 change_mask = pxa2xx_spi_get_ssrc1_change_mask(drv_data);
@@ -935,21 +938,21 @@
 		/* reject already-mapped transfers; PIO won't always work */
 		if (message->is_dma_mapped
 				|| transfer->rx_dma || transfer->tx_dma) {
-			dev_err(&drv_data->pdev->dev,
+			dev_err(&spi->dev,
 				"Mapped transfer length of %u is greater than %d\n",
 				transfer->len, MAX_DMA_LEN);
 			return -EINVAL;
 		}
 
 		/* warn ... we force this to PIO mode */
-		dev_warn_ratelimited(&message->spi->dev,
+		dev_warn_ratelimited(&spi->dev,
 				     "DMA disabled for transfer length %ld greater than %d\n",
 				     (long)transfer->len, MAX_DMA_LEN);
 	}
 
 	/* Setup the transfer state based on the type of transfer */
 	if (pxa2xx_spi_flush(drv_data) == 0) {
-		dev_err(&drv_data->pdev->dev, "Flush failed\n");
+		dev_err(&spi->dev, "Flush failed\n");
 		return -EIO;
 	}
 	drv_data->n_bytes = chip->n_bytes;
@@ -991,16 +994,16 @@
 	 */
 	if (chip->enable_dma) {
 		if (pxa2xx_spi_set_dma_burst_and_threshold(chip,
-						message->spi,
+						spi,
 						bits, &dma_burst,
 						&dma_thresh))
-			dev_warn_ratelimited(&message->spi->dev,
+			dev_warn_ratelimited(&spi->dev,
 					     "DMA burst size reduced to match bits_per_word\n");
 	}
 
-	dma_mapped = master->can_dma &&
-		     master->can_dma(master, message->spi, transfer) &&
-		     master->cur_msg_mapped;
+	dma_mapped = controller->can_dma &&
+		     controller->can_dma(controller, spi, transfer) &&
+		     controller->cur_msg_mapped;
 	if (dma_mapped) {
 
 		/* Ensure we have the correct interrupt handler */
@@ -1027,13 +1030,13 @@
 	/* NOTE:  PXA25x_SSP _could_ use external clocking ... */
 	cr0 = pxa2xx_configure_sscr0(drv_data, clk_div, bits);
 	if (!pxa25x_ssp_comp(drv_data))
-		dev_dbg(&message->spi->dev, "%u Hz actual, %s\n",
-			master->max_speed_hz
+		dev_dbg(&spi->dev, "%u Hz actual, %s\n",
+			controller->max_speed_hz
 				/ (1 + ((cr0 & SSCR0_SCR(0xfff)) >> 8)),
 			dma_mapped ? "DMA" : "PIO");
 	else
-		dev_dbg(&message->spi->dev, "%u Hz actual, %s\n",
-			master->max_speed_hz / 2
+		dev_dbg(&spi->dev, "%u Hz actual, %s\n",
+			controller->max_speed_hz / 2
 				/ (1 + ((cr0 & SSCR0_SCR(0x0ff)) >> 8)),
 			dma_mapped ? "DMA" : "PIO");
 
@@ -1070,6 +1073,30 @@
 			pxa2xx_spi_write(drv_data, SSTO, chip->timeout);
 	}
 
+	if (drv_data->ssp_type == MMP2_SSP) {
+		u8 tx_level = (pxa2xx_spi_read(drv_data, SSSR)
+					& SSSR_TFL_MASK) >> 8;
+
+		if (tx_level) {
+			/* On MMP2, flipping SSE doesn't to empty TXFIFO. */
+			dev_warn(&spi->dev, "%d bytes of garbage in TXFIFO!\n",
+								tx_level);
+			if (tx_level > transfer->len)
+				tx_level = transfer->len;
+			drv_data->tx += tx_level;
+		}
+	}
+
+	if (spi_controller_is_slave(controller)) {
+		while (drv_data->write(drv_data))
+			;
+		if (drv_data->gpiod_ready) {
+			gpiod_set_value(drv_data->gpiod_ready, 1);
+			udelay(1);
+			gpiod_set_value(drv_data->gpiod_ready, 0);
+		}
+	}
+
 	/*
 	 * Release the data by enabling service requests and interrupts,
 	 * without changing any mode bits
@@ -1079,10 +1106,31 @@
 	return 1;
 }
 
-static void pxa2xx_spi_handle_err(struct spi_controller *master,
+static int pxa2xx_spi_slave_abort(struct spi_controller *controller)
+{
+	struct driver_data *drv_data = spi_controller_get_devdata(controller);
+
+	/* Stop and reset SSP */
+	write_SSSR_CS(drv_data, drv_data->clear_sr);
+	reset_sccr1(drv_data);
+	if (!pxa25x_ssp_comp(drv_data))
+		pxa2xx_spi_write(drv_data, SSTO, 0);
+	pxa2xx_spi_flush(drv_data);
+	pxa2xx_spi_write(drv_data, SSCR0,
+			 pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE);
+
+	dev_dbg(&drv_data->pdev->dev, "transfer aborted\n");
+
+	drv_data->controller->cur_msg->status = -EINTR;
+	spi_finalize_current_transfer(drv_data->controller);
+
+	return 0;
+}
+
+static void pxa2xx_spi_handle_err(struct spi_controller *controller,
 				 struct spi_message *msg)
 {
-	struct driver_data *drv_data = spi_controller_get_devdata(master);
+	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
 	/* Disable the SSP */
 	pxa2xx_spi_write(drv_data, SSCR0,
@@ -1106,9 +1154,9 @@
 		pxa2xx_spi_dma_stop(drv_data);
 }
 
-static int pxa2xx_spi_unprepare_transfer(struct spi_controller *master)
+static int pxa2xx_spi_unprepare_transfer(struct spi_controller *controller)
 {
-	struct driver_data *drv_data = spi_controller_get_devdata(master);
+	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
 	/* Disable the SSP now */
 	pxa2xx_spi_write(drv_data, SSCR0,
@@ -1206,9 +1254,14 @@
 		rx_thres = config->rx_threshold;
 		break;
 	default:
-		tx_thres = TX_THRESH_DFLT;
 		tx_hi_thres = 0;
-		rx_thres = RX_THRESH_DFLT;
+		if (spi_controller_is_slave(drv_data->controller)) {
+			tx_thres = 1;
+			rx_thres = 2;
+		} else {
+			tx_thres = TX_THRESH_DFLT;
+			rx_thres = RX_THRESH_DFLT;
+		}
 		break;
 	}
 
@@ -1229,7 +1282,7 @@
 
 			chip->frm = spi->chip_select;
 		}
-		chip->enable_dma = drv_data->master_info->enable_dma;
+		chip->enable_dma = drv_data->controller_info->enable_dma;
 		chip->timeout = TIMOUT_DFLT;
 	}
 
@@ -1252,6 +1305,12 @@
 		if (chip_info->enable_loopback)
 			chip->cr1 = SSCR1_LBM;
 	}
+	if (spi_controller_is_slave(drv_data->controller)) {
+		chip->cr1 |= SSCR1_SCFR;
+		chip->cr1 |= SSCR1_SCLKDIR;
+		chip->cr1 |= SSCR1_SFRMDIR;
+		chip->cr1 |= SSCR1_SPH;
+	}
 
 	chip->lpss_rx_threshold = SSIRF_RxThresh(rx_thres);
 	chip->lpss_tx_threshold = SSITF_TxLoThresh(tx_thres)
@@ -1269,6 +1328,9 @@
 			dev_warn(&spi->dev,
 				 "in setup: DMA burst size reduced to match bits_per_word\n");
 		}
+		dev_dbg(&spi->dev,
+			"in setup: DMA burst size set to %u\n",
+			chip->dma_burst_size);
 	}
 
 	switch (drv_data->ssp_type) {
@@ -1333,9 +1395,6 @@
 	kfree(chip);
 }
 
-#ifdef CONFIG_PCI
-#ifdef CONFIG_ACPI
-
 static const struct acpi_device_id pxa2xx_spi_acpi_match[] = {
 	{ "INT33C0", LPSS_LPT_SSP },
 	{ "INT33C1", LPSS_LPT_SSP },
@@ -1347,23 +1406,6 @@
 };
 MODULE_DEVICE_TABLE(acpi, pxa2xx_spi_acpi_match);
 
-static int pxa2xx_spi_get_port_id(struct acpi_device *adev)
-{
-	unsigned int devid;
-	int port_id = -1;
-
-	if (adev && adev->pnp.unique_id &&
-	    !kstrtouint(adev->pnp.unique_id, 0, &devid))
-		port_id = devid;
-	return port_id;
-}
-#else /* !CONFIG_ACPI */
-static int pxa2xx_spi_get_port_id(struct acpi_device *adev)
-{
-	return -1;
-}
-#endif
-
 /*
  * PCI IDs of compound devices that integrate both host controller and private
  * integrated DMA engine. Please note these are not used in module
@@ -1395,6 +1437,10 @@
 	{ PCI_VDEVICE(INTEL, 0x34aa), LPSS_CNL_SSP },
 	{ PCI_VDEVICE(INTEL, 0x34ab), LPSS_CNL_SSP },
 	{ PCI_VDEVICE(INTEL, 0x34fb), LPSS_CNL_SSP },
+	/* EHL */
+	{ PCI_VDEVICE(INTEL, 0x4b2a), LPSS_BXT_SSP },
+	{ PCI_VDEVICE(INTEL, 0x4b2b), LPSS_BXT_SSP },
+	{ PCI_VDEVICE(INTEL, 0x4b37), LPSS_BXT_SSP },
 	/* APL */
 	{ PCI_VDEVICE(INTEL, 0x5ac2), LPSS_BXT_SSP },
 	{ PCI_VDEVICE(INTEL, 0x5ac4), LPSS_BXT_SSP },
@@ -1407,33 +1453,77 @@
 	{ PCI_VDEVICE(INTEL, 0xa32a), LPSS_CNL_SSP },
 	{ PCI_VDEVICE(INTEL, 0xa32b), LPSS_CNL_SSP },
 	{ PCI_VDEVICE(INTEL, 0xa37b), LPSS_CNL_SSP },
+	/* CML-LP */
+	{ PCI_VDEVICE(INTEL, 0x02aa), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0x02ab), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0x02fb), LPSS_CNL_SSP },
+	/* TGL-LP */
+	{ PCI_VDEVICE(INTEL, 0xa0aa), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0xa0ab), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0xa0de), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0xa0df), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0xa0fb), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0xa0fd), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0xa0fe), LPSS_CNL_SSP },
 	{ },
 };
 
-static bool pxa2xx_spi_idma_filter(struct dma_chan *chan, void *param)
+static const struct of_device_id pxa2xx_spi_of_match[] = {
+	{ .compatible = "marvell,mmp2-ssp", .data = (void *)MMP2_SSP },
+	{},
+};
+MODULE_DEVICE_TABLE(of, pxa2xx_spi_of_match);
+
+#ifdef CONFIG_ACPI
+
+static int pxa2xx_spi_get_port_id(struct acpi_device *adev)
 {
-	struct device *dev = param;
+	unsigned int devid;
+	int port_id = -1;
 
-	if (dev != chan->device->dev->parent)
-		return false;
-
-	return true;
+	if (adev && adev->pnp.unique_id &&
+	    !kstrtouint(adev->pnp.unique_id, 0, &devid))
+		port_id = devid;
+	return port_id;
 }
 
-static struct pxa2xx_spi_master *
+#else /* !CONFIG_ACPI */
+
+static int pxa2xx_spi_get_port_id(struct acpi_device *adev)
+{
+	return -1;
+}
+
+#endif /* CONFIG_ACPI */
+
+
+#ifdef CONFIG_PCI
+
+static bool pxa2xx_spi_idma_filter(struct dma_chan *chan, void *param)
+{
+	return param == chan->device->dev;
+}
+
+#endif /* CONFIG_PCI */
+
+static struct pxa2xx_spi_controller *
 pxa2xx_spi_init_pdata(struct platform_device *pdev)
 {
-	struct pxa2xx_spi_master *pdata;
+	struct pxa2xx_spi_controller *pdata;
 	struct acpi_device *adev;
 	struct ssp_device *ssp;
 	struct resource *res;
 	const struct acpi_device_id *adev_id = NULL;
 	const struct pci_device_id *pcidev_id = NULL;
-	int type;
+	const struct of_device_id *of_id = NULL;
+	enum pxa_ssp_type type;
 
 	adev = ACPI_COMPANION(&pdev->dev);
 
-	if (dev_is_pci(pdev->dev.parent))
+	if (pdev->dev.of_node)
+		of_id = of_match_device(pdev->dev.driver->of_match_table,
+					&pdev->dev);
+	else if (dev_is_pci(pdev->dev.parent))
 		pcidev_id = pci_match_id(pxa2xx_spi_pci_compound_match,
 					 to_pci_dev(pdev->dev.parent));
 	else if (adev)
@@ -1443,9 +1533,11 @@
 		return NULL;
 
 	if (adev_id)
-		type = (int)adev_id->driver_data;
+		type = (enum pxa_ssp_type)adev_id->driver_data;
 	else if (pcidev_id)
-		type = (int)pcidev_id->driver_data;
+		type = (enum pxa_ssp_type)pcidev_id->driver_data;
+	else if (of_id)
+		type = (enum pxa_ssp_type)of_id->data;
 	else
 		return NULL;
 
@@ -1464,11 +1556,13 @@
 	if (IS_ERR(ssp->mmio_base))
 		return NULL;
 
+#ifdef CONFIG_PCI
 	if (pcidev_id) {
 		pdata->tx_param = pdev->dev.parent;
 		pdata->rx_param = pdev->dev.parent;
 		pdata->dma_filter = pxa2xx_spi_idma_filter;
 	}
+#endif
 
 	ssp->clk = devm_clk_get(&pdev->dev, NULL);
 	ssp->irq = platform_get_irq(pdev, 0);
@@ -1476,24 +1570,18 @@
 	ssp->pdev = pdev;
 	ssp->port_id = pxa2xx_spi_get_port_id(adev);
 
+	pdata->is_slave = of_property_read_bool(pdev->dev.of_node, "spi-slave");
 	pdata->num_chipselect = 1;
 	pdata->enable_dma = true;
+	pdata->dma_burst_size = 1;
 
 	return pdata;
 }
 
-#else /* !CONFIG_PCI */
-static inline struct pxa2xx_spi_master *
-pxa2xx_spi_init_pdata(struct platform_device *pdev)
-{
-	return NULL;
-}
-#endif
-
-static int pxa2xx_spi_fw_translate_cs(struct spi_controller *master,
+static int pxa2xx_spi_fw_translate_cs(struct spi_controller *controller,
 				      unsigned int cs)
 {
-	struct driver_data *drv_data = spi_controller_get_devdata(master);
+	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
 	if (has_acpi_companion(&drv_data->pdev->dev)) {
 		switch (drv_data->ssp_type) {
@@ -1517,8 +1605,8 @@
 static int pxa2xx_spi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct pxa2xx_spi_master *platform_info;
-	struct spi_controller *master;
+	struct pxa2xx_spi_controller *platform_info;
+	struct spi_controller *controller;
 	struct driver_data *drv_data;
 	struct ssp_device *ssp;
 	const struct lpss_config *config;
@@ -1543,33 +1631,38 @@
 		return -ENODEV;
 	}
 
-	master = spi_alloc_master(dev, sizeof(struct driver_data));
-	if (!master) {
-		dev_err(&pdev->dev, "cannot alloc spi_master\n");
+	if (platform_info->is_slave)
+		controller = spi_alloc_slave(dev, sizeof(struct driver_data));
+	else
+		controller = spi_alloc_master(dev, sizeof(struct driver_data));
+
+	if (!controller) {
+		dev_err(&pdev->dev, "cannot alloc spi_controller\n");
 		pxa_ssp_free(ssp);
 		return -ENOMEM;
 	}
-	drv_data = spi_controller_get_devdata(master);
-	drv_data->master = master;
-	drv_data->master_info = platform_info;
+	drv_data = spi_controller_get_devdata(controller);
+	drv_data->controller = controller;
+	drv_data->controller_info = platform_info;
 	drv_data->pdev = pdev;
 	drv_data->ssp = ssp;
 
-	master->dev.of_node = pdev->dev.of_node;
+	controller->dev.of_node = pdev->dev.of_node;
 	/* the spi->mode bits understood by this driver: */
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
+	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
 
-	master->bus_num = ssp->port_id;
-	master->dma_alignment = DMA_ALIGNMENT;
-	master->cleanup = cleanup;
-	master->setup = setup;
-	master->set_cs = pxa2xx_spi_set_cs;
-	master->transfer_one = pxa2xx_spi_transfer_one;
-	master->handle_err = pxa2xx_spi_handle_err;
-	master->unprepare_transfer_hardware = pxa2xx_spi_unprepare_transfer;
-	master->fw_translate_cs = pxa2xx_spi_fw_translate_cs;
-	master->auto_runtime_pm = true;
-	master->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
+	controller->bus_num = ssp->port_id;
+	controller->dma_alignment = DMA_ALIGNMENT;
+	controller->cleanup = cleanup;
+	controller->setup = setup;
+	controller->set_cs = pxa2xx_spi_set_cs;
+	controller->transfer_one = pxa2xx_spi_transfer_one;
+	controller->slave_abort = pxa2xx_spi_slave_abort;
+	controller->handle_err = pxa2xx_spi_handle_err;
+	controller->unprepare_transfer_hardware = pxa2xx_spi_unprepare_transfer;
+	controller->fw_translate_cs = pxa2xx_spi_fw_translate_cs;
+	controller->auto_runtime_pm = true;
+	controller->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
 
 	drv_data->ssp_type = ssp->type;
 
@@ -1578,10 +1671,10 @@
 	if (pxa25x_ssp_comp(drv_data)) {
 		switch (drv_data->ssp_type) {
 		case QUARK_X1000_SSP:
-			master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
+			controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 			break;
 		default:
-			master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
+			controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
 			break;
 		}
 
@@ -1590,28 +1683,30 @@
 		drv_data->clear_sr = SSSR_ROR;
 		drv_data->mask_sr = SSSR_RFS | SSSR_TFS | SSSR_ROR;
 	} else {
-		master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
+		controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 		drv_data->int_cr1 = SSCR1_TIE | SSCR1_RIE | SSCR1_TINTE;
 		drv_data->dma_cr1 = DEFAULT_DMA_CR1;
 		drv_data->clear_sr = SSSR_ROR | SSSR_TINT;
-		drv_data->mask_sr = SSSR_TINT | SSSR_RFS | SSSR_TFS | SSSR_ROR;
+		drv_data->mask_sr = SSSR_TINT | SSSR_RFS | SSSR_TFS
+						| SSSR_ROR | SSSR_TUR;
 	}
 
 	status = request_irq(ssp->irq, ssp_int, IRQF_SHARED, dev_name(dev),
 			drv_data);
 	if (status < 0) {
 		dev_err(&pdev->dev, "cannot get IRQ %d\n", ssp->irq);
-		goto out_error_master_alloc;
+		goto out_error_controller_alloc;
 	}
 
 	/* Setup DMA if requested */
 	if (platform_info->enable_dma) {
 		status = pxa2xx_spi_dma_setup(drv_data);
 		if (status) {
-			dev_dbg(dev, "no DMA channels available, using PIO\n");
+			dev_warn(dev, "no DMA channels available, using PIO\n");
 			platform_info->enable_dma = false;
 		} else {
-			master->can_dma = pxa2xx_spi_can_dma;
+			controller->can_dma = pxa2xx_spi_can_dma;
+			controller->max_dma_len = MAX_DMA_LEN;
 		}
 	}
 
@@ -1620,7 +1715,17 @@
 	if (status)
 		goto out_error_dma_irq_alloc;
 
-	master->max_speed_hz = clk_get_rate(ssp->clk);
+	controller->max_speed_hz = clk_get_rate(ssp->clk);
+	/*
+	 * Set minimum speed for all other platforms than Intel Quark which is
+	 * able do under 1 Hz transfers.
+	 */
+	if (!pxa25x_ssp_comp(drv_data))
+		controller->min_speed_hz =
+			DIV_ROUND_UP(controller->max_speed_hz, 4096);
+	else if (!is_quark_x1000_ssp(drv_data))
+		controller->min_speed_hz =
+			DIV_ROUND_UP(controller->max_speed_hz, 512);
 
 	/* Load default SSP configuration */
 	pxa2xx_spi_write(drv_data, SSCR0, 0);
@@ -1642,10 +1747,22 @@
 		pxa2xx_spi_write(drv_data, SSCR0, tmp);
 		break;
 	default:
-		tmp = SSCR1_RxTresh(RX_THRESH_DFLT) |
-		      SSCR1_TxTresh(TX_THRESH_DFLT);
+
+		if (spi_controller_is_slave(controller)) {
+			tmp = SSCR1_SCFR |
+			      SSCR1_SCLKDIR |
+			      SSCR1_SFRMDIR |
+			      SSCR1_RxTresh(2) |
+			      SSCR1_TxTresh(1) |
+			      SSCR1_SPH;
+		} else {
+			tmp = SSCR1_RxTresh(RX_THRESH_DFLT) |
+			      SSCR1_TxTresh(TX_THRESH_DFLT);
+		}
 		pxa2xx_spi_write(drv_data, SSCR1, tmp);
-		tmp = SSCR0_SCR(2) | SSCR0_Motorola | SSCR0_DataSize(8);
+		tmp = SSCR0_Motorola | SSCR0_DataSize(8);
+		if (!spi_controller_is_slave(controller))
+			tmp |= SSCR0_SCR(2);
 		pxa2xx_spi_write(drv_data, SSCR0, tmp);
 		break;
 	}
@@ -1669,24 +1786,24 @@
 			platform_info->num_chipselect = config->cs_num;
 		}
 	}
-	master->num_chipselect = platform_info->num_chipselect;
+	controller->num_chipselect = platform_info->num_chipselect;
 
 	count = gpiod_count(&pdev->dev, "cs");
 	if (count > 0) {
 		int i;
 
-		master->num_chipselect = max_t(int, count,
-			master->num_chipselect);
+		controller->num_chipselect = max_t(int, count,
+			controller->num_chipselect);
 
 		drv_data->cs_gpiods = devm_kcalloc(&pdev->dev,
-			master->num_chipselect, sizeof(struct gpio_desc *),
+			controller->num_chipselect, sizeof(struct gpio_desc *),
 			GFP_KERNEL);
 		if (!drv_data->cs_gpiods) {
 			status = -ENOMEM;
 			goto out_error_clock_enabled;
 		}
 
-		for (i = 0; i < master->num_chipselect; i++) {
+		for (i = 0; i < controller->num_chipselect; i++) {
 			struct gpio_desc *gpiod;
 
 			gpiod = devm_gpiod_get_index(dev, "cs", i, GPIOD_ASIS);
@@ -1695,7 +1812,7 @@
 				if (PTR_ERR(gpiod) == -ENOENT)
 					continue;
 
-				status = (int)PTR_ERR(gpiod);
+				status = PTR_ERR(gpiod);
 				goto out_error_clock_enabled;
 			} else {
 				drv_data->cs_gpiods[i] = gpiod;
@@ -1703,6 +1820,15 @@
 		}
 	}
 
+	if (platform_info->is_slave) {
+		drv_data->gpiod_ready = devm_gpiod_get_optional(dev,
+						"ready", GPIOD_OUT_LOW);
+		if (IS_ERR(drv_data->gpiod_ready)) {
+			status = PTR_ERR(drv_data->gpiod_ready);
+			goto out_error_clock_enabled;
+		}
+	}
+
 	pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_set_active(&pdev->dev);
@@ -1710,25 +1836,27 @@
 
 	/* Register with the SPI framework */
 	platform_set_drvdata(pdev, drv_data);
-	status = devm_spi_register_controller(&pdev->dev, master);
+	status = devm_spi_register_controller(&pdev->dev, controller);
 	if (status != 0) {
-		dev_err(&pdev->dev, "problem registering spi master\n");
-		goto out_error_clock_enabled;
+		dev_err(&pdev->dev, "problem registering spi controller\n");
+		goto out_error_pm_runtime_enabled;
 	}
 
 	return status;
 
-out_error_clock_enabled:
+out_error_pm_runtime_enabled:
 	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
+
+out_error_clock_enabled:
 	clk_disable_unprepare(ssp->clk);
 
 out_error_dma_irq_alloc:
 	pxa2xx_spi_dma_release(drv_data);
 	free_irq(ssp->irq, drv_data);
 
-out_error_master_alloc:
-	spi_controller_put(master);
+out_error_controller_alloc:
+	spi_controller_put(controller);
 	pxa_ssp_free(ssp);
 	return status;
 }
@@ -1749,7 +1877,7 @@
 	clk_disable_unprepare(ssp->clk);
 
 	/* Release DMA */
-	if (drv_data->master_info->enable_dma)
+	if (drv_data->controller_info->enable_dma)
 		pxa2xx_spi_dma_release(drv_data);
 
 	pm_runtime_put_noidle(&pdev->dev);
@@ -1764,14 +1892,6 @@
 	return 0;
 }
 
-static void pxa2xx_spi_shutdown(struct platform_device *pdev)
-{
-	int status = 0;
-
-	if ((status = pxa2xx_spi_remove(pdev)) != 0)
-		dev_err(&pdev->dev, "shutdown failed with %d\n", status);
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int pxa2xx_spi_suspend(struct device *dev)
 {
@@ -1779,7 +1899,7 @@
 	struct ssp_device *ssp = drv_data->ssp;
 	int status;
 
-	status = spi_controller_suspend(drv_data->master);
+	status = spi_controller_suspend(drv_data->controller);
 	if (status != 0)
 		return status;
 	pxa2xx_spi_write(drv_data, SSCR0, 0);
@@ -1803,18 +1923,8 @@
 			return status;
 	}
 
-	/* Restore LPSS private register bits */
-	if (is_lpss_ssp(drv_data))
-		lpss_ssp_setup(drv_data);
-
 	/* Start the queue running */
-	status = spi_controller_resume(drv_data->master);
-	if (status != 0) {
-		dev_err(dev, "problem starting queue (%d)\n", status);
-		return status;
-	}
-
-	return 0;
+	return spi_controller_resume(drv_data->controller);
 }
 #endif
 
@@ -1848,10 +1958,10 @@
 		.name	= "pxa2xx-spi",
 		.pm	= &pxa2xx_spi_pm_ops,
 		.acpi_match_table = ACPI_PTR(pxa2xx_spi_acpi_match),
+		.of_match_table = of_match_ptr(pxa2xx_spi_of_match),
 	},
 	.probe = pxa2xx_spi_probe,
 	.remove = pxa2xx_spi_remove,
-	.shutdown = pxa2xx_spi_shutdown,
 };
 
 static int __init pxa2xx_spi_init(void)
@@ -1865,3 +1975,5 @@
 	platform_driver_unregister(&driver);
 }
 module_exit(pxa2xx_spi_exit);
+
+MODULE_SOFTDEP("pre: dw_dmac");
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 513c53a..1400472 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
  * Copyright (C) 2013, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef SPI_PXA2XX_H
@@ -31,10 +28,10 @@
 
 	/* SPI framework hookup */
 	enum pxa_ssp_type ssp_type;
-	struct spi_controller *master;
+	struct spi_controller *controller;
 
 	/* PXA hookup */
-	struct pxa2xx_spi_master *master_info;
+	struct pxa2xx_spi_controller *controller_info;
 
 	/* SSP register addresses */
 	void __iomem *ioaddr;
@@ -64,6 +61,9 @@
 
 	/* GPIOs for chip selects */
 	struct gpio_desc **cs_gpiods;
+
+	/* Optional slave FIFO ready signal */
+	struct gpio_desc *gpiod_ready;
 };
 
 struct chip_data {
diff --git a/drivers/spi/spi-qcom-qspi.c b/drivers/spi/spi-qcom-qspi.c
new file mode 100644
index 0000000..250fd60
--- /dev/null
+++ b/drivers/spi/spi-qcom-qspi.c
@@ -0,0 +1,577 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018, The Linux foundation. All rights reserved.
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+
+#define QSPI_NUM_CS		2
+#define QSPI_BYTES_PER_WORD	4
+
+#define MSTR_CONFIG		0x0000
+#define FULL_CYCLE_MODE		BIT(3)
+#define FB_CLK_EN		BIT(4)
+#define PIN_HOLDN		BIT(6)
+#define PIN_WPN			BIT(7)
+#define DMA_ENABLE		BIT(8)
+#define BIG_ENDIAN_MODE		BIT(9)
+#define SPI_MODE_MSK		0xc00
+#define SPI_MODE_SHFT		10
+#define CHIP_SELECT_NUM		BIT(12)
+#define SBL_EN			BIT(13)
+#define LPA_BASE_MSK		0x3c000
+#define LPA_BASE_SHFT		14
+#define TX_DATA_DELAY_MSK	0xc0000
+#define TX_DATA_DELAY_SHFT	18
+#define TX_CLK_DELAY_MSK	0x300000
+#define TX_CLK_DELAY_SHFT	20
+#define TX_CS_N_DELAY_MSK	0xc00000
+#define TX_CS_N_DELAY_SHFT	22
+#define TX_DATA_OE_DELAY_MSK	0x3000000
+#define TX_DATA_OE_DELAY_SHFT	24
+
+#define AHB_MASTER_CFG				0x0004
+#define HMEM_TYPE_START_MID_TRANS_MSK		0x7
+#define HMEM_TYPE_START_MID_TRANS_SHFT		0
+#define HMEM_TYPE_LAST_TRANS_MSK		0x38
+#define HMEM_TYPE_LAST_TRANS_SHFT		3
+#define USE_HMEMTYPE_LAST_ON_DESC_OR_CHAIN_MSK	0xc0
+#define USE_HMEMTYPE_LAST_ON_DESC_OR_CHAIN_SHFT	6
+#define HMEMTYPE_READ_TRANS_MSK			0x700
+#define HMEMTYPE_READ_TRANS_SHFT		8
+#define HSHARED					BIT(11)
+#define HINNERSHARED				BIT(12)
+
+#define MSTR_INT_EN		0x000C
+#define MSTR_INT_STATUS		0x0010
+#define RESP_FIFO_UNDERRUN	BIT(0)
+#define RESP_FIFO_NOT_EMPTY	BIT(1)
+#define RESP_FIFO_RDY		BIT(2)
+#define HRESP_FROM_NOC_ERR	BIT(3)
+#define WR_FIFO_EMPTY		BIT(9)
+#define WR_FIFO_FULL		BIT(10)
+#define WR_FIFO_OVERRUN		BIT(11)
+#define TRANSACTION_DONE	BIT(16)
+#define QSPI_ERR_IRQS		(RESP_FIFO_UNDERRUN | HRESP_FROM_NOC_ERR | \
+				 WR_FIFO_OVERRUN)
+#define QSPI_ALL_IRQS		(QSPI_ERR_IRQS | RESP_FIFO_RDY | \
+				 WR_FIFO_EMPTY | WR_FIFO_FULL | \
+				 TRANSACTION_DONE)
+
+#define PIO_XFER_CTRL		0x0014
+#define REQUEST_COUNT_MSK	0xffff
+
+#define PIO_XFER_CFG		0x0018
+#define TRANSFER_DIRECTION	BIT(0)
+#define MULTI_IO_MODE_MSK	0xe
+#define MULTI_IO_MODE_SHFT	1
+#define TRANSFER_FRAGMENT	BIT(8)
+#define SDR_1BIT		1
+#define SDR_2BIT		2
+#define SDR_4BIT		3
+#define DDR_1BIT		5
+#define DDR_2BIT		6
+#define DDR_4BIT		7
+#define DMA_DESC_SINGLE_SPI	1
+#define DMA_DESC_DUAL_SPI	2
+#define DMA_DESC_QUAD_SPI	3
+
+#define PIO_XFER_STATUS		0x001c
+#define WR_FIFO_BYTES_MSK	0xffff0000
+#define WR_FIFO_BYTES_SHFT	16
+
+#define PIO_DATAOUT_1B		0x0020
+#define PIO_DATAOUT_4B		0x0024
+
+#define RD_FIFO_CFG		0x0028
+#define CONTINUOUS_MODE		BIT(0)
+
+#define RD_FIFO_STATUS	0x002c
+#define FIFO_EMPTY	BIT(11)
+#define WR_CNTS_MSK	0x7f0
+#define WR_CNTS_SHFT	4
+#define RDY_64BYTE	BIT(3)
+#define RDY_32BYTE	BIT(2)
+#define RDY_16BYTE	BIT(1)
+#define FIFO_RDY	BIT(0)
+
+#define RD_FIFO_RESET		0x0030
+#define RESET_FIFO		BIT(0)
+
+#define CUR_MEM_ADDR		0x0048
+#define HW_VERSION		0x004c
+#define RD_FIFO			0x0050
+#define SAMPLING_CLK_CFG	0x0090
+#define SAMPLING_CLK_STATUS	0x0094
+
+
+enum qspi_dir {
+	QSPI_READ,
+	QSPI_WRITE,
+};
+
+struct qspi_xfer {
+	union {
+		const void *tx_buf;
+		void *rx_buf;
+	};
+	unsigned int rem_bytes;
+	unsigned int buswidth;
+	enum qspi_dir dir;
+	bool is_last;
+};
+
+enum qspi_clocks {
+	QSPI_CLK_CORE,
+	QSPI_CLK_IFACE,
+	QSPI_NUM_CLKS
+};
+
+struct qcom_qspi {
+	void __iomem *base;
+	struct device *dev;
+	struct clk_bulk_data clks[QSPI_NUM_CLKS];
+	struct qspi_xfer xfer;
+	/* Lock to protect xfer and IRQ accessed registers */
+	spinlock_t lock;
+};
+
+static u32 qspi_buswidth_to_iomode(struct qcom_qspi *ctrl,
+				   unsigned int buswidth)
+{
+	switch (buswidth) {
+	case 1:
+		return SDR_1BIT << MULTI_IO_MODE_SHFT;
+	case 2:
+		return SDR_2BIT << MULTI_IO_MODE_SHFT;
+	case 4:
+		return SDR_4BIT << MULTI_IO_MODE_SHFT;
+	default:
+		dev_warn_once(ctrl->dev,
+				"Unexpected bus width: %u\n", buswidth);
+		return SDR_1BIT << MULTI_IO_MODE_SHFT;
+	}
+}
+
+static void qcom_qspi_pio_xfer_cfg(struct qcom_qspi *ctrl)
+{
+	u32 pio_xfer_cfg;
+	const struct qspi_xfer *xfer;
+
+	xfer = &ctrl->xfer;
+	pio_xfer_cfg = readl(ctrl->base + PIO_XFER_CFG);
+	pio_xfer_cfg &= ~TRANSFER_DIRECTION;
+	pio_xfer_cfg |= xfer->dir;
+	if (xfer->is_last)
+		pio_xfer_cfg &= ~TRANSFER_FRAGMENT;
+	else
+		pio_xfer_cfg |= TRANSFER_FRAGMENT;
+	pio_xfer_cfg &= ~MULTI_IO_MODE_MSK;
+	pio_xfer_cfg |= qspi_buswidth_to_iomode(ctrl, xfer->buswidth);
+
+	writel(pio_xfer_cfg, ctrl->base + PIO_XFER_CFG);
+}
+
+static void qcom_qspi_pio_xfer_ctrl(struct qcom_qspi *ctrl)
+{
+	u32 pio_xfer_ctrl;
+
+	pio_xfer_ctrl = readl(ctrl->base + PIO_XFER_CTRL);
+	pio_xfer_ctrl &= ~REQUEST_COUNT_MSK;
+	pio_xfer_ctrl |= ctrl->xfer.rem_bytes;
+	writel(pio_xfer_ctrl, ctrl->base + PIO_XFER_CTRL);
+}
+
+static void qcom_qspi_pio_xfer(struct qcom_qspi *ctrl)
+{
+	u32 ints;
+
+	qcom_qspi_pio_xfer_cfg(ctrl);
+
+	/* Ack any previous interrupts that might be hanging around */
+	writel(QSPI_ALL_IRQS, ctrl->base + MSTR_INT_STATUS);
+
+	/* Setup new interrupts */
+	if (ctrl->xfer.dir == QSPI_WRITE)
+		ints = QSPI_ERR_IRQS | WR_FIFO_EMPTY;
+	else
+		ints = QSPI_ERR_IRQS | RESP_FIFO_RDY;
+	writel(ints, ctrl->base + MSTR_INT_EN);
+
+	/* Kick off the transfer */
+	qcom_qspi_pio_xfer_ctrl(ctrl);
+}
+
+static void qcom_qspi_handle_err(struct spi_master *master,
+				 struct spi_message *msg)
+{
+	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctrl->lock, flags);
+	writel(0, ctrl->base + MSTR_INT_EN);
+	ctrl->xfer.rem_bytes = 0;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+}
+
+static int qcom_qspi_transfer_one(struct spi_master *master,
+				  struct spi_device *slv,
+				  struct spi_transfer *xfer)
+{
+	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
+	int ret;
+	unsigned long speed_hz;
+	unsigned long flags;
+
+	speed_hz = slv->max_speed_hz;
+	if (xfer->speed_hz)
+		speed_hz = xfer->speed_hz;
+
+	/* In regular operation (SBL_EN=1) core must be 4x transfer clock */
+	ret = clk_set_rate(ctrl->clks[QSPI_CLK_CORE].clk, speed_hz * 4);
+	if (ret) {
+		dev_err(ctrl->dev, "Failed to set core clk %d\n", ret);
+		return ret;
+	}
+
+	spin_lock_irqsave(&ctrl->lock, flags);
+
+	/* We are half duplex, so either rx or tx will be set */
+	if (xfer->rx_buf) {
+		ctrl->xfer.dir = QSPI_READ;
+		ctrl->xfer.buswidth = xfer->rx_nbits;
+		ctrl->xfer.rx_buf = xfer->rx_buf;
+	} else {
+		ctrl->xfer.dir = QSPI_WRITE;
+		ctrl->xfer.buswidth = xfer->tx_nbits;
+		ctrl->xfer.tx_buf = xfer->tx_buf;
+	}
+	ctrl->xfer.is_last = list_is_last(&xfer->transfer_list,
+					  &master->cur_msg->transfers);
+	ctrl->xfer.rem_bytes = xfer->len;
+	qcom_qspi_pio_xfer(ctrl);
+
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	/* We'll call spi_finalize_current_transfer() when done */
+	return 1;
+}
+
+static int qcom_qspi_prepare_message(struct spi_master *master,
+				     struct spi_message *message)
+{
+	u32 mstr_cfg;
+	struct qcom_qspi *ctrl;
+	int tx_data_oe_delay = 1;
+	int tx_data_delay = 1;
+	unsigned long flags;
+
+	ctrl = spi_master_get_devdata(master);
+	spin_lock_irqsave(&ctrl->lock, flags);
+
+	mstr_cfg = readl(ctrl->base + MSTR_CONFIG);
+	mstr_cfg &= ~CHIP_SELECT_NUM;
+	if (message->spi->chip_select)
+		mstr_cfg |= CHIP_SELECT_NUM;
+
+	mstr_cfg |= FB_CLK_EN | PIN_WPN | PIN_HOLDN | SBL_EN | FULL_CYCLE_MODE;
+	mstr_cfg &= ~(SPI_MODE_MSK | TX_DATA_OE_DELAY_MSK | TX_DATA_DELAY_MSK);
+	mstr_cfg |= message->spi->mode << SPI_MODE_SHFT;
+	mstr_cfg |= tx_data_oe_delay << TX_DATA_OE_DELAY_SHFT;
+	mstr_cfg |= tx_data_delay << TX_DATA_DELAY_SHFT;
+	mstr_cfg &= ~DMA_ENABLE;
+
+	writel(mstr_cfg, ctrl->base + MSTR_CONFIG);
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	return 0;
+}
+
+static irqreturn_t pio_read(struct qcom_qspi *ctrl)
+{
+	u32 rd_fifo_status;
+	u32 rd_fifo;
+	unsigned int wr_cnts;
+	unsigned int bytes_to_read;
+	unsigned int words_to_read;
+	u32 *word_buf;
+	u8 *byte_buf;
+	int i;
+
+	rd_fifo_status = readl(ctrl->base + RD_FIFO_STATUS);
+
+	if (!(rd_fifo_status & FIFO_RDY)) {
+		dev_dbg(ctrl->dev, "Spurious IRQ %#x\n", rd_fifo_status);
+		return IRQ_NONE;
+	}
+
+	wr_cnts = (rd_fifo_status & WR_CNTS_MSK) >> WR_CNTS_SHFT;
+	wr_cnts = min(wr_cnts, ctrl->xfer.rem_bytes);
+
+	words_to_read = wr_cnts / QSPI_BYTES_PER_WORD;
+	bytes_to_read = wr_cnts % QSPI_BYTES_PER_WORD;
+
+	if (words_to_read) {
+		word_buf = ctrl->xfer.rx_buf;
+		ctrl->xfer.rem_bytes -= words_to_read * QSPI_BYTES_PER_WORD;
+		ioread32_rep(ctrl->base + RD_FIFO, word_buf, words_to_read);
+		ctrl->xfer.rx_buf = word_buf + words_to_read;
+	}
+
+	if (bytes_to_read) {
+		byte_buf = ctrl->xfer.rx_buf;
+		rd_fifo = readl(ctrl->base + RD_FIFO);
+		ctrl->xfer.rem_bytes -= bytes_to_read;
+		for (i = 0; i < bytes_to_read; i++)
+			*byte_buf++ = rd_fifo >> (i * BITS_PER_BYTE);
+		ctrl->xfer.rx_buf = byte_buf;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t pio_write(struct qcom_qspi *ctrl)
+{
+	const void *xfer_buf = ctrl->xfer.tx_buf;
+	const int *word_buf;
+	const char *byte_buf;
+	unsigned int wr_fifo_bytes;
+	unsigned int wr_fifo_words;
+	unsigned int wr_size;
+	unsigned int rem_words;
+
+	wr_fifo_bytes = readl(ctrl->base + PIO_XFER_STATUS);
+	wr_fifo_bytes >>= WR_FIFO_BYTES_SHFT;
+
+	if (ctrl->xfer.rem_bytes < QSPI_BYTES_PER_WORD) {
+		/* Process the last 1-3 bytes */
+		wr_size = min(wr_fifo_bytes, ctrl->xfer.rem_bytes);
+		ctrl->xfer.rem_bytes -= wr_size;
+
+		byte_buf = xfer_buf;
+		while (wr_size--)
+			writel(*byte_buf++,
+			       ctrl->base + PIO_DATAOUT_1B);
+		ctrl->xfer.tx_buf = byte_buf;
+	} else {
+		/*
+		 * Process all the whole words; to keep things simple we'll
+		 * just wait for the next interrupt to handle the last 1-3
+		 * bytes if we don't have an even number of words.
+		 */
+		rem_words = ctrl->xfer.rem_bytes / QSPI_BYTES_PER_WORD;
+		wr_fifo_words = wr_fifo_bytes / QSPI_BYTES_PER_WORD;
+
+		wr_size = min(rem_words, wr_fifo_words);
+		ctrl->xfer.rem_bytes -= wr_size * QSPI_BYTES_PER_WORD;
+
+		word_buf = xfer_buf;
+		iowrite32_rep(ctrl->base + PIO_DATAOUT_4B, word_buf, wr_size);
+		ctrl->xfer.tx_buf = word_buf + wr_size;
+
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)
+{
+	u32 int_status;
+	struct qcom_qspi *ctrl = dev_id;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctrl->lock, flags);
+
+	int_status = readl(ctrl->base + MSTR_INT_STATUS);
+	writel(int_status, ctrl->base + MSTR_INT_STATUS);
+
+	if (ctrl->xfer.dir == QSPI_WRITE) {
+		if (int_status & WR_FIFO_EMPTY)
+			ret = pio_write(ctrl);
+	} else {
+		if (int_status & RESP_FIFO_RDY)
+			ret = pio_read(ctrl);
+	}
+
+	if (int_status & QSPI_ERR_IRQS) {
+		if (int_status & RESP_FIFO_UNDERRUN)
+			dev_err(ctrl->dev, "IRQ error: FIFO underrun\n");
+		if (int_status & WR_FIFO_OVERRUN)
+			dev_err(ctrl->dev, "IRQ error: FIFO overrun\n");
+		if (int_status & HRESP_FROM_NOC_ERR)
+			dev_err(ctrl->dev, "IRQ error: NOC response error\n");
+		ret = IRQ_HANDLED;
+	}
+
+	if (!ctrl->xfer.rem_bytes) {
+		writel(0, ctrl->base + MSTR_INT_EN);
+		spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
+	}
+
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+	return ret;
+}
+
+static int qcom_qspi_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct device *dev;
+	struct spi_master *master;
+	struct qcom_qspi *ctrl;
+
+	dev = &pdev->dev;
+
+	master = spi_alloc_master(dev, sizeof(*ctrl));
+	if (!master)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, master);
+
+	ctrl = spi_master_get_devdata(master);
+
+	spin_lock_init(&ctrl->lock);
+	ctrl->dev = dev;
+	ctrl->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(ctrl->base)) {
+		ret = PTR_ERR(ctrl->base);
+		goto exit_probe_master_put;
+	}
+
+	ctrl->clks[QSPI_CLK_CORE].id = "core";
+	ctrl->clks[QSPI_CLK_IFACE].id = "iface";
+	ret = devm_clk_bulk_get(dev, QSPI_NUM_CLKS, ctrl->clks);
+	if (ret)
+		goto exit_probe_master_put;
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		goto exit_probe_master_put;
+	ret = devm_request_irq(dev, ret, qcom_qspi_irq,
+			IRQF_TRIGGER_HIGH, dev_name(dev), ctrl);
+	if (ret) {
+		dev_err(dev, "Failed to request irq %d\n", ret);
+		goto exit_probe_master_put;
+	}
+
+	master->max_speed_hz = 300000000;
+	master->num_chipselect = QSPI_NUM_CS;
+	master->bus_num = -1;
+	master->dev.of_node = pdev->dev.of_node;
+	master->mode_bits = SPI_MODE_0 |
+			    SPI_TX_DUAL | SPI_RX_DUAL |
+			    SPI_TX_QUAD | SPI_RX_QUAD;
+	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->prepare_message = qcom_qspi_prepare_message;
+	master->transfer_one = qcom_qspi_transfer_one;
+	master->handle_err = qcom_qspi_handle_err;
+	master->auto_runtime_pm = true;
+
+	pm_runtime_enable(dev);
+
+	ret = spi_register_master(master);
+	if (!ret)
+		return 0;
+
+	pm_runtime_disable(dev);
+
+exit_probe_master_put:
+	spi_master_put(master);
+
+	return ret;
+}
+
+static int qcom_qspi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+
+	/* Unregister _before_ disabling pm_runtime() so we stop transfers */
+	spi_unregister_master(master);
+
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static int __maybe_unused qcom_qspi_runtime_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
+
+	clk_bulk_disable_unprepare(QSPI_NUM_CLKS, ctrl->clks);
+
+	return 0;
+}
+
+static int __maybe_unused qcom_qspi_runtime_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
+
+	return clk_bulk_prepare_enable(QSPI_NUM_CLKS, ctrl->clks);
+}
+
+static int __maybe_unused qcom_qspi_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	int ret;
+
+	ret = spi_master_suspend(master);
+	if (ret)
+		return ret;
+
+	ret = pm_runtime_force_suspend(dev);
+	if (ret)
+		spi_master_resume(master);
+
+	return ret;
+}
+
+static int __maybe_unused qcom_qspi_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_runtime_force_resume(dev);
+	if (ret)
+		return ret;
+
+	ret = spi_master_resume(master);
+	if (ret)
+		pm_runtime_force_suspend(dev);
+
+	return ret;
+}
+
+static const struct dev_pm_ops qcom_qspi_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(qcom_qspi_runtime_suspend,
+			   qcom_qspi_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(qcom_qspi_suspend, qcom_qspi_resume)
+};
+
+static const struct of_device_id qcom_qspi_dt_match[] = {
+	{ .compatible = "qcom,qspi-v1", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, qcom_qspi_dt_match);
+
+static struct platform_driver qcom_qspi_driver = {
+	.driver = {
+		.name		= "qcom_qspi",
+		.pm		= &qcom_qspi_dev_pm_ops,
+		.of_match_table = qcom_qspi_dt_match,
+	},
+	.probe = qcom_qspi_probe,
+	.remove = qcom_qspi_remove,
+};
+module_platform_driver(qcom_qspi_driver);
+
+MODULE_DESCRIPTION("SPI driver for QSPI cores");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index 974a8ce..2f559e5 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008-2014, The Linux foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License rev 2 and
- * only rev 2 as published by the free Software foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or fITNESS fOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
@@ -281,6 +273,9 @@
 		writel_relaxed(QUP_OP_IN_SERVICE_FLAG,
 			       controller->base + QUP_OPERATIONAL);
 
+		if (!remainder)
+			goto exit;
+
 		if (is_block_mode) {
 			num_words = (remainder > words_per_block) ?
 					words_per_block : remainder;
@@ -310,11 +305,13 @@
 	 * to refresh opflags value because MAX_INPUT_DONE_FLAG may now be
 	 * present and this is used to determine if transaction is complete
 	 */
-	*opflags = readl_relaxed(controller->base + QUP_OPERATIONAL);
-	if (is_block_mode && *opflags & QUP_OP_MAX_INPUT_DONE_FLAG)
-		writel_relaxed(QUP_OP_IN_SERVICE_FLAG,
-			       controller->base + QUP_OPERATIONAL);
-
+exit:
+	if (!remainder) {
+		*opflags = readl_relaxed(controller->base + QUP_OPERATIONAL);
+		if (is_block_mode && *opflags & QUP_OP_MAX_INPUT_DONE_FLAG)
+			writel_relaxed(QUP_OP_IN_SERVICE_FLAG,
+				       controller->base + QUP_OPERATIONAL);
+	}
 }
 
 static void spi_qup_write_to_fifo(struct spi_qup *controller, u32 num_words)
@@ -362,6 +359,10 @@
 		writel_relaxed(QUP_OP_OUT_SERVICE_FLAG,
 			       controller->base + QUP_OPERATIONAL);
 
+		/* make sure the interrupt is valid */
+		if (!remainder)
+			return;
+
 		if (is_block_mode) {
 			num_words = (remainder > words_per_block) ?
 				words_per_block : remainder;
@@ -575,10 +576,24 @@
 	return 0;
 }
 
+static bool spi_qup_data_pending(struct spi_qup *controller)
+{
+	unsigned int remainder_tx, remainder_rx;
+
+	remainder_tx = DIV_ROUND_UP(spi_qup_len(controller) -
+				    controller->tx_bytes, controller->w_size);
+
+	remainder_rx = DIV_ROUND_UP(spi_qup_len(controller) -
+				    controller->rx_bytes, controller->w_size);
+
+	return remainder_tx || remainder_rx;
+}
+
 static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id)
 {
 	struct spi_qup *controller = dev_id;
 	u32 opflags, qup_err, spi_err;
+	unsigned long flags;
 	int error = 0;
 
 	qup_err = readl_relaxed(controller->base + QUP_ERROR_FLAGS);
@@ -610,6 +625,11 @@
 		error = -EIO;
 	}
 
+	spin_lock_irqsave(&controller->lock, flags);
+	if (!controller->error)
+		controller->error = error;
+	spin_unlock_irqrestore(&controller->lock, flags);
+
 	if (spi_qup_is_dma_xfer(controller->mode)) {
 		writel_relaxed(opflags, controller->base + QUP_OPERATIONAL);
 	} else {
@@ -618,11 +638,22 @@
 
 		if (opflags & QUP_OP_OUT_SERVICE_FLAG)
 			spi_qup_write(controller);
+
+		if (!spi_qup_data_pending(controller))
+			complete(&controller->done);
 	}
 
-	if ((opflags & QUP_OP_MAX_INPUT_DONE_FLAG) || error)
+	if (error)
 		complete(&controller->done);
 
+	if (opflags & QUP_OP_MAX_INPUT_DONE_FLAG) {
+		if (!spi_qup_is_dma_xfer(controller->mode)) {
+			if (spi_qup_data_pending(controller))
+				return IRQ_HANDLED;
+		}
+		complete(&controller->done);
+	}
+
 	return IRQ_HANDLED;
 }
 
@@ -842,10 +873,6 @@
 	else
 		ret = spi_qup_do_pio(spi, xfer, timeout);
 
-	if (ret)
-		goto exit;
-
-exit:
 	spi_qup_set_state(controller, QUP_STATE_RESET);
 	spin_lock_irqsave(&controller->lock, flags);
 	if (!ret)
diff --git a/drivers/spi/spi-rb4xx.c b/drivers/spi/spi-rb4xx.c
index 3641d0e..4c9620e 100644
--- a/drivers/spi/spi-rb4xx.c
+++ b/drivers/spi/spi-rb4xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * SPI controller driver for the Mikrotik RB4xx boards
  *
@@ -6,11 +7,6 @@
  *
  * This file was based on the patches for Linux 2.6.27.39 published by
  * MikroTik for their RouterBoard 4xx series devices.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/kernel.h>
@@ -139,12 +135,10 @@
 	struct spi_master *master;
 	struct clk *ahb_clk;
 	struct rb4xx_spi *rbspi;
-	struct resource *r;
 	int err;
 	void __iomem *spi_base;
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	spi_base = devm_ioremap_resource(&pdev->dev, r);
+	spi_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(spi_base))
 		return PTR_ERR(spi_base);
 
@@ -159,7 +153,7 @@
 	master->bus_num = 0;
 	master->num_chipselect = 3;
 	master->mode_bits = SPI_TX_DUAL;
-	master->bits_per_word_mask = BIT(7);
+	master->bits_per_word_mask = SPI_BPW_MASK(8);
 	master->flags = SPI_MASTER_MUST_TX;
 	master->transfer_one = rb4xx_transfer_one;
 	master->set_cs = rb4xx_set_cs;
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index fdcf307..2cc6d99 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -1,20 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
  * Author: Addy Ke <addy.ke@rock-chips.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  */
 
 #include <linux/clk.h>
 #include <linux/dmaengine.h>
+#include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/pinctrl/consumer.h>
@@ -54,6 +46,9 @@
 
 /* Bit fields in CTRLR0 */
 #define CR0_DFS_OFFSET				0
+#define CR0_DFS_4BIT				0x0
+#define CR0_DFS_8BIT				0x1
+#define CR0_DFS_16BIT				0x2
 
 #define CR0_CFS_OFFSET				2
 
@@ -94,6 +89,7 @@
 #define CR0_BHT_8BIT				0x1
 
 #define CR0_RSD_OFFSET				14
+#define CR0_RSD_MAX				0x3
 
 #define CR0_FRF_OFFSET				16
 #define CR0_FRF_SPI					0x0
@@ -115,6 +111,10 @@
 /* Bit fields in SER, 2bit */
 #define SER_MASK					0x3
 
+/* Bit fields in BAUDR */
+#define BAUDR_SCKDV_MIN				2
+#define BAUDR_SCKDV_MAX				65534
+
 /* Bit fields in SR, 5bit */
 #define SR_MASK						0x1f
 #define SR_BUSY						(1 << 0)
@@ -142,11 +142,12 @@
 #define RF_DMA_EN					(1 << 0)
 #define TF_DMA_EN					(1 << 1)
 
-#define RXBUSY						(1 << 0)
-#define TXBUSY						(1 << 1)
+/* Driver state flags */
+#define RXDMA					(1 << 0)
+#define TXDMA					(1 << 1)
 
 /* sclk_out: spi master internal logic in rk3x can support 50Mhz */
-#define MAX_SCLK_OUT		50000000
+#define MAX_SCLK_OUT				50000000U
 
 /*
  * SPI_CTRLR1 is 16-bits, so we should support lengths of 0xffff + 1. However,
@@ -156,74 +157,37 @@
 
 #define ROCKCHIP_SPI_MAX_CS_NUM			2
 
-enum rockchip_ssi_type {
-	SSI_MOTO_SPI = 0,
-	SSI_TI_SSP,
-	SSI_NS_MICROWIRE,
-};
-
-struct rockchip_spi_dma_data {
-	struct dma_chan *ch;
-	enum dma_transfer_direction direction;
-	dma_addr_t addr;
-};
-
 struct rockchip_spi {
 	struct device *dev;
-	struct spi_master *master;
 
 	struct clk *spiclk;
 	struct clk *apb_pclk;
 
 	void __iomem *regs;
-	/*depth of the FIFO buffer */
-	u32 fifo_len;
-	/* max bus freq supported */
-	u32 max_freq;
-	/* supported slave numbers */
-	enum rockchip_ssi_type type;
-
-	u16 mode;
-	u8 tmode;
-	u8 bpw;
-	u8 n_bytes;
-	u32 rsd_nsecs;
-	unsigned len;
-	u32 speed;
+	dma_addr_t dma_addr_rx;
+	dma_addr_t dma_addr_tx;
 
 	const void *tx;
-	const void *tx_end;
 	void *rx;
-	void *rx_end;
+	unsigned int tx_left;
+	unsigned int rx_left;
 
-	u32 state;
-	/* protect state */
-	spinlock_t lock;
+	atomic_t state;
+
+	/*depth of the FIFO buffer */
+	u32 fifo_len;
+	/* frequency of spiclk */
+	u32 freq;
+
+	u8 n_bytes;
+	u8 rsd;
 
 	bool cs_asserted[ROCKCHIP_SPI_MAX_CS_NUM];
-
-	u32 use_dma;
-	struct sg_table tx_sg;
-	struct sg_table rx_sg;
-	struct rockchip_spi_dma_data dma_rx;
-	struct rockchip_spi_dma_data dma_tx;
-	struct dma_slave_caps dma_caps;
 };
 
-static inline void spi_enable_chip(struct rockchip_spi *rs, int enable)
+static inline void spi_enable_chip(struct rockchip_spi *rs, bool enable)
 {
-	writel_relaxed((enable ? 1 : 0), rs->regs + ROCKCHIP_SPI_SSIENR);
-}
-
-static inline void spi_set_clk(struct rockchip_spi *rs, u16 div)
-{
-	writel_relaxed(div, rs->regs + ROCKCHIP_SPI_BAUDR);
-}
-
-static inline void flush_fifo(struct rockchip_spi *rs)
-{
-	while (readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR))
-		readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
+	writel_relaxed((enable ? 1U : 0U), rs->regs + ROCKCHIP_SPI_SSIENR);
 }
 
 static inline void wait_for_idle(struct rockchip_spi *rs)
@@ -253,24 +217,6 @@
 	return (fifo == 31) ? 0 : fifo;
 }
 
-static inline u32 tx_max(struct rockchip_spi *rs)
-{
-	u32 tx_left, tx_room;
-
-	tx_left = (rs->tx_end - rs->tx) / rs->n_bytes;
-	tx_room = rs->fifo_len - readl_relaxed(rs->regs + ROCKCHIP_SPI_TXFLR);
-
-	return min(tx_left, tx_room);
-}
-
-static inline u32 rx_max(struct rockchip_spi *rs)
-{
-	u32 rx_left = (rs->rx_end - rs->rx) / rs->n_bytes;
-	u32 rx_room = (u32)readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR);
-
-	return min(rx_left, rx_room);
-}
-
 static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 {
 	struct spi_master *master = spi->master;
@@ -298,64 +244,39 @@
 	rs->cs_asserted[spi->chip_select] = cs_asserted;
 }
 
-static int rockchip_spi_prepare_message(struct spi_master *master,
-					struct spi_message *msg)
-{
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
-	struct spi_device *spi = msg->spi;
-
-	rs->mode = spi->mode;
-
-	return 0;
-}
-
 static void rockchip_spi_handle_err(struct spi_master *master,
 				    struct spi_message *msg)
 {
-	unsigned long flags;
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
-	spin_lock_irqsave(&rs->lock, flags);
-
-	/*
-	 * For DMA mode, we need terminate DMA channel and flush
-	 * fifo for the next transfer if DMA thansfer timeout.
-	 * handle_err() was called by core if transfer failed.
-	 * Maybe it is reasonable for error handling here.
+	/* stop running spi transfer
+	 * this also flushes both rx and tx fifos
 	 */
-	if (rs->use_dma) {
-		if (rs->state & RXBUSY) {
-			dmaengine_terminate_async(rs->dma_rx.ch);
-			flush_fifo(rs);
-		}
+	spi_enable_chip(rs, false);
 
-		if (rs->state & TXBUSY)
-			dmaengine_terminate_async(rs->dma_tx.ch);
-	}
+	/* make sure all interrupts are masked */
+	writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
 
-	spin_unlock_irqrestore(&rs->lock, flags);
-}
+	if (atomic_read(&rs->state) & TXDMA)
+		dmaengine_terminate_async(master->dma_tx);
 
-static int rockchip_spi_unprepare_message(struct spi_master *master,
-					  struct spi_message *msg)
-{
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
-
-	spi_enable_chip(rs, 0);
-
-	return 0;
+	if (atomic_read(&rs->state) & RXDMA)
+		dmaengine_terminate_async(master->dma_rx);
 }
 
 static void rockchip_spi_pio_writer(struct rockchip_spi *rs)
 {
-	u32 max = tx_max(rs);
-	u32 txw = 0;
+	u32 tx_free = rs->fifo_len - readl_relaxed(rs->regs + ROCKCHIP_SPI_TXFLR);
+	u32 words = min(rs->tx_left, tx_free);
 
-	while (max--) {
+	rs->tx_left -= words;
+	for (; words; words--) {
+		u32 txw;
+
 		if (rs->n_bytes == 1)
-			txw = *(u8 *)(rs->tx);
+			txw = *(u8 *)rs->tx;
 		else
-			txw = *(u16 *)(rs->tx);
+			txw = *(u16 *)rs->tx;
 
 		writel_relaxed(txw, rs->regs + ROCKCHIP_SPI_TXDR);
 		rs->tx += rs->n_bytes;
@@ -364,227 +285,249 @@
 
 static void rockchip_spi_pio_reader(struct rockchip_spi *rs)
 {
-	u32 max = rx_max(rs);
-	u32 rxw;
+	u32 words = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR);
+	u32 rx_left = rs->rx_left - words;
 
-	while (max--) {
-		rxw = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
+	/* the hardware doesn't allow us to change fifo threshold
+	 * level while spi is enabled, so instead make sure to leave
+	 * enough words in the rx fifo to get the last interrupt
+	 * exactly when all words have been received
+	 */
+	if (rx_left) {
+		u32 ftl = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFTLR) + 1;
+
+		if (rx_left < ftl) {
+			rx_left = ftl;
+			words = rs->rx_left - rx_left;
+		}
+	}
+
+	rs->rx_left = rx_left;
+	for (; words; words--) {
+		u32 rxw = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
+
+		if (!rs->rx)
+			continue;
+
 		if (rs->n_bytes == 1)
-			*(u8 *)(rs->rx) = (u8)rxw;
+			*(u8 *)rs->rx = (u8)rxw;
 		else
-			*(u16 *)(rs->rx) = (u16)rxw;
+			*(u16 *)rs->rx = (u16)rxw;
 		rs->rx += rs->n_bytes;
 	}
 }
 
-static int rockchip_spi_pio_transfer(struct rockchip_spi *rs)
+static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
 {
-	int remain = 0;
+	struct spi_master *master = dev_id;
+	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
-	do {
-		if (rs->tx) {
-			remain = rs->tx_end - rs->tx;
-			rockchip_spi_pio_writer(rs);
-		}
+	if (rs->tx_left)
+		rockchip_spi_pio_writer(rs);
 
-		if (rs->rx) {
-			remain = rs->rx_end - rs->rx;
-			rockchip_spi_pio_reader(rs);
-		}
+	rockchip_spi_pio_reader(rs);
+	if (!rs->rx_left) {
+		spi_enable_chip(rs, false);
+		writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
+		spi_finalize_current_transfer(master);
+	}
 
-		cpu_relax();
-	} while (remain);
+	return IRQ_HANDLED;
+}
 
-	/* If tx, wait until the FIFO data completely. */
-	if (rs->tx)
-		wait_for_idle(rs);
+static int rockchip_spi_prepare_irq(struct rockchip_spi *rs,
+		struct spi_transfer *xfer)
+{
+	rs->tx = xfer->tx_buf;
+	rs->rx = xfer->rx_buf;
+	rs->tx_left = rs->tx ? xfer->len / rs->n_bytes : 0;
+	rs->rx_left = xfer->len / rs->n_bytes;
 
-	spi_enable_chip(rs, 0);
+	writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR);
+	spi_enable_chip(rs, true);
 
-	return 0;
+	if (rs->tx_left)
+		rockchip_spi_pio_writer(rs);
+
+	/* 1 means the transfer is in progress */
+	return 1;
 }
 
 static void rockchip_spi_dma_rxcb(void *data)
 {
-	unsigned long flags;
-	struct rockchip_spi *rs = data;
+	struct spi_master *master = data;
+	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	int state = atomic_fetch_andnot(RXDMA, &rs->state);
 
-	spin_lock_irqsave(&rs->lock, flags);
+	if (state & TXDMA)
+		return;
 
-	rs->state &= ~RXBUSY;
-	if (!(rs->state & TXBUSY)) {
-		spi_enable_chip(rs, 0);
-		spi_finalize_current_transfer(rs->master);
-	}
-
-	spin_unlock_irqrestore(&rs->lock, flags);
+	spi_enable_chip(rs, false);
+	spi_finalize_current_transfer(master);
 }
 
 static void rockchip_spi_dma_txcb(void *data)
 {
-	unsigned long flags;
-	struct rockchip_spi *rs = data;
+	struct spi_master *master = data;
+	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	int state = atomic_fetch_andnot(TXDMA, &rs->state);
+
+	if (state & RXDMA)
+		return;
 
 	/* Wait until the FIFO data completely. */
 	wait_for_idle(rs);
 
-	spin_lock_irqsave(&rs->lock, flags);
-
-	rs->state &= ~TXBUSY;
-	if (!(rs->state & RXBUSY)) {
-		spi_enable_chip(rs, 0);
-		spi_finalize_current_transfer(rs->master);
-	}
-
-	spin_unlock_irqrestore(&rs->lock, flags);
+	spi_enable_chip(rs, false);
+	spi_finalize_current_transfer(master);
 }
 
-static int rockchip_spi_prepare_dma(struct rockchip_spi *rs)
+static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
+		struct spi_master *master, struct spi_transfer *xfer)
 {
-	unsigned long flags;
-	struct dma_slave_config rxconf, txconf;
 	struct dma_async_tx_descriptor *rxdesc, *txdesc;
 
-	spin_lock_irqsave(&rs->lock, flags);
-	rs->state &= ~RXBUSY;
-	rs->state &= ~TXBUSY;
-	spin_unlock_irqrestore(&rs->lock, flags);
+	atomic_set(&rs->state, 0);
 
 	rxdesc = NULL;
-	if (rs->rx) {
-		rxconf.direction = rs->dma_rx.direction;
-		rxconf.src_addr = rs->dma_rx.addr;
-		rxconf.src_addr_width = rs->n_bytes;
-		if (rs->dma_caps.max_burst > 4)
-			rxconf.src_maxburst = 4;
-		else
-			rxconf.src_maxburst = 1;
-		dmaengine_slave_config(rs->dma_rx.ch, &rxconf);
+	if (xfer->rx_buf) {
+		struct dma_slave_config rxconf = {
+			.direction = DMA_DEV_TO_MEM,
+			.src_addr = rs->dma_addr_rx,
+			.src_addr_width = rs->n_bytes,
+			.src_maxburst = 1,
+		};
+
+		dmaengine_slave_config(master->dma_rx, &rxconf);
 
 		rxdesc = dmaengine_prep_slave_sg(
-				rs->dma_rx.ch,
-				rs->rx_sg.sgl, rs->rx_sg.nents,
-				rs->dma_rx.direction, DMA_PREP_INTERRUPT);
+				master->dma_rx,
+				xfer->rx_sg.sgl, xfer->rx_sg.nents,
+				DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
 		if (!rxdesc)
 			return -EINVAL;
 
 		rxdesc->callback = rockchip_spi_dma_rxcb;
-		rxdesc->callback_param = rs;
+		rxdesc->callback_param = master;
 	}
 
 	txdesc = NULL;
-	if (rs->tx) {
-		txconf.direction = rs->dma_tx.direction;
-		txconf.dst_addr = rs->dma_tx.addr;
-		txconf.dst_addr_width = rs->n_bytes;
-		if (rs->dma_caps.max_burst > 4)
-			txconf.dst_maxburst = 4;
-		else
-			txconf.dst_maxburst = 1;
-		dmaengine_slave_config(rs->dma_tx.ch, &txconf);
+	if (xfer->tx_buf) {
+		struct dma_slave_config txconf = {
+			.direction = DMA_MEM_TO_DEV,
+			.dst_addr = rs->dma_addr_tx,
+			.dst_addr_width = rs->n_bytes,
+			.dst_maxburst = rs->fifo_len / 4,
+		};
+
+		dmaengine_slave_config(master->dma_tx, &txconf);
 
 		txdesc = dmaengine_prep_slave_sg(
-				rs->dma_tx.ch,
-				rs->tx_sg.sgl, rs->tx_sg.nents,
-				rs->dma_tx.direction, DMA_PREP_INTERRUPT);
+				master->dma_tx,
+				xfer->tx_sg.sgl, xfer->tx_sg.nents,
+				DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
 		if (!txdesc) {
 			if (rxdesc)
-				dmaengine_terminate_sync(rs->dma_rx.ch);
+				dmaengine_terminate_sync(master->dma_rx);
 			return -EINVAL;
 		}
 
 		txdesc->callback = rockchip_spi_dma_txcb;
-		txdesc->callback_param = rs;
+		txdesc->callback_param = master;
 	}
 
 	/* rx must be started before tx due to spi instinct */
 	if (rxdesc) {
-		spin_lock_irqsave(&rs->lock, flags);
-		rs->state |= RXBUSY;
-		spin_unlock_irqrestore(&rs->lock, flags);
+		atomic_or(RXDMA, &rs->state);
 		dmaengine_submit(rxdesc);
-		dma_async_issue_pending(rs->dma_rx.ch);
+		dma_async_issue_pending(master->dma_rx);
 	}
 
+	spi_enable_chip(rs, true);
+
 	if (txdesc) {
-		spin_lock_irqsave(&rs->lock, flags);
-		rs->state |= TXBUSY;
-		spin_unlock_irqrestore(&rs->lock, flags);
+		atomic_or(TXDMA, &rs->state);
 		dmaengine_submit(txdesc);
-		dma_async_issue_pending(rs->dma_tx.ch);
+		dma_async_issue_pending(master->dma_tx);
 	}
 
-	return 0;
+	/* 1 means the transfer is in progress */
+	return 1;
 }
 
-static void rockchip_spi_config(struct rockchip_spi *rs)
+static void rockchip_spi_config(struct rockchip_spi *rs,
+		struct spi_device *spi, struct spi_transfer *xfer,
+		bool use_dma)
 {
-	u32 div = 0;
+	u32 cr0 = CR0_FRF_SPI  << CR0_FRF_OFFSET
+	        | CR0_BHT_8BIT << CR0_BHT_OFFSET
+	        | CR0_SSD_ONE  << CR0_SSD_OFFSET
+	        | CR0_EM_BIG   << CR0_EM_OFFSET;
+	u32 cr1;
 	u32 dmacr = 0;
-	int rsd = 0;
 
-	u32 cr0 = (CR0_BHT_8BIT << CR0_BHT_OFFSET)
-		| (CR0_SSD_ONE << CR0_SSD_OFFSET)
-		| (CR0_EM_BIG << CR0_EM_OFFSET);
+	cr0 |= rs->rsd << CR0_RSD_OFFSET;
+	cr0 |= (spi->mode & 0x3U) << CR0_SCPH_OFFSET;
+	if (spi->mode & SPI_LSB_FIRST)
+		cr0 |= CR0_FBM_LSB << CR0_FBM_OFFSET;
 
-	cr0 |= (rs->n_bytes << CR0_DFS_OFFSET);
-	cr0 |= ((rs->mode & 0x3) << CR0_SCPH_OFFSET);
-	cr0 |= (rs->tmode << CR0_XFM_OFFSET);
-	cr0 |= (rs->type << CR0_FRF_OFFSET);
+	if (xfer->rx_buf && xfer->tx_buf)
+		cr0 |= CR0_XFM_TR << CR0_XFM_OFFSET;
+	else if (xfer->rx_buf)
+		cr0 |= CR0_XFM_RO << CR0_XFM_OFFSET;
+	else if (use_dma)
+		cr0 |= CR0_XFM_TO << CR0_XFM_OFFSET;
 
-	if (rs->use_dma) {
-		if (rs->tx)
+	switch (xfer->bits_per_word) {
+	case 4:
+		cr0 |= CR0_DFS_4BIT << CR0_DFS_OFFSET;
+		cr1 = xfer->len - 1;
+		break;
+	case 8:
+		cr0 |= CR0_DFS_8BIT << CR0_DFS_OFFSET;
+		cr1 = xfer->len - 1;
+		break;
+	case 16:
+		cr0 |= CR0_DFS_16BIT << CR0_DFS_OFFSET;
+		cr1 = xfer->len / 2 - 1;
+		break;
+	default:
+		/* we only whitelist 4, 8 and 16 bit words in
+		 * master->bits_per_word_mask, so this shouldn't
+		 * happen
+		 */
+		unreachable();
+	}
+
+	if (use_dma) {
+		if (xfer->tx_buf)
 			dmacr |= TF_DMA_EN;
-		if (rs->rx)
+		if (xfer->rx_buf)
 			dmacr |= RF_DMA_EN;
 	}
 
-	if (WARN_ON(rs->speed > MAX_SCLK_OUT))
-		rs->speed = MAX_SCLK_OUT;
-
-	/* the minimum divisor is 2 */
-	if (rs->max_freq < 2 * rs->speed) {
-		clk_set_rate(rs->spiclk, 2 * rs->speed);
-		rs->max_freq = clk_get_rate(rs->spiclk);
-	}
-
-	/* div doesn't support odd number */
-	div = DIV_ROUND_UP(rs->max_freq, rs->speed);
-	div = (div + 1) & 0xfffe;
-
-	/* Rx sample delay is expressed in parent clock cycles (max 3) */
-	rsd = DIV_ROUND_CLOSEST(rs->rsd_nsecs * (rs->max_freq >> 8),
-				1000000000 >> 8);
-	if (!rsd && rs->rsd_nsecs) {
-		pr_warn_once("rockchip-spi: %u Hz are too slow to express %u ns delay\n",
-			     rs->max_freq, rs->rsd_nsecs);
-	} else if (rsd > 3) {
-		rsd = 3;
-		pr_warn_once("rockchip-spi: %u Hz are too fast to express %u ns delay, clamping at %u ns\n",
-			     rs->max_freq, rs->rsd_nsecs,
-			     rsd * 1000000000U / rs->max_freq);
-	}
-	cr0 |= rsd << CR0_RSD_OFFSET;
-
 	writel_relaxed(cr0, rs->regs + ROCKCHIP_SPI_CTRLR0);
+	writel_relaxed(cr1, rs->regs + ROCKCHIP_SPI_CTRLR1);
 
-	if (rs->n_bytes == 1)
-		writel_relaxed(rs->len - 1, rs->regs + ROCKCHIP_SPI_CTRLR1);
-	else if (rs->n_bytes == 2)
-		writel_relaxed((rs->len / 2) - 1, rs->regs + ROCKCHIP_SPI_CTRLR1);
+	/* unfortunately setting the fifo threshold level to generate an
+	 * interrupt exactly when the fifo is full doesn't seem to work,
+	 * so we need the strict inequality here
+	 */
+	if (xfer->len < rs->fifo_len)
+		writel_relaxed(xfer->len - 1, rs->regs + ROCKCHIP_SPI_RXFTLR);
 	else
-		writel_relaxed((rs->len * 2) - 1, rs->regs + ROCKCHIP_SPI_CTRLR1);
+		writel_relaxed(rs->fifo_len / 2 - 1, rs->regs + ROCKCHIP_SPI_RXFTLR);
 
-	writel_relaxed(rs->fifo_len / 2 - 1, rs->regs + ROCKCHIP_SPI_TXFTLR);
-	writel_relaxed(rs->fifo_len / 2 - 1, rs->regs + ROCKCHIP_SPI_RXFTLR);
-
-	writel_relaxed(0, rs->regs + ROCKCHIP_SPI_DMATDLR);
+	writel_relaxed(rs->fifo_len / 2, rs->regs + ROCKCHIP_SPI_DMATDLR);
 	writel_relaxed(0, rs->regs + ROCKCHIP_SPI_DMARDLR);
 	writel_relaxed(dmacr, rs->regs + ROCKCHIP_SPI_DMACR);
 
-	spi_set_clk(rs, div);
-
-	dev_dbg(rs->dev, "cr0 0x%x, div %d\n", cr0, div);
+	/* the hardware only supports an even clock divisor, so
+	 * round divisor = spiclk / speed up to nearest even number
+	 * so that the resulting speed is <= the requested speed
+	 */
+	writel_relaxed(2 * DIV_ROUND_UP(rs->freq, 2 * xfer->speed_hz),
+			rs->regs + ROCKCHIP_SPI_BAUDR);
 }
 
 static size_t rockchip_spi_max_transfer_size(struct spi_device *spi)
@@ -597,8 +540,8 @@
 		struct spi_device *spi,
 		struct spi_transfer *xfer)
 {
-	int ret = 0;
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	bool use_dma;
 
 	WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
 		(readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY));
@@ -613,52 +556,16 @@
 		return -EINVAL;
 	}
 
-	rs->speed = xfer->speed_hz;
-	rs->bpw = xfer->bits_per_word;
-	rs->n_bytes = rs->bpw >> 3;
+	rs->n_bytes = xfer->bits_per_word <= 8 ? 1 : 2;
 
-	rs->tx = xfer->tx_buf;
-	rs->tx_end = rs->tx + xfer->len;
-	rs->rx = xfer->rx_buf;
-	rs->rx_end = rs->rx + xfer->len;
-	rs->len = xfer->len;
+	use_dma = master->can_dma ? master->can_dma(master, spi, xfer) : false;
 
-	rs->tx_sg = xfer->tx_sg;
-	rs->rx_sg = xfer->rx_sg;
+	rockchip_spi_config(rs, spi, xfer, use_dma);
 
-	if (rs->tx && rs->rx)
-		rs->tmode = CR0_XFM_TR;
-	else if (rs->tx)
-		rs->tmode = CR0_XFM_TO;
-	else if (rs->rx)
-		rs->tmode = CR0_XFM_RO;
+	if (use_dma)
+		return rockchip_spi_prepare_dma(rs, master, xfer);
 
-	/* we need prepare dma before spi was enabled */
-	if (master->can_dma && master->can_dma(master, spi, xfer))
-		rs->use_dma = 1;
-	else
-		rs->use_dma = 0;
-
-	rockchip_spi_config(rs);
-
-	if (rs->use_dma) {
-		if (rs->tmode == CR0_XFM_RO) {
-			/* rx: dma must be prepared first */
-			ret = rockchip_spi_prepare_dma(rs);
-			spi_enable_chip(rs, 1);
-		} else {
-			/* tx or tr: spi must be enabled first */
-			spi_enable_chip(rs, 1);
-			ret = rockchip_spi_prepare_dma(rs);
-		}
-		/* successful DMA prepare means the transfer is in progress */
-		ret = ret ? ret : 1;
-	} else {
-		spi_enable_chip(rs, 1);
-		ret = rockchip_spi_pio_transfer(rs);
-	}
-
-	return ret;
+	return rockchip_spi_prepare_irq(rs, xfer);
 }
 
 static bool rockchip_spi_can_dma(struct spi_master *master,
@@ -666,8 +573,13 @@
 				 struct spi_transfer *xfer)
 {
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	unsigned int bytes_per_word = xfer->bits_per_word <= 8 ? 1 : 2;
 
-	return (xfer->len > rs->fifo_len);
+	/* if the numbor of spi words to transfer is less than the fifo
+	 * length we can just fill the fifo and wait for a single irq,
+	 * so don't bother setting up dma
+	 */
+	return xfer->len / bytes_per_word >= rs->fifo_len;
 }
 
 static int rockchip_spi_probe(struct platform_device *pdev)
@@ -720,16 +632,36 @@
 		goto err_disable_apbclk;
 	}
 
-	spi_enable_chip(rs, 0);
+	spi_enable_chip(rs, false);
 
-	rs->type = SSI_MOTO_SPI;
-	rs->master = master;
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		goto err_disable_spiclk;
+
+	ret = devm_request_threaded_irq(&pdev->dev, ret, rockchip_spi_isr, NULL,
+			IRQF_ONESHOT, dev_name(&pdev->dev), master);
+	if (ret)
+		goto err_disable_spiclk;
+
 	rs->dev = &pdev->dev;
-	rs->max_freq = clk_get_rate(rs->spiclk);
+	rs->freq = clk_get_rate(rs->spiclk);
 
 	if (!of_property_read_u32(pdev->dev.of_node, "rx-sample-delay-ns",
-				  &rsd_nsecs))
-		rs->rsd_nsecs = rsd_nsecs;
+				  &rsd_nsecs)) {
+		/* rx sample delay is expressed in parent clock cycles (max 3) */
+		u32 rsd = DIV_ROUND_CLOSEST(rsd_nsecs * (rs->freq >> 8),
+				1000000000 >> 8);
+		if (!rsd) {
+			dev_warn(rs->dev, "%u Hz are too slow to express %u ns delay\n",
+					rs->freq, rsd_nsecs);
+		} else if (rsd > CR0_RSD_MAX) {
+			rsd = CR0_RSD_MAX;
+			dev_warn(rs->dev, "%u Hz are too fast to express %u ns delay, clamping at %u ns\n",
+					rs->freq, rsd_nsecs,
+					CR0_RSD_MAX * 1000000000U / rs->freq);
+		}
+		rs->rsd = rsd;
+	}
 
 	rs->fifo_len = get_fifo_len(rs);
 	if (!rs->fifo_len) {
@@ -738,57 +670,49 @@
 		goto err_disable_spiclk;
 	}
 
-	spin_lock_init(&rs->lock);
-
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
 	master->auto_runtime_pm = true;
 	master->bus_num = pdev->id;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP;
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP | SPI_LSB_FIRST;
 	master->num_chipselect = ROCKCHIP_SPI_MAX_CS_NUM;
 	master->dev.of_node = pdev->dev.of_node;
-	master->bits_per_word_mask = SPI_BPW_MASK(16) | SPI_BPW_MASK(8);
+	master->bits_per_word_mask = SPI_BPW_MASK(16) | SPI_BPW_MASK(8) | SPI_BPW_MASK(4);
+	master->min_speed_hz = rs->freq / BAUDR_SCKDV_MAX;
+	master->max_speed_hz = min(rs->freq / BAUDR_SCKDV_MIN, MAX_SCLK_OUT);
 
 	master->set_cs = rockchip_spi_set_cs;
-	master->prepare_message = rockchip_spi_prepare_message;
-	master->unprepare_message = rockchip_spi_unprepare_message;
 	master->transfer_one = rockchip_spi_transfer_one;
 	master->max_transfer_size = rockchip_spi_max_transfer_size;
 	master->handle_err = rockchip_spi_handle_err;
 	master->flags = SPI_MASTER_GPIO_SS;
 
-	rs->dma_tx.ch = dma_request_chan(rs->dev, "tx");
-	if (IS_ERR(rs->dma_tx.ch)) {
+	master->dma_tx = dma_request_chan(rs->dev, "tx");
+	if (IS_ERR(master->dma_tx)) {
 		/* Check tx to see if we need defer probing driver */
-		if (PTR_ERR(rs->dma_tx.ch) == -EPROBE_DEFER) {
+		if (PTR_ERR(master->dma_tx) == -EPROBE_DEFER) {
 			ret = -EPROBE_DEFER;
 			goto err_disable_pm_runtime;
 		}
 		dev_warn(rs->dev, "Failed to request TX DMA channel\n");
-		rs->dma_tx.ch = NULL;
+		master->dma_tx = NULL;
 	}
 
-	rs->dma_rx.ch = dma_request_chan(rs->dev, "rx");
-	if (IS_ERR(rs->dma_rx.ch)) {
-		if (PTR_ERR(rs->dma_rx.ch) == -EPROBE_DEFER) {
+	master->dma_rx = dma_request_chan(rs->dev, "rx");
+	if (IS_ERR(master->dma_rx)) {
+		if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) {
 			ret = -EPROBE_DEFER;
 			goto err_free_dma_tx;
 		}
 		dev_warn(rs->dev, "Failed to request RX DMA channel\n");
-		rs->dma_rx.ch = NULL;
+		master->dma_rx = NULL;
 	}
 
-	if (rs->dma_tx.ch && rs->dma_rx.ch) {
-		dma_get_slave_caps(rs->dma_rx.ch, &(rs->dma_caps));
-		rs->dma_tx.addr = (dma_addr_t)(mem->start + ROCKCHIP_SPI_TXDR);
-		rs->dma_rx.addr = (dma_addr_t)(mem->start + ROCKCHIP_SPI_RXDR);
-		rs->dma_tx.direction = DMA_MEM_TO_DEV;
-		rs->dma_rx.direction = DMA_DEV_TO_MEM;
-
+	if (master->dma_tx && master->dma_rx) {
+		rs->dma_addr_tx = mem->start + ROCKCHIP_SPI_TXDR;
+		rs->dma_addr_rx = mem->start + ROCKCHIP_SPI_RXDR;
 		master->can_dma = rockchip_spi_can_dma;
-		master->dma_tx = rs->dma_tx.ch;
-		master->dma_rx = rs->dma_rx.ch;
 	}
 
 	ret = devm_spi_register_master(&pdev->dev, master);
@@ -800,11 +724,11 @@
 	return 0;
 
 err_free_dma_rx:
-	if (rs->dma_rx.ch)
-		dma_release_channel(rs->dma_rx.ch);
+	if (master->dma_rx)
+		dma_release_channel(master->dma_rx);
 err_free_dma_tx:
-	if (rs->dma_tx.ch)
-		dma_release_channel(rs->dma_tx.ch);
+	if (master->dma_tx)
+		dma_release_channel(master->dma_tx);
 err_disable_pm_runtime:
 	pm_runtime_disable(&pdev->dev);
 err_disable_spiclk:
@@ -831,10 +755,10 @@
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_set_suspended(&pdev->dev);
 
-	if (rs->dma_tx.ch)
-		dma_release_channel(rs->dma_tx.ch);
-	if (rs->dma_rx.ch)
-		dma_release_channel(rs->dma_rx.ch);
+	if (master->dma_tx)
+		dma_release_channel(master->dma_tx);
+	if (master->dma_rx)
+		dma_release_channel(master->dma_rx);
 
 	spi_master_put(master);
 
@@ -846,9 +770,8 @@
 {
 	int ret;
 	struct spi_master *master = dev_get_drvdata(dev);
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
-	ret = spi_master_suspend(rs->master);
+	ret = spi_master_suspend(master);
 	if (ret < 0)
 		return ret;
 
@@ -873,7 +796,7 @@
 	if (ret < 0)
 		return ret;
 
-	ret = spi_master_resume(rs->master);
+	ret = spi_master_resume(master);
 	if (ret < 0) {
 		clk_disable_unprepare(rs->spiclk);
 		clk_disable_unprepare(rs->apb_pclk);
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index b37de1d..15f5723 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * SH RSPI driver
  *
@@ -6,15 +7,6 @@
  *
  * Based on spi-sh.c:
  * Copyright (C) 2011 Renesas Solutions Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
@@ -188,7 +180,7 @@
 struct rspi_data {
 	void __iomem *addr;
 	u32 max_speed_hz;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	wait_queue_head_t wait;
 	struct clk *clk;
 	u16 spcmd;
@@ -245,8 +237,8 @@
 /* optional functions */
 struct spi_ops {
 	int (*set_config_register)(struct rspi_data *rspi, int access_size);
-	int (*transfer_one)(struct spi_master *master, struct spi_device *spi,
-			    struct spi_transfer *xfer);
+	int (*transfer_one)(struct spi_controller *ctlr,
+			    struct spi_device *spi, struct spi_transfer *xfer);
 	u16 mode_bits;
 	u16 flags;
 	u16 fifo_size;
@@ -279,7 +271,8 @@
 	/* Sets parity, interrupt mask */
 	rspi_write8(rspi, 0x00, RSPI_SPCR2);
 
-	/* Sets SPCMD */
+	/* Resets sequencer */
+	rspi_write8(rspi, 0, RSPI_SPSCR);
 	rspi->spcmd |= SPCMD_SPB_8_TO_16(access_size);
 	rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
 
@@ -323,7 +316,8 @@
 	rspi_write8(rspi, 0x00, RSPI_SSLND);
 	rspi_write8(rspi, 0x00, RSPI_SPND);
 
-	/* Sets SPCMD */
+	/* Resets sequencer */
+	rspi_write8(rspi, 0, RSPI_SPSCR);
 	rspi->spcmd |= SPCMD_SPB_8_TO_16(access_size);
 	rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
 
@@ -374,7 +368,8 @@
 	/* Sets buffer to allow normal operation */
 	rspi_write8(rspi, 0x00, QSPI_SPBFCR);
 
-	/* Sets SPCMD */
+	/* Resets sequencer */
+	rspi_write8(rspi, 0, RSPI_SPSCR);
 	rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
 
 	/* Sets RSPI mode */
@@ -474,7 +469,7 @@
 {
 	int error = rspi_wait_for_tx_empty(rspi);
 	if (error < 0) {
-		dev_err(&rspi->master->dev, "transmit timeout\n");
+		dev_err(&rspi->ctlr->dev, "transmit timeout\n");
 		return error;
 	}
 	rspi_write_data(rspi, data);
@@ -488,7 +483,7 @@
 
 	error = rspi_wait_for_rx_full(rspi);
 	if (error < 0) {
-		dev_err(&rspi->master->dev, "receive timeout\n");
+		dev_err(&rspi->ctlr->dev, "receive timeout\n");
 		return error;
 	}
 	data = rspi_read_data(rspi);
@@ -534,8 +529,8 @@
 
 	/* First prepare and submit the DMA request(s), as this may fail */
 	if (rx) {
-		desc_rx = dmaengine_prep_slave_sg(rspi->master->dma_rx,
-					rx->sgl, rx->nents, DMA_DEV_TO_MEM,
+		desc_rx = dmaengine_prep_slave_sg(rspi->ctlr->dma_rx, rx->sgl,
+					rx->nents, DMA_DEV_TO_MEM,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!desc_rx) {
 			ret = -EAGAIN;
@@ -554,8 +549,8 @@
 	}
 
 	if (tx) {
-		desc_tx = dmaengine_prep_slave_sg(rspi->master->dma_tx,
-					tx->sgl, tx->nents, DMA_MEM_TO_DEV,
+		desc_tx = dmaengine_prep_slave_sg(rspi->ctlr->dma_tx, tx->sgl,
+					tx->nents, DMA_MEM_TO_DEV,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!desc_tx) {
 			ret = -EAGAIN;
@@ -592,9 +587,9 @@
 
 	/* Now start DMA */
 	if (rx)
-		dma_async_issue_pending(rspi->master->dma_rx);
+		dma_async_issue_pending(rspi->ctlr->dma_rx);
 	if (tx)
-		dma_async_issue_pending(rspi->master->dma_tx);
+		dma_async_issue_pending(rspi->ctlr->dma_tx);
 
 	ret = wait_event_interruptible_timeout(rspi->wait,
 					       rspi->dma_callbacked, HZ);
@@ -602,13 +597,13 @@
 		ret = 0;
 	} else {
 		if (!ret) {
-			dev_err(&rspi->master->dev, "DMA timeout\n");
+			dev_err(&rspi->ctlr->dev, "DMA timeout\n");
 			ret = -ETIMEDOUT;
 		}
 		if (tx)
-			dmaengine_terminate_all(rspi->master->dma_tx);
+			dmaengine_terminate_all(rspi->ctlr->dma_tx);
 		if (rx)
-			dmaengine_terminate_all(rspi->master->dma_rx);
+			dmaengine_terminate_all(rspi->ctlr->dma_rx);
 	}
 
 	rspi_disable_irq(rspi, irq_mask);
@@ -622,12 +617,12 @@
 
 no_dma_tx:
 	if (rx)
-		dmaengine_terminate_all(rspi->master->dma_rx);
+		dmaengine_terminate_all(rspi->ctlr->dma_rx);
 no_dma_rx:
 	if (ret == -EAGAIN) {
 		pr_warn_once("%s %s: DMA not available, falling back to PIO\n",
-			     dev_driver_string(&rspi->master->dev),
-			     dev_name(&rspi->master->dev));
+			     dev_driver_string(&rspi->ctlr->dev),
+			     dev_name(&rspi->ctlr->dev));
 	}
 	return ret;
 }
@@ -668,10 +663,10 @@
 	return xfer->len > rspi->ops->fifo_size;
 }
 
-static bool rspi_can_dma(struct spi_master *master, struct spi_device *spi,
+static bool rspi_can_dma(struct spi_controller *ctlr, struct spi_device *spi,
 			 struct spi_transfer *xfer)
 {
-	struct rspi_data *rspi = spi_master_get_devdata(master);
+	struct rspi_data *rspi = spi_controller_get_devdata(ctlr);
 
 	return __rspi_can_dma(rspi, xfer);
 }
@@ -679,7 +674,7 @@
 static int rspi_dma_check_then_transfer(struct rspi_data *rspi,
 					 struct spi_transfer *xfer)
 {
-	if (!rspi->master->can_dma || !__rspi_can_dma(rspi, xfer))
+	if (!rspi->ctlr->can_dma || !__rspi_can_dma(rspi, xfer))
 		return -EAGAIN;
 
 	/* rx_buf can be NULL on RSPI on SH in TX-only Mode */
@@ -706,10 +701,10 @@
 	return 0;
 }
 
-static int rspi_transfer_one(struct spi_master *master, struct spi_device *spi,
-			     struct spi_transfer *xfer)
+static int rspi_transfer_one(struct spi_controller *ctlr,
+			     struct spi_device *spi, struct spi_transfer *xfer)
 {
-	struct rspi_data *rspi = spi_master_get_devdata(master);
+	struct rspi_data *rspi = spi_controller_get_devdata(ctlr);
 	u8 spcr;
 
 	spcr = rspi_read8(rspi, RSPI_SPCR);
@@ -724,11 +719,11 @@
 	return rspi_common_transfer(rspi, xfer);
 }
 
-static int rspi_rz_transfer_one(struct spi_master *master,
+static int rspi_rz_transfer_one(struct spi_controller *ctlr,
 				struct spi_device *spi,
 				struct spi_transfer *xfer)
 {
-	struct rspi_data *rspi = spi_master_get_devdata(master);
+	struct rspi_data *rspi = spi_controller_get_devdata(ctlr);
 
 	rspi_rz_receive_init(rspi);
 
@@ -744,27 +739,22 @@
 	while (len > 0) {
 		n = qspi_set_send_trigger(rspi, len);
 		qspi_set_receive_trigger(rspi, len);
-		if (n == QSPI_BUFFER_SIZE) {
-			ret = rspi_wait_for_tx_empty(rspi);
-			if (ret < 0) {
-				dev_err(&rspi->master->dev, "transmit timeout\n");
-				return ret;
-			}
-			for (i = 0; i < n; i++)
-				rspi_write_data(rspi, *tx++);
-
-			ret = rspi_wait_for_rx_full(rspi);
-			if (ret < 0) {
-				dev_err(&rspi->master->dev, "receive timeout\n");
-				return ret;
-			}
-			for (i = 0; i < n; i++)
-				*rx++ = rspi_read_data(rspi);
-		} else {
-			ret = rspi_pio_transfer(rspi, tx, rx, n);
-			if (ret < 0)
-				return ret;
+		ret = rspi_wait_for_tx_empty(rspi);
+		if (ret < 0) {
+			dev_err(&rspi->ctlr->dev, "transmit timeout\n");
+			return ret;
 		}
+		for (i = 0; i < n; i++)
+			rspi_write_data(rspi, *tx++);
+
+		ret = rspi_wait_for_rx_full(rspi);
+		if (ret < 0) {
+			dev_err(&rspi->ctlr->dev, "receive timeout\n");
+			return ret;
+		}
+		for (i = 0; i < n; i++)
+			*rx++ = rspi_read_data(rspi);
+
 		len -= n;
 	}
 
@@ -793,7 +783,7 @@
 	unsigned int i, len;
 	int ret;
 
-	if (rspi->master->can_dma && __rspi_can_dma(rspi, xfer)) {
+	if (rspi->ctlr->can_dma && __rspi_can_dma(rspi, xfer)) {
 		ret = rspi_dma_transfer(rspi, &xfer->tx_sg, NULL);
 		if (ret != -EAGAIN)
 			return ret;
@@ -801,19 +791,14 @@
 
 	while (n > 0) {
 		len = qspi_set_send_trigger(rspi, n);
-		if (len == QSPI_BUFFER_SIZE) {
-			ret = rspi_wait_for_tx_empty(rspi);
-			if (ret < 0) {
-				dev_err(&rspi->master->dev, "transmit timeout\n");
-				return ret;
-			}
-			for (i = 0; i < len; i++)
-				rspi_write_data(rspi, *tx++);
-		} else {
-			ret = rspi_pio_transfer(rspi, tx, NULL, len);
-			if (ret < 0)
-				return ret;
+		ret = rspi_wait_for_tx_empty(rspi);
+		if (ret < 0) {
+			dev_err(&rspi->ctlr->dev, "transmit timeout\n");
+			return ret;
 		}
+		for (i = 0; i < len; i++)
+			rspi_write_data(rspi, *tx++);
+
 		n -= len;
 	}
 
@@ -830,7 +815,7 @@
 	unsigned int i, len;
 	int ret;
 
-	if (rspi->master->can_dma && __rspi_can_dma(rspi, xfer)) {
+	if (rspi->ctlr->can_dma && __rspi_can_dma(rspi, xfer)) {
 		int ret = rspi_dma_transfer(rspi, NULL, &xfer->rx_sg);
 		if (ret != -EAGAIN)
 			return ret;
@@ -838,29 +823,24 @@
 
 	while (n > 0) {
 		len = qspi_set_receive_trigger(rspi, n);
-		if (len == QSPI_BUFFER_SIZE) {
-			ret = rspi_wait_for_rx_full(rspi);
-			if (ret < 0) {
-				dev_err(&rspi->master->dev, "receive timeout\n");
-				return ret;
-			}
-			for (i = 0; i < len; i++)
-				*rx++ = rspi_read_data(rspi);
-		} else {
-			ret = rspi_pio_transfer(rspi, NULL, rx, len);
-			if (ret < 0)
-				return ret;
+		ret = rspi_wait_for_rx_full(rspi);
+		if (ret < 0) {
+			dev_err(&rspi->ctlr->dev, "receive timeout\n");
+			return ret;
 		}
+		for (i = 0; i < len; i++)
+			*rx++ = rspi_read_data(rspi);
+
 		n -= len;
 	}
 
 	return 0;
 }
 
-static int qspi_transfer_one(struct spi_master *master, struct spi_device *spi,
-			     struct spi_transfer *xfer)
+static int qspi_transfer_one(struct spi_controller *ctlr,
+			     struct spi_device *spi, struct spi_transfer *xfer)
 {
-	struct rspi_data *rspi = spi_master_get_devdata(master);
+	struct rspi_data *rspi = spi_controller_get_devdata(ctlr);
 
 	if (spi->mode & SPI_LOOP) {
 		return qspi_transfer_out_in(rspi, xfer);
@@ -876,28 +856,6 @@
 	}
 }
 
-static int rspi_setup(struct spi_device *spi)
-{
-	struct rspi_data *rspi = spi_master_get_devdata(spi->master);
-
-	rspi->max_speed_hz = spi->max_speed_hz;
-
-	rspi->spcmd = SPCMD_SSLKP;
-	if (spi->mode & SPI_CPOL)
-		rspi->spcmd |= SPCMD_CPOL;
-	if (spi->mode & SPI_CPHA)
-		rspi->spcmd |= SPCMD_CPHA;
-
-	/* CMOS output mode and MOSI signal from previous transfer */
-	rspi->sppcr = 0;
-	if (spi->mode & SPI_LOOP)
-		rspi->sppcr |= SPPCR_SPLP;
-
-	set_config_register(rspi, 8);
-
-	return 0;
-}
-
 static u16 qspi_transfer_mode(const struct spi_transfer *xfer)
 {
 	if (xfer->tx_buf)
@@ -963,12 +921,28 @@
 	return 0;
 }
 
-static int rspi_prepare_message(struct spi_master *master,
+static int rspi_prepare_message(struct spi_controller *ctlr,
 				struct spi_message *msg)
 {
-	struct rspi_data *rspi = spi_master_get_devdata(master);
+	struct rspi_data *rspi = spi_controller_get_devdata(ctlr);
+	struct spi_device *spi = msg->spi;
 	int ret;
 
+	rspi->max_speed_hz = spi->max_speed_hz;
+
+	rspi->spcmd = SPCMD_SSLKP;
+	if (spi->mode & SPI_CPOL)
+		rspi->spcmd |= SPCMD_CPOL;
+	if (spi->mode & SPI_CPHA)
+		rspi->spcmd |= SPCMD_CPHA;
+
+	/* CMOS output mode and MOSI signal from previous transfer */
+	rspi->sppcr = 0;
+	if (spi->mode & SPI_LOOP)
+		rspi->sppcr |= SPPCR_SPLP;
+
+	set_config_register(rspi, 8);
+
 	if (msg->spi->mode &
 	    (SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD)) {
 		/* Setup sequencer for messages with multiple transfer modes */
@@ -982,10 +956,10 @@
 	return 0;
 }
 
-static int rspi_unprepare_message(struct spi_master *master,
+static int rspi_unprepare_message(struct spi_controller *ctlr,
 				  struct spi_message *msg)
 {
-	struct rspi_data *rspi = spi_master_get_devdata(master);
+	struct rspi_data *rspi = spi_controller_get_devdata(ctlr);
 
 	/* Disable SPI function */
 	rspi_write8(rspi, rspi_read8(rspi, RSPI_SPCR) & ~SPCR_SPE, RSPI_SPCR);
@@ -1089,7 +1063,7 @@
 	return chan;
 }
 
-static int rspi_request_dma(struct device *dev, struct spi_master *master,
+static int rspi_request_dma(struct device *dev, struct spi_controller *ctlr,
 			    const struct resource *res)
 {
 	const struct rspi_plat_data *rspi_pd = dev_get_platdata(dev);
@@ -1107,37 +1081,37 @@
 		return 0;
 	}
 
-	master->dma_tx = rspi_request_dma_chan(dev, DMA_MEM_TO_DEV, dma_tx_id,
-					       res->start + RSPI_SPDR);
-	if (!master->dma_tx)
+	ctlr->dma_tx = rspi_request_dma_chan(dev, DMA_MEM_TO_DEV, dma_tx_id,
+					     res->start + RSPI_SPDR);
+	if (!ctlr->dma_tx)
 		return -ENODEV;
 
-	master->dma_rx = rspi_request_dma_chan(dev, DMA_DEV_TO_MEM, dma_rx_id,
-					       res->start + RSPI_SPDR);
-	if (!master->dma_rx) {
-		dma_release_channel(master->dma_tx);
-		master->dma_tx = NULL;
+	ctlr->dma_rx = rspi_request_dma_chan(dev, DMA_DEV_TO_MEM, dma_rx_id,
+					     res->start + RSPI_SPDR);
+	if (!ctlr->dma_rx) {
+		dma_release_channel(ctlr->dma_tx);
+		ctlr->dma_tx = NULL;
 		return -ENODEV;
 	}
 
-	master->can_dma = rspi_can_dma;
+	ctlr->can_dma = rspi_can_dma;
 	dev_info(dev, "DMA available");
 	return 0;
 }
 
-static void rspi_release_dma(struct spi_master *master)
+static void rspi_release_dma(struct spi_controller *ctlr)
 {
-	if (master->dma_tx)
-		dma_release_channel(master->dma_tx);
-	if (master->dma_rx)
-		dma_release_channel(master->dma_rx);
+	if (ctlr->dma_tx)
+		dma_release_channel(ctlr->dma_tx);
+	if (ctlr->dma_rx)
+		dma_release_channel(ctlr->dma_rx);
 }
 
 static int rspi_remove(struct platform_device *pdev)
 {
 	struct rspi_data *rspi = platform_get_drvdata(pdev);
 
-	rspi_release_dma(rspi->master);
+	rspi_release_dma(rspi->ctlr);
 	pm_runtime_disable(&pdev->dev);
 
 	return 0;
@@ -1147,7 +1121,7 @@
 	.set_config_register =	rspi_set_config_register,
 	.transfer_one =		rspi_transfer_one,
 	.mode_bits =		SPI_CPHA | SPI_CPOL | SPI_LOOP,
-	.flags =		SPI_MASTER_MUST_TX,
+	.flags =		SPI_CONTROLLER_MUST_TX,
 	.fifo_size =		8,
 };
 
@@ -1155,7 +1129,7 @@
 	.set_config_register =	rspi_rz_set_config_register,
 	.transfer_one =		rspi_rz_transfer_one,
 	.mode_bits =		SPI_CPHA | SPI_CPOL | SPI_LOOP,
-	.flags =		SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX,
+	.flags =		SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX,
 	.fifo_size =		8,	/* 8 for TX, 32 for RX */
 };
 
@@ -1165,7 +1139,7 @@
 	.mode_bits =		SPI_CPHA | SPI_CPOL | SPI_LOOP |
 				SPI_TX_DUAL | SPI_TX_QUAD |
 				SPI_RX_DUAL | SPI_RX_QUAD,
-	.flags =		SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX,
+	.flags =		SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX,
 	.fifo_size =		32,
 };
 
@@ -1182,7 +1156,7 @@
 
 MODULE_DEVICE_TABLE(of, rspi_of_match);
 
-static int rspi_parse_dt(struct device *dev, struct spi_master *master)
+static int rspi_parse_dt(struct device *dev, struct spi_controller *ctlr)
 {
 	u32 num_cs;
 	int error;
@@ -1194,12 +1168,12 @@
 		return error;
 	}
 
-	master->num_chipselect = num_cs;
+	ctlr->num_chipselect = num_cs;
 	return 0;
 }
 #else
 #define rspi_of_match	NULL
-static inline int rspi_parse_dt(struct device *dev, struct spi_master *master)
+static inline int rspi_parse_dt(struct device *dev, struct spi_controller *ctlr)
 {
 	return -EINVAL;
 }
@@ -1220,28 +1194,28 @@
 static int rspi_probe(struct platform_device *pdev)
 {
 	struct resource *res;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	struct rspi_data *rspi;
 	int ret;
 	const struct rspi_plat_data *rspi_pd;
 	const struct spi_ops *ops;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct rspi_data));
-	if (master == NULL)
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(struct rspi_data));
+	if (ctlr == NULL)
 		return -ENOMEM;
 
 	ops = of_device_get_match_data(&pdev->dev);
 	if (ops) {
-		ret = rspi_parse_dt(&pdev->dev, master);
+		ret = rspi_parse_dt(&pdev->dev, ctlr);
 		if (ret)
 			goto error1;
 	} else {
 		ops = (struct spi_ops *)pdev->id_entry->driver_data;
 		rspi_pd = dev_get_platdata(&pdev->dev);
 		if (rspi_pd && rspi_pd->num_chipselect)
-			master->num_chipselect = rspi_pd->num_chipselect;
+			ctlr->num_chipselect = rspi_pd->num_chipselect;
 		else
-			master->num_chipselect = 2; /* default */
+			ctlr->num_chipselect = 2; /* default */
 	}
 
 	/* ops parameter check */
@@ -1251,10 +1225,10 @@
 		goto error1;
 	}
 
-	rspi = spi_master_get_devdata(master);
+	rspi = spi_controller_get_devdata(ctlr);
 	platform_set_drvdata(pdev, rspi);
 	rspi->ops = ops;
-	rspi->master = master;
+	rspi->ctlr = ctlr;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	rspi->addr = devm_ioremap_resource(&pdev->dev, res);
@@ -1274,15 +1248,14 @@
 
 	init_waitqueue_head(&rspi->wait);
 
-	master->bus_num = pdev->id;
-	master->setup = rspi_setup;
-	master->auto_runtime_pm = true;
-	master->transfer_one = ops->transfer_one;
-	master->prepare_message = rspi_prepare_message;
-	master->unprepare_message = rspi_unprepare_message;
-	master->mode_bits = ops->mode_bits;
-	master->flags = ops->flags;
-	master->dev.of_node = pdev->dev.of_node;
+	ctlr->bus_num = pdev->id;
+	ctlr->auto_runtime_pm = true;
+	ctlr->transfer_one = ops->transfer_one;
+	ctlr->prepare_message = rspi_prepare_message;
+	ctlr->unprepare_message = rspi_unprepare_message;
+	ctlr->mode_bits = ops->mode_bits;
+	ctlr->flags = ops->flags;
+	ctlr->dev.of_node = pdev->dev.of_node;
 
 	ret = platform_get_irq_byname(pdev, "rx");
 	if (ret < 0) {
@@ -1319,13 +1292,13 @@
 		goto error2;
 	}
 
-	ret = rspi_request_dma(&pdev->dev, master, res);
+	ret = rspi_request_dma(&pdev->dev, ctlr, res);
 	if (ret < 0)
 		dev_warn(&pdev->dev, "DMA not available, using PIO\n");
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "spi_register_master error.\n");
+		dev_err(&pdev->dev, "devm_spi_register_controller error.\n");
 		goto error3;
 	}
 
@@ -1334,11 +1307,11 @@
 	return 0;
 
 error3:
-	rspi_release_dma(master);
+	rspi_release_dma(ctlr);
 error2:
 	pm_runtime_disable(&pdev->dev);
 error1:
-	spi_master_put(master);
+	spi_controller_put(ctlr);
 
 	return ret;
 }
@@ -1355,18 +1328,16 @@
 #ifdef CONFIG_PM_SLEEP
 static int rspi_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rspi_data *rspi = platform_get_drvdata(pdev);
+	struct rspi_data *rspi = dev_get_drvdata(dev);
 
-	return spi_master_suspend(rspi->master);
+	return spi_controller_suspend(rspi->ctlr);
 }
 
 static int rspi_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rspi_data *rspi = platform_get_drvdata(pdev);
+	struct rspi_data *rspi = dev_get_drvdata(dev);
 
-	return spi_master_resume(rspi->master);
+	return spi_controller_resume(rspi->ctlr);
 }
 
 static SIMPLE_DEV_PM_OPS(rspi_pm_ops, rspi_suspend, rspi_resume);
diff --git a/drivers/spi/spi-s3c24xx-fiq.S b/drivers/spi/spi-s3c24xx-fiq.S
index 059f2dc..e95d628 100644
--- a/drivers/spi/spi-s3c24xx-fiq.S
+++ b/drivers/spi/spi-s3c24xx-fiq.S
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* linux/drivers/spi/spi_s3c24xx_fiq.S
  *
  * Copyright 2009 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
  *
  * S3C24XX SPI - FIQ pseudo-DMA transfer code
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
 */
 
 #include <linux/linkage.h>
diff --git a/drivers/spi/spi-s3c24xx-fiq.h b/drivers/spi/spi-s3c24xx-fiq.h
index a5950bb..7786b0e 100644
--- a/drivers/spi/spi-s3c24xx-fiq.h
+++ b/drivers/spi/spi-s3c24xx-fiq.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* linux/drivers/spi/spi_s3c24xx_fiq.h
  *
  * Copyright 2009 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
  *
  * S3C24XX SPI - FIQ pseudo-DMA transfer support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
 */
 
 /* We have R8 through R13 to play with */
diff --git a/drivers/spi/spi-s3c24xx.c b/drivers/spi/spi-s3c24xx.c
index 4e7d1bf..2d6e37f 100644
--- a/drivers/spi/spi-s3c24xx.c
+++ b/drivers/spi/spi-s3c24xx.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2006 Ben Dooks
  * Copyright 2006-2009 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
 */
 
 #include <linux/spinlock.h>
@@ -491,7 +487,6 @@
 	struct s3c2410_spi_info *pdata;
 	struct s3c24xx_spi *hw;
 	struct spi_master *master;
-	struct resource *res;
 	int err = 0;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct s3c24xx_spi));
@@ -540,8 +535,7 @@
 	dev_dbg(hw->dev, "bitbang at %p\n", &hw->bitbang);
 
 	/* find and map our resources */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	hw->regs = devm_ioremap_resource(&pdev->dev, res);
+	hw->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(hw->regs)) {
 		err = PTR_ERR(hw->regs);
 		goto err_no_pdata;
@@ -549,7 +543,6 @@
 
 	hw->irq = platform_get_irq(pdev, 0);
 	if (hw->irq < 0) {
-		dev_err(&pdev->dev, "No IRQ specified\n");
 		err = -ENOENT;
 		goto err_no_pdata;
 	}
diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index 52cf0e9..11acddc 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * NXP SC18IS602/603 SPI driver
  *
  * Copyright (C) Guenter Roeck <linux@roeck-us.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/spi/spi-sh-hspi.c b/drivers/spi/spi-sh-hspi.c
index 20e800e..7f73f91 100644
--- a/drivers/spi/spi-sh-hspi.c
+++ b/drivers/spi/spi-sh-hspi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * SuperH HSPI bus driver
  *
@@ -7,15 +8,6 @@
  * Based on pxa2xx_spi.c:
  * Copyright (C) 2011 Renesas Solutions Corp.
  * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
@@ -43,7 +35,7 @@
 
 struct hspi_priv {
 	void __iomem *addr;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	struct device *dev;
 	struct clk *clk;
 };
@@ -148,10 +140,10 @@
 	hspi_write(hspi, SPSCR, 0x21);	/* master mode / CS control */
 }
 
-static int hspi_transfer_one_message(struct spi_master *master,
+static int hspi_transfer_one_message(struct spi_controller *ctlr,
 				     struct spi_message *msg)
 {
-	struct hspi_priv *hspi = spi_master_get_devdata(master);
+	struct hspi_priv *hspi = spi_controller_get_devdata(ctlr);
 	struct spi_transfer *t;
 	u32 tx;
 	u32 rx;
@@ -213,7 +205,7 @@
 		ndelay(nsecs);
 		hspi_hw_cs_disable(hspi);
 	}
-	spi_finalize_current_message(master);
+	spi_finalize_current_message(ctlr);
 
 	return ret;
 }
@@ -221,7 +213,7 @@
 static int hspi_probe(struct platform_device *pdev)
 {
 	struct resource *res;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	struct hspi_priv *hspi;
 	struct clk *clk;
 	int ret;
@@ -233,11 +225,9 @@
 		return -EINVAL;
 	}
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*hspi));
-	if (!master) {
-		dev_err(&pdev->dev, "spi_alloc_master error.\n");
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(*hspi));
+	if (!ctlr)
 		return -ENOMEM;
-	}
 
 	clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(clk)) {
@@ -246,33 +236,32 @@
 		goto error0;
 	}
 
-	hspi = spi_master_get_devdata(master);
+	hspi = spi_controller_get_devdata(ctlr);
 	platform_set_drvdata(pdev, hspi);
 
 	/* init hspi */
-	hspi->master	= master;
+	hspi->ctlr	= ctlr;
 	hspi->dev	= &pdev->dev;
 	hspi->clk	= clk;
 	hspi->addr	= devm_ioremap(hspi->dev,
 				       res->start, resource_size(res));
 	if (!hspi->addr) {
-		dev_err(&pdev->dev, "ioremap error.\n");
 		ret = -ENOMEM;
 		goto error1;
 	}
 
 	pm_runtime_enable(&pdev->dev);
 
-	master->bus_num		= pdev->id;
-	master->mode_bits	= SPI_CPOL | SPI_CPHA;
-	master->dev.of_node	= pdev->dev.of_node;
-	master->auto_runtime_pm = true;
-	master->transfer_one_message		= hspi_transfer_one_message;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
+	ctlr->bus_num = pdev->id;
+	ctlr->mode_bits	= SPI_CPOL | SPI_CPHA;
+	ctlr->dev.of_node = pdev->dev.of_node;
+	ctlr->auto_runtime_pm = true;
+	ctlr->transfer_one_message = hspi_transfer_one_message;
+	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "spi_register_master error.\n");
+		dev_err(&pdev->dev, "devm_spi_register_controller error.\n");
 		goto error2;
 	}
 
@@ -283,7 +272,7 @@
  error1:
 	clk_put(clk);
  error0:
-	spi_master_put(master);
+	spi_controller_put(ctlr);
 
 	return ret;
 }
@@ -316,6 +305,6 @@
 module_platform_driver(hspi_driver);
 
 MODULE_DESCRIPTION("SuperH HSPI bus driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>");
 MODULE_ALIAS("platform:sh-hspi");
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 101cd6a..8f13473 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * SuperH MSIOF SPI Master Interface
+ * SuperH MSIOF SPI Controller Interface
  *
  * Copyright (c) 2009 Magnus Damm
  * Copyright (C) 2014 Renesas Electronics Corporation
  * Copyright (C) 2014-2017 Glider bvba
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/bitmap.h>
@@ -22,6 +18,7 @@
 #include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
@@ -36,14 +33,15 @@
 #include <asm/unaligned.h>
 
 struct sh_msiof_chipdata {
+	u32 bits_per_word_mask;
 	u16 tx_fifo_size;
 	u16 rx_fifo_size;
-	u16 master_flags;
+	u16 ctlr_flags;
 	u16 min_div_pow;
 };
 
 struct sh_msiof_spi_priv {
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	void __iomem *mapbase;
 	struct clk *clk;
 	struct platform_device *pdev;
@@ -85,111 +83,113 @@
 #define RFDR	0x60	/* Receive FIFO Data Register */
 
 /* TMDR1 and RMDR1 */
-#define MDR1_TRMD	 0x80000000 /* Transfer Mode (1 = Master mode) */
-#define MDR1_SYNCMD_MASK 0x30000000 /* SYNC Mode */
-#define MDR1_SYNCMD_SPI	 0x20000000 /*   Level mode/SPI */
-#define MDR1_SYNCMD_LR	 0x30000000 /*   L/R mode */
-#define MDR1_SYNCAC_SHIFT	 25 /* Sync Polarity (1 = Active-low) */
-#define MDR1_BITLSB_SHIFT	 24 /* MSB/LSB First (1 = LSB first) */
-#define MDR1_DTDL_SHIFT		 20 /* Data Pin Bit Delay for MSIOF_SYNC */
-#define MDR1_SYNCDL_SHIFT	 16 /* Frame Sync Signal Timing Delay */
-#define MDR1_FLD_MASK	 0x0000000c /* Frame Sync Signal Interval (0-3) */
-#define MDR1_FLD_SHIFT		  2
-#define MDR1_XXSTP	 0x00000001 /* Transmission/Reception Stop on FIFO */
+#define MDR1_TRMD	   BIT(31)  /* Transfer Mode (1 = Master mode) */
+#define MDR1_SYNCMD_MASK   GENMASK(29, 28) /* SYNC Mode */
+#define MDR1_SYNCMD_SPI	   (2 << 28)/*   Level mode/SPI */
+#define MDR1_SYNCMD_LR	   (3 << 28)/*   L/R mode */
+#define MDR1_SYNCAC_SHIFT  25       /* Sync Polarity (1 = Active-low) */
+#define MDR1_BITLSB_SHIFT  24       /* MSB/LSB First (1 = LSB first) */
+#define MDR1_DTDL_SHIFT	   20       /* Data Pin Bit Delay for MSIOF_SYNC */
+#define MDR1_SYNCDL_SHIFT  16       /* Frame Sync Signal Timing Delay */
+#define MDR1_FLD_MASK	   GENMASK(3, 2) /* Frame Sync Signal Interval (0-3) */
+#define MDR1_FLD_SHIFT	   2
+#define MDR1_XXSTP	   BIT(0)   /* Transmission/Reception Stop on FIFO */
 /* TMDR1 */
-#define TMDR1_PCON	 0x40000000 /* Transfer Signal Connection */
-#define TMDR1_SYNCCH_MASK 0xc000000 /* Synchronization Signal Channel Select */
-#define TMDR1_SYNCCH_SHIFT	 26 /* 0=MSIOF_SYNC, 1=MSIOF_SS1, 2=MSIOF_SS2 */
+#define TMDR1_PCON	   BIT(30)  /* Transfer Signal Connection */
+#define TMDR1_SYNCCH_MASK  GENMASK(27, 26) /* Sync Signal Channel Select */
+#define TMDR1_SYNCCH_SHIFT 26       /* 0=MSIOF_SYNC, 1=MSIOF_SS1, 2=MSIOF_SS2 */
 
 /* TMDR2 and RMDR2 */
 #define MDR2_BITLEN1(i)	(((i) - 1) << 24) /* Data Size (8-32 bits) */
 #define MDR2_WDLEN1(i)	(((i) - 1) << 16) /* Word Count (1-64/256 (SH, A1))) */
-#define MDR2_GRPMASK1	0x00000001 /* Group Output Mask 1 (SH, A1) */
+#define MDR2_GRPMASK1	BIT(0)      /* Group Output Mask 1 (SH, A1) */
 
 /* TSCR and RSCR */
-#define SCR_BRPS_MASK	    0x1f00 /* Prescaler Setting (1-32) */
+#define SCR_BRPS_MASK	GENMASK(12, 8) /* Prescaler Setting (1-32) */
 #define SCR_BRPS(i)	(((i) - 1) << 8)
-#define SCR_BRDV_MASK	    0x0007 /* Baud Rate Generator's Division Ratio */
-#define SCR_BRDV_DIV_2	    0x0000
-#define SCR_BRDV_DIV_4	    0x0001
-#define SCR_BRDV_DIV_8	    0x0002
-#define SCR_BRDV_DIV_16	    0x0003
-#define SCR_BRDV_DIV_32	    0x0004
-#define SCR_BRDV_DIV_1	    0x0007
+#define SCR_BRDV_MASK	GENMASK(2, 0) /* Baud Rate Generator's Division Ratio */
+#define SCR_BRDV_DIV_2	0
+#define SCR_BRDV_DIV_4	1
+#define SCR_BRDV_DIV_8	2
+#define SCR_BRDV_DIV_16	3
+#define SCR_BRDV_DIV_32	4
+#define SCR_BRDV_DIV_1	7
 
 /* CTR */
-#define CTR_TSCKIZ_MASK	0xc0000000 /* Transmit Clock I/O Polarity Select */
-#define CTR_TSCKIZ_SCK	0x80000000 /*   Disable SCK when TX disabled */
-#define CTR_TSCKIZ_POL_SHIFT	30 /*   Transmit Clock Polarity */
-#define CTR_RSCKIZ_MASK	0x30000000 /* Receive Clock Polarity Select */
-#define CTR_RSCKIZ_SCK	0x20000000 /*   Must match CTR_TSCKIZ_SCK */
-#define CTR_RSCKIZ_POL_SHIFT	28 /*   Receive Clock Polarity */
-#define CTR_TEDG_SHIFT		27 /* Transmit Timing (1 = falling edge) */
-#define CTR_REDG_SHIFT		26 /* Receive Timing (1 = falling edge) */
-#define CTR_TXDIZ_MASK	0x00c00000 /* Pin Output When TX is Disabled */
-#define CTR_TXDIZ_LOW	0x00000000 /*   0 */
-#define CTR_TXDIZ_HIGH	0x00400000 /*   1 */
-#define CTR_TXDIZ_HIZ	0x00800000 /*   High-impedance */
-#define CTR_TSCKE	0x00008000 /* Transmit Serial Clock Output Enable */
-#define CTR_TFSE	0x00004000 /* Transmit Frame Sync Signal Output Enable */
-#define CTR_TXE		0x00000200 /* Transmit Enable */
-#define CTR_RXE		0x00000100 /* Receive Enable */
+#define CTR_TSCKIZ_MASK	GENMASK(31, 30) /* Transmit Clock I/O Polarity Select */
+#define CTR_TSCKIZ_SCK	BIT(31)   /*   Disable SCK when TX disabled */
+#define CTR_TSCKIZ_POL_SHIFT 30   /*   Transmit Clock Polarity */
+#define CTR_RSCKIZ_MASK	GENMASK(29, 28) /* Receive Clock Polarity Select */
+#define CTR_RSCKIZ_SCK	BIT(29)   /*   Must match CTR_TSCKIZ_SCK */
+#define CTR_RSCKIZ_POL_SHIFT 28   /*   Receive Clock Polarity */
+#define CTR_TEDG_SHIFT	     27   /* Transmit Timing (1 = falling edge) */
+#define CTR_REDG_SHIFT	     26   /* Receive Timing (1 = falling edge) */
+#define CTR_TXDIZ_MASK	GENMASK(23, 22) /* Pin Output When TX is Disabled */
+#define CTR_TXDIZ_LOW	(0 << 22) /*   0 */
+#define CTR_TXDIZ_HIGH	(1 << 22) /*   1 */
+#define CTR_TXDIZ_HIZ	(2 << 22) /*   High-impedance */
+#define CTR_TSCKE	BIT(15)   /* Transmit Serial Clock Output Enable */
+#define CTR_TFSE	BIT(14)   /* Transmit Frame Sync Signal Output Enable */
+#define CTR_TXE		BIT(9)    /* Transmit Enable */
+#define CTR_RXE		BIT(8)    /* Receive Enable */
+#define CTR_TXRST	BIT(1)    /* Transmit Reset */
+#define CTR_RXRST	BIT(0)    /* Receive Reset */
 
 /* FCTR */
-#define FCTR_TFWM_MASK	0xe0000000 /* Transmit FIFO Watermark */
-#define FCTR_TFWM_64	0x00000000 /*  Transfer Request when 64 empty stages */
-#define FCTR_TFWM_32	0x20000000 /*  Transfer Request when 32 empty stages */
-#define FCTR_TFWM_24	0x40000000 /*  Transfer Request when 24 empty stages */
-#define FCTR_TFWM_16	0x60000000 /*  Transfer Request when 16 empty stages */
-#define FCTR_TFWM_12	0x80000000 /*  Transfer Request when 12 empty stages */
-#define FCTR_TFWM_8	0xa0000000 /*  Transfer Request when 8 empty stages */
-#define FCTR_TFWM_4	0xc0000000 /*  Transfer Request when 4 empty stages */
-#define FCTR_TFWM_1	0xe0000000 /*  Transfer Request when 1 empty stage */
-#define FCTR_TFUA_MASK	0x07f00000 /* Transmit FIFO Usable Area */
-#define FCTR_TFUA_SHIFT		20
+#define FCTR_TFWM_MASK	GENMASK(31, 29) /* Transmit FIFO Watermark */
+#define FCTR_TFWM_64	(0 << 29) /*  Transfer Request when 64 empty stages */
+#define FCTR_TFWM_32	(1 << 29) /*  Transfer Request when 32 empty stages */
+#define FCTR_TFWM_24	(2 << 29) /*  Transfer Request when 24 empty stages */
+#define FCTR_TFWM_16	(3 << 29) /*  Transfer Request when 16 empty stages */
+#define FCTR_TFWM_12	(4 << 29) /*  Transfer Request when 12 empty stages */
+#define FCTR_TFWM_8	(5 << 29) /*  Transfer Request when 8 empty stages */
+#define FCTR_TFWM_4	(6 << 29) /*  Transfer Request when 4 empty stages */
+#define FCTR_TFWM_1	(7 << 29) /*  Transfer Request when 1 empty stage */
+#define FCTR_TFUA_MASK	GENMASK(26, 20) /* Transmit FIFO Usable Area */
+#define FCTR_TFUA_SHIFT	20
 #define FCTR_TFUA(i)	((i) << FCTR_TFUA_SHIFT)
-#define FCTR_RFWM_MASK	0x0000e000 /* Receive FIFO Watermark */
-#define FCTR_RFWM_1	0x00000000 /*  Transfer Request when 1 valid stages */
-#define FCTR_RFWM_4	0x00002000 /*  Transfer Request when 4 valid stages */
-#define FCTR_RFWM_8	0x00004000 /*  Transfer Request when 8 valid stages */
-#define FCTR_RFWM_16	0x00006000 /*  Transfer Request when 16 valid stages */
-#define FCTR_RFWM_32	0x00008000 /*  Transfer Request when 32 valid stages */
-#define FCTR_RFWM_64	0x0000a000 /*  Transfer Request when 64 valid stages */
-#define FCTR_RFWM_128	0x0000c000 /*  Transfer Request when 128 valid stages */
-#define FCTR_RFWM_256	0x0000e000 /*  Transfer Request when 256 valid stages */
-#define FCTR_RFUA_MASK	0x00001ff0 /* Receive FIFO Usable Area (0x40 = full) */
-#define FCTR_RFUA_SHIFT		 4
+#define FCTR_RFWM_MASK	GENMASK(15, 13) /* Receive FIFO Watermark */
+#define FCTR_RFWM_1	(0 << 13) /*  Transfer Request when 1 valid stages */
+#define FCTR_RFWM_4	(1 << 13) /*  Transfer Request when 4 valid stages */
+#define FCTR_RFWM_8	(2 << 13) /*  Transfer Request when 8 valid stages */
+#define FCTR_RFWM_16	(3 << 13) /*  Transfer Request when 16 valid stages */
+#define FCTR_RFWM_32	(4 << 13) /*  Transfer Request when 32 valid stages */
+#define FCTR_RFWM_64	(5 << 13) /*  Transfer Request when 64 valid stages */
+#define FCTR_RFWM_128	(6 << 13) /*  Transfer Request when 128 valid stages */
+#define FCTR_RFWM_256	(7 << 13) /*  Transfer Request when 256 valid stages */
+#define FCTR_RFUA_MASK	GENMASK(12, 4) /* Receive FIFO Usable Area (0x40 = full) */
+#define FCTR_RFUA_SHIFT	4
 #define FCTR_RFUA(i)	((i) << FCTR_RFUA_SHIFT)
 
 /* STR */
-#define STR_TFEMP	0x20000000 /* Transmit FIFO Empty */
-#define STR_TDREQ	0x10000000 /* Transmit Data Transfer Request */
-#define STR_TEOF	0x00800000 /* Frame Transmission End */
-#define STR_TFSERR	0x00200000 /* Transmit Frame Synchronization Error */
-#define STR_TFOVF	0x00100000 /* Transmit FIFO Overflow */
-#define STR_TFUDF	0x00080000 /* Transmit FIFO Underflow */
-#define STR_RFFUL	0x00002000 /* Receive FIFO Full */
-#define STR_RDREQ	0x00001000 /* Receive Data Transfer Request */
-#define STR_REOF	0x00000080 /* Frame Reception End */
-#define STR_RFSERR	0x00000020 /* Receive Frame Synchronization Error */
-#define STR_RFUDF	0x00000010 /* Receive FIFO Underflow */
-#define STR_RFOVF	0x00000008 /* Receive FIFO Overflow */
+#define STR_TFEMP	BIT(29) /* Transmit FIFO Empty */
+#define STR_TDREQ	BIT(28) /* Transmit Data Transfer Request */
+#define STR_TEOF	BIT(23) /* Frame Transmission End */
+#define STR_TFSERR	BIT(21) /* Transmit Frame Synchronization Error */
+#define STR_TFOVF	BIT(20) /* Transmit FIFO Overflow */
+#define STR_TFUDF	BIT(19) /* Transmit FIFO Underflow */
+#define STR_RFFUL	BIT(13) /* Receive FIFO Full */
+#define STR_RDREQ	BIT(12) /* Receive Data Transfer Request */
+#define STR_REOF	BIT(7)  /* Frame Reception End */
+#define STR_RFSERR	BIT(5)  /* Receive Frame Synchronization Error */
+#define STR_RFUDF	BIT(4)  /* Receive FIFO Underflow */
+#define STR_RFOVF	BIT(3)  /* Receive FIFO Overflow */
 
 /* IER */
-#define IER_TDMAE	0x80000000 /* Transmit Data DMA Transfer Req. Enable */
-#define IER_TFEMPE	0x20000000 /* Transmit FIFO Empty Enable */
-#define IER_TDREQE	0x10000000 /* Transmit Data Transfer Request Enable */
-#define IER_TEOFE	0x00800000 /* Frame Transmission End Enable */
-#define IER_TFSERRE	0x00200000 /* Transmit Frame Sync Error Enable */
-#define IER_TFOVFE	0x00100000 /* Transmit FIFO Overflow Enable */
-#define IER_TFUDFE	0x00080000 /* Transmit FIFO Underflow Enable */
-#define IER_RDMAE	0x00008000 /* Receive Data DMA Transfer Req. Enable */
-#define IER_RFFULE	0x00002000 /* Receive FIFO Full Enable */
-#define IER_RDREQE	0x00001000 /* Receive Data Transfer Request Enable */
-#define IER_REOFE	0x00000080 /* Frame Reception End Enable */
-#define IER_RFSERRE	0x00000020 /* Receive Frame Sync Error Enable */
-#define IER_RFUDFE	0x00000010 /* Receive FIFO Underflow Enable */
-#define IER_RFOVFE	0x00000008 /* Receive FIFO Overflow Enable */
+#define IER_TDMAE	BIT(31) /* Transmit Data DMA Transfer Req. Enable */
+#define IER_TFEMPE	BIT(29) /* Transmit FIFO Empty Enable */
+#define IER_TDREQE	BIT(28) /* Transmit Data Transfer Request Enable */
+#define IER_TEOFE	BIT(23) /* Frame Transmission End Enable */
+#define IER_TFSERRE	BIT(21) /* Transmit Frame Sync Error Enable */
+#define IER_TFOVFE	BIT(20) /* Transmit FIFO Overflow Enable */
+#define IER_TFUDFE	BIT(19) /* Transmit FIFO Underflow Enable */
+#define IER_RDMAE	BIT(15) /* Receive Data DMA Transfer Req. Enable */
+#define IER_RFFULE	BIT(13) /* Receive FIFO Full Enable */
+#define IER_RDREQE	BIT(12) /* Receive Data Transfer Request Enable */
+#define IER_REOFE	BIT(7)  /* Frame Reception End Enable */
+#define IER_RFSERRE	BIT(5)  /* Receive Frame Sync Error Enable */
+#define IER_RFUDFE	BIT(4)  /* Receive FIFO Underflow Enable */
+#define IER_RFOVFE	BIT(3)  /* Receive FIFO Overflow Enable */
 
 
 static u32 sh_msiof_read(struct sh_msiof_spi_priv *p, int reg_offs)
@@ -222,21 +222,14 @@
 {
 	u32 mask = clr | set;
 	u32 data;
-	int k;
 
 	data = sh_msiof_read(p, CTR);
 	data &= ~clr;
 	data |= set;
 	sh_msiof_write(p, CTR, data);
 
-	for (k = 100; k > 0; k--) {
-		if ((sh_msiof_read(p, CTR) & mask) == set)
-			break;
-
-		udelay(10);
-	}
-
-	return k > 0 ? 0 : -ETIMEDOUT;
+	return readl_poll_timeout_atomic(p->mapbase + CTR, data,
+					 (data & mask) == set, 1, 100);
 }
 
 static irqreturn_t sh_msiof_spi_irq(int irq, void *data)
@@ -250,6 +243,19 @@
 	return IRQ_HANDLED;
 }
 
+static void sh_msiof_spi_reset_regs(struct sh_msiof_spi_priv *p)
+{
+	u32 mask = CTR_TXRST | CTR_RXRST;
+	u32 data;
+
+	data = sh_msiof_read(p, CTR);
+	data |= mask;
+	sh_msiof_write(p, CTR, data);
+
+	readl_poll_timeout_atomic(p->mapbase + CTR, data, !(data & mask), 1,
+				  100);
+}
+
 static const u32 sh_msiof_spi_div_array[] = {
 	SCR_BRDV_DIV_1, SCR_BRDV_DIV_2,	 SCR_BRDV_DIV_4,
 	SCR_BRDV_DIV_8,	SCR_BRDV_DIV_16, SCR_BRDV_DIV_32,
@@ -291,7 +297,7 @@
 
 	scr = sh_msiof_spi_div_array[div_pow] | SCR_BRPS(brps);
 	sh_msiof_write(p, TSCR, scr);
-	if (!(p->master->flags & SPI_MASTER_MUST_TX))
+	if (!(p->ctlr->flags & SPI_CONTROLLER_MUST_TX))
 		sh_msiof_write(p, RSCR, scr);
 }
 
@@ -355,14 +361,14 @@
 	tmp |= !cs_high << MDR1_SYNCAC_SHIFT;
 	tmp |= lsb_first << MDR1_BITLSB_SHIFT;
 	tmp |= sh_msiof_spi_get_dtdl_and_syncdl(p);
-	if (spi_controller_is_slave(p->master)) {
+	if (spi_controller_is_slave(p->ctlr)) {
 		sh_msiof_write(p, TMDR1, tmp | TMDR1_PCON);
 	} else {
 		sh_msiof_write(p, TMDR1,
 			       tmp | MDR1_TRMD | TMDR1_PCON |
 			       (ss < MAX_SS ? ss : 0) << TMDR1_SYNCCH_SHIFT);
 	}
-	if (p->master->flags & SPI_MASTER_MUST_TX) {
+	if (p->ctlr->flags & SPI_CONTROLLER_MUST_TX) {
 		/* These bits are reserved if RX needs TX */
 		tmp &= ~0x0000ffff;
 	}
@@ -386,7 +392,7 @@
 {
 	u32 dr2 = MDR2_BITLEN1(bits) | MDR2_WDLEN1(words);
 
-	if (tx_buf || (p->master->flags & SPI_MASTER_MUST_TX))
+	if (tx_buf || (p->ctlr->flags & SPI_CONTROLLER_MUST_TX))
 		sh_msiof_write(p, TMDR2, dr2);
 	else
 		sh_msiof_write(p, TMDR2, dr2 | MDR2_GRPMASK1);
@@ -543,24 +549,11 @@
 
 static int sh_msiof_spi_setup(struct spi_device *spi)
 {
-	struct device_node	*np = spi->master->dev.of_node;
-	struct sh_msiof_spi_priv *p = spi_master_get_devdata(spi->master);
+	struct sh_msiof_spi_priv *p =
+		spi_controller_get_devdata(spi->controller);
 	u32 clr, set, tmp;
 
-	if (!np) {
-		/*
-		 * Use spi->controller_data for CS (same strategy as spi_gpio),
-		 * if any. otherwise let HW control CS
-		 */
-		spi->cs_gpio = (uintptr_t)spi->controller_data;
-	}
-
-	if (gpio_is_valid(spi->cs_gpio)) {
-		gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
-		return 0;
-	}
-
-	if (spi_controller_is_slave(p->master))
+	if (spi->cs_gpiod || spi_controller_is_slave(p->ctlr))
 		return 0;
 
 	if (p->native_cs_inited &&
@@ -585,15 +578,15 @@
 	return 0;
 }
 
-static int sh_msiof_prepare_message(struct spi_master *master,
+static int sh_msiof_prepare_message(struct spi_controller *ctlr,
 				    struct spi_message *msg)
 {
-	struct sh_msiof_spi_priv *p = spi_master_get_devdata(master);
+	struct sh_msiof_spi_priv *p = spi_controller_get_devdata(ctlr);
 	const struct spi_device *spi = msg->spi;
 	u32 ss, cs_high;
 
 	/* Configure pins before asserting CS */
-	if (gpio_is_valid(spi->cs_gpio)) {
+	if (spi->cs_gpiod) {
 		ss = p->unused_ss;
 		cs_high = p->native_cs_high;
 	} else {
@@ -609,7 +602,7 @@
 
 static int sh_msiof_spi_start(struct sh_msiof_spi_priv *p, void *rx_buf)
 {
-	bool slave = spi_controller_is_slave(p->master);
+	bool slave = spi_controller_is_slave(p->ctlr);
 	int ret = 0;
 
 	/* setup clock and rx/tx signals */
@@ -629,7 +622,7 @@
 
 static int sh_msiof_spi_stop(struct sh_msiof_spi_priv *p, void *rx_buf)
 {
-	bool slave = spi_controller_is_slave(p->master);
+	bool slave = spi_controller_is_slave(p->ctlr);
 	int ret = 0;
 
 	/* shut down frame, rx/tx and clock signals */
@@ -645,9 +638,9 @@
 	return ret;
 }
 
-static int sh_msiof_slave_abort(struct spi_master *master)
+static int sh_msiof_slave_abort(struct spi_controller *ctlr)
 {
-	struct sh_msiof_spi_priv *p = spi_master_get_devdata(master);
+	struct sh_msiof_spi_priv *p = spi_controller_get_devdata(ctlr);
 
 	p->slave_aborted = true;
 	complete(&p->done);
@@ -658,7 +651,7 @@
 static int sh_msiof_wait_for_completion(struct sh_msiof_spi_priv *p,
 					struct completion *x)
 {
-	if (spi_controller_is_slave(p->master)) {
+	if (spi_controller_is_slave(p->ctlr)) {
 		if (wait_for_completion_interruptible(x) ||
 		    p->slave_aborted) {
 			dev_dbg(&p->pdev->dev, "interrupted\n");
@@ -758,7 +751,7 @@
 	/* First prepare and submit the DMA request(s), as this may fail */
 	if (rx) {
 		ier_bits |= IER_RDREQE | IER_RDMAE;
-		desc_rx = dmaengine_prep_slave_single(p->master->dma_rx,
+		desc_rx = dmaengine_prep_slave_single(p->ctlr->dma_rx,
 					p->rx_dma_addr, len, DMA_DEV_TO_MEM,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!desc_rx)
@@ -773,9 +766,9 @@
 
 	if (tx) {
 		ier_bits |= IER_TDREQE | IER_TDMAE;
-		dma_sync_single_for_device(p->master->dma_tx->device->dev,
+		dma_sync_single_for_device(p->ctlr->dma_tx->device->dev,
 					   p->tx_dma_addr, len, DMA_TO_DEVICE);
-		desc_tx = dmaengine_prep_slave_single(p->master->dma_tx,
+		desc_tx = dmaengine_prep_slave_single(p->ctlr->dma_tx,
 					p->tx_dma_addr, len, DMA_MEM_TO_DEV,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!desc_tx) {
@@ -807,9 +800,9 @@
 
 	/* Now start DMA */
 	if (rx)
-		dma_async_issue_pending(p->master->dma_rx);
+		dma_async_issue_pending(p->ctlr->dma_rx);
 	if (tx)
-		dma_async_issue_pending(p->master->dma_tx);
+		dma_async_issue_pending(p->ctlr->dma_tx);
 
 	ret = sh_msiof_spi_start(p, rx);
 	if (ret) {
@@ -849,9 +842,8 @@
 	}
 
 	if (rx)
-		dma_sync_single_for_cpu(p->master->dma_rx->device->dev,
-					p->rx_dma_addr, len,
-					DMA_FROM_DEVICE);
+		dma_sync_single_for_cpu(p->ctlr->dma_rx->device->dev,
+					p->rx_dma_addr, len, DMA_FROM_DEVICE);
 
 	return 0;
 
@@ -860,10 +852,10 @@
 	sh_msiof_spi_stop(p, rx);
 stop_dma:
 	if (tx)
-		dmaengine_terminate_all(p->master->dma_tx);
+		dmaengine_terminate_all(p->ctlr->dma_tx);
 no_dma_tx:
 	if (rx)
-		dmaengine_terminate_all(p->master->dma_rx);
+		dmaengine_terminate_all(p->ctlr->dma_rx);
 	sh_msiof_write(p, IER, 0);
 	return ret;
 }
@@ -911,11 +903,11 @@
 	memcpy(dst, src, words * 4);
 }
 
-static int sh_msiof_transfer_one(struct spi_master *master,
+static int sh_msiof_transfer_one(struct spi_controller *ctlr,
 				 struct spi_device *spi,
 				 struct spi_transfer *t)
 {
-	struct sh_msiof_spi_priv *p = spi_master_get_devdata(master);
+	struct sh_msiof_spi_priv *p = spi_controller_get_devdata(ctlr);
 	void (*copy32)(u32 *, const u32 *, unsigned int);
 	void (*tx_fifo)(struct sh_msiof_spi_priv *, const void *, int, int);
 	void (*rx_fifo)(struct sh_msiof_spi_priv *, void *, int, int);
@@ -929,11 +921,14 @@
 	bool swab;
 	int ret;
 
+	/* reset registers */
+	sh_msiof_spi_reset_regs(p);
+
 	/* setup clocks (clock already enabled in chipselect()) */
-	if (!spi_controller_is_slave(p->master))
+	if (!spi_controller_is_slave(p->ctlr))
 		sh_msiof_spi_set_clk_regs(p, clk_get_rate(p->clk), t->speed_hz);
 
-	while (master->dma_tx && len > 15) {
+	while (ctlr->dma_tx && len > 15) {
 		/*
 		 *  DMA supports 32-bit words only, hence pack 8-bit and 16-bit
 		 *  words, with byte resp. word swapping.
@@ -941,17 +936,13 @@
 		unsigned int l = 0;
 
 		if (tx_buf)
-			l = min(len, p->tx_fifo_size * 4);
+			l = min(round_down(len, 4), p->tx_fifo_size * 4);
 		if (rx_buf)
-			l = min(len, p->rx_fifo_size * 4);
+			l = min(round_down(len, 4), p->rx_fifo_size * 4);
 
 		if (bits <= 8) {
-			if (l & 3)
-				break;
 			copy32 = copy_bswap32;
 		} else if (bits <= 16) {
-			if (l & 3)
-				break;
 			copy32 = copy_wswap32;
 		} else {
 			copy32 = copy_plain32;
@@ -981,7 +972,7 @@
 			return 0;
 	}
 
-	if (bits <= 8 && len > 15 && !(len & 3)) {
+	if (bits <= 8 && len > 15) {
 		bits = 32;
 		swab = true;
 	} else {
@@ -1042,29 +1033,42 @@
 		if (rx_buf)
 			rx_buf += n * bytes_per_word;
 		words -= n;
+
+		if (words == 0 && (len % bytes_per_word)) {
+			words = len % bytes_per_word;
+			bits = t->bits_per_word;
+			bytes_per_word = 1;
+			tx_fifo = sh_msiof_spi_write_fifo_8;
+			rx_fifo = sh_msiof_spi_read_fifo_8;
+		}
 	}
 
 	return 0;
 }
 
 static const struct sh_msiof_chipdata sh_data = {
+	.bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32),
 	.tx_fifo_size = 64,
 	.rx_fifo_size = 64,
-	.master_flags = 0,
+	.ctlr_flags = 0,
 	.min_div_pow = 0,
 };
 
 static const struct sh_msiof_chipdata rcar_gen2_data = {
+	.bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16) |
+			      SPI_BPW_MASK(24) | SPI_BPW_MASK(32),
 	.tx_fifo_size = 64,
 	.rx_fifo_size = 64,
-	.master_flags = SPI_MASTER_MUST_TX,
+	.ctlr_flags = SPI_CONTROLLER_MUST_TX,
 	.min_div_pow = 0,
 };
 
 static const struct sh_msiof_chipdata rcar_gen3_data = {
+	.bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16) |
+			      SPI_BPW_MASK(24) | SPI_BPW_MASK(32),
 	.tx_fifo_size = 64,
 	.rx_fifo_size = 64,
-	.master_flags = SPI_MASTER_MUST_TX,
+	.ctlr_flags = SPI_CONTROLLER_MUST_TX,
 	.min_div_pow = 1,
 };
 
@@ -1132,12 +1136,13 @@
 	if (ret <= 0)
 		return 0;
 
-	num_cs = max_t(unsigned int, ret, p->master->num_chipselect);
+	num_cs = max_t(unsigned int, ret, p->ctlr->num_chipselect);
 	for (i = 0; i < num_cs; i++) {
 		struct gpio_desc *gpiod;
 
 		gpiod = devm_gpiod_get_index(dev, "cs", i, GPIOD_ASIS);
 		if (!IS_ERR(gpiod)) {
+			devm_gpiod_put(dev, gpiod);
 			cs_gpios++;
 			continue;
 		}
@@ -1202,10 +1207,10 @@
 {
 	struct platform_device *pdev = p->pdev;
 	struct device *dev = &pdev->dev;
-	const struct sh_msiof_spi_info *info = dev_get_platdata(dev);
+	const struct sh_msiof_spi_info *info = p->info;
 	unsigned int dma_tx_id, dma_rx_id;
 	const struct resource *res;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	struct device *tx_dev, *rx_dev;
 
 	if (dev->of_node) {
@@ -1225,17 +1230,15 @@
 	if (!res)
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	master = p->master;
-	master->dma_tx = sh_msiof_request_dma_chan(dev, DMA_MEM_TO_DEV,
-						   dma_tx_id,
-						   res->start + TFDR);
-	if (!master->dma_tx)
+	ctlr = p->ctlr;
+	ctlr->dma_tx = sh_msiof_request_dma_chan(dev, DMA_MEM_TO_DEV,
+						 dma_tx_id, res->start + TFDR);
+	if (!ctlr->dma_tx)
 		return -ENODEV;
 
-	master->dma_rx = sh_msiof_request_dma_chan(dev, DMA_DEV_TO_MEM,
-						   dma_rx_id,
-						   res->start + RFDR);
-	if (!master->dma_rx)
+	ctlr->dma_rx = sh_msiof_request_dma_chan(dev, DMA_DEV_TO_MEM,
+						 dma_rx_id, res->start + RFDR);
+	if (!ctlr->dma_rx)
 		goto free_tx_chan;
 
 	p->tx_dma_page = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
@@ -1246,13 +1249,13 @@
 	if (!p->rx_dma_page)
 		goto free_tx_page;
 
-	tx_dev = master->dma_tx->device->dev;
+	tx_dev = ctlr->dma_tx->device->dev;
 	p->tx_dma_addr = dma_map_single(tx_dev, p->tx_dma_page, PAGE_SIZE,
 					DMA_TO_DEVICE);
 	if (dma_mapping_error(tx_dev, p->tx_dma_addr))
 		goto free_rx_page;
 
-	rx_dev = master->dma_rx->device->dev;
+	rx_dev = ctlr->dma_rx->device->dev;
 	p->rx_dma_addr = dma_map_single(rx_dev, p->rx_dma_page, PAGE_SIZE,
 					DMA_FROM_DEVICE);
 	if (dma_mapping_error(rx_dev, p->rx_dma_addr))
@@ -1268,34 +1271,33 @@
 free_tx_page:
 	free_page((unsigned long)p->tx_dma_page);
 free_rx_chan:
-	dma_release_channel(master->dma_rx);
+	dma_release_channel(ctlr->dma_rx);
 free_tx_chan:
-	dma_release_channel(master->dma_tx);
-	master->dma_tx = NULL;
+	dma_release_channel(ctlr->dma_tx);
+	ctlr->dma_tx = NULL;
 	return -ENODEV;
 }
 
 static void sh_msiof_release_dma(struct sh_msiof_spi_priv *p)
 {
-	struct spi_master *master = p->master;
+	struct spi_controller *ctlr = p->ctlr;
 
-	if (!master->dma_tx)
+	if (!ctlr->dma_tx)
 		return;
 
-	dma_unmap_single(master->dma_rx->device->dev, p->rx_dma_addr,
-			 PAGE_SIZE, DMA_FROM_DEVICE);
-	dma_unmap_single(master->dma_tx->device->dev, p->tx_dma_addr,
-			 PAGE_SIZE, DMA_TO_DEVICE);
+	dma_unmap_single(ctlr->dma_rx->device->dev, p->rx_dma_addr, PAGE_SIZE,
+			 DMA_FROM_DEVICE);
+	dma_unmap_single(ctlr->dma_tx->device->dev, p->tx_dma_addr, PAGE_SIZE,
+			 DMA_TO_DEVICE);
 	free_page((unsigned long)p->rx_dma_page);
 	free_page((unsigned long)p->tx_dma_page);
-	dma_release_channel(master->dma_rx);
-	dma_release_channel(master->dma_tx);
+	dma_release_channel(ctlr->dma_rx);
+	dma_release_channel(ctlr->dma_tx);
 }
 
 static int sh_msiof_spi_probe(struct platform_device *pdev)
 {
-	struct resource	*r;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	const struct sh_msiof_chipdata *chipdata;
 	struct sh_msiof_spi_info *info;
 	struct sh_msiof_spi_priv *p;
@@ -1316,18 +1318,18 @@
 	}
 
 	if (info->mode == MSIOF_SPI_SLAVE)
-		master = spi_alloc_slave(&pdev->dev,
-					 sizeof(struct sh_msiof_spi_priv));
+		ctlr = spi_alloc_slave(&pdev->dev,
+				       sizeof(struct sh_msiof_spi_priv));
 	else
-		master = spi_alloc_master(&pdev->dev,
-					  sizeof(struct sh_msiof_spi_priv));
-	if (master == NULL)
+		ctlr = spi_alloc_master(&pdev->dev,
+					sizeof(struct sh_msiof_spi_priv));
+	if (ctlr == NULL)
 		return -ENOMEM;
 
-	p = spi_master_get_devdata(master);
+	p = spi_controller_get_devdata(ctlr);
 
 	platform_set_drvdata(pdev, p);
-	p->master = master;
+	p->ctlr = ctlr;
 	p->info = info;
 	p->min_div_pow = chipdata->min_div_pow;
 
@@ -1343,13 +1345,11 @@
 
 	i = platform_get_irq(pdev, 0);
 	if (i < 0) {
-		dev_err(&pdev->dev, "cannot get platform IRQ\n");
-		ret = -ENOENT;
+		ret = i;
 		goto err1;
 	}
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	p->mapbase = devm_ioremap_resource(&pdev->dev, r);
+	p->mapbase = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(p->mapbase)) {
 		ret = PTR_ERR(p->mapbase);
 		goto err1;
@@ -1374,31 +1374,32 @@
 		p->rx_fifo_size = p->info->rx_fifo_override;
 
 	/* Setup GPIO chip selects */
-	master->num_chipselect = p->info->num_chipselect;
+	ctlr->num_chipselect = p->info->num_chipselect;
 	ret = sh_msiof_get_cs_gpios(p);
 	if (ret)
 		goto err1;
 
-	/* init master code */
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
-	master->mode_bits |= SPI_LSB_FIRST | SPI_3WIRE;
-	master->flags = chipdata->master_flags;
-	master->bus_num = pdev->id;
-	master->dev.of_node = pdev->dev.of_node;
-	master->setup = sh_msiof_spi_setup;
-	master->prepare_message = sh_msiof_prepare_message;
-	master->slave_abort = sh_msiof_slave_abort;
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
-	master->auto_runtime_pm = true;
-	master->transfer_one = sh_msiof_transfer_one;
+	/* init controller code */
+	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	ctlr->mode_bits |= SPI_LSB_FIRST | SPI_3WIRE;
+	ctlr->flags = chipdata->ctlr_flags;
+	ctlr->bus_num = pdev->id;
+	ctlr->dev.of_node = pdev->dev.of_node;
+	ctlr->setup = sh_msiof_spi_setup;
+	ctlr->prepare_message = sh_msiof_prepare_message;
+	ctlr->slave_abort = sh_msiof_slave_abort;
+	ctlr->bits_per_word_mask = chipdata->bits_per_word_mask;
+	ctlr->auto_runtime_pm = true;
+	ctlr->transfer_one = sh_msiof_transfer_one;
+	ctlr->use_gpio_descriptors = true;
 
 	ret = sh_msiof_request_dma(p);
 	if (ret < 0)
 		dev_warn(&pdev->dev, "DMA not available, using PIO\n");
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "spi_register_master error.\n");
+		dev_err(&pdev->dev, "devm_spi_register_controller error.\n");
 		goto err2;
 	}
 
@@ -1408,7 +1409,7 @@
 	sh_msiof_release_dma(p);
 	pm_runtime_disable(&pdev->dev);
  err1:
-	spi_master_put(master);
+	spi_controller_put(ctlr);
 	return ret;
 }
 
@@ -1430,18 +1431,16 @@
 #ifdef CONFIG_PM_SLEEP
 static int sh_msiof_spi_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev);
+	struct sh_msiof_spi_priv *p = dev_get_drvdata(dev);
 
-	return spi_master_suspend(p->master);
+	return spi_controller_suspend(p->ctlr);
 }
 
 static int sh_msiof_spi_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev);
+	struct sh_msiof_spi_priv *p = dev_get_drvdata(dev);
 
-	return spi_master_resume(p->master);
+	return spi_controller_resume(p->ctlr);
 }
 
 static SIMPLE_DEV_PM_OPS(sh_msiof_spi_pm_ops, sh_msiof_spi_suspend,
@@ -1463,7 +1462,7 @@
 };
 module_platform_driver(sh_msiof_spi_drv);
 
-MODULE_DESCRIPTION("SuperH MSIOF SPI Master Interface Driver");
+MODULE_DESCRIPTION("SuperH MSIOF SPI Controller Interface Driver");
 MODULE_AUTHOR("Magnus Damm");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:spi_sh_msiof");
diff --git a/drivers/spi/spi-sh-sci.c b/drivers/spi/spi-sh-sci.c
index 393701c..8f30531 100644
--- a/drivers/spi/spi-sh-sci.c
+++ b/drivers/spi/spi-sh-sci.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * SH SCI SPI interface
  *
@@ -6,11 +7,6 @@
  * Based on S3C24XX GPIO based SPI driver, which is:
  *   Copyright (c) 2006 Ben Dooks
  *   Copyright (c) 2006 Simtec Electronics
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/spi/spi-sh.c b/drivers/spi/spi-sh.c
index 50e0ea9..20bdae5 100644
--- a/drivers/spi/spi-sh.c
+++ b/drivers/spi/spi-sh.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * SH SPI bus driver
  *
@@ -5,15 +6,6 @@
  *
  * Based on pxa2xx_spi.c:
  * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
@@ -445,10 +437,8 @@
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "platform_get_irq error: %d\n", irq);
+	if (irq < 0)
 		return irq;
-	}
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct spi_sh_data));
 	if (master == NULL) {
@@ -522,6 +512,6 @@
 module_platform_driver(spi_sh_driver);
 
 MODULE_DESCRIPTION("SH SPI bus driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Yoshihiro Shimoda");
 MODULE_ALIAS("platform:sh_spi");
diff --git a/drivers/spi/spi-sifive.c b/drivers/spi/spi-sifive.c
new file mode 100644
index 0000000..35254bd
--- /dev/null
+++ b/drivers/spi/spi-sifive.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright 2018 SiFive, Inc.
+//
+// SiFive SPI controller driver (master mode only)
+//
+// Author: SiFive, Inc.
+// sifive@sifive.com
+
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+
+#define SIFIVE_SPI_DRIVER_NAME           "sifive_spi"
+
+#define SIFIVE_SPI_MAX_CS                32
+#define SIFIVE_SPI_DEFAULT_DEPTH         8
+#define SIFIVE_SPI_DEFAULT_MAX_BITS      8
+
+/* register offsets */
+#define SIFIVE_SPI_REG_SCKDIV            0x00 /* Serial clock divisor */
+#define SIFIVE_SPI_REG_SCKMODE           0x04 /* Serial clock mode */
+#define SIFIVE_SPI_REG_CSID              0x10 /* Chip select ID */
+#define SIFIVE_SPI_REG_CSDEF             0x14 /* Chip select default */
+#define SIFIVE_SPI_REG_CSMODE            0x18 /* Chip select mode */
+#define SIFIVE_SPI_REG_DELAY0            0x28 /* Delay control 0 */
+#define SIFIVE_SPI_REG_DELAY1            0x2c /* Delay control 1 */
+#define SIFIVE_SPI_REG_FMT               0x40 /* Frame format */
+#define SIFIVE_SPI_REG_TXDATA            0x48 /* Tx FIFO data */
+#define SIFIVE_SPI_REG_RXDATA            0x4c /* Rx FIFO data */
+#define SIFIVE_SPI_REG_TXMARK            0x50 /* Tx FIFO watermark */
+#define SIFIVE_SPI_REG_RXMARK            0x54 /* Rx FIFO watermark */
+#define SIFIVE_SPI_REG_FCTRL             0x60 /* SPI flash interface control */
+#define SIFIVE_SPI_REG_FFMT              0x64 /* SPI flash instruction format */
+#define SIFIVE_SPI_REG_IE                0x70 /* Interrupt Enable Register */
+#define SIFIVE_SPI_REG_IP                0x74 /* Interrupt Pendings Register */
+
+/* sckdiv bits */
+#define SIFIVE_SPI_SCKDIV_DIV_MASK       0xfffU
+
+/* sckmode bits */
+#define SIFIVE_SPI_SCKMODE_PHA           BIT(0)
+#define SIFIVE_SPI_SCKMODE_POL           BIT(1)
+#define SIFIVE_SPI_SCKMODE_MODE_MASK     (SIFIVE_SPI_SCKMODE_PHA | \
+					  SIFIVE_SPI_SCKMODE_POL)
+
+/* csmode bits */
+#define SIFIVE_SPI_CSMODE_MODE_AUTO      0U
+#define SIFIVE_SPI_CSMODE_MODE_HOLD      2U
+#define SIFIVE_SPI_CSMODE_MODE_OFF       3U
+
+/* delay0 bits */
+#define SIFIVE_SPI_DELAY0_CSSCK(x)       ((u32)(x))
+#define SIFIVE_SPI_DELAY0_CSSCK_MASK     0xffU
+#define SIFIVE_SPI_DELAY0_SCKCS(x)       ((u32)(x) << 16)
+#define SIFIVE_SPI_DELAY0_SCKCS_MASK     (0xffU << 16)
+
+/* delay1 bits */
+#define SIFIVE_SPI_DELAY1_INTERCS(x)     ((u32)(x))
+#define SIFIVE_SPI_DELAY1_INTERCS_MASK   0xffU
+#define SIFIVE_SPI_DELAY1_INTERXFR(x)    ((u32)(x) << 16)
+#define SIFIVE_SPI_DELAY1_INTERXFR_MASK  (0xffU << 16)
+
+/* fmt bits */
+#define SIFIVE_SPI_FMT_PROTO_SINGLE      0U
+#define SIFIVE_SPI_FMT_PROTO_DUAL        1U
+#define SIFIVE_SPI_FMT_PROTO_QUAD        2U
+#define SIFIVE_SPI_FMT_PROTO_MASK        3U
+#define SIFIVE_SPI_FMT_ENDIAN            BIT(2)
+#define SIFIVE_SPI_FMT_DIR               BIT(3)
+#define SIFIVE_SPI_FMT_LEN(x)            ((u32)(x) << 16)
+#define SIFIVE_SPI_FMT_LEN_MASK          (0xfU << 16)
+
+/* txdata bits */
+#define SIFIVE_SPI_TXDATA_DATA_MASK      0xffU
+#define SIFIVE_SPI_TXDATA_FULL           BIT(31)
+
+/* rxdata bits */
+#define SIFIVE_SPI_RXDATA_DATA_MASK      0xffU
+#define SIFIVE_SPI_RXDATA_EMPTY          BIT(31)
+
+/* ie and ip bits */
+#define SIFIVE_SPI_IP_TXWM               BIT(0)
+#define SIFIVE_SPI_IP_RXWM               BIT(1)
+
+struct sifive_spi {
+	void __iomem      *regs;        /* virt. address of control registers */
+	struct clk        *clk;         /* bus clock */
+	unsigned int      fifo_depth;   /* fifo depth in words */
+	u32               cs_inactive;  /* level of the CS pins when inactive */
+	struct completion done;         /* wake-up from interrupt */
+};
+
+static void sifive_spi_write(struct sifive_spi *spi, int offset, u32 value)
+{
+	iowrite32(value, spi->regs + offset);
+}
+
+static u32 sifive_spi_read(struct sifive_spi *spi, int offset)
+{
+	return ioread32(spi->regs + offset);
+}
+
+static void sifive_spi_init(struct sifive_spi *spi)
+{
+	/* Watermark interrupts are disabled by default */
+	sifive_spi_write(spi, SIFIVE_SPI_REG_IE, 0);
+
+	/* Default watermark FIFO threshold values */
+	sifive_spi_write(spi, SIFIVE_SPI_REG_TXMARK, 1);
+	sifive_spi_write(spi, SIFIVE_SPI_REG_RXMARK, 0);
+
+	/* Set CS/SCK Delays and Inactive Time to defaults */
+	sifive_spi_write(spi, SIFIVE_SPI_REG_DELAY0,
+			 SIFIVE_SPI_DELAY0_CSSCK(1) |
+			 SIFIVE_SPI_DELAY0_SCKCS(1));
+	sifive_spi_write(spi, SIFIVE_SPI_REG_DELAY1,
+			 SIFIVE_SPI_DELAY1_INTERCS(1) |
+			 SIFIVE_SPI_DELAY1_INTERXFR(0));
+
+	/* Exit specialized memory-mapped SPI flash mode */
+	sifive_spi_write(spi, SIFIVE_SPI_REG_FCTRL, 0);
+}
+
+static int
+sifive_spi_prepare_message(struct spi_master *master, struct spi_message *msg)
+{
+	struct sifive_spi *spi = spi_master_get_devdata(master);
+	struct spi_device *device = msg->spi;
+
+	/* Update the chip select polarity */
+	if (device->mode & SPI_CS_HIGH)
+		spi->cs_inactive &= ~BIT(device->chip_select);
+	else
+		spi->cs_inactive |= BIT(device->chip_select);
+	sifive_spi_write(spi, SIFIVE_SPI_REG_CSDEF, spi->cs_inactive);
+
+	/* Select the correct device */
+	sifive_spi_write(spi, SIFIVE_SPI_REG_CSID, device->chip_select);
+
+	/* Set clock mode */
+	sifive_spi_write(spi, SIFIVE_SPI_REG_SCKMODE,
+			 device->mode & SIFIVE_SPI_SCKMODE_MODE_MASK);
+
+	return 0;
+}
+
+static void sifive_spi_set_cs(struct spi_device *device, bool is_high)
+{
+	struct sifive_spi *spi = spi_master_get_devdata(device->master);
+
+	/* Reverse polarity is handled by SCMR/CPOL. Not inverted CS. */
+	if (device->mode & SPI_CS_HIGH)
+		is_high = !is_high;
+
+	sifive_spi_write(spi, SIFIVE_SPI_REG_CSMODE, is_high ?
+			 SIFIVE_SPI_CSMODE_MODE_AUTO :
+			 SIFIVE_SPI_CSMODE_MODE_HOLD);
+}
+
+static int
+sifive_spi_prep_transfer(struct sifive_spi *spi, struct spi_device *device,
+			 struct spi_transfer *t)
+{
+	u32 cr;
+	unsigned int mode;
+
+	/* Calculate and program the clock rate */
+	cr = DIV_ROUND_UP(clk_get_rate(spi->clk) >> 1, t->speed_hz) - 1;
+	cr &= SIFIVE_SPI_SCKDIV_DIV_MASK;
+	sifive_spi_write(spi, SIFIVE_SPI_REG_SCKDIV, cr);
+
+	mode = max_t(unsigned int, t->rx_nbits, t->tx_nbits);
+
+	/* Set frame format */
+	cr = SIFIVE_SPI_FMT_LEN(t->bits_per_word);
+	switch (mode) {
+	case SPI_NBITS_QUAD:
+		cr |= SIFIVE_SPI_FMT_PROTO_QUAD;
+		break;
+	case SPI_NBITS_DUAL:
+		cr |= SIFIVE_SPI_FMT_PROTO_DUAL;
+		break;
+	default:
+		cr |= SIFIVE_SPI_FMT_PROTO_SINGLE;
+		break;
+	}
+	if (device->mode & SPI_LSB_FIRST)
+		cr |= SIFIVE_SPI_FMT_ENDIAN;
+	if (!t->rx_buf)
+		cr |= SIFIVE_SPI_FMT_DIR;
+	sifive_spi_write(spi, SIFIVE_SPI_REG_FMT, cr);
+
+	/* We will want to poll if the time we need to wait is
+	 * less than the context switching time.
+	 * Let's call that threshold 5us. The operation will take:
+	 *    (8/mode) * fifo_depth / hz <= 5 * 10^-6
+	 *    1600000 * fifo_depth <= hz * mode
+	 */
+	return 1600000 * spi->fifo_depth <= t->speed_hz * mode;
+}
+
+static irqreturn_t sifive_spi_irq(int irq, void *dev_id)
+{
+	struct sifive_spi *spi = dev_id;
+	u32 ip = sifive_spi_read(spi, SIFIVE_SPI_REG_IP);
+
+	if (ip & (SIFIVE_SPI_IP_TXWM | SIFIVE_SPI_IP_RXWM)) {
+		/* Disable interrupts until next transfer */
+		sifive_spi_write(spi, SIFIVE_SPI_REG_IE, 0);
+		complete(&spi->done);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static void sifive_spi_wait(struct sifive_spi *spi, u32 bit, int poll)
+{
+	if (poll) {
+		u32 cr;
+
+		do {
+			cr = sifive_spi_read(spi, SIFIVE_SPI_REG_IP);
+		} while (!(cr & bit));
+	} else {
+		reinit_completion(&spi->done);
+		sifive_spi_write(spi, SIFIVE_SPI_REG_IE, bit);
+		wait_for_completion(&spi->done);
+	}
+}
+
+static void sifive_spi_tx(struct sifive_spi *spi, const u8 *tx_ptr)
+{
+	WARN_ON_ONCE((sifive_spi_read(spi, SIFIVE_SPI_REG_TXDATA)
+				& SIFIVE_SPI_TXDATA_FULL) != 0);
+	sifive_spi_write(spi, SIFIVE_SPI_REG_TXDATA,
+			 *tx_ptr & SIFIVE_SPI_TXDATA_DATA_MASK);
+}
+
+static void sifive_spi_rx(struct sifive_spi *spi, u8 *rx_ptr)
+{
+	u32 data = sifive_spi_read(spi, SIFIVE_SPI_REG_RXDATA);
+
+	WARN_ON_ONCE((data & SIFIVE_SPI_RXDATA_EMPTY) != 0);
+	*rx_ptr = data & SIFIVE_SPI_RXDATA_DATA_MASK;
+}
+
+static int
+sifive_spi_transfer_one(struct spi_master *master, struct spi_device *device,
+			struct spi_transfer *t)
+{
+	struct sifive_spi *spi = spi_master_get_devdata(master);
+	int poll = sifive_spi_prep_transfer(spi, device, t);
+	const u8 *tx_ptr = t->tx_buf;
+	u8 *rx_ptr = t->rx_buf;
+	unsigned int remaining_words = t->len;
+
+	while (remaining_words) {
+		unsigned int n_words = min(remaining_words, spi->fifo_depth);
+		unsigned int i;
+
+		/* Enqueue n_words for transmission */
+		for (i = 0; i < n_words; i++)
+			sifive_spi_tx(spi, tx_ptr++);
+
+		if (rx_ptr) {
+			/* Wait for transmission + reception to complete */
+			sifive_spi_write(spi, SIFIVE_SPI_REG_RXMARK,
+					 n_words - 1);
+			sifive_spi_wait(spi, SIFIVE_SPI_IP_RXWM, poll);
+
+			/* Read out all the data from the RX FIFO */
+			for (i = 0; i < n_words; i++)
+				sifive_spi_rx(spi, rx_ptr++);
+		} else {
+			/* Wait for transmission to complete */
+			sifive_spi_wait(spi, SIFIVE_SPI_IP_TXWM, poll);
+		}
+
+		remaining_words -= n_words;
+	}
+
+	return 0;
+}
+
+static int sifive_spi_probe(struct platform_device *pdev)
+{
+	struct sifive_spi *spi;
+	int ret, irq, num_cs;
+	u32 cs_bits, max_bits_per_word;
+	struct spi_master *master;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(struct sifive_spi));
+	if (!master) {
+		dev_err(&pdev->dev, "out of memory\n");
+		return -ENOMEM;
+	}
+
+	spi = spi_master_get_devdata(master);
+	init_completion(&spi->done);
+	platform_set_drvdata(pdev, master);
+
+	spi->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(spi->regs)) {
+		ret = PTR_ERR(spi->regs);
+		goto put_master;
+	}
+
+	spi->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(spi->clk)) {
+		dev_err(&pdev->dev, "Unable to find bus clock\n");
+		ret = PTR_ERR(spi->clk);
+		goto put_master;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		goto put_master;
+	}
+
+	/* Optional parameters */
+	ret =
+	  of_property_read_u32(pdev->dev.of_node, "sifive,fifo-depth",
+			       &spi->fifo_depth);
+	if (ret < 0)
+		spi->fifo_depth = SIFIVE_SPI_DEFAULT_DEPTH;
+
+	ret =
+	  of_property_read_u32(pdev->dev.of_node, "sifive,max-bits-per-word",
+			       &max_bits_per_word);
+
+	if (!ret && max_bits_per_word < 8) {
+		dev_err(&pdev->dev, "Only 8bit SPI words supported by the driver\n");
+		ret = -EINVAL;
+		goto put_master;
+	}
+
+	/* Spin up the bus clock before hitting registers */
+	ret = clk_prepare_enable(spi->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to enable bus clock\n");
+		goto put_master;
+	}
+
+	/* probe the number of CS lines */
+	spi->cs_inactive = sifive_spi_read(spi, SIFIVE_SPI_REG_CSDEF);
+	sifive_spi_write(spi, SIFIVE_SPI_REG_CSDEF, 0xffffffffU);
+	cs_bits = sifive_spi_read(spi, SIFIVE_SPI_REG_CSDEF);
+	sifive_spi_write(spi, SIFIVE_SPI_REG_CSDEF, spi->cs_inactive);
+	if (!cs_bits) {
+		dev_err(&pdev->dev, "Could not auto probe CS lines\n");
+		ret = -EINVAL;
+		goto put_master;
+	}
+
+	num_cs = ilog2(cs_bits) + 1;
+	if (num_cs > SIFIVE_SPI_MAX_CS) {
+		dev_err(&pdev->dev, "Invalid number of spi slaves\n");
+		ret = -EINVAL;
+		goto put_master;
+	}
+
+	/* Define our master */
+	master->dev.of_node = pdev->dev.of_node;
+	master->bus_num = pdev->id;
+	master->num_chipselect = num_cs;
+	master->mode_bits = SPI_CPHA | SPI_CPOL
+			  | SPI_CS_HIGH | SPI_LSB_FIRST
+			  | SPI_TX_DUAL | SPI_TX_QUAD
+			  | SPI_RX_DUAL | SPI_RX_QUAD;
+	/* TODO: add driver support for bits_per_word < 8
+	 * we need to "left-align" the bits (unless SPI_LSB_FIRST)
+	 */
+	master->bits_per_word_mask = SPI_BPW_MASK(8);
+	master->flags = SPI_CONTROLLER_MUST_TX | SPI_MASTER_GPIO_SS;
+	master->prepare_message = sifive_spi_prepare_message;
+	master->set_cs = sifive_spi_set_cs;
+	master->transfer_one = sifive_spi_transfer_one;
+
+	pdev->dev.dma_mask = NULL;
+	/* Configure the SPI master hardware */
+	sifive_spi_init(spi);
+
+	/* Register for SPI Interrupt */
+	ret = devm_request_irq(&pdev->dev, irq, sifive_spi_irq, 0,
+			       dev_name(&pdev->dev), spi);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to bind to interrupt\n");
+		goto put_master;
+	}
+
+	dev_info(&pdev->dev, "mapped; irq=%d, cs=%d\n",
+		 irq, master->num_chipselect);
+
+	ret = devm_spi_register_master(&pdev->dev, master);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "spi_register_master failed\n");
+		goto put_master;
+	}
+
+	return 0;
+
+put_master:
+	spi_master_put(master);
+
+	return ret;
+}
+
+static int sifive_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct sifive_spi *spi = spi_master_get_devdata(master);
+
+	/* Disable all the interrupts just in case */
+	sifive_spi_write(spi, SIFIVE_SPI_REG_IE, 0);
+
+	return 0;
+}
+
+static const struct of_device_id sifive_spi_of_match[] = {
+	{ .compatible = "sifive,spi0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, sifive_spi_of_match);
+
+static struct platform_driver sifive_spi_driver = {
+	.probe = sifive_spi_probe,
+	.remove = sifive_spi_remove,
+	.driver = {
+		.name = SIFIVE_SPI_DRIVER_NAME,
+		.of_match_table = sifive_spi_of_match,
+	},
+};
+module_platform_driver(sifive_spi_driver);
+
+MODULE_AUTHOR("SiFive, Inc. <sifive@sifive.com>");
+MODULE_DESCRIPTION("SiFive SPI driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-sirf.c b/drivers/spi/spi-sirf.c
index f009d76..e1e6391 100644
--- a/drivers/spi/spi-sirf.c
+++ b/drivers/spi/spi-sirf.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * SPI bus driver for CSR SiRFprimaII
  *
  * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
- *
- * Licensed under GPLv2 or later.
  */
 
 #include <linux/module.h>
@@ -1071,7 +1070,6 @@
 {
 	struct sirfsoc_spi *sspi;
 	struct spi_master *master;
-	struct resource *mem_res;
 	const struct sirf_spi_comp_data *spi_comp_data;
 	int irq;
 	int ret;
@@ -1098,8 +1096,7 @@
 	sspi->fifo_level_chk_mask = (sspi->fifo_size / 4) - 1;
 	sspi->dat_max_frm_len = spi_comp_data->dat_max_frm_len;
 	sspi->fifo_size = spi_comp_data->fifo_size;
-	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	sspi->base = devm_ioremap_resource(&pdev->dev, mem_res);
+	sspi->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(sspi->base)) {
 		ret = PTR_ERR(sspi->base);
 		goto free_master;
diff --git a/drivers/spi/spi-slave-mt27xx.c b/drivers/spi/spi-slave-mt27xx.c
new file mode 100644
index 0000000..61bc43b
--- /dev/null
+++ b/drivers/spi/spi-slave-mt27xx.c
@@ -0,0 +1,553 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2018 MediaTek Inc.
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/spi/spi.h>
+
+#define SPIS_IRQ_EN_REG		0x0
+#define SPIS_IRQ_CLR_REG	0x4
+#define SPIS_IRQ_ST_REG		0x8
+#define SPIS_IRQ_MASK_REG	0xc
+#define SPIS_CFG_REG		0x10
+#define SPIS_RX_DATA_REG	0x14
+#define SPIS_TX_DATA_REG	0x18
+#define SPIS_RX_DST_REG		0x1c
+#define SPIS_TX_SRC_REG		0x20
+#define SPIS_DMA_CFG_REG	0x30
+#define SPIS_SOFT_RST_REG	0x40
+
+/* SPIS_IRQ_EN_REG */
+#define DMA_DONE_EN		BIT(7)
+#define DATA_DONE_EN		BIT(2)
+#define RSTA_DONE_EN		BIT(1)
+#define CMD_INVALID_EN		BIT(0)
+
+/* SPIS_IRQ_ST_REG */
+#define DMA_DONE_ST		BIT(7)
+#define DATA_DONE_ST		BIT(2)
+#define RSTA_DONE_ST		BIT(1)
+#define CMD_INVALID_ST		BIT(0)
+
+/* SPIS_IRQ_MASK_REG */
+#define DMA_DONE_MASK		BIT(7)
+#define DATA_DONE_MASK		BIT(2)
+#define RSTA_DONE_MASK		BIT(1)
+#define CMD_INVALID_MASK	BIT(0)
+
+/* SPIS_CFG_REG */
+#define SPIS_TX_ENDIAN		BIT(7)
+#define SPIS_RX_ENDIAN		BIT(6)
+#define SPIS_TXMSBF		BIT(5)
+#define SPIS_RXMSBF		BIT(4)
+#define SPIS_CPHA		BIT(3)
+#define SPIS_CPOL		BIT(2)
+#define SPIS_TX_EN		BIT(1)
+#define SPIS_RX_EN		BIT(0)
+
+/* SPIS_DMA_CFG_REG */
+#define TX_DMA_TRIG_EN		BIT(31)
+#define TX_DMA_EN		BIT(30)
+#define RX_DMA_EN		BIT(29)
+#define TX_DMA_LEN		0xfffff
+
+/* SPIS_SOFT_RST_REG */
+#define SPIS_DMA_ADDR_EN	BIT(1)
+#define SPIS_SOFT_RST		BIT(0)
+
+#define MTK_SPI_SLAVE_MAX_FIFO_SIZE 512U
+
+struct mtk_spi_slave {
+	struct device *dev;
+	void __iomem *base;
+	struct clk *spi_clk;
+	struct completion xfer_done;
+	struct spi_transfer *cur_transfer;
+	bool slave_aborted;
+};
+
+static const struct of_device_id mtk_spi_slave_of_match[] = {
+	{ .compatible = "mediatek,mt2712-spi-slave", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mtk_spi_slave_of_match);
+
+static void mtk_spi_slave_disable_dma(struct mtk_spi_slave *mdata)
+{
+	u32 reg_val;
+
+	reg_val = readl(mdata->base + SPIS_DMA_CFG_REG);
+	reg_val &= ~RX_DMA_EN;
+	reg_val &= ~TX_DMA_EN;
+	writel(reg_val, mdata->base + SPIS_DMA_CFG_REG);
+}
+
+static void mtk_spi_slave_disable_xfer(struct mtk_spi_slave *mdata)
+{
+	u32 reg_val;
+
+	reg_val = readl(mdata->base + SPIS_CFG_REG);
+	reg_val &= ~SPIS_TX_EN;
+	reg_val &= ~SPIS_RX_EN;
+	writel(reg_val, mdata->base + SPIS_CFG_REG);
+}
+
+static int mtk_spi_slave_wait_for_completion(struct mtk_spi_slave *mdata)
+{
+	if (wait_for_completion_interruptible(&mdata->xfer_done) ||
+	    mdata->slave_aborted) {
+		dev_err(mdata->dev, "interrupted\n");
+		return -EINTR;
+	}
+
+	return 0;
+}
+
+static int mtk_spi_slave_prepare_message(struct spi_controller *ctlr,
+					 struct spi_message *msg)
+{
+	struct mtk_spi_slave *mdata = spi_controller_get_devdata(ctlr);
+	struct spi_device *spi = msg->spi;
+	bool cpha, cpol;
+	u32 reg_val;
+
+	cpha = spi->mode & SPI_CPHA ? 1 : 0;
+	cpol = spi->mode & SPI_CPOL ? 1 : 0;
+
+	reg_val = readl(mdata->base + SPIS_CFG_REG);
+	if (cpha)
+		reg_val |= SPIS_CPHA;
+	else
+		reg_val &= ~SPIS_CPHA;
+	if (cpol)
+		reg_val |= SPIS_CPOL;
+	else
+		reg_val &= ~SPIS_CPOL;
+
+	if (spi->mode & SPI_LSB_FIRST)
+		reg_val &= ~(SPIS_TXMSBF | SPIS_RXMSBF);
+	else
+		reg_val |= SPIS_TXMSBF | SPIS_RXMSBF;
+
+	reg_val &= ~SPIS_TX_ENDIAN;
+	reg_val &= ~SPIS_RX_ENDIAN;
+	writel(reg_val, mdata->base + SPIS_CFG_REG);
+
+	return 0;
+}
+
+static int mtk_spi_slave_fifo_transfer(struct spi_controller *ctlr,
+				       struct spi_device *spi,
+				       struct spi_transfer *xfer)
+{
+	struct mtk_spi_slave *mdata = spi_controller_get_devdata(ctlr);
+	int reg_val, cnt, remainder, ret;
+
+	writel(SPIS_SOFT_RST, mdata->base + SPIS_SOFT_RST_REG);
+
+	reg_val = readl(mdata->base + SPIS_CFG_REG);
+	if (xfer->rx_buf)
+		reg_val |= SPIS_RX_EN;
+	if (xfer->tx_buf)
+		reg_val |= SPIS_TX_EN;
+	writel(reg_val, mdata->base + SPIS_CFG_REG);
+
+	cnt = xfer->len / 4;
+	if (xfer->tx_buf)
+		iowrite32_rep(mdata->base + SPIS_TX_DATA_REG,
+			      xfer->tx_buf, cnt);
+
+	remainder = xfer->len % 4;
+	if (xfer->tx_buf && remainder > 0) {
+		reg_val = 0;
+		memcpy(&reg_val, xfer->tx_buf + cnt * 4, remainder);
+		writel(reg_val, mdata->base + SPIS_TX_DATA_REG);
+	}
+
+	ret = mtk_spi_slave_wait_for_completion(mdata);
+	if (ret) {
+		mtk_spi_slave_disable_xfer(mdata);
+		writel(SPIS_SOFT_RST, mdata->base + SPIS_SOFT_RST_REG);
+	}
+
+	return ret;
+}
+
+static int mtk_spi_slave_dma_transfer(struct spi_controller *ctlr,
+				      struct spi_device *spi,
+				      struct spi_transfer *xfer)
+{
+	struct mtk_spi_slave *mdata = spi_controller_get_devdata(ctlr);
+	struct device *dev = mdata->dev;
+	int reg_val, ret;
+
+	writel(SPIS_SOFT_RST, mdata->base + SPIS_SOFT_RST_REG);
+
+	if (xfer->tx_buf) {
+		/* tx_buf is a const void* where we need a void * for
+		 * the dma mapping
+		 */
+		void *nonconst_tx = (void *)xfer->tx_buf;
+
+		xfer->tx_dma = dma_map_single(dev, nonconst_tx,
+					      xfer->len, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, xfer->tx_dma)) {
+			ret = -ENOMEM;
+			goto disable_transfer;
+		}
+	}
+
+	if (xfer->rx_buf) {
+		xfer->rx_dma = dma_map_single(dev, xfer->rx_buf,
+					      xfer->len, DMA_FROM_DEVICE);
+		if (dma_mapping_error(dev, xfer->rx_dma)) {
+			ret = -ENOMEM;
+			goto unmap_txdma;
+		}
+	}
+
+	writel(xfer->tx_dma, mdata->base + SPIS_TX_SRC_REG);
+	writel(xfer->rx_dma, mdata->base + SPIS_RX_DST_REG);
+
+	writel(SPIS_DMA_ADDR_EN, mdata->base + SPIS_SOFT_RST_REG);
+
+	/* enable config reg tx rx_enable */
+	reg_val = readl(mdata->base + SPIS_CFG_REG);
+	if (xfer->tx_buf)
+		reg_val |= SPIS_TX_EN;
+	if (xfer->rx_buf)
+		reg_val |= SPIS_RX_EN;
+	writel(reg_val, mdata->base + SPIS_CFG_REG);
+
+	/* config dma */
+	reg_val = 0;
+	reg_val |= (xfer->len - 1) & TX_DMA_LEN;
+	writel(reg_val, mdata->base + SPIS_DMA_CFG_REG);
+
+	reg_val = readl(mdata->base + SPIS_DMA_CFG_REG);
+	if (xfer->tx_buf)
+		reg_val |= TX_DMA_EN;
+	if (xfer->rx_buf)
+		reg_val |= RX_DMA_EN;
+	reg_val |= TX_DMA_TRIG_EN;
+	writel(reg_val, mdata->base + SPIS_DMA_CFG_REG);
+
+	ret = mtk_spi_slave_wait_for_completion(mdata);
+	if (ret)
+		goto unmap_rxdma;
+
+	return 0;
+
+unmap_rxdma:
+	if (xfer->rx_buf)
+		dma_unmap_single(dev, xfer->rx_dma,
+				 xfer->len, DMA_FROM_DEVICE);
+
+unmap_txdma:
+	if (xfer->tx_buf)
+		dma_unmap_single(dev, xfer->tx_dma,
+				 xfer->len, DMA_TO_DEVICE);
+
+disable_transfer:
+	mtk_spi_slave_disable_dma(mdata);
+	mtk_spi_slave_disable_xfer(mdata);
+	writel(SPIS_SOFT_RST, mdata->base + SPIS_SOFT_RST_REG);
+
+	return ret;
+}
+
+static int mtk_spi_slave_transfer_one(struct spi_controller *ctlr,
+				      struct spi_device *spi,
+				      struct spi_transfer *xfer)
+{
+	struct mtk_spi_slave *mdata = spi_controller_get_devdata(ctlr);
+
+	reinit_completion(&mdata->xfer_done);
+	mdata->slave_aborted = false;
+	mdata->cur_transfer = xfer;
+
+	if (xfer->len > MTK_SPI_SLAVE_MAX_FIFO_SIZE)
+		return mtk_spi_slave_dma_transfer(ctlr, spi, xfer);
+	else
+		return mtk_spi_slave_fifo_transfer(ctlr, spi, xfer);
+}
+
+static int mtk_spi_slave_setup(struct spi_device *spi)
+{
+	struct mtk_spi_slave *mdata = spi_controller_get_devdata(spi->master);
+	u32 reg_val;
+
+	reg_val = DMA_DONE_EN | DATA_DONE_EN |
+		  RSTA_DONE_EN | CMD_INVALID_EN;
+	writel(reg_val, mdata->base + SPIS_IRQ_EN_REG);
+
+	reg_val = DMA_DONE_MASK | DATA_DONE_MASK |
+		  RSTA_DONE_MASK | CMD_INVALID_MASK;
+	writel(reg_val, mdata->base + SPIS_IRQ_MASK_REG);
+
+	mtk_spi_slave_disable_dma(mdata);
+	mtk_spi_slave_disable_xfer(mdata);
+
+	return 0;
+}
+
+static int mtk_slave_abort(struct spi_controller *ctlr)
+{
+	struct mtk_spi_slave *mdata = spi_controller_get_devdata(ctlr);
+
+	mdata->slave_aborted = true;
+	complete(&mdata->xfer_done);
+
+	return 0;
+}
+
+static irqreturn_t mtk_spi_slave_interrupt(int irq, void *dev_id)
+{
+	struct spi_controller *ctlr = dev_id;
+	struct mtk_spi_slave *mdata = spi_controller_get_devdata(ctlr);
+	struct spi_transfer *trans = mdata->cur_transfer;
+	u32 int_status, reg_val, cnt, remainder;
+
+	int_status = readl(mdata->base + SPIS_IRQ_ST_REG);
+	writel(int_status, mdata->base + SPIS_IRQ_CLR_REG);
+
+	if (!trans)
+		return IRQ_NONE;
+
+	if ((int_status & DMA_DONE_ST) &&
+	    ((int_status & DATA_DONE_ST) ||
+	    (int_status & RSTA_DONE_ST))) {
+		writel(SPIS_SOFT_RST, mdata->base + SPIS_SOFT_RST_REG);
+
+		if (trans->tx_buf)
+			dma_unmap_single(mdata->dev, trans->tx_dma,
+					 trans->len, DMA_TO_DEVICE);
+		if (trans->rx_buf)
+			dma_unmap_single(mdata->dev, trans->rx_dma,
+					 trans->len, DMA_FROM_DEVICE);
+
+		mtk_spi_slave_disable_dma(mdata);
+		mtk_spi_slave_disable_xfer(mdata);
+	}
+
+	if ((!(int_status & DMA_DONE_ST)) &&
+	    ((int_status & DATA_DONE_ST) ||
+	    (int_status & RSTA_DONE_ST))) {
+		cnt = trans->len / 4;
+		if (trans->rx_buf)
+			ioread32_rep(mdata->base + SPIS_RX_DATA_REG,
+				     trans->rx_buf, cnt);
+		remainder = trans->len % 4;
+		if (trans->rx_buf && remainder > 0) {
+			reg_val = readl(mdata->base + SPIS_RX_DATA_REG);
+			memcpy(trans->rx_buf + (cnt * 4),
+			       &reg_val, remainder);
+		}
+
+		mtk_spi_slave_disable_xfer(mdata);
+	}
+
+	if (int_status & CMD_INVALID_ST) {
+		dev_warn(&ctlr->dev, "cmd invalid\n");
+		return IRQ_NONE;
+	}
+
+	mdata->cur_transfer = NULL;
+	complete(&mdata->xfer_done);
+
+	return IRQ_HANDLED;
+}
+
+static int mtk_spi_slave_probe(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr;
+	struct mtk_spi_slave *mdata;
+	struct resource *res;
+	int irq, ret;
+
+	ctlr = spi_alloc_slave(&pdev->dev, sizeof(*mdata));
+	if (!ctlr) {
+		dev_err(&pdev->dev, "failed to alloc spi slave\n");
+		return -ENOMEM;
+	}
+
+	ctlr->auto_runtime_pm = true;
+	ctlr->dev.of_node = pdev->dev.of_node;
+	ctlr->mode_bits = SPI_CPOL | SPI_CPHA;
+	ctlr->mode_bits |= SPI_LSB_FIRST;
+
+	ctlr->prepare_message = mtk_spi_slave_prepare_message;
+	ctlr->transfer_one = mtk_spi_slave_transfer_one;
+	ctlr->setup = mtk_spi_slave_setup;
+	ctlr->slave_abort = mtk_slave_abort;
+
+	mdata = spi_controller_get_devdata(ctlr);
+
+	platform_set_drvdata(pdev, ctlr);
+
+	init_completion(&mdata->xfer_done);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		ret = -ENODEV;
+		dev_err(&pdev->dev, "failed to determine base address\n");
+		goto err_put_ctlr;
+	}
+
+	mdata->dev = &pdev->dev;
+
+	mdata->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mdata->base)) {
+		ret = PTR_ERR(mdata->base);
+		goto err_put_ctlr;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		goto err_put_ctlr;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, mtk_spi_slave_interrupt,
+			       IRQF_TRIGGER_NONE, dev_name(&pdev->dev), ctlr);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register irq (%d)\n", ret);
+		goto err_put_ctlr;
+	}
+
+	mdata->spi_clk = devm_clk_get(&pdev->dev, "spi");
+	if (IS_ERR(mdata->spi_clk)) {
+		ret = PTR_ERR(mdata->spi_clk);
+		dev_err(&pdev->dev, "failed to get spi-clk: %d\n", ret);
+		goto err_put_ctlr;
+	}
+
+	ret = clk_prepare_enable(mdata->spi_clk);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to enable spi_clk (%d)\n", ret);
+		goto err_put_ctlr;
+	}
+
+	pm_runtime_enable(&pdev->dev);
+
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"failed to register slave controller(%d)\n", ret);
+		clk_disable_unprepare(mdata->spi_clk);
+		goto err_disable_runtime_pm;
+	}
+
+	clk_disable_unprepare(mdata->spi_clk);
+
+	return 0;
+
+err_disable_runtime_pm:
+	pm_runtime_disable(&pdev->dev);
+err_put_ctlr:
+	spi_controller_put(ctlr);
+
+	return ret;
+}
+
+static int mtk_spi_slave_remove(struct platform_device *pdev)
+{
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int mtk_spi_slave_suspend(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct mtk_spi_slave *mdata = spi_controller_get_devdata(ctlr);
+	int ret;
+
+	ret = spi_controller_suspend(ctlr);
+	if (ret)
+		return ret;
+
+	if (!pm_runtime_suspended(dev))
+		clk_disable_unprepare(mdata->spi_clk);
+
+	return ret;
+}
+
+static int mtk_spi_slave_resume(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct mtk_spi_slave *mdata = spi_controller_get_devdata(ctlr);
+	int ret;
+
+	if (!pm_runtime_suspended(dev)) {
+		ret = clk_prepare_enable(mdata->spi_clk);
+		if (ret < 0) {
+			dev_err(dev, "failed to enable spi_clk (%d)\n", ret);
+			return ret;
+		}
+	}
+
+	ret = spi_controller_resume(ctlr);
+	if (ret < 0)
+		clk_disable_unprepare(mdata->spi_clk);
+
+	return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+static int mtk_spi_slave_runtime_suspend(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct mtk_spi_slave *mdata = spi_controller_get_devdata(ctlr);
+
+	clk_disable_unprepare(mdata->spi_clk);
+
+	return 0;
+}
+
+static int mtk_spi_slave_runtime_resume(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct mtk_spi_slave *mdata = spi_controller_get_devdata(ctlr);
+	int ret;
+
+	ret = clk_prepare_enable(mdata->spi_clk);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable spi_clk (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_PM */
+
+static const struct dev_pm_ops mtk_spi_slave_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(mtk_spi_slave_suspend, mtk_spi_slave_resume)
+	SET_RUNTIME_PM_OPS(mtk_spi_slave_runtime_suspend,
+			   mtk_spi_slave_runtime_resume, NULL)
+};
+
+static struct platform_driver mtk_spi_slave_driver = {
+	.driver = {
+		.name = "mtk-spi-slave",
+		.pm	= &mtk_spi_slave_pm,
+		.of_match_table = mtk_spi_slave_of_match,
+	},
+	.probe = mtk_spi_slave_probe,
+	.remove = mtk_spi_slave_remove,
+};
+
+module_platform_driver(mtk_spi_slave_driver);
+
+MODULE_DESCRIPTION("MTK SPI Slave Controller driver");
+MODULE_AUTHOR("Leilk Liu <leilk.liu@mediatek.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:mtk-spi-slave");
diff --git a/drivers/spi/spi-slave-system-control.c b/drivers/spi/spi-slave-system-control.c
index c0257e9..169f3d5 100644
--- a/drivers/spi/spi-slave-system-control.c
+++ b/drivers/spi/spi-slave-system-control.c
@@ -60,6 +60,7 @@
 	case CMD_REBOOT:
 		dev_info(&priv->spi->dev, "Rebooting system...\n");
 		kernel_restart(NULL);
+		break;
 
 	case CMD_POWEROFF:
 		dev_info(&priv->spi->dev, "Powering off system...\n");
diff --git a/drivers/spi/spi-sprd-adi.c b/drivers/spi/spi-sprd-adi.c
index df5960b..9a05128 100644
--- a/drivers/spi/spi-sprd-adi.c
+++ b/drivers/spi/spi-sprd-adi.c
@@ -86,6 +86,7 @@
 #define BIT_WDG_EN			BIT(2)
 
 /* Definition of PMIC reset status register */
+#define HWRST_STATUS_SECURITY		0x02
 #define HWRST_STATUS_RECOVERY		0x20
 #define HWRST_STATUS_NORMAL		0x40
 #define HWRST_STATUS_ALARM		0x50
@@ -97,6 +98,8 @@
 #define HWRST_STATUS_AUTODLOADER	0xa0
 #define HWRST_STATUS_IQMODE		0xb0
 #define HWRST_STATUS_SPRDISK		0xc0
+#define HWRST_STATUS_FACTORYTEST	0xe0
+#define HWRST_STATUS_WATCHDOG		0xf0
 
 /* Use default timeout 50 ms that converts to watchdog values */
 #define WDG_LOAD_VAL			((50 * 1000) / 32768)
@@ -162,14 +165,16 @@
 	int read_timeout = ADI_READ_TIMEOUT;
 	unsigned long flags;
 	u32 val, rd_addr;
-	int ret;
+	int ret = 0;
 
-	ret = hwspin_lock_timeout_irqsave(sadi->hwlock,
-					  ADI_HWSPINLOCK_TIMEOUT,
-					  &flags);
-	if (ret) {
-		dev_err(sadi->dev, "get the hw lock failed\n");
-		return ret;
+	if (sadi->hwlock) {
+		ret = hwspin_lock_timeout_irqsave(sadi->hwlock,
+						  ADI_HWSPINLOCK_TIMEOUT,
+						  &flags);
+		if (ret) {
+			dev_err(sadi->dev, "get the hw lock failed\n");
+			return ret;
+		}
 	}
 
 	/*
@@ -216,7 +221,8 @@
 	*read_val = val & RD_VALUE_MASK;
 
 out:
-	hwspin_unlock_irqrestore(sadi->hwlock, &flags);
+	if (sadi->hwlock)
+		hwspin_unlock_irqrestore(sadi->hwlock, &flags);
 	return ret;
 }
 
@@ -227,12 +233,14 @@
 	unsigned long flags;
 	int ret;
 
-	ret = hwspin_lock_timeout_irqsave(sadi->hwlock,
-					  ADI_HWSPINLOCK_TIMEOUT,
-					  &flags);
-	if (ret) {
-		dev_err(sadi->dev, "get the hw lock failed\n");
-		return ret;
+	if (sadi->hwlock) {
+		ret = hwspin_lock_timeout_irqsave(sadi->hwlock,
+						  ADI_HWSPINLOCK_TIMEOUT,
+						  &flags);
+		if (ret) {
+			dev_err(sadi->dev, "get the hw lock failed\n");
+			return ret;
+		}
 	}
 
 	ret = sprd_adi_drain_fifo(sadi);
@@ -258,7 +266,8 @@
 	}
 
 out:
-	hwspin_unlock_irqrestore(sadi->hwlock, &flags);
+	if (sadi->hwlock)
+		hwspin_unlock_irqrestore(sadi->hwlock, &flags);
 	return ret;
 }
 
@@ -307,6 +316,18 @@
 	return 0;
 }
 
+static void sprd_adi_set_wdt_rst_mode(struct sprd_adi *sadi)
+{
+#ifdef CONFIG_SPRD_WATCHDOG
+	u32 val;
+
+	/* Set default watchdog reboot mode */
+	sprd_adi_read(sadi, sadi->slave_pbase + PMIC_RST_STATUS, &val);
+	val |= HWRST_STATUS_WATCHDOG;
+	sprd_adi_write(sadi, sadi->slave_pbase + PMIC_RST_STATUS, val);
+#endif
+}
+
 static int sprd_adi_restart_handler(struct notifier_block *this,
 				    unsigned long mode, void *cmd)
 {
@@ -336,11 +357,16 @@
 		reboot_mode = HWRST_STATUS_IQMODE;
 	else if (!strncmp(cmd, "sprdisk", 7))
 		reboot_mode = HWRST_STATUS_SPRDISK;
+	else if (!strncmp(cmd, "tospanic", 8))
+		reboot_mode = HWRST_STATUS_SECURITY;
+	else if (!strncmp(cmd, "factorytest", 11))
+		reboot_mode = HWRST_STATUS_FACTORYTEST;
 	else
 		reboot_mode = HWRST_STATUS_NORMAL;
 
 	/* Record the reboot mode */
 	sprd_adi_read(sadi, sadi->slave_pbase + PMIC_RST_STATUS, &val);
+	val &= ~HWRST_STATUS_WATCHDOG;
 	val |= reboot_mode;
 	sprd_adi_write(sadi, sadi->slave_pbase + PMIC_RST_STATUS, val);
 
@@ -380,9 +406,6 @@
 	const __be32 *list;
 	u32 tmp;
 
-	/* Address bits select default 12 bits */
-	writel_relaxed(0, sadi->base + REG_ADI_CTRL0);
-
 	/* Set all channels as default priority */
 	writel_relaxed(0, sadi->base + REG_ADI_CHN_PRIL);
 	writel_relaxed(0, sadi->base + REG_ADI_CHN_PRIH);
@@ -459,19 +482,30 @@
 	sadi->slave_pbase = res->start + ADI_SLAVE_OFFSET;
 	sadi->ctlr = ctlr;
 	sadi->dev = &pdev->dev;
-	ret = of_hwspin_lock_get_id_byname(np, "adi");
-	if (ret < 0) {
-		dev_err(&pdev->dev, "can not get the hardware spinlock\n");
-		goto put_ctlr;
-	}
-
-	sadi->hwlock = devm_hwspin_lock_request_specific(&pdev->dev, ret);
-	if (!sadi->hwlock) {
-		ret = -ENXIO;
-		goto put_ctlr;
+	ret = of_hwspin_lock_get_id(np, 0);
+	if (ret > 0 || (IS_ENABLED(CONFIG_HWSPINLOCK) && ret == 0)) {
+		sadi->hwlock =
+			devm_hwspin_lock_request_specific(&pdev->dev, ret);
+		if (!sadi->hwlock) {
+			ret = -ENXIO;
+			goto put_ctlr;
+		}
+	} else {
+		switch (ret) {
+		case -ENOENT:
+			dev_info(&pdev->dev, "no hardware spinlock supplied\n");
+			break;
+		default:
+			dev_err(&pdev->dev,
+				"failed to find hwlock id, %d\n", ret);
+			/* fall-through */
+		case -EPROBE_DEFER:
+			goto put_ctlr;
+		}
 	}
 
 	sprd_adi_hw_init(sadi);
+	sprd_adi_set_wdt_rst_mode(sadi);
 
 	ctlr->dev.of_node = pdev->dev.of_node;
 	ctlr->bus_num = pdev->id;
diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c
new file mode 100644
index 0000000..8c9021b
--- /dev/null
+++ b/drivers/spi/spi-sprd.c
@@ -0,0 +1,1083 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Spreadtrum Communications Inc.
+
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma/sprd-dma.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/spi/spi.h>
+
+#define SPRD_SPI_TXD			0x0
+#define SPRD_SPI_CLKD			0x4
+#define SPRD_SPI_CTL0			0x8
+#define SPRD_SPI_CTL1			0xc
+#define SPRD_SPI_CTL2			0x10
+#define SPRD_SPI_CTL3			0x14
+#define SPRD_SPI_CTL4			0x18
+#define SPRD_SPI_CTL5			0x1c
+#define SPRD_SPI_INT_EN			0x20
+#define SPRD_SPI_INT_CLR		0x24
+#define SPRD_SPI_INT_RAW_STS		0x28
+#define SPRD_SPI_INT_MASK_STS		0x2c
+#define SPRD_SPI_STS1			0x30
+#define SPRD_SPI_STS2			0x34
+#define SPRD_SPI_DSP_WAIT		0x38
+#define SPRD_SPI_STS3			0x3c
+#define SPRD_SPI_CTL6			0x40
+#define SPRD_SPI_STS4			0x44
+#define SPRD_SPI_FIFO_RST		0x48
+#define SPRD_SPI_CTL7			0x4c
+#define SPRD_SPI_STS5			0x50
+#define SPRD_SPI_CTL8			0x54
+#define SPRD_SPI_CTL9			0x58
+#define SPRD_SPI_CTL10			0x5c
+#define SPRD_SPI_CTL11			0x60
+#define SPRD_SPI_CTL12			0x64
+#define SPRD_SPI_STS6			0x68
+#define SPRD_SPI_STS7			0x6c
+#define SPRD_SPI_STS8			0x70
+#define SPRD_SPI_STS9			0x74
+
+/* Bits & mask definition for register CTL0 */
+#define SPRD_SPI_SCK_REV		BIT(13)
+#define SPRD_SPI_NG_TX			BIT(1)
+#define SPRD_SPI_NG_RX			BIT(0)
+#define SPRD_SPI_CHNL_LEN_MASK		GENMASK(4, 0)
+#define SPRD_SPI_CSN_MASK		GENMASK(11, 8)
+#define SPRD_SPI_CS0_VALID		BIT(8)
+
+/* Bits & mask definition for register SPI_INT_EN */
+#define SPRD_SPI_TX_END_INT_EN		BIT(8)
+#define SPRD_SPI_RX_END_INT_EN		BIT(9)
+
+/* Bits & mask definition for register SPI_INT_RAW_STS */
+#define SPRD_SPI_TX_END_RAW		BIT(8)
+#define SPRD_SPI_RX_END_RAW		BIT(9)
+
+/* Bits & mask definition for register SPI_INT_CLR */
+#define SPRD_SPI_TX_END_CLR		BIT(8)
+#define SPRD_SPI_RX_END_CLR		BIT(9)
+
+/* Bits & mask definition for register INT_MASK_STS */
+#define SPRD_SPI_MASK_RX_END		BIT(9)
+#define SPRD_SPI_MASK_TX_END		BIT(8)
+
+/* Bits & mask definition for register STS2 */
+#define SPRD_SPI_TX_BUSY		BIT(8)
+
+/* Bits & mask definition for register CTL1 */
+#define SPRD_SPI_RX_MODE		BIT(12)
+#define SPRD_SPI_TX_MODE		BIT(13)
+#define SPRD_SPI_RTX_MD_MASK		GENMASK(13, 12)
+
+/* Bits & mask definition for register CTL2 */
+#define SPRD_SPI_DMA_EN			BIT(6)
+
+/* Bits & mask definition for register CTL4 */
+#define SPRD_SPI_START_RX		BIT(9)
+#define SPRD_SPI_ONLY_RECV_MASK		GENMASK(8, 0)
+
+/* Bits & mask definition for register SPI_INT_CLR */
+#define SPRD_SPI_RX_END_INT_CLR		BIT(9)
+#define SPRD_SPI_TX_END_INT_CLR		BIT(8)
+
+/* Bits & mask definition for register SPI_INT_RAW */
+#define SPRD_SPI_RX_END_IRQ		BIT(9)
+#define SPRD_SPI_TX_END_IRQ		BIT(8)
+
+/* Bits & mask definition for register CTL12 */
+#define SPRD_SPI_SW_RX_REQ		BIT(0)
+#define SPRD_SPI_SW_TX_REQ		BIT(1)
+
+/* Bits & mask definition for register CTL7 */
+#define SPRD_SPI_DATA_LINE2_EN		BIT(15)
+#define SPRD_SPI_MODE_MASK		GENMASK(5, 3)
+#define SPRD_SPI_MODE_OFFSET		3
+#define SPRD_SPI_3WIRE_MODE		4
+#define SPRD_SPI_4WIRE_MODE		0
+
+/* Bits & mask definition for register CTL8 */
+#define SPRD_SPI_TX_MAX_LEN_MASK	GENMASK(19, 0)
+#define SPRD_SPI_TX_LEN_H_MASK		GENMASK(3, 0)
+#define SPRD_SPI_TX_LEN_H_OFFSET	16
+
+/* Bits & mask definition for register CTL9 */
+#define SPRD_SPI_TX_LEN_L_MASK		GENMASK(15, 0)
+
+/* Bits & mask definition for register CTL10 */
+#define SPRD_SPI_RX_MAX_LEN_MASK	GENMASK(19, 0)
+#define SPRD_SPI_RX_LEN_H_MASK		GENMASK(3, 0)
+#define SPRD_SPI_RX_LEN_H_OFFSET	16
+
+/* Bits & mask definition for register CTL11 */
+#define SPRD_SPI_RX_LEN_L_MASK		GENMASK(15, 0)
+
+/* Default & maximum word delay cycles */
+#define SPRD_SPI_MIN_DELAY_CYCLE	14
+#define SPRD_SPI_MAX_DELAY_CYCLE	130
+
+#define SPRD_SPI_FIFO_SIZE		32
+#define SPRD_SPI_CHIP_CS_NUM		0x4
+#define SPRD_SPI_CHNL_LEN		2
+#define SPRD_SPI_DEFAULT_SOURCE		26000000
+#define SPRD_SPI_MAX_SPEED_HZ		48000000
+#define SPRD_SPI_AUTOSUSPEND_DELAY	100
+#define SPRD_SPI_DMA_STEP		8
+
+enum sprd_spi_dma_channel {
+	SPRD_SPI_RX,
+	SPRD_SPI_TX,
+	SPRD_SPI_MAX,
+};
+
+struct sprd_spi_dma {
+	bool enable;
+	struct dma_chan *dma_chan[SPRD_SPI_MAX];
+	enum dma_slave_buswidth width;
+	u32 fragmens_len;
+	u32 rx_len;
+};
+
+struct sprd_spi {
+	void __iomem *base;
+	phys_addr_t phy_base;
+	struct device *dev;
+	struct clk *clk;
+	int irq;
+	u32 src_clk;
+	u32 hw_mode;
+	u32 trans_len;
+	u32 trans_mode;
+	u32 word_delay;
+	u32 hw_speed_hz;
+	u32 len;
+	int status;
+	struct sprd_spi_dma dma;
+	struct completion xfer_completion;
+	const void *tx_buf;
+	void *rx_buf;
+	int (*read_bufs)(struct sprd_spi *ss, u32 len);
+	int (*write_bufs)(struct sprd_spi *ss, u32 len);
+};
+
+static u32 sprd_spi_transfer_max_timeout(struct sprd_spi *ss,
+					 struct spi_transfer *t)
+{
+	/*
+	 * The time spent on transmission of the full FIFO data is the maximum
+	 * SPI transmission time.
+	 */
+	u32 size = t->bits_per_word * SPRD_SPI_FIFO_SIZE;
+	u32 bit_time_us = DIV_ROUND_UP(USEC_PER_SEC, ss->hw_speed_hz);
+	u32 total_time_us = size * bit_time_us;
+	/*
+	 * There is an interval between data and the data in our SPI hardware,
+	 * so the total transmission time need add the interval time.
+	 */
+	u32 interval_cycle = SPRD_SPI_FIFO_SIZE * ss->word_delay;
+	u32 interval_time_us = DIV_ROUND_UP(interval_cycle * USEC_PER_SEC,
+					    ss->src_clk);
+
+	return total_time_us + interval_time_us;
+}
+
+static int sprd_spi_wait_for_tx_end(struct sprd_spi *ss, struct spi_transfer *t)
+{
+	u32 val, us;
+	int ret;
+
+	us = sprd_spi_transfer_max_timeout(ss, t);
+	ret = readl_relaxed_poll_timeout(ss->base + SPRD_SPI_INT_RAW_STS, val,
+					 val & SPRD_SPI_TX_END_IRQ, 0, us);
+	if (ret) {
+		dev_err(ss->dev, "SPI error, spi send timeout!\n");
+		return ret;
+	}
+
+	ret = readl_relaxed_poll_timeout(ss->base + SPRD_SPI_STS2, val,
+					 !(val & SPRD_SPI_TX_BUSY), 0, us);
+	if (ret) {
+		dev_err(ss->dev, "SPI error, spi busy timeout!\n");
+		return ret;
+	}
+
+	writel_relaxed(SPRD_SPI_TX_END_INT_CLR, ss->base + SPRD_SPI_INT_CLR);
+
+	return 0;
+}
+
+static int sprd_spi_wait_for_rx_end(struct sprd_spi *ss, struct spi_transfer *t)
+{
+	u32 val, us;
+	int ret;
+
+	us = sprd_spi_transfer_max_timeout(ss, t);
+	ret = readl_relaxed_poll_timeout(ss->base + SPRD_SPI_INT_RAW_STS, val,
+					 val & SPRD_SPI_RX_END_IRQ, 0, us);
+	if (ret) {
+		dev_err(ss->dev, "SPI error, spi rx timeout!\n");
+		return ret;
+	}
+
+	writel_relaxed(SPRD_SPI_RX_END_INT_CLR, ss->base + SPRD_SPI_INT_CLR);
+
+	return 0;
+}
+
+static void sprd_spi_tx_req(struct sprd_spi *ss)
+{
+	writel_relaxed(SPRD_SPI_SW_TX_REQ, ss->base + SPRD_SPI_CTL12);
+}
+
+static void sprd_spi_rx_req(struct sprd_spi *ss)
+{
+	writel_relaxed(SPRD_SPI_SW_RX_REQ, ss->base + SPRD_SPI_CTL12);
+}
+
+static void sprd_spi_enter_idle(struct sprd_spi *ss)
+{
+	u32 val = readl_relaxed(ss->base + SPRD_SPI_CTL1);
+
+	val &= ~SPRD_SPI_RTX_MD_MASK;
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL1);
+}
+
+static void sprd_spi_set_transfer_bits(struct sprd_spi *ss, u32 bits)
+{
+	u32 val = readl_relaxed(ss->base + SPRD_SPI_CTL0);
+
+	/* Set the valid bits for every transaction */
+	val &= ~(SPRD_SPI_CHNL_LEN_MASK << SPRD_SPI_CHNL_LEN);
+	val |= bits << SPRD_SPI_CHNL_LEN;
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL0);
+}
+
+static void sprd_spi_set_tx_length(struct sprd_spi *ss, u32 length)
+{
+	u32 val = readl_relaxed(ss->base + SPRD_SPI_CTL8);
+
+	length &= SPRD_SPI_TX_MAX_LEN_MASK;
+	val &= ~SPRD_SPI_TX_LEN_H_MASK;
+	val |= length >> SPRD_SPI_TX_LEN_H_OFFSET;
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL8);
+
+	val = length & SPRD_SPI_TX_LEN_L_MASK;
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL9);
+}
+
+static void sprd_spi_set_rx_length(struct sprd_spi *ss, u32 length)
+{
+	u32 val = readl_relaxed(ss->base + SPRD_SPI_CTL10);
+
+	length &= SPRD_SPI_RX_MAX_LEN_MASK;
+	val &= ~SPRD_SPI_RX_LEN_H_MASK;
+	val |= length >> SPRD_SPI_RX_LEN_H_OFFSET;
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL10);
+
+	val = length & SPRD_SPI_RX_LEN_L_MASK;
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL11);
+}
+
+static void sprd_spi_chipselect(struct spi_device *sdev, bool cs)
+{
+	struct spi_controller *sctlr = sdev->controller;
+	struct sprd_spi *ss = spi_controller_get_devdata(sctlr);
+	u32 val;
+
+	val = readl_relaxed(ss->base + SPRD_SPI_CTL0);
+	/*  The SPI controller will pull down CS pin if cs is 0 */
+	if (!cs) {
+		val &= ~SPRD_SPI_CS0_VALID;
+		writel_relaxed(val, ss->base + SPRD_SPI_CTL0);
+	} else {
+		val |= SPRD_SPI_CSN_MASK;
+		writel_relaxed(val, ss->base + SPRD_SPI_CTL0);
+	}
+}
+
+static int sprd_spi_write_only_receive(struct sprd_spi *ss, u32 len)
+{
+	u32 val;
+
+	/* Clear the start receive bit and reset receive data number */
+	val = readl_relaxed(ss->base + SPRD_SPI_CTL4);
+	val &= ~(SPRD_SPI_START_RX | SPRD_SPI_ONLY_RECV_MASK);
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL4);
+
+	/* Set the receive data length */
+	val = readl_relaxed(ss->base + SPRD_SPI_CTL4);
+	val |= len & SPRD_SPI_ONLY_RECV_MASK;
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL4);
+
+	/* Trigger to receive data */
+	val = readl_relaxed(ss->base + SPRD_SPI_CTL4);
+	val |= SPRD_SPI_START_RX;
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL4);
+
+	return len;
+}
+
+static int sprd_spi_write_bufs_u8(struct sprd_spi *ss, u32 len)
+{
+	u8 *tx_p = (u8 *)ss->tx_buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		writeb_relaxed(tx_p[i], ss->base + SPRD_SPI_TXD);
+
+	ss->tx_buf += i;
+	return i;
+}
+
+static int sprd_spi_write_bufs_u16(struct sprd_spi *ss, u32 len)
+{
+	u16 *tx_p = (u16 *)ss->tx_buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		writew_relaxed(tx_p[i], ss->base + SPRD_SPI_TXD);
+
+	ss->tx_buf += i << 1;
+	return i << 1;
+}
+
+static int sprd_spi_write_bufs_u32(struct sprd_spi *ss, u32 len)
+{
+	u32 *tx_p = (u32 *)ss->tx_buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		writel_relaxed(tx_p[i], ss->base + SPRD_SPI_TXD);
+
+	ss->tx_buf += i << 2;
+	return i << 2;
+}
+
+static int sprd_spi_read_bufs_u8(struct sprd_spi *ss, u32 len)
+{
+	u8 *rx_p = (u8 *)ss->rx_buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		rx_p[i] = readb_relaxed(ss->base + SPRD_SPI_TXD);
+
+	ss->rx_buf += i;
+	return i;
+}
+
+static int sprd_spi_read_bufs_u16(struct sprd_spi *ss, u32 len)
+{
+	u16 *rx_p = (u16 *)ss->rx_buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		rx_p[i] = readw_relaxed(ss->base + SPRD_SPI_TXD);
+
+	ss->rx_buf += i << 1;
+	return i << 1;
+}
+
+static int sprd_spi_read_bufs_u32(struct sprd_spi *ss, u32 len)
+{
+	u32 *rx_p = (u32 *)ss->rx_buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		rx_p[i] = readl_relaxed(ss->base + SPRD_SPI_TXD);
+
+	ss->rx_buf += i << 2;
+	return i << 2;
+}
+
+static int sprd_spi_txrx_bufs(struct spi_device *sdev, struct spi_transfer *t)
+{
+	struct sprd_spi *ss = spi_controller_get_devdata(sdev->controller);
+	u32 trans_len = ss->trans_len, len;
+	int ret, write_size = 0, read_size = 0;
+
+	while (trans_len) {
+		len = trans_len > SPRD_SPI_FIFO_SIZE ? SPRD_SPI_FIFO_SIZE :
+			trans_len;
+		if (ss->trans_mode & SPRD_SPI_TX_MODE) {
+			sprd_spi_set_tx_length(ss, len);
+			write_size += ss->write_bufs(ss, len);
+
+			/*
+			 * For our 3 wires mode or dual TX line mode, we need
+			 * to request the controller to transfer.
+			 */
+			if (ss->hw_mode & SPI_3WIRE || ss->hw_mode & SPI_TX_DUAL)
+				sprd_spi_tx_req(ss);
+
+			ret = sprd_spi_wait_for_tx_end(ss, t);
+		} else {
+			sprd_spi_set_rx_length(ss, len);
+
+			/*
+			 * For our 3 wires mode or dual TX line mode, we need
+			 * to request the controller to read.
+			 */
+			if (ss->hw_mode & SPI_3WIRE || ss->hw_mode & SPI_TX_DUAL)
+				sprd_spi_rx_req(ss);
+			else
+				write_size += ss->write_bufs(ss, len);
+
+			ret = sprd_spi_wait_for_rx_end(ss, t);
+		}
+
+		if (ret)
+			goto complete;
+
+		if (ss->trans_mode & SPRD_SPI_RX_MODE)
+			read_size += ss->read_bufs(ss, len);
+
+		trans_len -= len;
+	}
+
+	if (ss->trans_mode & SPRD_SPI_TX_MODE)
+		ret = write_size;
+	else
+		ret = read_size;
+complete:
+	sprd_spi_enter_idle(ss);
+
+	return ret;
+}
+
+static void sprd_spi_irq_enable(struct sprd_spi *ss)
+{
+	u32 val;
+
+	/* Clear interrupt status before enabling interrupt. */
+	writel_relaxed(SPRD_SPI_TX_END_CLR | SPRD_SPI_RX_END_CLR,
+		ss->base + SPRD_SPI_INT_CLR);
+	/* Enable SPI interrupt only in DMA mode. */
+	val = readl_relaxed(ss->base + SPRD_SPI_INT_EN);
+	writel_relaxed(val | SPRD_SPI_TX_END_INT_EN |
+		       SPRD_SPI_RX_END_INT_EN,
+		       ss->base + SPRD_SPI_INT_EN);
+}
+
+static void sprd_spi_irq_disable(struct sprd_spi *ss)
+{
+	writel_relaxed(0, ss->base + SPRD_SPI_INT_EN);
+}
+
+static void sprd_spi_dma_enable(struct sprd_spi *ss, bool enable)
+{
+	u32 val = readl_relaxed(ss->base + SPRD_SPI_CTL2);
+
+	if (enable)
+		val |= SPRD_SPI_DMA_EN;
+	else
+		val &= ~SPRD_SPI_DMA_EN;
+
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL2);
+}
+
+static int sprd_spi_dma_submit(struct dma_chan *dma_chan,
+			       struct dma_slave_config *c,
+			       struct sg_table *sg,
+			       enum dma_transfer_direction dir)
+{
+	struct dma_async_tx_descriptor *desc;
+	dma_cookie_t cookie;
+	unsigned long flags;
+	int ret;
+
+	ret = dmaengine_slave_config(dma_chan, c);
+	if (ret < 0)
+		return ret;
+
+	flags = SPRD_DMA_FLAGS(SPRD_DMA_CHN_MODE_NONE, SPRD_DMA_NO_TRG,
+			       SPRD_DMA_FRAG_REQ, SPRD_DMA_TRANS_INT);
+	desc = dmaengine_prep_slave_sg(dma_chan, sg->sgl, sg->nents, dir, flags);
+	if (!desc)
+		return  -ENODEV;
+
+	cookie = dmaengine_submit(desc);
+	if (dma_submit_error(cookie))
+		return dma_submit_error(cookie);
+
+	dma_async_issue_pending(dma_chan);
+
+	return 0;
+}
+
+static int sprd_spi_dma_rx_config(struct sprd_spi *ss, struct spi_transfer *t)
+{
+	struct dma_chan *dma_chan = ss->dma.dma_chan[SPRD_SPI_RX];
+	struct dma_slave_config config = {
+		.src_addr = ss->phy_base,
+		.src_addr_width = ss->dma.width,
+		.dst_addr_width = ss->dma.width,
+		.dst_maxburst = ss->dma.fragmens_len,
+	};
+	int ret;
+
+	ret = sprd_spi_dma_submit(dma_chan, &config, &t->rx_sg, DMA_DEV_TO_MEM);
+	if (ret)
+		return ret;
+
+	return ss->dma.rx_len;
+}
+
+static int sprd_spi_dma_tx_config(struct sprd_spi *ss, struct spi_transfer *t)
+{
+	struct dma_chan *dma_chan = ss->dma.dma_chan[SPRD_SPI_TX];
+	struct dma_slave_config config = {
+		.dst_addr = ss->phy_base,
+		.src_addr_width = ss->dma.width,
+		.dst_addr_width = ss->dma.width,
+		.src_maxburst = ss->dma.fragmens_len,
+	};
+	int ret;
+
+	ret = sprd_spi_dma_submit(dma_chan, &config, &t->tx_sg, DMA_MEM_TO_DEV);
+	if (ret)
+		return ret;
+
+	return t->len;
+}
+
+static int sprd_spi_dma_request(struct sprd_spi *ss)
+{
+	ss->dma.dma_chan[SPRD_SPI_RX] = dma_request_chan(ss->dev, "rx_chn");
+	if (IS_ERR_OR_NULL(ss->dma.dma_chan[SPRD_SPI_RX])) {
+		if (PTR_ERR(ss->dma.dma_chan[SPRD_SPI_RX]) == -EPROBE_DEFER)
+			return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_RX]);
+
+		dev_err(ss->dev, "request RX DMA channel failed!\n");
+		return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_RX]);
+	}
+
+	ss->dma.dma_chan[SPRD_SPI_TX]  = dma_request_chan(ss->dev, "tx_chn");
+	if (IS_ERR_OR_NULL(ss->dma.dma_chan[SPRD_SPI_TX])) {
+		if (PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]) == -EPROBE_DEFER)
+			return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]);
+
+		dev_err(ss->dev, "request TX DMA channel failed!\n");
+		dma_release_channel(ss->dma.dma_chan[SPRD_SPI_RX]);
+		return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]);
+	}
+
+	return 0;
+}
+
+static void sprd_spi_dma_release(struct sprd_spi *ss)
+{
+	if (ss->dma.dma_chan[SPRD_SPI_RX])
+		dma_release_channel(ss->dma.dma_chan[SPRD_SPI_RX]);
+
+	if (ss->dma.dma_chan[SPRD_SPI_TX])
+		dma_release_channel(ss->dma.dma_chan[SPRD_SPI_TX]);
+}
+
+static int sprd_spi_dma_txrx_bufs(struct spi_device *sdev,
+				  struct spi_transfer *t)
+{
+	struct sprd_spi *ss = spi_master_get_devdata(sdev->master);
+	u32 trans_len = ss->trans_len;
+	int ret, write_size = 0;
+
+	reinit_completion(&ss->xfer_completion);
+	sprd_spi_irq_enable(ss);
+	if (ss->trans_mode & SPRD_SPI_TX_MODE) {
+		write_size = sprd_spi_dma_tx_config(ss, t);
+		sprd_spi_set_tx_length(ss, trans_len);
+
+		/*
+		 * For our 3 wires mode or dual TX line mode, we need
+		 * to request the controller to transfer.
+		 */
+		if (ss->hw_mode & SPI_3WIRE || ss->hw_mode & SPI_TX_DUAL)
+			sprd_spi_tx_req(ss);
+	} else {
+		sprd_spi_set_rx_length(ss, trans_len);
+
+		/*
+		 * For our 3 wires mode or dual TX line mode, we need
+		 * to request the controller to read.
+		 */
+		if (ss->hw_mode & SPI_3WIRE || ss->hw_mode & SPI_TX_DUAL)
+			sprd_spi_rx_req(ss);
+		else
+			write_size = ss->write_bufs(ss, trans_len);
+	}
+
+	if (write_size < 0) {
+		ret = write_size;
+		dev_err(ss->dev, "failed to write, ret = %d\n", ret);
+		goto trans_complete;
+	}
+
+	if (ss->trans_mode & SPRD_SPI_RX_MODE) {
+		/*
+		 * Set up the DMA receive data length, which must be an
+		 * integral multiple of fragment length. But when the length
+		 * of received data is less than fragment length, DMA can be
+		 * configured to receive data according to the actual length
+		 * of received data.
+		 */
+		ss->dma.rx_len = t->len > ss->dma.fragmens_len ?
+			(t->len - t->len % ss->dma.fragmens_len) :
+			 t->len;
+		ret = sprd_spi_dma_rx_config(ss, t);
+		if (ret < 0) {
+			dev_err(&sdev->dev,
+				"failed to configure rx DMA, ret = %d\n", ret);
+			goto trans_complete;
+		}
+	}
+
+	sprd_spi_dma_enable(ss, true);
+	wait_for_completion(&(ss->xfer_completion));
+
+	if (ss->trans_mode & SPRD_SPI_TX_MODE)
+		ret = write_size;
+	else
+		ret = ss->dma.rx_len;
+
+trans_complete:
+	sprd_spi_dma_enable(ss, false);
+	sprd_spi_enter_idle(ss);
+	sprd_spi_irq_disable(ss);
+
+	return ret;
+}
+
+static void sprd_spi_set_speed(struct sprd_spi *ss, u32 speed_hz)
+{
+	/*
+	 * From SPI datasheet, the prescale calculation formula:
+	 * prescale = SPI source clock / (2 * SPI_freq) - 1;
+	 */
+	u32 clk_div = DIV_ROUND_UP(ss->src_clk, speed_hz << 1) - 1;
+
+	/* Save the real hardware speed */
+	ss->hw_speed_hz = (ss->src_clk >> 1) / (clk_div + 1);
+	writel_relaxed(clk_div, ss->base + SPRD_SPI_CLKD);
+}
+
+static void sprd_spi_init_hw(struct sprd_spi *ss, struct spi_transfer *t)
+{
+	u16 word_delay, interval;
+	u32 val;
+
+	val = readl_relaxed(ss->base + SPRD_SPI_CTL7);
+	val &= ~(SPRD_SPI_SCK_REV | SPRD_SPI_NG_TX | SPRD_SPI_NG_RX);
+	/* Set default chip selection, clock phase and clock polarity */
+	val |= ss->hw_mode & SPI_CPHA ? SPRD_SPI_NG_RX : SPRD_SPI_NG_TX;
+	val |= ss->hw_mode & SPI_CPOL ? SPRD_SPI_SCK_REV : 0;
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL0);
+
+	/*
+	 * Set the intervals of two SPI frames, and the inteval calculation
+	 * formula as below per datasheet:
+	 * interval time (source clock cycles) = interval * 4 + 10.
+	 */
+	word_delay = clamp_t(u16, t->word_delay, SPRD_SPI_MIN_DELAY_CYCLE,
+			     SPRD_SPI_MAX_DELAY_CYCLE);
+	interval = DIV_ROUND_UP(word_delay - 10, 4);
+	ss->word_delay = interval * 4 + 10;
+	writel_relaxed(interval, ss->base + SPRD_SPI_CTL5);
+
+	/* Reset SPI fifo */
+	writel_relaxed(1, ss->base + SPRD_SPI_FIFO_RST);
+	writel_relaxed(0, ss->base + SPRD_SPI_FIFO_RST);
+
+	/* Set SPI work mode */
+	val = readl_relaxed(ss->base + SPRD_SPI_CTL7);
+	val &= ~SPRD_SPI_MODE_MASK;
+
+	if (ss->hw_mode & SPI_3WIRE)
+		val |= SPRD_SPI_3WIRE_MODE << SPRD_SPI_MODE_OFFSET;
+	else
+		val |= SPRD_SPI_4WIRE_MODE << SPRD_SPI_MODE_OFFSET;
+
+	if (ss->hw_mode & SPI_TX_DUAL)
+		val |= SPRD_SPI_DATA_LINE2_EN;
+	else
+		val &= ~SPRD_SPI_DATA_LINE2_EN;
+
+	writel_relaxed(val, ss->base + SPRD_SPI_CTL7);
+}
+
+static int sprd_spi_setup_transfer(struct spi_device *sdev,
+				   struct spi_transfer *t)
+{
+	struct sprd_spi *ss = spi_controller_get_devdata(sdev->controller);
+	u8 bits_per_word = t->bits_per_word;
+	u32 val, mode = 0;
+
+	ss->len = t->len;
+	ss->tx_buf = t->tx_buf;
+	ss->rx_buf = t->rx_buf;
+
+	ss->hw_mode = sdev->mode;
+	sprd_spi_init_hw(ss, t);
+
+	/* Set tansfer speed and valid bits */
+	sprd_spi_set_speed(ss, t->speed_hz);
+	sprd_spi_set_transfer_bits(ss, bits_per_word);
+
+	if (bits_per_word > 16)
+		bits_per_word = round_up(bits_per_word, 16);
+	else
+		bits_per_word = round_up(bits_per_word, 8);
+
+	switch (bits_per_word) {
+	case 8:
+		ss->trans_len = t->len;
+		ss->read_bufs = sprd_spi_read_bufs_u8;
+		ss->write_bufs = sprd_spi_write_bufs_u8;
+		ss->dma.width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		ss->dma.fragmens_len = SPRD_SPI_DMA_STEP;
+		break;
+	case 16:
+		ss->trans_len = t->len >> 1;
+		ss->read_bufs = sprd_spi_read_bufs_u16;
+		ss->write_bufs = sprd_spi_write_bufs_u16;
+		ss->dma.width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+		ss->dma.fragmens_len = SPRD_SPI_DMA_STEP << 1;
+		break;
+	case 32:
+		ss->trans_len = t->len >> 2;
+		ss->read_bufs = sprd_spi_read_bufs_u32;
+		ss->write_bufs = sprd_spi_write_bufs_u32;
+		ss->dma.width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		ss->dma.fragmens_len = SPRD_SPI_DMA_STEP << 2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Set transfer read or write mode */
+	val = readl_relaxed(ss->base + SPRD_SPI_CTL1);
+	val &= ~SPRD_SPI_RTX_MD_MASK;
+	if (t->tx_buf)
+		mode |= SPRD_SPI_TX_MODE;
+	if (t->rx_buf)
+		mode |= SPRD_SPI_RX_MODE;
+
+	writel_relaxed(val | mode, ss->base + SPRD_SPI_CTL1);
+
+	ss->trans_mode = mode;
+
+	/*
+	 * If in only receive mode, we need to trigger the SPI controller to
+	 * receive data automatically.
+	 */
+	if (ss->trans_mode == SPRD_SPI_RX_MODE)
+		ss->write_bufs = sprd_spi_write_only_receive;
+
+	return 0;
+}
+
+static int sprd_spi_transfer_one(struct spi_controller *sctlr,
+				 struct spi_device *sdev,
+				 struct spi_transfer *t)
+{
+	int ret;
+
+	ret = sprd_spi_setup_transfer(sdev, t);
+	if (ret)
+		goto setup_err;
+
+	if (sctlr->can_dma(sctlr, sdev, t))
+		ret = sprd_spi_dma_txrx_bufs(sdev, t);
+	else
+		ret = sprd_spi_txrx_bufs(sdev, t);
+
+	if (ret == t->len)
+		ret = 0;
+	else if (ret >= 0)
+		ret = -EREMOTEIO;
+
+setup_err:
+	spi_finalize_current_transfer(sctlr);
+
+	return ret;
+}
+
+static irqreturn_t sprd_spi_handle_irq(int irq, void *data)
+{
+	struct sprd_spi *ss = (struct sprd_spi *)data;
+	u32 val = readl_relaxed(ss->base + SPRD_SPI_INT_MASK_STS);
+
+	if (val & SPRD_SPI_MASK_TX_END) {
+		writel_relaxed(SPRD_SPI_TX_END_CLR, ss->base + SPRD_SPI_INT_CLR);
+		if (!(ss->trans_mode & SPRD_SPI_RX_MODE))
+			complete(&ss->xfer_completion);
+
+		return IRQ_HANDLED;
+	}
+
+	if (val & SPRD_SPI_MASK_RX_END) {
+		writel_relaxed(SPRD_SPI_RX_END_CLR, ss->base + SPRD_SPI_INT_CLR);
+		if (ss->dma.rx_len < ss->len) {
+			ss->rx_buf += ss->dma.rx_len;
+			ss->dma.rx_len +=
+				ss->read_bufs(ss, ss->len - ss->dma.rx_len);
+		}
+		complete(&ss->xfer_completion);
+
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static int sprd_spi_irq_init(struct platform_device *pdev, struct sprd_spi *ss)
+{
+	int ret;
+
+	ss->irq = platform_get_irq(pdev, 0);
+	if (ss->irq < 0)
+		return ss->irq;
+
+	ret = devm_request_irq(&pdev->dev, ss->irq, sprd_spi_handle_irq,
+				0, pdev->name, ss);
+	if (ret)
+		dev_err(&pdev->dev, "failed to request spi irq %d, ret = %d\n",
+			ss->irq, ret);
+
+	return ret;
+}
+
+static int sprd_spi_clk_init(struct platform_device *pdev, struct sprd_spi *ss)
+{
+	struct clk *clk_spi, *clk_parent;
+
+	clk_spi = devm_clk_get(&pdev->dev, "spi");
+	if (IS_ERR(clk_spi)) {
+		dev_warn(&pdev->dev, "can't get the spi clock\n");
+		clk_spi = NULL;
+	}
+
+	clk_parent = devm_clk_get(&pdev->dev, "source");
+	if (IS_ERR(clk_parent)) {
+		dev_warn(&pdev->dev, "can't get the source clock\n");
+		clk_parent = NULL;
+	}
+
+	ss->clk = devm_clk_get(&pdev->dev, "enable");
+	if (IS_ERR(ss->clk)) {
+		dev_err(&pdev->dev, "can't get the enable clock\n");
+		return PTR_ERR(ss->clk);
+	}
+
+	if (!clk_set_parent(clk_spi, clk_parent))
+		ss->src_clk = clk_get_rate(clk_spi);
+	else
+		ss->src_clk = SPRD_SPI_DEFAULT_SOURCE;
+
+	return 0;
+}
+
+static bool sprd_spi_can_dma(struct spi_controller *sctlr,
+			     struct spi_device *spi, struct spi_transfer *t)
+{
+	struct sprd_spi *ss = spi_controller_get_devdata(sctlr);
+
+	return ss->dma.enable && (t->len > SPRD_SPI_FIFO_SIZE);
+}
+
+static int sprd_spi_dma_init(struct platform_device *pdev, struct sprd_spi *ss)
+{
+	int ret;
+
+	ret = sprd_spi_dma_request(ss);
+	if (ret) {
+		if (ret == -EPROBE_DEFER)
+			return ret;
+
+		dev_warn(&pdev->dev,
+			 "failed to request dma, enter no dma mode, ret = %d\n",
+			 ret);
+
+		return 0;
+	}
+
+	ss->dma.enable = true;
+
+	return 0;
+}
+
+static int sprd_spi_probe(struct platform_device *pdev)
+{
+	struct spi_controller *sctlr;
+	struct resource *res;
+	struct sprd_spi *ss;
+	int ret;
+
+	pdev->id = of_alias_get_id(pdev->dev.of_node, "spi");
+	sctlr = spi_alloc_master(&pdev->dev, sizeof(*ss));
+	if (!sctlr)
+		return -ENOMEM;
+
+	ss = spi_controller_get_devdata(sctlr);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ss->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ss->base)) {
+		ret = PTR_ERR(ss->base);
+		goto free_controller;
+	}
+
+	ss->phy_base = res->start;
+	ss->dev = &pdev->dev;
+	sctlr->dev.of_node = pdev->dev.of_node;
+	sctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_3WIRE | SPI_TX_DUAL;
+	sctlr->bus_num = pdev->id;
+	sctlr->set_cs = sprd_spi_chipselect;
+	sctlr->transfer_one = sprd_spi_transfer_one;
+	sctlr->can_dma = sprd_spi_can_dma;
+	sctlr->auto_runtime_pm = true;
+	sctlr->max_speed_hz = min_t(u32, ss->src_clk >> 1,
+				    SPRD_SPI_MAX_SPEED_HZ);
+
+	init_completion(&ss->xfer_completion);
+	platform_set_drvdata(pdev, sctlr);
+	ret = sprd_spi_clk_init(pdev, ss);
+	if (ret)
+		goto free_controller;
+
+	ret = sprd_spi_irq_init(pdev, ss);
+	if (ret)
+		goto free_controller;
+
+	ret = sprd_spi_dma_init(pdev, ss);
+	if (ret)
+		goto free_controller;
+
+	ret = clk_prepare_enable(ss->clk);
+	if (ret)
+		goto release_dma;
+
+	ret = pm_runtime_set_active(&pdev->dev);
+	if (ret < 0)
+		goto disable_clk;
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev,
+					 SPRD_SPI_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to resume SPI controller\n");
+		goto err_rpm_put;
+	}
+
+	ret = devm_spi_register_controller(&pdev->dev, sctlr);
+	if (ret)
+		goto err_rpm_put;
+
+	pm_runtime_mark_last_busy(&pdev->dev);
+	pm_runtime_put_autosuspend(&pdev->dev);
+
+	return 0;
+
+err_rpm_put:
+	pm_runtime_put_noidle(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+disable_clk:
+	clk_disable_unprepare(ss->clk);
+release_dma:
+	sprd_spi_dma_release(ss);
+free_controller:
+	spi_controller_put(sctlr);
+
+	return ret;
+}
+
+static int sprd_spi_remove(struct platform_device *pdev)
+{
+	struct spi_controller *sctlr = platform_get_drvdata(pdev);
+	struct sprd_spi *ss = spi_controller_get_devdata(sctlr);
+	int ret;
+
+	ret = pm_runtime_get_sync(ss->dev);
+	if (ret < 0) {
+		dev_err(ss->dev, "failed to resume SPI controller\n");
+		return ret;
+	}
+
+	spi_controller_suspend(sctlr);
+
+	if (ss->dma.enable)
+		sprd_spi_dma_release(ss);
+	clk_disable_unprepare(ss->clk);
+	pm_runtime_put_noidle(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static int __maybe_unused sprd_spi_runtime_suspend(struct device *dev)
+{
+	struct spi_controller *sctlr = dev_get_drvdata(dev);
+	struct sprd_spi *ss = spi_controller_get_devdata(sctlr);
+
+	if (ss->dma.enable)
+		sprd_spi_dma_release(ss);
+
+	clk_disable_unprepare(ss->clk);
+
+	return 0;
+}
+
+static int __maybe_unused sprd_spi_runtime_resume(struct device *dev)
+{
+	struct spi_controller *sctlr = dev_get_drvdata(dev);
+	struct sprd_spi *ss = spi_controller_get_devdata(sctlr);
+	int ret;
+
+	ret = clk_prepare_enable(ss->clk);
+	if (ret)
+		return ret;
+
+	if (!ss->dma.enable)
+		return 0;
+
+	ret = sprd_spi_dma_request(ss);
+	if (ret)
+		clk_disable_unprepare(ss->clk);
+
+	return ret;
+}
+
+static const struct dev_pm_ops sprd_spi_pm_ops = {
+	SET_RUNTIME_PM_OPS(sprd_spi_runtime_suspend,
+			   sprd_spi_runtime_resume, NULL)
+};
+
+static const struct of_device_id sprd_spi_of_match[] = {
+	{ .compatible = "sprd,sc9860-spi", },
+	{ /* sentinel */ }
+};
+
+static struct platform_driver sprd_spi_driver = {
+	.driver = {
+		.name = "sprd-spi",
+		.of_match_table = sprd_spi_of_match,
+		.pm = &sprd_spi_pm_ops,
+	},
+	.probe = sprd_spi_probe,
+	.remove  = sprd_spi_remove,
+};
+
+module_platform_driver(sprd_spi_driver);
+
+MODULE_DESCRIPTION("Spreadtrum SPI Controller driver");
+MODULE_AUTHOR("Lanqing Liu <lanqing.liu@spreadtrum.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c
index a4e43fc..0c24c49 100644
--- a/drivers/spi/spi-st-ssc4.c
+++ b/drivers/spi/spi-st-ssc4.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  Copyright (c) 2008-2014 STMicroelectronics Limited
  *
@@ -6,9 +7,6 @@
  *          Lee Jones <lee.jones@linaro.org>
  *
  *  SPI master mode controller driver, used in STMicroelectronics devices.
- *
- *  May be copied or modified under the terms of the GNU General Public
- *  License Version 2.0 only.  See linux/COPYING for more information.
  */
 
 #include <linux/clk.h>
@@ -300,7 +298,6 @@
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct spi_master *master;
-	struct resource *res;
 	struct spi_st *spi_st;
 	int irq, ret = 0;
 	u32 var;
@@ -333,8 +330,7 @@
 	init_completion(&spi_st->done);
 
 	/* Get resources */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	spi_st->base = devm_ioremap_resource(&pdev->dev, res);
+	spi_st->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(spi_st->base)) {
 		ret = PTR_ERR(spi_st->base);
 		goto clk_disable;
diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
new file mode 100644
index 0000000..9ac6f9f
--- /dev/null
+++ b/drivers/spi/spi-stm32-qspi.c
@@ -0,0 +1,684 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) STMicroelectronics 2018 - All Rights Reserved
+ * Author: Ludovic Barre <ludovic.barre@st.com> for STMicroelectronics.
+ */
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/sizes.h>
+#include <linux/spi/spi-mem.h>
+
+#define QSPI_CR			0x00
+#define CR_EN			BIT(0)
+#define CR_ABORT		BIT(1)
+#define CR_DMAEN		BIT(2)
+#define CR_TCEN			BIT(3)
+#define CR_SSHIFT		BIT(4)
+#define CR_DFM			BIT(6)
+#define CR_FSEL			BIT(7)
+#define CR_FTHRES_SHIFT		8
+#define CR_TEIE			BIT(16)
+#define CR_TCIE			BIT(17)
+#define CR_FTIE			BIT(18)
+#define CR_SMIE			BIT(19)
+#define CR_TOIE			BIT(20)
+#define CR_PRESC_MASK		GENMASK(31, 24)
+
+#define QSPI_DCR		0x04
+#define DCR_FSIZE_MASK		GENMASK(20, 16)
+
+#define QSPI_SR			0x08
+#define SR_TEF			BIT(0)
+#define SR_TCF			BIT(1)
+#define SR_FTF			BIT(2)
+#define SR_SMF			BIT(3)
+#define SR_TOF			BIT(4)
+#define SR_BUSY			BIT(5)
+#define SR_FLEVEL_MASK		GENMASK(13, 8)
+
+#define QSPI_FCR		0x0c
+#define FCR_CTEF		BIT(0)
+#define FCR_CTCF		BIT(1)
+
+#define QSPI_DLR		0x10
+
+#define QSPI_CCR		0x14
+#define CCR_INST_MASK		GENMASK(7, 0)
+#define CCR_IMODE_MASK		GENMASK(9, 8)
+#define CCR_ADMODE_MASK		GENMASK(11, 10)
+#define CCR_ADSIZE_MASK		GENMASK(13, 12)
+#define CCR_DCYC_MASK		GENMASK(22, 18)
+#define CCR_DMODE_MASK		GENMASK(25, 24)
+#define CCR_FMODE_MASK		GENMASK(27, 26)
+#define CCR_FMODE_INDW		(0U << 26)
+#define CCR_FMODE_INDR		(1U << 26)
+#define CCR_FMODE_APM		(2U << 26)
+#define CCR_FMODE_MM		(3U << 26)
+#define CCR_BUSWIDTH_0		0x0
+#define CCR_BUSWIDTH_1		0x1
+#define CCR_BUSWIDTH_2		0x2
+#define CCR_BUSWIDTH_4		0x3
+
+#define QSPI_AR			0x18
+#define QSPI_ABR		0x1c
+#define QSPI_DR			0x20
+#define QSPI_PSMKR		0x24
+#define QSPI_PSMAR		0x28
+#define QSPI_PIR		0x2c
+#define QSPI_LPTR		0x30
+
+#define STM32_QSPI_MAX_MMAP_SZ	SZ_256M
+#define STM32_QSPI_MAX_NORCHIP	2
+
+#define STM32_FIFO_TIMEOUT_US 30000
+#define STM32_BUSY_TIMEOUT_US 100000
+#define STM32_ABT_TIMEOUT_US 100000
+#define STM32_COMP_TIMEOUT_MS 1000
+
+struct stm32_qspi_flash {
+	struct stm32_qspi *qspi;
+	u32 cs;
+	u32 presc;
+};
+
+struct stm32_qspi {
+	struct device *dev;
+	struct spi_controller *ctrl;
+	phys_addr_t phys_base;
+	void __iomem *io_base;
+	void __iomem *mm_base;
+	resource_size_t mm_size;
+	struct clk *clk;
+	u32 clk_rate;
+	struct stm32_qspi_flash flash[STM32_QSPI_MAX_NORCHIP];
+	struct completion data_completion;
+	u32 fmode;
+
+	struct dma_chan *dma_chtx;
+	struct dma_chan *dma_chrx;
+	struct completion dma_completion;
+
+	u32 cr_reg;
+	u32 dcr_reg;
+
+	/*
+	 * to protect device configuration, could be different between
+	 * 2 flash access (bk1, bk2)
+	 */
+	struct mutex lock;
+};
+
+static irqreturn_t stm32_qspi_irq(int irq, void *dev_id)
+{
+	struct stm32_qspi *qspi = (struct stm32_qspi *)dev_id;
+	u32 cr, sr;
+
+	sr = readl_relaxed(qspi->io_base + QSPI_SR);
+
+	if (sr & (SR_TEF | SR_TCF)) {
+		/* disable irq */
+		cr = readl_relaxed(qspi->io_base + QSPI_CR);
+		cr &= ~CR_TCIE & ~CR_TEIE;
+		writel_relaxed(cr, qspi->io_base + QSPI_CR);
+		complete(&qspi->data_completion);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void stm32_qspi_read_fifo(u8 *val, void __iomem *addr)
+{
+	*val = readb_relaxed(addr);
+}
+
+static void stm32_qspi_write_fifo(u8 *val, void __iomem *addr)
+{
+	writeb_relaxed(*val, addr);
+}
+
+static int stm32_qspi_tx_poll(struct stm32_qspi *qspi,
+			      const struct spi_mem_op *op)
+{
+	void (*tx_fifo)(u8 *val, void __iomem *addr);
+	u32 len = op->data.nbytes, sr;
+	u8 *buf;
+	int ret;
+
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		tx_fifo = stm32_qspi_read_fifo;
+		buf = op->data.buf.in;
+
+	} else {
+		tx_fifo = stm32_qspi_write_fifo;
+		buf = (u8 *)op->data.buf.out;
+	}
+
+	while (len--) {
+		ret = readl_relaxed_poll_timeout_atomic(qspi->io_base + QSPI_SR,
+							sr, (sr & SR_FTF), 1,
+							STM32_FIFO_TIMEOUT_US);
+		if (ret) {
+			dev_err(qspi->dev, "fifo timeout (len:%d stat:%#x)\n",
+				len, sr);
+			return ret;
+		}
+		tx_fifo(buf++, qspi->io_base + QSPI_DR);
+	}
+
+	return 0;
+}
+
+static int stm32_qspi_tx_mm(struct stm32_qspi *qspi,
+			    const struct spi_mem_op *op)
+{
+	memcpy_fromio(op->data.buf.in, qspi->mm_base + op->addr.val,
+		      op->data.nbytes);
+	return 0;
+}
+
+static void stm32_qspi_dma_callback(void *arg)
+{
+	struct completion *dma_completion = arg;
+
+	complete(dma_completion);
+}
+
+static int stm32_qspi_tx_dma(struct stm32_qspi *qspi,
+			     const struct spi_mem_op *op)
+{
+	struct dma_async_tx_descriptor *desc;
+	enum dma_transfer_direction dma_dir;
+	struct dma_chan *dma_ch;
+	struct sg_table sgt;
+	dma_cookie_t cookie;
+	u32 cr, t_out;
+	int err;
+
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		dma_dir = DMA_DEV_TO_MEM;
+		dma_ch = qspi->dma_chrx;
+	} else {
+		dma_dir = DMA_MEM_TO_DEV;
+		dma_ch = qspi->dma_chtx;
+	}
+
+	/*
+	 * spi_map_buf return -EINVAL if the buffer is not DMA-able
+	 * (DMA-able: in vmalloc | kmap | virt_addr_valid)
+	 */
+	err = spi_controller_dma_map_mem_op_data(qspi->ctrl, op, &sgt);
+	if (err)
+		return err;
+
+	desc = dmaengine_prep_slave_sg(dma_ch, sgt.sgl, sgt.nents,
+				       dma_dir, DMA_PREP_INTERRUPT);
+	if (!desc) {
+		err = -ENOMEM;
+		goto out_unmap;
+	}
+
+	cr = readl_relaxed(qspi->io_base + QSPI_CR);
+
+	reinit_completion(&qspi->dma_completion);
+	desc->callback = stm32_qspi_dma_callback;
+	desc->callback_param = &qspi->dma_completion;
+	cookie = dmaengine_submit(desc);
+	err = dma_submit_error(cookie);
+	if (err)
+		goto out;
+
+	dma_async_issue_pending(dma_ch);
+
+	writel_relaxed(cr | CR_DMAEN, qspi->io_base + QSPI_CR);
+
+	t_out = sgt.nents * STM32_COMP_TIMEOUT_MS;
+	if (!wait_for_completion_timeout(&qspi->dma_completion,
+					 msecs_to_jiffies(t_out)))
+		err = -ETIMEDOUT;
+
+	if (err)
+		dmaengine_terminate_all(dma_ch);
+
+out:
+	writel_relaxed(cr & ~CR_DMAEN, qspi->io_base + QSPI_CR);
+out_unmap:
+	spi_controller_dma_unmap_mem_op_data(qspi->ctrl, op, &sgt);
+
+	return err;
+}
+
+static int stm32_qspi_tx(struct stm32_qspi *qspi, const struct spi_mem_op *op)
+{
+	if (!op->data.nbytes)
+		return 0;
+
+	if (qspi->fmode == CCR_FMODE_MM)
+		return stm32_qspi_tx_mm(qspi, op);
+	else if ((op->data.dir == SPI_MEM_DATA_IN && qspi->dma_chrx) ||
+		 (op->data.dir == SPI_MEM_DATA_OUT && qspi->dma_chtx))
+		if (!stm32_qspi_tx_dma(qspi, op))
+			return 0;
+
+	return stm32_qspi_tx_poll(qspi, op);
+}
+
+static int stm32_qspi_wait_nobusy(struct stm32_qspi *qspi)
+{
+	u32 sr;
+
+	return readl_relaxed_poll_timeout_atomic(qspi->io_base + QSPI_SR, sr,
+						 !(sr & SR_BUSY), 1,
+						 STM32_BUSY_TIMEOUT_US);
+}
+
+static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi,
+			       const struct spi_mem_op *op)
+{
+	u32 cr, sr;
+	int err = 0;
+
+	if (!op->data.nbytes)
+		return stm32_qspi_wait_nobusy(qspi);
+
+	if (readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF)
+		goto out;
+
+	reinit_completion(&qspi->data_completion);
+	cr = readl_relaxed(qspi->io_base + QSPI_CR);
+	writel_relaxed(cr | CR_TCIE | CR_TEIE, qspi->io_base + QSPI_CR);
+
+	if (!wait_for_completion_timeout(&qspi->data_completion,
+				msecs_to_jiffies(STM32_COMP_TIMEOUT_MS))) {
+		err = -ETIMEDOUT;
+	} else {
+		sr = readl_relaxed(qspi->io_base + QSPI_SR);
+		if (sr & SR_TEF)
+			err = -EIO;
+	}
+
+out:
+	/* clear flags */
+	writel_relaxed(FCR_CTCF | FCR_CTEF, qspi->io_base + QSPI_FCR);
+
+	return err;
+}
+
+static int stm32_qspi_get_mode(struct stm32_qspi *qspi, u8 buswidth)
+{
+	if (buswidth == 4)
+		return CCR_BUSWIDTH_4;
+
+	return buswidth;
+}
+
+static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
+	struct stm32_qspi_flash *flash = &qspi->flash[mem->spi->chip_select];
+	u32 ccr, cr, addr_max;
+	int timeout, err = 0;
+
+	dev_dbg(qspi->dev, "cmd:%#x mode:%d.%d.%d.%d addr:%#llx len:%#x\n",
+		op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
+		op->dummy.buswidth, op->data.buswidth,
+		op->addr.val, op->data.nbytes);
+
+	err = stm32_qspi_wait_nobusy(qspi);
+	if (err)
+		goto abort;
+
+	addr_max = op->addr.val + op->data.nbytes + 1;
+
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		if (addr_max < qspi->mm_size &&
+		    op->addr.buswidth)
+			qspi->fmode = CCR_FMODE_MM;
+		else
+			qspi->fmode = CCR_FMODE_INDR;
+	} else {
+		qspi->fmode = CCR_FMODE_INDW;
+	}
+
+	cr = readl_relaxed(qspi->io_base + QSPI_CR);
+	cr &= ~CR_PRESC_MASK & ~CR_FSEL;
+	cr |= FIELD_PREP(CR_PRESC_MASK, flash->presc);
+	cr |= FIELD_PREP(CR_FSEL, flash->cs);
+	writel_relaxed(cr, qspi->io_base + QSPI_CR);
+
+	if (op->data.nbytes)
+		writel_relaxed(op->data.nbytes - 1,
+			       qspi->io_base + QSPI_DLR);
+	else
+		qspi->fmode = CCR_FMODE_INDW;
+
+	ccr = qspi->fmode;
+	ccr |= FIELD_PREP(CCR_INST_MASK, op->cmd.opcode);
+	ccr |= FIELD_PREP(CCR_IMODE_MASK,
+			  stm32_qspi_get_mode(qspi, op->cmd.buswidth));
+
+	if (op->addr.nbytes) {
+		ccr |= FIELD_PREP(CCR_ADMODE_MASK,
+				  stm32_qspi_get_mode(qspi, op->addr.buswidth));
+		ccr |= FIELD_PREP(CCR_ADSIZE_MASK, op->addr.nbytes - 1);
+	}
+
+	if (op->dummy.buswidth && op->dummy.nbytes)
+		ccr |= FIELD_PREP(CCR_DCYC_MASK,
+				  op->dummy.nbytes * 8 / op->dummy.buswidth);
+
+	if (op->data.nbytes) {
+		ccr |= FIELD_PREP(CCR_DMODE_MASK,
+				  stm32_qspi_get_mode(qspi, op->data.buswidth));
+	}
+
+	writel_relaxed(ccr, qspi->io_base + QSPI_CCR);
+
+	if (op->addr.nbytes && qspi->fmode != CCR_FMODE_MM)
+		writel_relaxed(op->addr.val, qspi->io_base + QSPI_AR);
+
+	err = stm32_qspi_tx(qspi, op);
+
+	/*
+	 * Abort in:
+	 * -error case
+	 * -read memory map: prefetching must be stopped if we read the last
+	 *  byte of device (device size - fifo size). like device size is not
+	 *  knows, the prefetching is always stop.
+	 */
+	if (err || qspi->fmode == CCR_FMODE_MM)
+		goto abort;
+
+	/* wait end of tx in indirect mode */
+	err = stm32_qspi_wait_cmd(qspi, op);
+	if (err)
+		goto abort;
+
+	return 0;
+
+abort:
+	cr = readl_relaxed(qspi->io_base + QSPI_CR) | CR_ABORT;
+	writel_relaxed(cr, qspi->io_base + QSPI_CR);
+
+	/* wait clear of abort bit by hw */
+	timeout = readl_relaxed_poll_timeout_atomic(qspi->io_base + QSPI_CR,
+						    cr, !(cr & CR_ABORT), 1,
+						    STM32_ABT_TIMEOUT_US);
+
+	writel_relaxed(FCR_CTCF, qspi->io_base + QSPI_FCR);
+
+	if (err || timeout)
+		dev_err(qspi->dev, "%s err:%d abort timeout:%d\n",
+			__func__, err, timeout);
+
+	return err;
+}
+
+static int stm32_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
+	int ret;
+
+	mutex_lock(&qspi->lock);
+	ret = stm32_qspi_send(mem, op);
+	mutex_unlock(&qspi->lock);
+
+	return ret;
+}
+
+static int stm32_qspi_setup(struct spi_device *spi)
+{
+	struct spi_controller *ctrl = spi->master;
+	struct stm32_qspi *qspi = spi_controller_get_devdata(ctrl);
+	struct stm32_qspi_flash *flash;
+	u32 presc;
+
+	if (ctrl->busy)
+		return -EBUSY;
+
+	if (!spi->max_speed_hz)
+		return -EINVAL;
+
+	presc = DIV_ROUND_UP(qspi->clk_rate, spi->max_speed_hz) - 1;
+
+	flash = &qspi->flash[spi->chip_select];
+	flash->qspi = qspi;
+	flash->cs = spi->chip_select;
+	flash->presc = presc;
+
+	mutex_lock(&qspi->lock);
+	qspi->cr_reg = 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
+	writel_relaxed(qspi->cr_reg, qspi->io_base + QSPI_CR);
+
+	/* set dcr fsize to max address */
+	qspi->dcr_reg = DCR_FSIZE_MASK;
+	writel_relaxed(qspi->dcr_reg, qspi->io_base + QSPI_DCR);
+	mutex_unlock(&qspi->lock);
+
+	return 0;
+}
+
+static void stm32_qspi_dma_setup(struct stm32_qspi *qspi)
+{
+	struct dma_slave_config dma_cfg;
+	struct device *dev = qspi->dev;
+
+	memset(&dma_cfg, 0, sizeof(dma_cfg));
+
+	dma_cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	dma_cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	dma_cfg.src_addr = qspi->phys_base + QSPI_DR;
+	dma_cfg.dst_addr = qspi->phys_base + QSPI_DR;
+	dma_cfg.src_maxburst = 4;
+	dma_cfg.dst_maxburst = 4;
+
+	qspi->dma_chrx = dma_request_slave_channel(dev, "rx");
+	if (qspi->dma_chrx) {
+		if (dmaengine_slave_config(qspi->dma_chrx, &dma_cfg)) {
+			dev_err(dev, "dma rx config failed\n");
+			dma_release_channel(qspi->dma_chrx);
+			qspi->dma_chrx = NULL;
+		}
+	}
+
+	qspi->dma_chtx = dma_request_slave_channel(dev, "tx");
+	if (qspi->dma_chtx) {
+		if (dmaengine_slave_config(qspi->dma_chtx, &dma_cfg)) {
+			dev_err(dev, "dma tx config failed\n");
+			dma_release_channel(qspi->dma_chtx);
+			qspi->dma_chtx = NULL;
+		}
+	}
+
+	init_completion(&qspi->dma_completion);
+}
+
+static void stm32_qspi_dma_free(struct stm32_qspi *qspi)
+{
+	if (qspi->dma_chtx)
+		dma_release_channel(qspi->dma_chtx);
+	if (qspi->dma_chrx)
+		dma_release_channel(qspi->dma_chrx);
+}
+
+/*
+ * no special host constraint, so use default spi_mem_default_supports_op
+ * to check supported mode.
+ */
+static const struct spi_controller_mem_ops stm32_qspi_mem_ops = {
+	.exec_op = stm32_qspi_exec_op,
+};
+
+static void stm32_qspi_release(struct stm32_qspi *qspi)
+{
+	/* disable qspi */
+	writel_relaxed(0, qspi->io_base + QSPI_CR);
+	stm32_qspi_dma_free(qspi);
+	mutex_destroy(&qspi->lock);
+	clk_disable_unprepare(qspi->clk);
+	spi_master_put(qspi->ctrl);
+}
+
+static int stm32_qspi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct spi_controller *ctrl;
+	struct reset_control *rstc;
+	struct stm32_qspi *qspi;
+	struct resource *res;
+	int ret, irq;
+
+	ctrl = spi_alloc_master(dev, sizeof(*qspi));
+	if (!ctrl)
+		return -ENOMEM;
+
+	qspi = spi_controller_get_devdata(ctrl);
+	qspi->ctrl = ctrl;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi");
+	qspi->io_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(qspi->io_base)) {
+		ret = PTR_ERR(qspi->io_base);
+		goto err;
+	}
+
+	qspi->phys_base = res->start;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi_mm");
+	qspi->mm_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(qspi->mm_base)) {
+		ret = PTR_ERR(qspi->mm_base);
+		goto err;
+	}
+
+	qspi->mm_size = resource_size(res);
+	if (qspi->mm_size > STM32_QSPI_MAX_MMAP_SZ) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	ret = devm_request_irq(dev, irq, stm32_qspi_irq, 0,
+			       dev_name(dev), qspi);
+	if (ret) {
+		dev_err(dev, "failed to request irq\n");
+		goto err;
+	}
+
+	init_completion(&qspi->data_completion);
+
+	qspi->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(qspi->clk)) {
+		ret = PTR_ERR(qspi->clk);
+		goto err;
+	}
+
+	qspi->clk_rate = clk_get_rate(qspi->clk);
+	if (!qspi->clk_rate) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = clk_prepare_enable(qspi->clk);
+	if (ret) {
+		dev_err(dev, "can not enable the clock\n");
+		goto err;
+	}
+
+	rstc = devm_reset_control_get_exclusive(dev, NULL);
+	if (!IS_ERR(rstc)) {
+		reset_control_assert(rstc);
+		udelay(2);
+		reset_control_deassert(rstc);
+	}
+
+	qspi->dev = dev;
+	platform_set_drvdata(pdev, qspi);
+	stm32_qspi_dma_setup(qspi);
+	mutex_init(&qspi->lock);
+
+	ctrl->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD
+		| SPI_TX_DUAL | SPI_TX_QUAD;
+	ctrl->setup = stm32_qspi_setup;
+	ctrl->bus_num = -1;
+	ctrl->mem_ops = &stm32_qspi_mem_ops;
+	ctrl->num_chipselect = STM32_QSPI_MAX_NORCHIP;
+	ctrl->dev.of_node = dev->of_node;
+
+	ret = devm_spi_register_master(dev, ctrl);
+	if (!ret)
+		return 0;
+
+err:
+	stm32_qspi_release(qspi);
+	return ret;
+}
+
+static int stm32_qspi_remove(struct platform_device *pdev)
+{
+	struct stm32_qspi *qspi = platform_get_drvdata(pdev);
+
+	stm32_qspi_release(qspi);
+	return 0;
+}
+
+static int __maybe_unused stm32_qspi_suspend(struct device *dev)
+{
+	struct stm32_qspi *qspi = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(qspi->clk);
+	pinctrl_pm_select_sleep_state(dev);
+
+	return 0;
+}
+
+static int __maybe_unused stm32_qspi_resume(struct device *dev)
+{
+	struct stm32_qspi *qspi = dev_get_drvdata(dev);
+
+	pinctrl_pm_select_default_state(dev);
+	clk_prepare_enable(qspi->clk);
+
+	writel_relaxed(qspi->cr_reg, qspi->io_base + QSPI_CR);
+	writel_relaxed(qspi->dcr_reg, qspi->io_base + QSPI_DCR);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(stm32_qspi_pm_ops, stm32_qspi_suspend, stm32_qspi_resume);
+
+static const struct of_device_id stm32_qspi_match[] = {
+	{.compatible = "st,stm32f469-qspi"},
+	{}
+};
+MODULE_DEVICE_TABLE(of, stm32_qspi_match);
+
+static struct platform_driver stm32_qspi_driver = {
+	.probe	= stm32_qspi_probe,
+	.remove	= stm32_qspi_remove,
+	.driver	= {
+		.name = "stm32-qspi",
+		.of_match_table = stm32_qspi_match,
+		.pm = &stm32_qspi_pm_ops,
+	},
+};
+module_platform_driver(stm32_qspi_driver);
+
+MODULE_AUTHOR("Ludovic Barre <ludovic.barre@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics STM32 quad spi driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index ad1e55d..b222ce8 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1,23 +1,10 @@
-/*
- * STMicroelectronics STM32 SPI Controller driver (master mode only)
- *
- * Copyright (C) 2017, STMicroelectronics - All Rights Reserved
- * Author(s): Amelie Delaunay <amelie.delaunay@st.com> for STMicroelectronics.
- *
- * License terms: GPL V2.0.
- *
- * spi_stm32 driver is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * spi_stm32 driver is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along with
- * spi_stm32 driver. If not, see <http://www.gnu.org/licenses/>.
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// STMicroelectronics STM32 SPI Controller driver (master mode only)
+//
+// Copyright (C) 2017, STMicroelectronics - All Rights Reserved
+// Author(s): Amelie Delaunay <amelie.delaunay@st.com> for STMicroelectronics.
+
 #include <linux/debugfs.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
@@ -33,99 +20,251 @@
 
 #define DRIVER_NAME "spi_stm32"
 
-/* STM32 SPI registers */
-#define STM32_SPI_CR1		0x00
-#define STM32_SPI_CR2		0x04
-#define STM32_SPI_CFG1		0x08
-#define STM32_SPI_CFG2		0x0C
-#define STM32_SPI_IER		0x10
-#define STM32_SPI_SR		0x14
-#define STM32_SPI_IFCR		0x18
-#define STM32_SPI_TXDR		0x20
-#define STM32_SPI_RXDR		0x30
-#define STM32_SPI_I2SCFGR	0x50
+/* STM32F4 SPI registers */
+#define STM32F4_SPI_CR1			0x00
+#define STM32F4_SPI_CR2			0x04
+#define STM32F4_SPI_SR			0x08
+#define STM32F4_SPI_DR			0x0C
+#define STM32F4_SPI_I2SCFGR		0x1C
 
-/* STM32_SPI_CR1 bit fields */
-#define SPI_CR1_SPE		BIT(0)
-#define SPI_CR1_MASRX		BIT(8)
-#define SPI_CR1_CSTART		BIT(9)
-#define SPI_CR1_CSUSP		BIT(10)
-#define SPI_CR1_HDDIR		BIT(11)
-#define SPI_CR1_SSI		BIT(12)
+/* STM32F4_SPI_CR1 bit fields */
+#define STM32F4_SPI_CR1_CPHA		BIT(0)
+#define STM32F4_SPI_CR1_CPOL		BIT(1)
+#define STM32F4_SPI_CR1_MSTR		BIT(2)
+#define STM32F4_SPI_CR1_BR_SHIFT	3
+#define STM32F4_SPI_CR1_BR		GENMASK(5, 3)
+#define STM32F4_SPI_CR1_SPE		BIT(6)
+#define STM32F4_SPI_CR1_LSBFRST		BIT(7)
+#define STM32F4_SPI_CR1_SSI		BIT(8)
+#define STM32F4_SPI_CR1_SSM		BIT(9)
+#define STM32F4_SPI_CR1_RXONLY		BIT(10)
+#define STM32F4_SPI_CR1_DFF		BIT(11)
+#define STM32F4_SPI_CR1_CRCNEXT		BIT(12)
+#define STM32F4_SPI_CR1_CRCEN		BIT(13)
+#define STM32F4_SPI_CR1_BIDIOE		BIT(14)
+#define STM32F4_SPI_CR1_BIDIMODE	BIT(15)
+#define STM32F4_SPI_CR1_BR_MIN		0
+#define STM32F4_SPI_CR1_BR_MAX		(GENMASK(5, 3) >> 3)
 
-/* STM32_SPI_CR2 bit fields */
-#define SPI_CR2_TSIZE_SHIFT	0
-#define SPI_CR2_TSIZE		GENMASK(15, 0)
+/* STM32F4_SPI_CR2 bit fields */
+#define STM32F4_SPI_CR2_RXDMAEN		BIT(0)
+#define STM32F4_SPI_CR2_TXDMAEN		BIT(1)
+#define STM32F4_SPI_CR2_SSOE		BIT(2)
+#define STM32F4_SPI_CR2_FRF		BIT(4)
+#define STM32F4_SPI_CR2_ERRIE		BIT(5)
+#define STM32F4_SPI_CR2_RXNEIE		BIT(6)
+#define STM32F4_SPI_CR2_TXEIE		BIT(7)
 
-/* STM32_SPI_CFG1 bit fields */
-#define SPI_CFG1_DSIZE_SHIFT	0
-#define SPI_CFG1_DSIZE		GENMASK(4, 0)
-#define SPI_CFG1_FTHLV_SHIFT	5
-#define SPI_CFG1_FTHLV		GENMASK(8, 5)
-#define SPI_CFG1_RXDMAEN	BIT(14)
-#define SPI_CFG1_TXDMAEN	BIT(15)
-#define SPI_CFG1_MBR_SHIFT	28
-#define SPI_CFG1_MBR		GENMASK(30, 28)
-#define SPI_CFG1_MBR_MIN	0
-#define SPI_CFG1_MBR_MAX	(GENMASK(30, 28) >> 28)
+/* STM32F4_SPI_SR bit fields */
+#define STM32F4_SPI_SR_RXNE		BIT(0)
+#define STM32F4_SPI_SR_TXE		BIT(1)
+#define STM32F4_SPI_SR_CHSIDE		BIT(2)
+#define STM32F4_SPI_SR_UDR		BIT(3)
+#define STM32F4_SPI_SR_CRCERR		BIT(4)
+#define STM32F4_SPI_SR_MODF		BIT(5)
+#define STM32F4_SPI_SR_OVR		BIT(6)
+#define STM32F4_SPI_SR_BSY		BIT(7)
+#define STM32F4_SPI_SR_FRE		BIT(8)
 
-/* STM32_SPI_CFG2 bit fields */
-#define SPI_CFG2_MIDI_SHIFT	4
-#define SPI_CFG2_MIDI		GENMASK(7, 4)
-#define SPI_CFG2_COMM_SHIFT	17
-#define SPI_CFG2_COMM		GENMASK(18, 17)
-#define SPI_CFG2_SP_SHIFT	19
-#define SPI_CFG2_SP		GENMASK(21, 19)
-#define SPI_CFG2_MASTER		BIT(22)
-#define SPI_CFG2_LSBFRST	BIT(23)
-#define SPI_CFG2_CPHA		BIT(24)
-#define SPI_CFG2_CPOL		BIT(25)
-#define SPI_CFG2_SSM		BIT(26)
-#define SPI_CFG2_AFCNTR		BIT(31)
+/* STM32F4_SPI_I2SCFGR bit fields */
+#define STM32F4_SPI_I2SCFGR_I2SMOD	BIT(11)
 
-/* STM32_SPI_IER bit fields */
-#define SPI_IER_RXPIE		BIT(0)
-#define SPI_IER_TXPIE		BIT(1)
-#define SPI_IER_DXPIE		BIT(2)
-#define SPI_IER_EOTIE		BIT(3)
-#define SPI_IER_TXTFIE		BIT(4)
-#define SPI_IER_OVRIE		BIT(6)
-#define SPI_IER_MODFIE		BIT(9)
-#define SPI_IER_ALL		GENMASK(10, 0)
+/* STM32F4 SPI Baud Rate min/max divisor */
+#define STM32F4_SPI_BR_DIV_MIN		(2 << STM32F4_SPI_CR1_BR_MIN)
+#define STM32F4_SPI_BR_DIV_MAX		(2 << STM32F4_SPI_CR1_BR_MAX)
 
-/* STM32_SPI_SR bit fields */
-#define SPI_SR_RXP		BIT(0)
-#define SPI_SR_TXP		BIT(1)
-#define SPI_SR_EOT		BIT(3)
-#define SPI_SR_OVR		BIT(6)
-#define SPI_SR_MODF		BIT(9)
-#define SPI_SR_SUSP		BIT(11)
-#define SPI_SR_RXPLVL_SHIFT	13
-#define SPI_SR_RXPLVL		GENMASK(14, 13)
-#define SPI_SR_RXWNE		BIT(15)
+/* STM32H7 SPI registers */
+#define STM32H7_SPI_CR1			0x00
+#define STM32H7_SPI_CR2			0x04
+#define STM32H7_SPI_CFG1		0x08
+#define STM32H7_SPI_CFG2		0x0C
+#define STM32H7_SPI_IER			0x10
+#define STM32H7_SPI_SR			0x14
+#define STM32H7_SPI_IFCR		0x18
+#define STM32H7_SPI_TXDR		0x20
+#define STM32H7_SPI_RXDR		0x30
+#define STM32H7_SPI_I2SCFGR		0x50
 
-/* STM32_SPI_IFCR bit fields */
-#define SPI_IFCR_ALL		GENMASK(11, 3)
+/* STM32H7_SPI_CR1 bit fields */
+#define STM32H7_SPI_CR1_SPE		BIT(0)
+#define STM32H7_SPI_CR1_MASRX		BIT(8)
+#define STM32H7_SPI_CR1_CSTART		BIT(9)
+#define STM32H7_SPI_CR1_CSUSP		BIT(10)
+#define STM32H7_SPI_CR1_HDDIR		BIT(11)
+#define STM32H7_SPI_CR1_SSI		BIT(12)
 
-/* STM32_SPI_I2SCFGR bit fields */
-#define SPI_I2SCFGR_I2SMOD	BIT(0)
+/* STM32H7_SPI_CR2 bit fields */
+#define STM32H7_SPI_CR2_TSIZE_SHIFT	0
+#define STM32H7_SPI_CR2_TSIZE		GENMASK(15, 0)
 
-/* SPI Master Baud Rate min/max divisor */
-#define SPI_MBR_DIV_MIN		(2 << SPI_CFG1_MBR_MIN)
-#define SPI_MBR_DIV_MAX		(2 << SPI_CFG1_MBR_MAX)
+/* STM32H7_SPI_CFG1 bit fields */
+#define STM32H7_SPI_CFG1_DSIZE_SHIFT	0
+#define STM32H7_SPI_CFG1_DSIZE		GENMASK(4, 0)
+#define STM32H7_SPI_CFG1_FTHLV_SHIFT	5
+#define STM32H7_SPI_CFG1_FTHLV		GENMASK(8, 5)
+#define STM32H7_SPI_CFG1_RXDMAEN	BIT(14)
+#define STM32H7_SPI_CFG1_TXDMAEN	BIT(15)
+#define STM32H7_SPI_CFG1_MBR_SHIFT	28
+#define STM32H7_SPI_CFG1_MBR		GENMASK(30, 28)
+#define STM32H7_SPI_CFG1_MBR_MIN	0
+#define STM32H7_SPI_CFG1_MBR_MAX	(GENMASK(30, 28) >> 28)
 
-/* SPI Communication mode */
+/* STM32H7_SPI_CFG2 bit fields */
+#define STM32H7_SPI_CFG2_MIDI_SHIFT	4
+#define STM32H7_SPI_CFG2_MIDI		GENMASK(7, 4)
+#define STM32H7_SPI_CFG2_COMM_SHIFT	17
+#define STM32H7_SPI_CFG2_COMM		GENMASK(18, 17)
+#define STM32H7_SPI_CFG2_SP_SHIFT	19
+#define STM32H7_SPI_CFG2_SP		GENMASK(21, 19)
+#define STM32H7_SPI_CFG2_MASTER		BIT(22)
+#define STM32H7_SPI_CFG2_LSBFRST	BIT(23)
+#define STM32H7_SPI_CFG2_CPHA		BIT(24)
+#define STM32H7_SPI_CFG2_CPOL		BIT(25)
+#define STM32H7_SPI_CFG2_SSM		BIT(26)
+#define STM32H7_SPI_CFG2_AFCNTR		BIT(31)
+
+/* STM32H7_SPI_IER bit fields */
+#define STM32H7_SPI_IER_RXPIE		BIT(0)
+#define STM32H7_SPI_IER_TXPIE		BIT(1)
+#define STM32H7_SPI_IER_DXPIE		BIT(2)
+#define STM32H7_SPI_IER_EOTIE		BIT(3)
+#define STM32H7_SPI_IER_TXTFIE		BIT(4)
+#define STM32H7_SPI_IER_OVRIE		BIT(6)
+#define STM32H7_SPI_IER_MODFIE		BIT(9)
+#define STM32H7_SPI_IER_ALL		GENMASK(10, 0)
+
+/* STM32H7_SPI_SR bit fields */
+#define STM32H7_SPI_SR_RXP		BIT(0)
+#define STM32H7_SPI_SR_TXP		BIT(1)
+#define STM32H7_SPI_SR_EOT		BIT(3)
+#define STM32H7_SPI_SR_OVR		BIT(6)
+#define STM32H7_SPI_SR_MODF		BIT(9)
+#define STM32H7_SPI_SR_SUSP		BIT(11)
+#define STM32H7_SPI_SR_RXPLVL_SHIFT	13
+#define STM32H7_SPI_SR_RXPLVL		GENMASK(14, 13)
+#define STM32H7_SPI_SR_RXWNE		BIT(15)
+
+/* STM32H7_SPI_IFCR bit fields */
+#define STM32H7_SPI_IFCR_ALL		GENMASK(11, 3)
+
+/* STM32H7_SPI_I2SCFGR bit fields */
+#define STM32H7_SPI_I2SCFGR_I2SMOD	BIT(0)
+
+/* STM32H7 SPI Master Baud Rate min/max divisor */
+#define STM32H7_SPI_MBR_DIV_MIN		(2 << STM32H7_SPI_CFG1_MBR_MIN)
+#define STM32H7_SPI_MBR_DIV_MAX		(2 << STM32H7_SPI_CFG1_MBR_MAX)
+
+/* STM32H7 SPI Communication mode */
+#define STM32H7_SPI_FULL_DUPLEX		0
+#define STM32H7_SPI_SIMPLEX_TX		1
+#define STM32H7_SPI_SIMPLEX_RX		2
+#define STM32H7_SPI_HALF_DUPLEX		3
+
+/* SPI Communication type */
 #define SPI_FULL_DUPLEX		0
 #define SPI_SIMPLEX_TX		1
 #define SPI_SIMPLEX_RX		2
-#define SPI_HALF_DUPLEX		3
+#define SPI_3WIRE_TX		3
+#define SPI_3WIRE_RX		4
 
 #define SPI_1HZ_NS		1000000000
 
+/*
+ * use PIO for small transfers, avoiding DMA setup/teardown overhead for drivers
+ * without fifo buffers.
+ */
+#define SPI_DMA_MIN_BYTES	16
+
+/**
+ * stm32_spi_reg - stm32 SPI register & bitfield desc
+ * @reg:		register offset
+ * @mask:		bitfield mask
+ * @shift:		left shift
+ */
+struct stm32_spi_reg {
+	int reg;
+	int mask;
+	int shift;
+};
+
+/**
+ * stm32_spi_regspec - stm32 registers definition, compatible dependent data
+ * en: enable register and SPI enable bit
+ * dma_rx_en: SPI DMA RX enable register end SPI DMA RX enable bit
+ * dma_tx_en: SPI DMA TX enable register end SPI DMA TX enable bit
+ * cpol: clock polarity register and polarity bit
+ * cpha: clock phase register and phase bit
+ * lsb_first: LSB transmitted first register and bit
+ * br: baud rate register and bitfields
+ * rx: SPI RX data register
+ * tx: SPI TX data register
+ */
+struct stm32_spi_regspec {
+	const struct stm32_spi_reg en;
+	const struct stm32_spi_reg dma_rx_en;
+	const struct stm32_spi_reg dma_tx_en;
+	const struct stm32_spi_reg cpol;
+	const struct stm32_spi_reg cpha;
+	const struct stm32_spi_reg lsb_first;
+	const struct stm32_spi_reg br;
+	const struct stm32_spi_reg rx;
+	const struct stm32_spi_reg tx;
+};
+
+struct stm32_spi;
+
+/**
+ * stm32_spi_cfg - stm32 compatible configuration data
+ * @regs: registers descriptions
+ * @get_fifo_size: routine to get fifo size
+ * @get_bpw_mask: routine to get bits per word mask
+ * @disable: routine to disable controller
+ * @config: routine to configure controller as SPI Master
+ * @set_bpw: routine to configure registers to for bits per word
+ * @set_mode: routine to configure registers to desired mode
+ * @set_data_idleness: optional routine to configure registers to desired idle
+ * time between frames (if driver has this functionality)
+ * set_number_of_data: optional routine to configure registers to desired
+ * number of data (if driver has this functionality)
+ * @can_dma: routine to determine if the transfer is eligible for DMA use
+ * @transfer_one_dma_start: routine to start transfer a single spi_transfer
+ * using DMA
+ * @dma_rx cb: routine to call after DMA RX channel operation is complete
+ * @dma_tx cb: routine to call after DMA TX channel operation is complete
+ * @transfer_one_irq: routine to configure interrupts for driver
+ * @irq_handler_event: Interrupt handler for SPI controller events
+ * @irq_handler_thread: thread of interrupt handler for SPI controller
+ * @baud_rate_div_min: minimum baud rate divisor
+ * @baud_rate_div_max: maximum baud rate divisor
+ * @has_fifo: boolean to know if fifo is used for driver
+ * @has_startbit: boolean to know if start bit is used to start transfer
+ */
+struct stm32_spi_cfg {
+	const struct stm32_spi_regspec *regs;
+	int (*get_fifo_size)(struct stm32_spi *spi);
+	int (*get_bpw_mask)(struct stm32_spi *spi);
+	void (*disable)(struct stm32_spi *spi);
+	int (*config)(struct stm32_spi *spi);
+	void (*set_bpw)(struct stm32_spi *spi);
+	int (*set_mode)(struct stm32_spi *spi, unsigned int comm_type);
+	void (*set_data_idleness)(struct stm32_spi *spi, u32 length);
+	int (*set_number_of_data)(struct stm32_spi *spi, u32 length);
+	void (*transfer_one_dma_start)(struct stm32_spi *spi);
+	void (*dma_rx_cb)(void *data);
+	void (*dma_tx_cb)(void *data);
+	int (*transfer_one_irq)(struct stm32_spi *spi);
+	irqreturn_t (*irq_handler_event)(int irq, void *dev_id);
+	irqreturn_t (*irq_handler_thread)(int irq, void *dev_id);
+	unsigned int baud_rate_div_min;
+	unsigned int baud_rate_div_max;
+	bool has_fifo;
+};
+
 /**
  * struct stm32_spi - private data of the SPI controller
  * @dev: driver model representation of the controller
  * @master: controller master interface
+ * @cfg: compatible configuration data
  * @base: virtual memory area
  * @clk: hw kernel clock feeding the SPI clock generator
  * @clk_rate: rate of the hw kernel clock feeding the SPI clock generator
@@ -151,6 +290,7 @@
 struct stm32_spi {
 	struct device *dev;
 	struct spi_master *master;
+	const struct stm32_spi_cfg *cfg;
 	void __iomem *base;
 	struct clk *clk;
 	u32 clk_rate;
@@ -176,6 +316,40 @@
 	dma_addr_t phys_addr;
 };
 
+static const struct stm32_spi_regspec stm32f4_spi_regspec = {
+	.en = { STM32F4_SPI_CR1, STM32F4_SPI_CR1_SPE },
+
+	.dma_rx_en = { STM32F4_SPI_CR2, STM32F4_SPI_CR2_RXDMAEN },
+	.dma_tx_en = { STM32F4_SPI_CR2, STM32F4_SPI_CR2_TXDMAEN },
+
+	.cpol = { STM32F4_SPI_CR1, STM32F4_SPI_CR1_CPOL },
+	.cpha = { STM32F4_SPI_CR1, STM32F4_SPI_CR1_CPHA },
+	.lsb_first = { STM32F4_SPI_CR1, STM32F4_SPI_CR1_LSBFRST },
+	.br = { STM32F4_SPI_CR1, STM32F4_SPI_CR1_BR, STM32F4_SPI_CR1_BR_SHIFT },
+
+	.rx = { STM32F4_SPI_DR },
+	.tx = { STM32F4_SPI_DR },
+};
+
+static const struct stm32_spi_regspec stm32h7_spi_regspec = {
+	/* SPI data transfer is enabled but spi_ker_ck is idle.
+	 * CFG1 and CFG2 registers are write protected when SPE is enabled.
+	 */
+	.en = { STM32H7_SPI_CR1, STM32H7_SPI_CR1_SPE },
+
+	.dma_rx_en = { STM32H7_SPI_CFG1, STM32H7_SPI_CFG1_RXDMAEN },
+	.dma_tx_en = { STM32H7_SPI_CFG1, STM32H7_SPI_CFG1_TXDMAEN },
+
+	.cpol = { STM32H7_SPI_CFG2, STM32H7_SPI_CFG2_CPOL },
+	.cpha = { STM32H7_SPI_CFG2, STM32H7_SPI_CFG2_CPHA },
+	.lsb_first = { STM32H7_SPI_CFG2, STM32H7_SPI_CFG2_LSBFRST },
+	.br = { STM32H7_SPI_CFG1, STM32H7_SPI_CFG1_MBR,
+		STM32H7_SPI_CFG1_MBR_SHIFT },
+
+	.rx = { STM32H7_SPI_RXDR },
+	.tx = { STM32H7_SPI_TXDR },
+};
+
 static inline void stm32_spi_set_bits(struct stm32_spi *spi,
 				      u32 offset, u32 bits)
 {
@@ -191,22 +365,22 @@
 }
 
 /**
- * stm32_spi_get_fifo_size - Return fifo size
+ * stm32h7_spi_get_fifo_size - Return fifo size
  * @spi: pointer to the spi controller data structure
  */
-static int stm32_spi_get_fifo_size(struct stm32_spi *spi)
+static int stm32h7_spi_get_fifo_size(struct stm32_spi *spi)
 {
 	unsigned long flags;
 	u32 count = 0;
 
 	spin_lock_irqsave(&spi->lock, flags);
 
-	stm32_spi_set_bits(spi, STM32_SPI_CR1, SPI_CR1_SPE);
+	stm32_spi_set_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_SPE);
 
-	while (readl_relaxed(spi->base + STM32_SPI_SR) & SPI_SR_TXP)
-		writeb_relaxed(++count, spi->base + STM32_SPI_TXDR);
+	while (readl_relaxed(spi->base + STM32H7_SPI_SR) & STM32H7_SPI_SR_TXP)
+		writeb_relaxed(++count, spi->base + STM32H7_SPI_TXDR);
 
-	stm32_spi_clr_bits(spi, STM32_SPI_CR1, SPI_CR1_SPE);
+	stm32_spi_clr_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_SPE);
 
 	spin_unlock_irqrestore(&spi->lock, flags);
 
@@ -216,10 +390,20 @@
 }
 
 /**
- * stm32_spi_get_bpw_mask - Return bits per word mask
+ * stm32f4_spi_get_bpw_mask - Return bits per word mask
  * @spi: pointer to the spi controller data structure
  */
-static int stm32_spi_get_bpw_mask(struct stm32_spi *spi)
+static int stm32f4_spi_get_bpw_mask(struct stm32_spi *spi)
+{
+	dev_dbg(spi->dev, "8-bit or 16-bit data frame supported\n");
+	return SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
+}
+
+/**
+ * stm32h7_spi_get_bpw_mask - Return bits per word mask
+ * @spi: pointer to the spi controller data structure
+ */
+static int stm32h7_spi_get_bpw_mask(struct stm32_spi *spi)
 {
 	unsigned long flags;
 	u32 cfg1, max_bpw;
@@ -230,10 +414,11 @@
 	 * The most significant bit at DSIZE bit field is reserved when the
 	 * maximum data size of periperal instances is limited to 16-bit
 	 */
-	stm32_spi_set_bits(spi, STM32_SPI_CFG1, SPI_CFG1_DSIZE);
+	stm32_spi_set_bits(spi, STM32H7_SPI_CFG1, STM32H7_SPI_CFG1_DSIZE);
 
-	cfg1 = readl_relaxed(spi->base + STM32_SPI_CFG1);
-	max_bpw = (cfg1 & SPI_CFG1_DSIZE) >> SPI_CFG1_DSIZE_SHIFT;
+	cfg1 = readl_relaxed(spi->base + STM32H7_SPI_CFG1);
+	max_bpw = (cfg1 & STM32H7_SPI_CFG1_DSIZE) >>
+		  STM32H7_SPI_CFG1_DSIZE_SHIFT;
 	max_bpw += 1;
 
 	spin_unlock_irqrestore(&spi->lock, flags);
@@ -244,13 +429,16 @@
 }
 
 /**
- * stm32_spi_prepare_mbr - Determine SPI_CFG1.MBR value
+ * stm32_spi_prepare_mbr - Determine baud rate divisor value
  * @spi: pointer to the spi controller data structure
  * @speed_hz: requested speed
+ * @min_div: minimum baud rate divisor
+ * @max_div: maximum baud rate divisor
  *
- * Return SPI_CFG1.MBR value in case of success or -EINVAL
+ * Return baud rate divisor value in case of success or -EINVAL
  */
-static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz)
+static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz,
+				 u32 min_div, u32 max_div)
 {
 	u32 div, mbrdiv;
 
@@ -263,8 +451,7 @@
 	 * no need to check it there.
 	 * However, we need to ensure the following calculations.
 	 */
-	if (div < SPI_MBR_DIV_MIN ||
-	    div > SPI_MBR_DIV_MAX)
+	if ((div < min_div) || (div > max_div))
 		return -EINVAL;
 
 	/* Determine the first power of 2 greater than or equal to div */
@@ -279,10 +466,10 @@
 }
 
 /**
- * stm32_spi_prepare_fthlv - Determine FIFO threshold level
+ * stm32h7_spi_prepare_fthlv - Determine FIFO threshold level
  * @spi: pointer to the spi controller data structure
  */
-static u32 stm32_spi_prepare_fthlv(struct stm32_spi *spi)
+static u32 stm32h7_spi_prepare_fthlv(struct stm32_spi *spi)
 {
 	u32 fthlv, half_fifo;
 
@@ -306,32 +493,27 @@
 }
 
 /**
- * stm32_spi_write_txfifo - Write bytes in Transmit Data Register
+ * stm32f4_spi_write_tx - Write bytes to Transmit Data Register
  * @spi: pointer to the spi controller data structure
  *
  * Read from tx_buf depends on remaining bytes to avoid to read beyond
  * tx_buf end.
  */
-static void stm32_spi_write_txfifo(struct stm32_spi *spi)
+static void stm32f4_spi_write_tx(struct stm32_spi *spi)
 {
-	while ((spi->tx_len > 0) &&
-	       (readl_relaxed(spi->base + STM32_SPI_SR) & SPI_SR_TXP)) {
+	if ((spi->tx_len > 0) && (readl_relaxed(spi->base + STM32F4_SPI_SR) &
+				  STM32F4_SPI_SR_TXE)) {
 		u32 offs = spi->cur_xferlen - spi->tx_len;
 
-		if (spi->tx_len >= sizeof(u32)) {
-			const u32 *tx_buf32 = (const u32 *)(spi->tx_buf + offs);
-
-			writel_relaxed(*tx_buf32, spi->base + STM32_SPI_TXDR);
-			spi->tx_len -= sizeof(u32);
-		} else if (spi->tx_len >= sizeof(u16)) {
+		if (spi->cur_bpw == 16) {
 			const u16 *tx_buf16 = (const u16 *)(spi->tx_buf + offs);
 
-			writew_relaxed(*tx_buf16, spi->base + STM32_SPI_TXDR);
+			writew_relaxed(*tx_buf16, spi->base + STM32F4_SPI_DR);
 			spi->tx_len -= sizeof(u16);
 		} else {
 			const u8 *tx_buf8 = (const u8 *)(spi->tx_buf + offs);
 
-			writeb_relaxed(*tx_buf8, spi->base + STM32_SPI_TXDR);
+			writeb_relaxed(*tx_buf8, spi->base + STM32F4_SPI_DR);
 			spi->tx_len -= sizeof(u8);
 		}
 	}
@@ -340,43 +522,109 @@
 }
 
 /**
- * stm32_spi_read_rxfifo - Read bytes in Receive Data Register
+ * stm32h7_spi_write_txfifo - Write bytes in Transmit Data Register
+ * @spi: pointer to the spi controller data structure
+ *
+ * Read from tx_buf depends on remaining bytes to avoid to read beyond
+ * tx_buf end.
+ */
+static void stm32h7_spi_write_txfifo(struct stm32_spi *spi)
+{
+	while ((spi->tx_len > 0) &&
+		       (readl_relaxed(spi->base + STM32H7_SPI_SR) &
+			STM32H7_SPI_SR_TXP)) {
+		u32 offs = spi->cur_xferlen - spi->tx_len;
+
+		if (spi->tx_len >= sizeof(u32)) {
+			const u32 *tx_buf32 = (const u32 *)(spi->tx_buf + offs);
+
+			writel_relaxed(*tx_buf32, spi->base + STM32H7_SPI_TXDR);
+			spi->tx_len -= sizeof(u32);
+		} else if (spi->tx_len >= sizeof(u16)) {
+			const u16 *tx_buf16 = (const u16 *)(spi->tx_buf + offs);
+
+			writew_relaxed(*tx_buf16, spi->base + STM32H7_SPI_TXDR);
+			spi->tx_len -= sizeof(u16);
+		} else {
+			const u8 *tx_buf8 = (const u8 *)(spi->tx_buf + offs);
+
+			writeb_relaxed(*tx_buf8, spi->base + STM32H7_SPI_TXDR);
+			spi->tx_len -= sizeof(u8);
+		}
+	}
+
+	dev_dbg(spi->dev, "%s: %d bytes left\n", __func__, spi->tx_len);
+}
+
+/**
+ * stm32f4_spi_read_rx - Read bytes from Receive Data Register
  * @spi: pointer to the spi controller data structure
  *
  * Write in rx_buf depends on remaining bytes to avoid to write beyond
  * rx_buf end.
  */
-static void stm32_spi_read_rxfifo(struct stm32_spi *spi, bool flush)
+static void stm32f4_spi_read_rx(struct stm32_spi *spi)
 {
-	u32 sr = readl_relaxed(spi->base + STM32_SPI_SR);
-	u32 rxplvl = (sr & SPI_SR_RXPLVL) >> SPI_SR_RXPLVL_SHIFT;
+	if ((spi->rx_len > 0) && (readl_relaxed(spi->base + STM32F4_SPI_SR) &
+				  STM32F4_SPI_SR_RXNE)) {
+		u32 offs = spi->cur_xferlen - spi->rx_len;
+
+		if (spi->cur_bpw == 16) {
+			u16 *rx_buf16 = (u16 *)(spi->rx_buf + offs);
+
+			*rx_buf16 = readw_relaxed(spi->base + STM32F4_SPI_DR);
+			spi->rx_len -= sizeof(u16);
+		} else {
+			u8 *rx_buf8 = (u8 *)(spi->rx_buf + offs);
+
+			*rx_buf8 = readb_relaxed(spi->base + STM32F4_SPI_DR);
+			spi->rx_len -= sizeof(u8);
+		}
+	}
+
+	dev_dbg(spi->dev, "%s: %d bytes left\n", __func__, spi->rx_len);
+}
+
+/**
+ * stm32h7_spi_read_rxfifo - Read bytes in Receive Data Register
+ * @spi: pointer to the spi controller data structure
+ *
+ * Write in rx_buf depends on remaining bytes to avoid to write beyond
+ * rx_buf end.
+ */
+static void stm32h7_spi_read_rxfifo(struct stm32_spi *spi, bool flush)
+{
+	u32 sr = readl_relaxed(spi->base + STM32H7_SPI_SR);
+	u32 rxplvl = (sr & STM32H7_SPI_SR_RXPLVL) >>
+		     STM32H7_SPI_SR_RXPLVL_SHIFT;
 
 	while ((spi->rx_len > 0) &&
-	       ((sr & SPI_SR_RXP) ||
-		(flush && ((sr & SPI_SR_RXWNE) || (rxplvl > 0))))) {
+	       ((sr & STM32H7_SPI_SR_RXP) ||
+		(flush && ((sr & STM32H7_SPI_SR_RXWNE) || (rxplvl > 0))))) {
 		u32 offs = spi->cur_xferlen - spi->rx_len;
 
 		if ((spi->rx_len >= sizeof(u32)) ||
-		    (flush && (sr & SPI_SR_RXWNE))) {
+		    (flush && (sr & STM32H7_SPI_SR_RXWNE))) {
 			u32 *rx_buf32 = (u32 *)(spi->rx_buf + offs);
 
-			*rx_buf32 = readl_relaxed(spi->base + STM32_SPI_RXDR);
+			*rx_buf32 = readl_relaxed(spi->base + STM32H7_SPI_RXDR);
 			spi->rx_len -= sizeof(u32);
 		} else if ((spi->rx_len >= sizeof(u16)) ||
 			   (flush && (rxplvl >= 2 || spi->cur_bpw > 8))) {
 			u16 *rx_buf16 = (u16 *)(spi->rx_buf + offs);
 
-			*rx_buf16 = readw_relaxed(spi->base + STM32_SPI_RXDR);
+			*rx_buf16 = readw_relaxed(spi->base + STM32H7_SPI_RXDR);
 			spi->rx_len -= sizeof(u16);
 		} else {
 			u8 *rx_buf8 = (u8 *)(spi->rx_buf + offs);
 
-			*rx_buf8 = readb_relaxed(spi->base + STM32_SPI_RXDR);
+			*rx_buf8 = readb_relaxed(spi->base + STM32H7_SPI_RXDR);
 			spi->rx_len -= sizeof(u8);
 		}
 
-		sr = readl_relaxed(spi->base + STM32_SPI_SR);
-		rxplvl = (sr & SPI_SR_RXPLVL) >> SPI_SR_RXPLVL_SHIFT;
+		sr = readl_relaxed(spi->base + STM32H7_SPI_SR);
+		rxplvl = (sr & STM32H7_SPI_SR_RXPLVL) >>
+			 STM32H7_SPI_SR_RXPLVL_SHIFT;
 	}
 
 	dev_dbg(spi->dev, "%s%s: %d bytes left\n", __func__,
@@ -386,26 +634,76 @@
 /**
  * stm32_spi_enable - Enable SPI controller
  * @spi: pointer to the spi controller data structure
- *
- * SPI data transfer is enabled but spi_ker_ck is idle.
- * SPI_CFG1 and SPI_CFG2 are now write protected.
  */
 static void stm32_spi_enable(struct stm32_spi *spi)
 {
 	dev_dbg(spi->dev, "enable controller\n");
 
-	stm32_spi_set_bits(spi, STM32_SPI_CR1, SPI_CR1_SPE);
+	stm32_spi_set_bits(spi, spi->cfg->regs->en.reg,
+			   spi->cfg->regs->en.mask);
 }
 
 /**
- * stm32_spi_disable - Disable SPI controller
+ * stm32f4_spi_disable - Disable SPI controller
+ * @spi: pointer to the spi controller data structure
+ */
+static void stm32f4_spi_disable(struct stm32_spi *spi)
+{
+	unsigned long flags;
+	u32 sr;
+
+	dev_dbg(spi->dev, "disable controller\n");
+
+	spin_lock_irqsave(&spi->lock, flags);
+
+	if (!(readl_relaxed(spi->base + STM32F4_SPI_CR1) &
+	      STM32F4_SPI_CR1_SPE)) {
+		spin_unlock_irqrestore(&spi->lock, flags);
+		return;
+	}
+
+	/* Disable interrupts */
+	stm32_spi_clr_bits(spi, STM32F4_SPI_CR2, STM32F4_SPI_CR2_TXEIE |
+						 STM32F4_SPI_CR2_RXNEIE |
+						 STM32F4_SPI_CR2_ERRIE);
+
+	/* Wait until BSY = 0 */
+	if (readl_relaxed_poll_timeout_atomic(spi->base + STM32F4_SPI_SR,
+					      sr, !(sr & STM32F4_SPI_SR_BSY),
+					      10, 100000) < 0) {
+		dev_warn(spi->dev, "disabling condition timeout\n");
+	}
+
+	if (spi->cur_usedma && spi->dma_tx)
+		dmaengine_terminate_all(spi->dma_tx);
+	if (spi->cur_usedma && spi->dma_rx)
+		dmaengine_terminate_all(spi->dma_rx);
+
+	stm32_spi_clr_bits(spi, STM32F4_SPI_CR1, STM32F4_SPI_CR1_SPE);
+
+	stm32_spi_clr_bits(spi, STM32F4_SPI_CR2, STM32F4_SPI_CR2_TXDMAEN |
+						 STM32F4_SPI_CR2_RXDMAEN);
+
+	/* Sequence to clear OVR flag */
+	readl_relaxed(spi->base + STM32F4_SPI_DR);
+	readl_relaxed(spi->base + STM32F4_SPI_SR);
+
+	spin_unlock_irqrestore(&spi->lock, flags);
+}
+
+/**
+ * stm32h7_spi_disable - Disable SPI controller
  * @spi: pointer to the spi controller data structure
  *
  * RX-Fifo is flushed when SPI controller is disabled. To prevent any data
- * loss, use stm32_spi_read_rxfifo(flush) to read the remaining bytes in
+ * loss, use stm32h7_spi_read_rxfifo(flush) to read the remaining bytes in
  * RX-Fifo.
+ * Normally, if TSIZE has been configured, we should relax the hardware at the
+ * reception of the EOT interrupt. But in case of error, EOT will not be
+ * raised. So the subsystem unprepare_message call allows us to properly
+ * complete the transfer from an hardware point of view.
  */
-static void stm32_spi_disable(struct stm32_spi *spi)
+static void stm32h7_spi_disable(struct stm32_spi *spi)
 {
 	unsigned long flags;
 	u32 cr1, sr;
@@ -414,23 +712,23 @@
 
 	spin_lock_irqsave(&spi->lock, flags);
 
-	cr1 = readl_relaxed(spi->base + STM32_SPI_CR1);
+	cr1 = readl_relaxed(spi->base + STM32H7_SPI_CR1);
 
-	if (!(cr1 & SPI_CR1_SPE)) {
+	if (!(cr1 & STM32H7_SPI_CR1_SPE)) {
 		spin_unlock_irqrestore(&spi->lock, flags);
 		return;
 	}
 
 	/* Wait on EOT or suspend the flow */
-	if (readl_relaxed_poll_timeout_atomic(spi->base + STM32_SPI_SR,
-					      sr, !(sr & SPI_SR_EOT),
+	if (readl_relaxed_poll_timeout_atomic(spi->base + STM32H7_SPI_SR,
+					      sr, !(sr & STM32H7_SPI_SR_EOT),
 					      10, 100000) < 0) {
-		if (cr1 & SPI_CR1_CSTART) {
-			writel_relaxed(cr1 | SPI_CR1_CSUSP,
-				       spi->base + STM32_SPI_CR1);
+		if (cr1 & STM32H7_SPI_CR1_CSTART) {
+			writel_relaxed(cr1 | STM32H7_SPI_CR1_CSUSP,
+				       spi->base + STM32H7_SPI_CR1);
 			if (readl_relaxed_poll_timeout_atomic(
-						spi->base + STM32_SPI_SR,
-						sr, !(sr & SPI_SR_SUSP),
+						spi->base + STM32H7_SPI_SR,
+						sr, !(sr & STM32H7_SPI_SR_SUSP),
 						10, 100000) < 0)
 				dev_warn(spi->dev,
 					 "Suspend request timeout\n");
@@ -438,21 +736,21 @@
 	}
 
 	if (!spi->cur_usedma && spi->rx_buf && (spi->rx_len > 0))
-		stm32_spi_read_rxfifo(spi, true);
+		stm32h7_spi_read_rxfifo(spi, true);
 
-	if (spi->cur_usedma && spi->tx_buf)
+	if (spi->cur_usedma && spi->dma_tx)
 		dmaengine_terminate_all(spi->dma_tx);
-	if (spi->cur_usedma && spi->rx_buf)
+	if (spi->cur_usedma && spi->dma_rx)
 		dmaengine_terminate_all(spi->dma_rx);
 
-	stm32_spi_clr_bits(spi, STM32_SPI_CR1, SPI_CR1_SPE);
+	stm32_spi_clr_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_SPE);
 
-	stm32_spi_clr_bits(spi, STM32_SPI_CFG1, SPI_CFG1_TXDMAEN |
-						SPI_CFG1_RXDMAEN);
+	stm32_spi_clr_bits(spi, STM32H7_SPI_CFG1, STM32H7_SPI_CFG1_TXDMAEN |
+						STM32H7_SPI_CFG1_RXDMAEN);
 
 	/* Disable interrupts and clear status flags */
-	writel_relaxed(0, spi->base + STM32_SPI_IER);
-	writel_relaxed(SPI_IFCR_ALL, spi->base + STM32_SPI_IFCR);
+	writel_relaxed(0, spi->base + STM32H7_SPI_IER);
+	writel_relaxed(STM32H7_SPI_IFCR_ALL, spi->base + STM32H7_SPI_IFCR);
 
 	spin_unlock_irqrestore(&spi->lock, flags);
 }
@@ -460,26 +758,136 @@
 /**
  * stm32_spi_can_dma - Determine if the transfer is eligible for DMA use
  *
- * If the current transfer size is greater than fifo size, use DMA.
+ * If driver has fifo and the current transfer size is greater than fifo size,
+ * use DMA. Otherwise use DMA for transfer longer than defined DMA min bytes.
  */
 static bool stm32_spi_can_dma(struct spi_master *master,
 			      struct spi_device *spi_dev,
 			      struct spi_transfer *transfer)
 {
+	unsigned int dma_size;
 	struct stm32_spi *spi = spi_master_get_devdata(master);
 
-	dev_dbg(spi->dev, "%s: %s\n", __func__,
-		(transfer->len > spi->fifo_size) ? "true" : "false");
+	if (spi->cfg->has_fifo)
+		dma_size = spi->fifo_size;
+	else
+		dma_size = SPI_DMA_MIN_BYTES;
 
-	return (transfer->len > spi->fifo_size);
+	dev_dbg(spi->dev, "%s: %s\n", __func__,
+		(transfer->len > dma_size) ? "true" : "false");
+
+	return (transfer->len > dma_size);
 }
 
 /**
- * stm32_spi_irq - Interrupt handler for SPI controller events
+ * stm32f4_spi_irq_event - Interrupt handler for SPI controller events
  * @irq: interrupt line
  * @dev_id: SPI controller master interface
  */
-static irqreturn_t stm32_spi_irq(int irq, void *dev_id)
+static irqreturn_t stm32f4_spi_irq_event(int irq, void *dev_id)
+{
+	struct spi_master *master = dev_id;
+	struct stm32_spi *spi = spi_master_get_devdata(master);
+	u32 sr, mask = 0;
+	unsigned long flags;
+	bool end = false;
+
+	spin_lock_irqsave(&spi->lock, flags);
+
+	sr = readl_relaxed(spi->base + STM32F4_SPI_SR);
+	/*
+	 * BSY flag is not handled in interrupt but it is normal behavior when
+	 * this flag is set.
+	 */
+	sr &= ~STM32F4_SPI_SR_BSY;
+
+	if (!spi->cur_usedma && (spi->cur_comm == SPI_SIMPLEX_TX ||
+				 spi->cur_comm == SPI_3WIRE_TX)) {
+		/* OVR flag shouldn't be handled for TX only mode */
+		sr &= ~STM32F4_SPI_SR_OVR | STM32F4_SPI_SR_RXNE;
+		mask |= STM32F4_SPI_SR_TXE;
+	}
+
+	if (!spi->cur_usedma && spi->cur_comm == SPI_FULL_DUPLEX) {
+		/* TXE flag is set and is handled when RXNE flag occurs */
+		sr &= ~STM32F4_SPI_SR_TXE;
+		mask |= STM32F4_SPI_SR_RXNE | STM32F4_SPI_SR_OVR;
+	}
+
+	if (!(sr & mask)) {
+		dev_dbg(spi->dev, "spurious IT (sr=0x%08x)\n", sr);
+		spin_unlock_irqrestore(&spi->lock, flags);
+		return IRQ_NONE;
+	}
+
+	if (sr & STM32F4_SPI_SR_OVR) {
+		dev_warn(spi->dev, "Overrun: received value discarded\n");
+
+		/* Sequence to clear OVR flag */
+		readl_relaxed(spi->base + STM32F4_SPI_DR);
+		readl_relaxed(spi->base + STM32F4_SPI_SR);
+
+		/*
+		 * If overrun is detected, it means that something went wrong,
+		 * so stop the current transfer. Transfer can wait for next
+		 * RXNE but DR is already read and end never happens.
+		 */
+		end = true;
+		goto end_irq;
+	}
+
+	if (sr & STM32F4_SPI_SR_TXE) {
+		if (spi->tx_buf)
+			stm32f4_spi_write_tx(spi);
+		if (spi->tx_len == 0)
+			end = true;
+	}
+
+	if (sr & STM32F4_SPI_SR_RXNE) {
+		stm32f4_spi_read_rx(spi);
+		if (spi->rx_len == 0)
+			end = true;
+		else /* Load data for discontinuous mode */
+			stm32f4_spi_write_tx(spi);
+	}
+
+end_irq:
+	if (end) {
+		/* Immediately disable interrupts to do not generate new one */
+		stm32_spi_clr_bits(spi, STM32F4_SPI_CR2,
+					STM32F4_SPI_CR2_TXEIE |
+					STM32F4_SPI_CR2_RXNEIE |
+					STM32F4_SPI_CR2_ERRIE);
+		spin_unlock_irqrestore(&spi->lock, flags);
+		return IRQ_WAKE_THREAD;
+	}
+
+	spin_unlock_irqrestore(&spi->lock, flags);
+	return IRQ_HANDLED;
+}
+
+/**
+ * stm32f4_spi_irq_thread - Thread of interrupt handler for SPI controller
+ * @irq: interrupt line
+ * @dev_id: SPI controller master interface
+ */
+static irqreturn_t stm32f4_spi_irq_thread(int irq, void *dev_id)
+{
+	struct spi_master *master = dev_id;
+	struct stm32_spi *spi = spi_master_get_devdata(master);
+
+	spi_finalize_current_transfer(master);
+	stm32f4_spi_disable(spi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * stm32h7_spi_irq_thread - Thread of interrupt handler for SPI controller
+ * @irq: interrupt line
+ * @dev_id: SPI controller master interface
+ */
+static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
 {
 	struct spi_master *master = dev_id;
 	struct stm32_spi *spi = spi_master_get_devdata(master);
@@ -489,19 +897,19 @@
 
 	spin_lock_irqsave(&spi->lock, flags);
 
-	sr = readl_relaxed(spi->base + STM32_SPI_SR);
-	ier = readl_relaxed(spi->base + STM32_SPI_IER);
+	sr = readl_relaxed(spi->base + STM32H7_SPI_SR);
+	ier = readl_relaxed(spi->base + STM32H7_SPI_IER);
 
 	mask = ier;
 	/* EOTIE is triggered on EOT, SUSP and TXC events. */
-	mask |= SPI_SR_SUSP;
+	mask |= STM32H7_SPI_SR_SUSP;
 	/*
 	 * When TXTF is set, DXPIE and TXPIE are cleared. So in case of
 	 * Full-Duplex, need to poll RXP event to know if there are remaining
 	 * data, before disabling SPI.
 	 */
 	if (spi->rx_buf && !spi->cur_usedma)
-		mask |= SPI_SR_RXP;
+		mask |= STM32H7_SPI_SR_RXP;
 
 	if (!(sr & mask)) {
 		dev_dbg(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n",
@@ -510,10 +918,10 @@
 		return IRQ_NONE;
 	}
 
-	if (sr & SPI_SR_SUSP) {
+	if (sr & STM32H7_SPI_SR_SUSP) {
 		dev_warn(spi->dev, "Communication suspended\n");
 		if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0)))
-			stm32_spi_read_rxfifo(spi, false);
+			stm32h7_spi_read_rxfifo(spi, false);
 		/*
 		 * If communication is suspended while using DMA, it means
 		 * that something went wrong, so stop the current transfer
@@ -522,15 +930,15 @@
 			end = true;
 	}
 
-	if (sr & SPI_SR_MODF) {
+	if (sr & STM32H7_SPI_SR_MODF) {
 		dev_warn(spi->dev, "Mode fault: transfer aborted\n");
 		end = true;
 	}
 
-	if (sr & SPI_SR_OVR) {
+	if (sr & STM32H7_SPI_SR_OVR) {
 		dev_warn(spi->dev, "Overrun: received value discarded\n");
 		if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0)))
-			stm32_spi_read_rxfifo(spi, false);
+			stm32h7_spi_read_rxfifo(spi, false);
 		/*
 		 * If overrun is detected while using DMA, it means that
 		 * something went wrong, so stop the current transfer
@@ -539,27 +947,27 @@
 			end = true;
 	}
 
-	if (sr & SPI_SR_EOT) {
+	if (sr & STM32H7_SPI_SR_EOT) {
 		if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0)))
-			stm32_spi_read_rxfifo(spi, true);
+			stm32h7_spi_read_rxfifo(spi, true);
 		end = true;
 	}
 
-	if (sr & SPI_SR_TXP)
+	if (sr & STM32H7_SPI_SR_TXP)
 		if (!spi->cur_usedma && (spi->tx_buf && (spi->tx_len > 0)))
-			stm32_spi_write_txfifo(spi);
+			stm32h7_spi_write_txfifo(spi);
 
-	if (sr & SPI_SR_RXP)
+	if (sr & STM32H7_SPI_SR_RXP)
 		if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0)))
-			stm32_spi_read_rxfifo(spi, false);
+			stm32h7_spi_read_rxfifo(spi, false);
 
-	writel_relaxed(mask, spi->base + STM32_SPI_IFCR);
+	writel_relaxed(mask, spi->base + STM32H7_SPI_IFCR);
 
 	spin_unlock_irqrestore(&spi->lock, flags);
 
 	if (end) {
 		spi_finalize_current_transfer(master);
-		stm32_spi_disable(spi);
+		stm32h7_spi_disable(spi);
 	}
 
 	return IRQ_HANDLED;
@@ -598,7 +1006,7 @@
 	struct spi_device *spi_dev = msg->spi;
 	struct device_node *np = spi_dev->dev.of_node;
 	unsigned long flags;
-	u32 cfg2_clrb = 0, cfg2_setb = 0;
+	u32 clrb = 0, setb = 0;
 
 	/* SPI slave device may need time between data frames */
 	spi->cur_midi = 0;
@@ -606,19 +1014,19 @@
 		dev_dbg(spi->dev, "%dns inter-data idleness\n", spi->cur_midi);
 
 	if (spi_dev->mode & SPI_CPOL)
-		cfg2_setb |= SPI_CFG2_CPOL;
+		setb |= spi->cfg->regs->cpol.mask;
 	else
-		cfg2_clrb |= SPI_CFG2_CPOL;
+		clrb |= spi->cfg->regs->cpol.mask;
 
 	if (spi_dev->mode & SPI_CPHA)
-		cfg2_setb |= SPI_CFG2_CPHA;
+		setb |= spi->cfg->regs->cpha.mask;
 	else
-		cfg2_clrb |= SPI_CFG2_CPHA;
+		clrb |= spi->cfg->regs->cpha.mask;
 
 	if (spi_dev->mode & SPI_LSB_FIRST)
-		cfg2_setb |= SPI_CFG2_LSBFRST;
+		setb |= spi->cfg->regs->lsb_first.mask;
 	else
-		cfg2_clrb |= SPI_CFG2_LSBFRST;
+		clrb |= spi->cfg->regs->lsb_first.mask;
 
 	dev_dbg(spi->dev, "cpol=%d cpha=%d lsb_first=%d cs_high=%d\n",
 		spi_dev->mode & SPI_CPOL,
@@ -628,11 +1036,12 @@
 
 	spin_lock_irqsave(&spi->lock, flags);
 
-	if (cfg2_clrb || cfg2_setb)
+	/* CPOL, CPHA and LSB FIRST bits have common register */
+	if (clrb || setb)
 		writel_relaxed(
-			(readl_relaxed(spi->base + STM32_SPI_CFG2) &
-				~cfg2_clrb) | cfg2_setb,
-			       spi->base + STM32_SPI_CFG2);
+			(readl_relaxed(spi->base + spi->cfg->regs->cpol.reg) &
+			 ~clrb) | setb,
+			spi->base + spi->cfg->regs->cpol.reg);
 
 	spin_unlock_irqrestore(&spi->lock, flags);
 
@@ -640,12 +1049,40 @@
 }
 
 /**
- * stm32_spi_dma_cb - dma callback
+ * stm32f4_spi_dma_tx_cb - dma callback
+ *
+ * DMA callback is called when the transfer is complete for DMA TX channel.
+ */
+static void stm32f4_spi_dma_tx_cb(void *data)
+{
+	struct stm32_spi *spi = data;
+
+	if (spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX) {
+		spi_finalize_current_transfer(spi->master);
+		stm32f4_spi_disable(spi);
+	}
+}
+
+/**
+ * stm32f4_spi_dma_rx_cb - dma callback
+ *
+ * DMA callback is called when the transfer is complete for DMA RX channel.
+ */
+static void stm32f4_spi_dma_rx_cb(void *data)
+{
+	struct stm32_spi *spi = data;
+
+	spi_finalize_current_transfer(spi->master);
+	stm32f4_spi_disable(spi);
+}
+
+/**
+ * stm32h7_spi_dma_cb - dma callback
  *
  * DMA callback is called when the transfer is complete or when an error
  * occurs. If the transfer is complete, EOT flag is raised.
  */
-static void stm32_spi_dma_cb(void *data)
+static void stm32h7_spi_dma_cb(void *data)
 {
 	struct stm32_spi *spi = data;
 	unsigned long flags;
@@ -653,11 +1090,11 @@
 
 	spin_lock_irqsave(&spi->lock, flags);
 
-	sr = readl_relaxed(spi->base + STM32_SPI_SR);
+	sr = readl_relaxed(spi->base + STM32H7_SPI_SR);
 
 	spin_unlock_irqrestore(&spi->lock, flags);
 
-	if (!(sr & SPI_SR_EOT))
+	if (!(sr & STM32H7_SPI_SR_EOT))
 		dev_warn(spi->dev, "DMA error (sr=0x%08x)\n", sr);
 
 	/* Now wait for EOT, or SUSP or OVR in case of error */
@@ -681,23 +1118,27 @@
 	else
 		buswidth = DMA_SLAVE_BUSWIDTH_4_BYTES;
 
-	/* Valid for DMA Half or Full Fifo threshold */
-	if (spi->cur_fthlv == 2)
+	if (spi->cfg->has_fifo) {
+		/* Valid for DMA Half or Full Fifo threshold */
+		if (spi->cur_fthlv == 2)
+			maxburst = 1;
+		else
+			maxburst = spi->cur_fthlv;
+	} else {
 		maxburst = 1;
-	else
-		maxburst = spi->cur_fthlv;
+	}
 
 	memset(dma_conf, 0, sizeof(struct dma_slave_config));
 	dma_conf->direction = dir;
 	if (dma_conf->direction == DMA_DEV_TO_MEM) { /* RX */
-		dma_conf->src_addr = spi->phys_addr + STM32_SPI_RXDR;
+		dma_conf->src_addr = spi->phys_addr + spi->cfg->regs->rx.reg;
 		dma_conf->src_addr_width = buswidth;
 		dma_conf->src_maxburst = maxburst;
 
 		dev_dbg(spi->dev, "Rx DMA config buswidth=%d, maxburst=%d\n",
 			buswidth, maxburst);
 	} else if (dma_conf->direction == DMA_MEM_TO_DEV) { /* TX */
-		dma_conf->dst_addr = spi->phys_addr + STM32_SPI_TXDR;
+		dma_conf->dst_addr = spi->phys_addr + spi->cfg->regs->tx.reg;
 		dma_conf->dst_addr_width = buswidth;
 		dma_conf->dst_maxburst = maxburst;
 
@@ -707,27 +1148,68 @@
 }
 
 /**
- * stm32_spi_transfer_one_irq - transfer a single spi_transfer using
- *				interrupts
+ * stm32f4_spi_transfer_one_irq - transfer a single spi_transfer using
+ *				  interrupts
  *
  * It must returns 0 if the transfer is finished or 1 if the transfer is still
  * in progress.
  */
-static int stm32_spi_transfer_one_irq(struct stm32_spi *spi)
+static int stm32f4_spi_transfer_one_irq(struct stm32_spi *spi)
+{
+	unsigned long flags;
+	u32 cr2 = 0;
+
+	/* Enable the interrupts relative to the current communication mode */
+	if (spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX) {
+		cr2 |= STM32F4_SPI_CR2_TXEIE;
+	} else if (spi->cur_comm == SPI_FULL_DUPLEX) {
+		/* In transmit-only mode, the OVR flag is set in the SR register
+		 * since the received data are never read. Therefore set OVR
+		 * interrupt only when rx buffer is available.
+		 */
+		cr2 |= STM32F4_SPI_CR2_RXNEIE | STM32F4_SPI_CR2_ERRIE;
+	} else {
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&spi->lock, flags);
+
+	stm32_spi_set_bits(spi, STM32F4_SPI_CR2, cr2);
+
+	stm32_spi_enable(spi);
+
+	/* starting data transfer when buffer is loaded */
+	if (spi->tx_buf)
+		stm32f4_spi_write_tx(spi);
+
+	spin_unlock_irqrestore(&spi->lock, flags);
+
+	return 1;
+}
+
+/**
+ * stm32h7_spi_transfer_one_irq - transfer a single spi_transfer using
+ *				  interrupts
+ *
+ * It must returns 0 if the transfer is finished or 1 if the transfer is still
+ * in progress.
+ */
+static int stm32h7_spi_transfer_one_irq(struct stm32_spi *spi)
 {
 	unsigned long flags;
 	u32 ier = 0;
 
 	/* Enable the interrupts relative to the current communication mode */
 	if (spi->tx_buf && spi->rx_buf)	/* Full Duplex */
-		ier |= SPI_IER_DXPIE;
+		ier |= STM32H7_SPI_IER_DXPIE;
 	else if (spi->tx_buf)		/* Half-Duplex TX dir or Simplex TX */
-		ier |= SPI_IER_TXPIE;
+		ier |= STM32H7_SPI_IER_TXPIE;
 	else if (spi->rx_buf)		/* Half-Duplex RX dir or Simplex RX */
-		ier |= SPI_IER_RXPIE;
+		ier |= STM32H7_SPI_IER_RXPIE;
 
 	/* Enable the interrupts relative to the end of transfer */
-	ier |= SPI_IER_EOTIE | SPI_IER_TXTFIE |	SPI_IER_OVRIE |	SPI_IER_MODFIE;
+	ier |= STM32H7_SPI_IER_EOTIE | STM32H7_SPI_IER_TXTFIE |
+	       STM32H7_SPI_IER_OVRIE | STM32H7_SPI_IER_MODFIE;
 
 	spin_lock_irqsave(&spi->lock, flags);
 
@@ -735,11 +1217,11 @@
 
 	/* Be sure to have data in fifo before starting data transfer */
 	if (spi->tx_buf)
-		stm32_spi_write_txfifo(spi);
+		stm32h7_spi_write_txfifo(spi);
 
-	stm32_spi_set_bits(spi, STM32_SPI_CR1, SPI_CR1_CSTART);
+	stm32_spi_set_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_CSTART);
 
-	writel_relaxed(ier, spi->base + STM32_SPI_IER);
+	writel_relaxed(ier, spi->base + STM32H7_SPI_IER);
 
 	spin_unlock_irqrestore(&spi->lock, flags);
 
@@ -747,6 +1229,43 @@
 }
 
 /**
+ * stm32f4_spi_transfer_one_dma_start - Set SPI driver registers to start
+ *					transfer using DMA
+ */
+static void stm32f4_spi_transfer_one_dma_start(struct stm32_spi *spi)
+{
+	/* In DMA mode end of transfer is handled by DMA TX or RX callback. */
+	if (spi->cur_comm == SPI_SIMPLEX_RX || spi->cur_comm == SPI_3WIRE_RX ||
+	    spi->cur_comm == SPI_FULL_DUPLEX) {
+		/*
+		 * In transmit-only mode, the OVR flag is set in the SR register
+		 * since the received data are never read. Therefore set OVR
+		 * interrupt only when rx buffer is available.
+		 */
+		stm32_spi_set_bits(spi, STM32F4_SPI_CR2, STM32F4_SPI_CR2_ERRIE);
+	}
+
+	stm32_spi_enable(spi);
+}
+
+/**
+ * stm32h7_spi_transfer_one_dma_start - Set SPI driver registers to start
+ *					transfer using DMA
+ */
+static void stm32h7_spi_transfer_one_dma_start(struct stm32_spi *spi)
+{
+	/* Enable the interrupts relative to the end of transfer */
+	stm32_spi_set_bits(spi, STM32H7_SPI_IER, STM32H7_SPI_IER_EOTIE |
+						 STM32H7_SPI_IER_TXTFIE |
+						 STM32H7_SPI_IER_OVRIE |
+						 STM32H7_SPI_IER_MODFIE);
+
+	stm32_spi_enable(spi);
+
+	stm32_spi_set_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_CSTART);
+}
+
+/**
  * stm32_spi_transfer_one_dma - transfer a single spi_transfer using DMA
  *
  * It must returns 0 if the transfer is finished or 1 if the transfer is still
@@ -758,17 +1277,17 @@
 	struct dma_slave_config tx_dma_conf, rx_dma_conf;
 	struct dma_async_tx_descriptor *tx_dma_desc, *rx_dma_desc;
 	unsigned long flags;
-	u32 ier = 0;
 
 	spin_lock_irqsave(&spi->lock, flags);
 
 	rx_dma_desc = NULL;
-	if (spi->rx_buf) {
+	if (spi->rx_buf && spi->dma_rx) {
 		stm32_spi_dma_config(spi, &rx_dma_conf, DMA_DEV_TO_MEM);
 		dmaengine_slave_config(spi->dma_rx, &rx_dma_conf);
 
 		/* Enable Rx DMA request */
-		stm32_spi_set_bits(spi, STM32_SPI_CFG1, SPI_CFG1_RXDMAEN);
+		stm32_spi_set_bits(spi, spi->cfg->regs->dma_rx_en.reg,
+				   spi->cfg->regs->dma_rx_en.mask);
 
 		rx_dma_desc = dmaengine_prep_slave_sg(
 					spi->dma_rx, xfer->rx_sg.sgl,
@@ -778,7 +1297,7 @@
 	}
 
 	tx_dma_desc = NULL;
-	if (spi->tx_buf) {
+	if (spi->tx_buf && spi->dma_tx) {
 		stm32_spi_dma_config(spi, &tx_dma_conf, DMA_MEM_TO_DEV);
 		dmaengine_slave_config(spi->dma_tx, &tx_dma_conf);
 
@@ -789,12 +1308,15 @@
 					DMA_PREP_INTERRUPT);
 	}
 
-	if ((spi->tx_buf && !tx_dma_desc) ||
-	    (spi->rx_buf && !rx_dma_desc))
+	if ((spi->tx_buf && spi->dma_tx && !tx_dma_desc) ||
+	    (spi->rx_buf && spi->dma_rx && !rx_dma_desc))
+		goto dma_desc_error;
+
+	if (spi->cur_comm == SPI_FULL_DUPLEX && (!tx_dma_desc || !rx_dma_desc))
 		goto dma_desc_error;
 
 	if (rx_dma_desc) {
-		rx_dma_desc->callback = stm32_spi_dma_cb;
+		rx_dma_desc->callback = spi->cfg->dma_rx_cb;
 		rx_dma_desc->callback_param = spi;
 
 		if (dma_submit_error(dmaengine_submit(rx_dma_desc))) {
@@ -806,8 +1328,9 @@
 	}
 
 	if (tx_dma_desc) {
-		if (spi->cur_comm == SPI_SIMPLEX_TX) {
-			tx_dma_desc->callback = stm32_spi_dma_cb;
+		if (spi->cur_comm == SPI_SIMPLEX_TX ||
+		    spi->cur_comm == SPI_3WIRE_TX) {
+			tx_dma_desc->callback = spi->cfg->dma_tx_cb;
 			tx_dma_desc->callback_param = spi;
 		}
 
@@ -819,33 +1342,228 @@
 		dma_async_issue_pending(spi->dma_tx);
 
 		/* Enable Tx DMA request */
-		stm32_spi_set_bits(spi, STM32_SPI_CFG1, SPI_CFG1_TXDMAEN);
+		stm32_spi_set_bits(spi, spi->cfg->regs->dma_tx_en.reg,
+				   spi->cfg->regs->dma_tx_en.mask);
 	}
 
-	/* Enable the interrupts relative to the end of transfer */
-	ier |= SPI_IER_EOTIE | SPI_IER_TXTFIE |	SPI_IER_OVRIE |	SPI_IER_MODFIE;
-	writel_relaxed(ier, spi->base + STM32_SPI_IER);
-
-	stm32_spi_enable(spi);
-
-	stm32_spi_set_bits(spi, STM32_SPI_CR1, SPI_CR1_CSTART);
+	spi->cfg->transfer_one_dma_start(spi);
 
 	spin_unlock_irqrestore(&spi->lock, flags);
 
 	return 1;
 
 dma_submit_error:
-	if (spi->rx_buf)
+	if (spi->dma_rx)
 		dmaengine_terminate_all(spi->dma_rx);
 
 dma_desc_error:
-	stm32_spi_clr_bits(spi, STM32_SPI_CFG1, SPI_CFG1_RXDMAEN);
+	stm32_spi_clr_bits(spi, spi->cfg->regs->dma_rx_en.reg,
+			   spi->cfg->regs->dma_rx_en.mask);
 
 	spin_unlock_irqrestore(&spi->lock, flags);
 
 	dev_info(spi->dev, "DMA issue: fall back to irq transfer\n");
 
-	return stm32_spi_transfer_one_irq(spi);
+	spi->cur_usedma = false;
+	return spi->cfg->transfer_one_irq(spi);
+}
+
+/**
+ * stm32f4_spi_set_bpw - Configure bits per word
+ * @spi: pointer to the spi controller data structure
+ */
+static void stm32f4_spi_set_bpw(struct stm32_spi *spi)
+{
+	if (spi->cur_bpw == 16)
+		stm32_spi_set_bits(spi, STM32F4_SPI_CR1, STM32F4_SPI_CR1_DFF);
+	else
+		stm32_spi_clr_bits(spi, STM32F4_SPI_CR1, STM32F4_SPI_CR1_DFF);
+}
+
+/**
+ * stm32h7_spi_set_bpw - configure bits per word
+ * @spi: pointer to the spi controller data structure
+ */
+static void stm32h7_spi_set_bpw(struct stm32_spi *spi)
+{
+	u32 bpw, fthlv;
+	u32 cfg1_clrb = 0, cfg1_setb = 0;
+
+	bpw = spi->cur_bpw - 1;
+
+	cfg1_clrb |= STM32H7_SPI_CFG1_DSIZE;
+	cfg1_setb |= (bpw << STM32H7_SPI_CFG1_DSIZE_SHIFT) &
+		     STM32H7_SPI_CFG1_DSIZE;
+
+	spi->cur_fthlv = stm32h7_spi_prepare_fthlv(spi);
+	fthlv = spi->cur_fthlv - 1;
+
+	cfg1_clrb |= STM32H7_SPI_CFG1_FTHLV;
+	cfg1_setb |= (fthlv << STM32H7_SPI_CFG1_FTHLV_SHIFT) &
+		     STM32H7_SPI_CFG1_FTHLV;
+
+	writel_relaxed(
+		(readl_relaxed(spi->base + STM32H7_SPI_CFG1) &
+		 ~cfg1_clrb) | cfg1_setb,
+		spi->base + STM32H7_SPI_CFG1);
+}
+
+/**
+ * stm32_spi_set_mbr - Configure baud rate divisor in master mode
+ * @spi: pointer to the spi controller data structure
+ * @mbrdiv: baud rate divisor value
+ */
+static void stm32_spi_set_mbr(struct stm32_spi *spi, u32 mbrdiv)
+{
+	u32 clrb = 0, setb = 0;
+
+	clrb |= spi->cfg->regs->br.mask;
+	setb |= ((u32)mbrdiv << spi->cfg->regs->br.shift) &
+		spi->cfg->regs->br.mask;
+
+	writel_relaxed((readl_relaxed(spi->base + spi->cfg->regs->br.reg) &
+			~clrb) | setb,
+		       spi->base + spi->cfg->regs->br.reg);
+}
+
+/**
+ * stm32_spi_communication_type - return transfer communication type
+ * @spi_dev: pointer to the spi device
+ * transfer: pointer to spi transfer
+ */
+static unsigned int stm32_spi_communication_type(struct spi_device *spi_dev,
+						 struct spi_transfer *transfer)
+{
+	unsigned int type = SPI_FULL_DUPLEX;
+
+	if (spi_dev->mode & SPI_3WIRE) { /* MISO/MOSI signals shared */
+		/*
+		 * SPI_3WIRE and xfer->tx_buf != NULL and xfer->rx_buf != NULL
+		 * is forbidden and unvalidated by SPI subsystem so depending
+		 * on the valid buffer, we can determine the direction of the
+		 * transfer.
+		 */
+		if (!transfer->tx_buf)
+			type = SPI_3WIRE_RX;
+		else
+			type = SPI_3WIRE_TX;
+	} else {
+		if (!transfer->tx_buf)
+			type = SPI_SIMPLEX_RX;
+		else if (!transfer->rx_buf)
+			type = SPI_SIMPLEX_TX;
+	}
+
+	return type;
+}
+
+/**
+ * stm32f4_spi_set_mode - configure communication mode
+ * @spi: pointer to the spi controller data structure
+ * @comm_type: type of communication to configure
+ */
+static int stm32f4_spi_set_mode(struct stm32_spi *spi, unsigned int comm_type)
+{
+	if (comm_type == SPI_3WIRE_TX || comm_type == SPI_SIMPLEX_TX) {
+		stm32_spi_set_bits(spi, STM32F4_SPI_CR1,
+					STM32F4_SPI_CR1_BIDIMODE |
+					STM32F4_SPI_CR1_BIDIOE);
+	} else if (comm_type == SPI_FULL_DUPLEX) {
+		stm32_spi_clr_bits(spi, STM32F4_SPI_CR1,
+					STM32F4_SPI_CR1_BIDIMODE |
+					STM32F4_SPI_CR1_BIDIOE);
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * stm32h7_spi_set_mode - configure communication mode
+ * @spi: pointer to the spi controller data structure
+ * @comm_type: type of communication to configure
+ */
+static int stm32h7_spi_set_mode(struct stm32_spi *spi, unsigned int comm_type)
+{
+	u32 mode;
+	u32 cfg2_clrb = 0, cfg2_setb = 0;
+
+	if (comm_type == SPI_3WIRE_RX) {
+		mode = STM32H7_SPI_HALF_DUPLEX;
+		stm32_spi_clr_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_HDDIR);
+	} else if (comm_type == SPI_3WIRE_TX) {
+		mode = STM32H7_SPI_HALF_DUPLEX;
+		stm32_spi_set_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_HDDIR);
+	} else if (comm_type == SPI_SIMPLEX_RX) {
+		mode = STM32H7_SPI_SIMPLEX_RX;
+	} else if (comm_type == SPI_SIMPLEX_TX) {
+		mode = STM32H7_SPI_SIMPLEX_TX;
+	} else {
+		mode = STM32H7_SPI_FULL_DUPLEX;
+	}
+
+	cfg2_clrb |= STM32H7_SPI_CFG2_COMM;
+	cfg2_setb |= (mode << STM32H7_SPI_CFG2_COMM_SHIFT) &
+		     STM32H7_SPI_CFG2_COMM;
+
+	writel_relaxed(
+		(readl_relaxed(spi->base + STM32H7_SPI_CFG2) &
+		 ~cfg2_clrb) | cfg2_setb,
+		spi->base + STM32H7_SPI_CFG2);
+
+	return 0;
+}
+
+/**
+ * stm32h7_spi_data_idleness - configure minimum time delay inserted between two
+ *			       consecutive data frames in master mode
+ * @spi: pointer to the spi controller data structure
+ * @len: transfer len
+ */
+static void stm32h7_spi_data_idleness(struct stm32_spi *spi, u32 len)
+{
+	u32 cfg2_clrb = 0, cfg2_setb = 0;
+
+	cfg2_clrb |= STM32H7_SPI_CFG2_MIDI;
+	if ((len > 1) && (spi->cur_midi > 0)) {
+		u32 sck_period_ns = DIV_ROUND_UP(SPI_1HZ_NS, spi->cur_speed);
+		u32 midi = min((u32)DIV_ROUND_UP(spi->cur_midi, sck_period_ns),
+			       (u32)STM32H7_SPI_CFG2_MIDI >>
+			       STM32H7_SPI_CFG2_MIDI_SHIFT);
+
+		dev_dbg(spi->dev, "period=%dns, midi=%d(=%dns)\n",
+			sck_period_ns, midi, midi * sck_period_ns);
+		cfg2_setb |= (midi << STM32H7_SPI_CFG2_MIDI_SHIFT) &
+			     STM32H7_SPI_CFG2_MIDI;
+	}
+
+	writel_relaxed((readl_relaxed(spi->base + STM32H7_SPI_CFG2) &
+			~cfg2_clrb) | cfg2_setb,
+		       spi->base + STM32H7_SPI_CFG2);
+}
+
+/**
+ * stm32h7_spi_number_of_data - configure number of data at current transfer
+ * @spi: pointer to the spi controller data structure
+ * @len: transfer length
+ */
+static int stm32h7_spi_number_of_data(struct stm32_spi *spi, u32 nb_words)
+{
+	u32 cr2_clrb = 0, cr2_setb = 0;
+
+	if (nb_words <= (STM32H7_SPI_CR2_TSIZE >>
+			 STM32H7_SPI_CR2_TSIZE_SHIFT)) {
+		cr2_clrb |= STM32H7_SPI_CR2_TSIZE;
+		cr2_setb = nb_words << STM32H7_SPI_CR2_TSIZE_SHIFT;
+		writel_relaxed((readl_relaxed(spi->base + STM32H7_SPI_CR2) &
+				~cr2_clrb) | cr2_setb,
+			       spi->base + STM32H7_SPI_CR2);
+	} else {
+		return -EMSGSIZE;
+	}
+
+	return 0;
 }
 
 /**
@@ -858,91 +1576,44 @@
 					struct spi_transfer *transfer)
 {
 	unsigned long flags;
-	u32 cfg1_clrb = 0, cfg1_setb = 0, cfg2_clrb = 0, cfg2_setb = 0;
-	u32 mode, nb_words;
-	int ret = 0;
+	unsigned int comm_type;
+	int nb_words, ret = 0;
 
 	spin_lock_irqsave(&spi->lock, flags);
 
 	if (spi->cur_bpw != transfer->bits_per_word) {
-		u32 bpw, fthlv;
-
 		spi->cur_bpw = transfer->bits_per_word;
-		bpw = spi->cur_bpw - 1;
-
-		cfg1_clrb |= SPI_CFG1_DSIZE;
-		cfg1_setb |= (bpw << SPI_CFG1_DSIZE_SHIFT) & SPI_CFG1_DSIZE;
-
-		spi->cur_fthlv = stm32_spi_prepare_fthlv(spi);
-		fthlv = spi->cur_fthlv - 1;
-
-		cfg1_clrb |= SPI_CFG1_FTHLV;
-		cfg1_setb |= (fthlv << SPI_CFG1_FTHLV_SHIFT) & SPI_CFG1_FTHLV;
+		spi->cfg->set_bpw(spi);
 	}
 
 	if (spi->cur_speed != transfer->speed_hz) {
 		int mbr;
 
 		/* Update spi->cur_speed with real clock speed */
-		mbr = stm32_spi_prepare_mbr(spi, transfer->speed_hz);
+		mbr = stm32_spi_prepare_mbr(spi, transfer->speed_hz,
+					    spi->cfg->baud_rate_div_min,
+					    spi->cfg->baud_rate_div_max);
 		if (mbr < 0) {
 			ret = mbr;
 			goto out;
 		}
 
 		transfer->speed_hz = spi->cur_speed;
-
-		cfg1_clrb |= SPI_CFG1_MBR;
-		cfg1_setb |= ((u32)mbr << SPI_CFG1_MBR_SHIFT) & SPI_CFG1_MBR;
+		stm32_spi_set_mbr(spi, mbr);
 	}
 
-	if (cfg1_clrb || cfg1_setb)
-		writel_relaxed((readl_relaxed(spi->base + STM32_SPI_CFG1) &
-				~cfg1_clrb) | cfg1_setb,
-			       spi->base + STM32_SPI_CFG1);
+	comm_type = stm32_spi_communication_type(spi_dev, transfer);
+	if (spi->cur_comm != comm_type) {
+		ret = spi->cfg->set_mode(spi, comm_type);
 
-	mode = SPI_FULL_DUPLEX;
-	if (spi_dev->mode & SPI_3WIRE) { /* MISO/MOSI signals shared */
-		/*
-		 * SPI_3WIRE and xfer->tx_buf != NULL and xfer->rx_buf != NULL
-		 * is forbidden und unvalidated by SPI subsystem so depending
-		 * on the valid buffer, we can determine the direction of the
-		 * transfer.
-		 */
-		mode = SPI_HALF_DUPLEX;
-		if (!transfer->tx_buf)
-			stm32_spi_clr_bits(spi, STM32_SPI_CR1, SPI_CR1_HDDIR);
-		else if (!transfer->rx_buf)
-			stm32_spi_set_bits(spi, STM32_SPI_CR1, SPI_CR1_HDDIR);
-	} else {
-		if (!transfer->tx_buf)
-			mode = SPI_SIMPLEX_RX;
-		else if (!transfer->rx_buf)
-			mode = SPI_SIMPLEX_TX;
-	}
-	if (spi->cur_comm != mode) {
-		spi->cur_comm = mode;
+		if (ret < 0)
+			goto out;
 
-		cfg2_clrb |= SPI_CFG2_COMM;
-		cfg2_setb |= (mode << SPI_CFG2_COMM_SHIFT) & SPI_CFG2_COMM;
+		spi->cur_comm = comm_type;
 	}
 
-	cfg2_clrb |= SPI_CFG2_MIDI;
-	if ((transfer->len > 1) && (spi->cur_midi > 0)) {
-		u32 sck_period_ns = DIV_ROUND_UP(SPI_1HZ_NS, spi->cur_speed);
-		u32 midi = min((u32)DIV_ROUND_UP(spi->cur_midi, sck_period_ns),
-			       (u32)SPI_CFG2_MIDI >> SPI_CFG2_MIDI_SHIFT);
-
-		dev_dbg(spi->dev, "period=%dns, midi=%d(=%dns)\n",
-			sck_period_ns, midi, midi * sck_period_ns);
-
-		cfg2_setb |= (midi << SPI_CFG2_MIDI_SHIFT) & SPI_CFG2_MIDI;
-	}
-
-	if (cfg2_clrb || cfg2_setb)
-		writel_relaxed((readl_relaxed(spi->base + STM32_SPI_CFG2) &
-				~cfg2_clrb) | cfg2_setb,
-			       spi->base + STM32_SPI_CFG2);
+	if (spi->cfg->set_data_idleness)
+		spi->cfg->set_data_idleness(spi, transfer->len);
 
 	if (spi->cur_bpw <= 8)
 		nb_words = transfer->len;
@@ -950,13 +1621,11 @@
 		nb_words = DIV_ROUND_UP(transfer->len * 8, 16);
 	else
 		nb_words = DIV_ROUND_UP(transfer->len * 8, 32);
-	nb_words <<= SPI_CR2_TSIZE_SHIFT;
 
-	if (nb_words <= SPI_CR2_TSIZE) {
-		writel_relaxed(nb_words, spi->base + STM32_SPI_CR2);
-	} else {
-		ret = -EMSGSIZE;
-		goto out;
+	if (spi->cfg->set_number_of_data) {
+		ret = spi->cfg->set_number_of_data(spi, nb_words);
+		if (ret < 0)
+			goto out;
 	}
 
 	spi->cur_xferlen = transfer->len;
@@ -997,7 +1666,7 @@
 	spi->rx_len = spi->rx_buf ? transfer->len : 0;
 
 	spi->cur_usedma = (master->can_dma &&
-			   stm32_spi_can_dma(master, spi_dev, transfer));
+			   master->can_dma(master, spi_dev, transfer));
 
 	ret = stm32_spi_transfer_one_setup(spi, spi_dev, transfer);
 	if (ret) {
@@ -1008,47 +1677,73 @@
 	if (spi->cur_usedma)
 		return stm32_spi_transfer_one_dma(spi, transfer);
 	else
-		return stm32_spi_transfer_one_irq(spi);
+		return spi->cfg->transfer_one_irq(spi);
 }
 
 /**
  * stm32_spi_unprepare_msg - relax the hardware
- *
- * Normally, if TSIZE has been configured, we should relax the hardware at the
- * reception of the EOT interrupt. But in case of error, EOT will not be
- * raised. So the subsystem unprepare_message call allows us to properly
- * complete the transfer from an hardware point of view.
  */
 static int stm32_spi_unprepare_msg(struct spi_master *master,
 				   struct spi_message *msg)
 {
 	struct stm32_spi *spi = spi_master_get_devdata(master);
 
-	stm32_spi_disable(spi);
+	spi->cfg->disable(spi);
 
 	return 0;
 }
 
 /**
- * stm32_spi_config - Configure SPI controller as SPI master
+ * stm32f4_spi_config - Configure SPI controller as SPI master
  */
-static int stm32_spi_config(struct stm32_spi *spi)
+static int stm32f4_spi_config(struct stm32_spi *spi)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&spi->lock, flags);
 
 	/* Ensure I2SMOD bit is kept cleared */
-	stm32_spi_clr_bits(spi, STM32_SPI_I2SCFGR, SPI_I2SCFGR_I2SMOD);
+	stm32_spi_clr_bits(spi, STM32F4_SPI_I2SCFGR,
+			   STM32F4_SPI_I2SCFGR_I2SMOD);
+
+	/*
+	 * - SS input value high
+	 * - transmitter half duplex direction
+	 * - Set the master mode (default Motorola mode)
+	 * - Consider 1 master/n slaves configuration and
+	 *   SS input value is determined by the SSI bit
+	 */
+	stm32_spi_set_bits(spi, STM32F4_SPI_CR1, STM32F4_SPI_CR1_SSI |
+						 STM32F4_SPI_CR1_BIDIOE |
+						 STM32F4_SPI_CR1_MSTR |
+						 STM32F4_SPI_CR1_SSM);
+
+	spin_unlock_irqrestore(&spi->lock, flags);
+
+	return 0;
+}
+
+/**
+ * stm32h7_spi_config - Configure SPI controller as SPI master
+ */
+static int stm32h7_spi_config(struct stm32_spi *spi)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&spi->lock, flags);
+
+	/* Ensure I2SMOD bit is kept cleared */
+	stm32_spi_clr_bits(spi, STM32H7_SPI_I2SCFGR,
+			   STM32H7_SPI_I2SCFGR_I2SMOD);
 
 	/*
 	 * - SS input value high
 	 * - transmitter half duplex direction
 	 * - automatic communication suspend when RX-Fifo is full
 	 */
-	stm32_spi_set_bits(spi, STM32_SPI_CR1, SPI_CR1_SSI |
-					       SPI_CR1_HDDIR |
-					       SPI_CR1_MASRX);
+	stm32_spi_set_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_SSI |
+						 STM32H7_SPI_CR1_HDDIR |
+						 STM32H7_SPI_CR1_MASRX);
 
 	/*
 	 * - Set the master mode (default Motorola mode)
@@ -1056,17 +1751,56 @@
 	 *   SS input value is determined by the SSI bit
 	 * - keep control of all associated GPIOs
 	 */
-	stm32_spi_set_bits(spi, STM32_SPI_CFG2, SPI_CFG2_MASTER |
-						SPI_CFG2_SSM |
-						SPI_CFG2_AFCNTR);
+	stm32_spi_set_bits(spi, STM32H7_SPI_CFG2, STM32H7_SPI_CFG2_MASTER |
+						  STM32H7_SPI_CFG2_SSM |
+						  STM32H7_SPI_CFG2_AFCNTR);
 
 	spin_unlock_irqrestore(&spi->lock, flags);
 
 	return 0;
 }
 
+static const struct stm32_spi_cfg stm32f4_spi_cfg = {
+	.regs = &stm32f4_spi_regspec,
+	.get_bpw_mask = stm32f4_spi_get_bpw_mask,
+	.disable = stm32f4_spi_disable,
+	.config = stm32f4_spi_config,
+	.set_bpw = stm32f4_spi_set_bpw,
+	.set_mode = stm32f4_spi_set_mode,
+	.transfer_one_dma_start = stm32f4_spi_transfer_one_dma_start,
+	.dma_tx_cb = stm32f4_spi_dma_tx_cb,
+	.dma_rx_cb = stm32f4_spi_dma_rx_cb,
+	.transfer_one_irq = stm32f4_spi_transfer_one_irq,
+	.irq_handler_event = stm32f4_spi_irq_event,
+	.irq_handler_thread = stm32f4_spi_irq_thread,
+	.baud_rate_div_min = STM32F4_SPI_BR_DIV_MIN,
+	.baud_rate_div_max = STM32F4_SPI_BR_DIV_MAX,
+	.has_fifo = false,
+};
+
+static const struct stm32_spi_cfg stm32h7_spi_cfg = {
+	.regs = &stm32h7_spi_regspec,
+	.get_fifo_size = stm32h7_spi_get_fifo_size,
+	.get_bpw_mask = stm32h7_spi_get_bpw_mask,
+	.disable = stm32h7_spi_disable,
+	.config = stm32h7_spi_config,
+	.set_bpw = stm32h7_spi_set_bpw,
+	.set_mode = stm32h7_spi_set_mode,
+	.set_data_idleness = stm32h7_spi_data_idleness,
+	.set_number_of_data = stm32h7_spi_number_of_data,
+	.transfer_one_dma_start = stm32h7_spi_transfer_one_dma_start,
+	.dma_rx_cb = stm32h7_spi_dma_cb,
+	.dma_tx_cb = stm32h7_spi_dma_cb,
+	.transfer_one_irq = stm32h7_spi_transfer_one_irq,
+	.irq_handler_thread = stm32h7_spi_irq_thread,
+	.baud_rate_div_min = STM32H7_SPI_MBR_DIV_MIN,
+	.baud_rate_div_max = STM32H7_SPI_MBR_DIV_MAX,
+	.has_fifo = true,
+};
+
 static const struct of_device_id stm32_spi_of_match[] = {
-	{ .compatible = "st,stm32h7-spi", },
+	{ .compatible = "st,stm32h7-spi", .data = (void *)&stm32h7_spi_cfg },
+	{ .compatible = "st,stm32f4-spi", .data = (void *)&stm32f4_spi_cfg },
 	{},
 };
 MODULE_DEVICE_TABLE(of, stm32_spi_of_match);
@@ -1090,30 +1824,37 @@
 	spi->master = master;
 	spin_lock_init(&spi->lock);
 
+	spi->cfg = (const struct stm32_spi_cfg *)
+		of_match_device(pdev->dev.driver->of_match_table,
+				&pdev->dev)->data;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	spi->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(spi->base)) {
 		ret = PTR_ERR(spi->base);
 		goto err_master_put;
 	}
+
 	spi->phys_addr = (dma_addr_t)res->start;
 
 	spi->irq = platform_get_irq(pdev, 0);
 	if (spi->irq <= 0) {
-		dev_err(&pdev->dev, "no irq: %d\n", spi->irq);
-		ret = -ENOENT;
+		ret = spi->irq;
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "failed to get irq: %d\n", ret);
 		goto err_master_put;
 	}
-	ret = devm_request_threaded_irq(&pdev->dev, spi->irq, NULL,
-					stm32_spi_irq, IRQF_ONESHOT,
-					pdev->name, master);
+	ret = devm_request_threaded_irq(&pdev->dev, spi->irq,
+					spi->cfg->irq_handler_event,
+					spi->cfg->irq_handler_thread,
+					IRQF_ONESHOT, pdev->name, master);
 	if (ret) {
 		dev_err(&pdev->dev, "irq%d request failed: %d\n", spi->irq,
 			ret);
 		goto err_master_put;
 	}
 
-	spi->clk = devm_clk_get(&pdev->dev, 0);
+	spi->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(spi->clk)) {
 		ret = PTR_ERR(spi->clk);
 		dev_err(&pdev->dev, "clk get failed: %d\n", ret);
@@ -1139,9 +1880,10 @@
 		reset_control_deassert(spi->rst);
 	}
 
-	spi->fifo_size = stm32_spi_get_fifo_size(spi);
+	if (spi->cfg->has_fifo)
+		spi->fifo_size = spi->cfg->get_fifo_size(spi);
 
-	ret = stm32_spi_config(spi);
+	ret = spi->cfg->config(spi);
 	if (ret) {
 		dev_err(&pdev->dev, "controller configuration failed: %d\n",
 			ret);
@@ -1151,11 +1893,11 @@
 	master->dev.of_node = pdev->dev.of_node;
 	master->auto_runtime_pm = true;
 	master->bus_num = pdev->id;
-	master->mode_bits = SPI_MODE_3 | SPI_CS_HIGH | SPI_LSB_FIRST |
-			    SPI_3WIRE | SPI_LOOP;
-	master->bits_per_word_mask = stm32_spi_get_bpw_mask(spi);
-	master->max_speed_hz = spi->clk_rate / SPI_MBR_DIV_MIN;
-	master->min_speed_hz = spi->clk_rate / SPI_MBR_DIV_MAX;
+	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LSB_FIRST |
+			    SPI_3WIRE;
+	master->bits_per_word_mask = spi->cfg->get_bpw_mask(spi);
+	master->max_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_min;
+	master->min_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_max;
 	master->setup = stm32_spi_setup;
 	master->prepare_message = stm32_spi_prepare_msg;
 	master->transfer_one = stm32_spi_transfer_one;
@@ -1233,7 +1975,7 @@
 	struct spi_master *master = platform_get_drvdata(pdev);
 	struct stm32_spi *spi = spi_master_get_devdata(master);
 
-	stm32_spi_disable(spi);
+	spi->cfg->disable(spi);
 
 	if (master->dma_tx)
 		dma_release_channel(master->dma_tx);
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index 4141003..cbfac65 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2012 - 2014 Allwinner Tech
  * Pan Nan <pannan@allwinnertech.com>
  *
  * Copyright (C) 2014 Maxime Ripard
  * Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
  */
 
 #include <linux/clk.h>
@@ -432,7 +428,6 @@
 {
 	struct spi_master *master;
 	struct sun4i_spi *sspi;
-	struct resource	*res;
 	int ret = 0, irq;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct sun4i_spi));
@@ -444,8 +439,7 @@
 	platform_set_drvdata(pdev, master);
 	sspi = spi_master_get_devdata(master);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	sspi->base_addr = devm_ioremap_resource(&pdev->dev, res);
+	sspi->base_addr = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(sspi->base_addr)) {
 		ret = PTR_ERR(sspi->base_addr);
 		goto err_free_master;
@@ -453,7 +447,6 @@
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(&pdev->dev, "No spi IRQ specified\n");
 		ret = -ENXIO;
 		goto err_free_master;
 	}
diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 8533f4e..ec7967b 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2012 - 2014 Allwinner Tech
  * Pan Nan <pannan@allwinnertech.com>
  *
  * Copyright (C) 2014 Maxime Ripard
  * Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
  */
 
 #include <linux/clk.h>
@@ -439,7 +435,6 @@
 {
 	struct spi_master *master;
 	struct sun6i_spi *sspi;
-	struct resource	*res;
 	int ret = 0, irq;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct sun6i_spi));
@@ -451,8 +446,7 @@
 	platform_set_drvdata(pdev, master);
 	sspi = spi_master_get_devdata(master);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	sspi->base_addr = devm_ioremap_resource(&pdev->dev, res);
+	sspi->base_addr = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(sspi->base_addr)) {
 		ret = PTR_ERR(sspi->base_addr);
 		goto err_free_master;
@@ -460,7 +454,6 @@
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(&pdev->dev, "No spi IRQ specified\n");
 		ret = -ENXIO;
 		goto err_free_master;
 	}
diff --git a/drivers/spi/spi-synquacer.c b/drivers/spi/spi-synquacer.c
new file mode 100644
index 0000000..ae17c99
--- /dev/null
+++ b/drivers/spi/spi-synquacer.c
@@ -0,0 +1,826 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Synquacer HSSPI controller driver
+//
+// Copyright (c) 2015-2018 Socionext Inc.
+// Copyright (c) 2018-2019 Linaro Ltd.
+//
+
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/spinlock.h>
+#include <linux/clk.h>
+
+/* HSSPI register address definitions */
+#define SYNQUACER_HSSPI_REG_MCTRL	0x00
+#define SYNQUACER_HSSPI_REG_PCC0	0x04
+#define SYNQUACER_HSSPI_REG_PCC(n)	(SYNQUACER_HSSPI_REG_PCC0 + (n) * 4)
+#define SYNQUACER_HSSPI_REG_TXF		0x14
+#define SYNQUACER_HSSPI_REG_TXE		0x18
+#define SYNQUACER_HSSPI_REG_TXC		0x1C
+#define SYNQUACER_HSSPI_REG_RXF		0x20
+#define SYNQUACER_HSSPI_REG_RXE		0x24
+#define SYNQUACER_HSSPI_REG_RXC		0x28
+#define SYNQUACER_HSSPI_REG_FAULTF	0x2C
+#define SYNQUACER_HSSPI_REG_FAULTC	0x30
+#define SYNQUACER_HSSPI_REG_DMCFG	0x34
+#define SYNQUACER_HSSPI_REG_DMSTART	0x38
+#define SYNQUACER_HSSPI_REG_DMBCC	0x3C
+#define SYNQUACER_HSSPI_REG_DMSTATUS	0x40
+#define SYNQUACER_HSSPI_REG_FIFOCFG	0x4C
+#define SYNQUACER_HSSPI_REG_TX_FIFO	0x50
+#define SYNQUACER_HSSPI_REG_RX_FIFO	0x90
+#define SYNQUACER_HSSPI_REG_MID		0xFC
+
+/* HSSPI register bit definitions */
+#define SYNQUACER_HSSPI_MCTRL_MEN			BIT(0)
+#define SYNQUACER_HSSPI_MCTRL_COMMAND_SEQUENCE_EN	BIT(1)
+#define SYNQUACER_HSSPI_MCTRL_CDSS			BIT(3)
+#define SYNQUACER_HSSPI_MCTRL_MES			BIT(4)
+#define SYNQUACER_HSSPI_MCTRL_SYNCON			BIT(5)
+
+#define SYNQUACER_HSSPI_PCC_CPHA		BIT(0)
+#define SYNQUACER_HSSPI_PCC_CPOL		BIT(1)
+#define SYNQUACER_HSSPI_PCC_ACES		BIT(2)
+#define SYNQUACER_HSSPI_PCC_RTM			BIT(3)
+#define SYNQUACER_HSSPI_PCC_SSPOL		BIT(4)
+#define SYNQUACER_HSSPI_PCC_SDIR		BIT(7)
+#define SYNQUACER_HSSPI_PCC_SENDIAN		BIT(8)
+#define SYNQUACER_HSSPI_PCC_SAFESYNC		BIT(16)
+#define SYNQUACER_HSSPI_PCC_SS2CD_SHIFT		5U
+#define SYNQUACER_HSSPI_PCC_CDRS_MASK		0x7f
+#define SYNQUACER_HSSPI_PCC_CDRS_SHIFT		9U
+
+#define SYNQUACER_HSSPI_TXF_FIFO_FULL		BIT(0)
+#define SYNQUACER_HSSPI_TXF_FIFO_EMPTY		BIT(1)
+#define SYNQUACER_HSSPI_TXF_SLAVE_RELEASED	BIT(6)
+
+#define SYNQUACER_HSSPI_TXE_FIFO_FULL		BIT(0)
+#define SYNQUACER_HSSPI_TXE_FIFO_EMPTY		BIT(1)
+#define SYNQUACER_HSSPI_TXE_SLAVE_RELEASED	BIT(6)
+
+#define SYNQUACER_HSSPI_RXF_FIFO_MORE_THAN_THRESHOLD		BIT(5)
+#define SYNQUACER_HSSPI_RXF_SLAVE_RELEASED			BIT(6)
+
+#define SYNQUACER_HSSPI_RXE_FIFO_MORE_THAN_THRESHOLD		BIT(5)
+#define SYNQUACER_HSSPI_RXE_SLAVE_RELEASED			BIT(6)
+
+#define SYNQUACER_HSSPI_DMCFG_SSDC		BIT(1)
+#define SYNQUACER_HSSPI_DMCFG_MSTARTEN		BIT(2)
+
+#define SYNQUACER_HSSPI_DMSTART_START		BIT(0)
+#define SYNQUACER_HSSPI_DMSTOP_STOP		BIT(8)
+#define SYNQUACER_HSSPI_DMPSEL_CS_MASK		0x3
+#define SYNQUACER_HSSPI_DMPSEL_CS_SHIFT		16U
+#define SYNQUACER_HSSPI_DMTRP_BUS_WIDTH_SHIFT	24U
+#define SYNQUACER_HSSPI_DMTRP_DATA_MASK		0x3
+#define SYNQUACER_HSSPI_DMTRP_DATA_SHIFT	26U
+#define SYNQUACER_HSSPI_DMTRP_DATA_TXRX		0
+#define SYNQUACER_HSSPI_DMTRP_DATA_RX		1
+#define SYNQUACER_HSSPI_DMTRP_DATA_TX		2
+
+#define SYNQUACER_HSSPI_DMSTATUS_RX_DATA_MASK	0x1f
+#define SYNQUACER_HSSPI_DMSTATUS_RX_DATA_SHIFT	8U
+#define SYNQUACER_HSSPI_DMSTATUS_TX_DATA_MASK	0x1f
+#define SYNQUACER_HSSPI_DMSTATUS_TX_DATA_SHIFT	16U
+
+#define SYNQUACER_HSSPI_FIFOCFG_RX_THRESHOLD_MASK	0xf
+#define SYNQUACER_HSSPI_FIFOCFG_RX_THRESHOLD_SHIFT	0U
+#define SYNQUACER_HSSPI_FIFOCFG_TX_THRESHOLD_MASK	0xf
+#define SYNQUACER_HSSPI_FIFOCFG_TX_THRESHOLD_SHIFT	4U
+#define SYNQUACER_HSSPI_FIFOCFG_FIFO_WIDTH_MASK		0x3
+#define SYNQUACER_HSSPI_FIFOCFG_FIFO_WIDTH_SHIFT	8U
+#define SYNQUACER_HSSPI_FIFOCFG_RX_FLUSH		BIT(11)
+#define SYNQUACER_HSSPI_FIFOCFG_TX_FLUSH		BIT(12)
+
+#define SYNQUACER_HSSPI_FIFO_DEPTH		16U
+#define SYNQUACER_HSSPI_FIFO_TX_THRESHOLD	4U
+#define SYNQUACER_HSSPI_FIFO_RX_THRESHOLD \
+	(SYNQUACER_HSSPI_FIFO_DEPTH - SYNQUACER_HSSPI_FIFO_TX_THRESHOLD)
+
+#define SYNQUACER_HSSPI_TRANSFER_MODE_TX	BIT(1)
+#define SYNQUACER_HSSPI_TRANSFER_MODE_RX	BIT(2)
+#define SYNQUACER_HSSPI_TRANSFER_TMOUT_MSEC	2000U
+#define SYNQUACER_HSSPI_ENABLE_TMOUT_MSEC	1000U
+
+#define SYNQUACER_HSSPI_CLOCK_SRC_IHCLK		0
+#define SYNQUACER_HSSPI_CLOCK_SRC_IPCLK		1
+
+#define SYNQUACER_HSSPI_NUM_CHIP_SELECT		4U
+#define SYNQUACER_HSSPI_IRQ_NAME_MAX		32U
+
+struct synquacer_spi {
+	struct device *dev;
+	struct completion transfer_done;
+	unsigned int cs;
+	unsigned int bpw;
+	unsigned int mode;
+	unsigned int speed;
+	bool aces, rtm;
+	void *rx_buf;
+	const void *tx_buf;
+	struct clk *clk;
+	int clk_src_type;
+	void __iomem *regs;
+	u32 tx_words, rx_words;
+	unsigned int bus_width;
+	unsigned int transfer_mode;
+	char rx_irq_name[SYNQUACER_HSSPI_IRQ_NAME_MAX];
+	char tx_irq_name[SYNQUACER_HSSPI_IRQ_NAME_MAX];
+};
+
+static int read_fifo(struct synquacer_spi *sspi)
+{
+	u32 len = readl(sspi->regs + SYNQUACER_HSSPI_REG_DMSTATUS);
+
+	len = (len >> SYNQUACER_HSSPI_DMSTATUS_RX_DATA_SHIFT) &
+	       SYNQUACER_HSSPI_DMSTATUS_RX_DATA_MASK;
+	len = min(len, sspi->rx_words);
+
+	switch (sspi->bpw) {
+	case 8: {
+		u8 *buf = sspi->rx_buf;
+
+		ioread8_rep(sspi->regs + SYNQUACER_HSSPI_REG_RX_FIFO,
+			    buf, len);
+		sspi->rx_buf = buf + len;
+		break;
+	}
+	case 16: {
+		u16 *buf = sspi->rx_buf;
+
+		ioread16_rep(sspi->regs + SYNQUACER_HSSPI_REG_RX_FIFO,
+			     buf, len);
+		sspi->rx_buf = buf + len;
+		break;
+	}
+	case 24:
+		/* fallthrough, should use 32-bits access */
+	case 32: {
+		u32 *buf = sspi->rx_buf;
+
+		ioread32_rep(sspi->regs + SYNQUACER_HSSPI_REG_RX_FIFO,
+			     buf, len);
+		sspi->rx_buf = buf + len;
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+
+	sspi->rx_words -= len;
+	return 0;
+}
+
+static int write_fifo(struct synquacer_spi *sspi)
+{
+	u32 len = readl(sspi->regs + SYNQUACER_HSSPI_REG_DMSTATUS);
+
+	len = (len >> SYNQUACER_HSSPI_DMSTATUS_TX_DATA_SHIFT) &
+	       SYNQUACER_HSSPI_DMSTATUS_TX_DATA_MASK;
+	len = min(SYNQUACER_HSSPI_FIFO_DEPTH - len,
+		    sspi->tx_words);
+
+	switch (sspi->bpw) {
+	case 8: {
+		const u8 *buf = sspi->tx_buf;
+
+		iowrite8_rep(sspi->regs + SYNQUACER_HSSPI_REG_TX_FIFO,
+			     buf, len);
+		sspi->tx_buf = buf + len;
+		break;
+	}
+	case 16: {
+		const u16 *buf = sspi->tx_buf;
+
+		iowrite16_rep(sspi->regs + SYNQUACER_HSSPI_REG_TX_FIFO,
+			      buf, len);
+		sspi->tx_buf = buf + len;
+		break;
+	}
+	case 24:
+		/* fallthrough, should use 32-bits access */
+	case 32: {
+		const u32 *buf = sspi->tx_buf;
+
+		iowrite32_rep(sspi->regs + SYNQUACER_HSSPI_REG_TX_FIFO,
+			      buf, len);
+		sspi->tx_buf = buf + len;
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+
+	sspi->tx_words -= len;
+	return 0;
+}
+
+static int synquacer_spi_config(struct spi_master *master,
+				struct spi_device *spi,
+				struct spi_transfer *xfer)
+{
+	struct synquacer_spi *sspi = spi_master_get_devdata(master);
+	unsigned int speed, mode, bpw, cs, bus_width, transfer_mode;
+	u32 rate, val, div;
+
+	/* Full Duplex only on 1-bit wide bus */
+	if (xfer->rx_buf && xfer->tx_buf &&
+	    (xfer->rx_nbits != 1 || xfer->tx_nbits != 1)) {
+		dev_err(sspi->dev,
+			"RX and TX bus widths must be 1-bit for Full-Duplex!\n");
+		return -EINVAL;
+	}
+
+	if (xfer->tx_buf) {
+		bus_width = xfer->tx_nbits;
+		transfer_mode = SYNQUACER_HSSPI_TRANSFER_MODE_TX;
+	} else {
+		bus_width = xfer->rx_nbits;
+		transfer_mode = SYNQUACER_HSSPI_TRANSFER_MODE_RX;
+	}
+
+	mode = spi->mode;
+	cs = spi->chip_select;
+	speed = xfer->speed_hz;
+	bpw = xfer->bits_per_word;
+
+	/* return if nothing to change */
+	if (speed == sspi->speed &&
+		bus_width == sspi->bus_width && bpw == sspi->bpw &&
+		mode == sspi->mode && cs == sspi->cs &&
+		transfer_mode == sspi->transfer_mode) {
+		return 0;
+	}
+
+	sspi->transfer_mode = transfer_mode;
+	rate = master->max_speed_hz;
+
+	div = DIV_ROUND_UP(rate, speed);
+	if (div > 254) {
+		dev_err(sspi->dev, "Requested rate too low (%u)\n",
+			sspi->speed);
+		return -EINVAL;
+	}
+
+	val = readl(sspi->regs + SYNQUACER_HSSPI_REG_PCC(cs));
+	val &= ~SYNQUACER_HSSPI_PCC_SAFESYNC;
+	if (bpw == 8 &&	(mode & (SPI_TX_DUAL | SPI_RX_DUAL)) && div < 3)
+		val |= SYNQUACER_HSSPI_PCC_SAFESYNC;
+	if (bpw == 8 &&	(mode & (SPI_TX_QUAD | SPI_RX_QUAD)) && div < 6)
+		val |= SYNQUACER_HSSPI_PCC_SAFESYNC;
+	if (bpw == 16 && (mode & (SPI_TX_QUAD | SPI_RX_QUAD)) && div < 3)
+		val |= SYNQUACER_HSSPI_PCC_SAFESYNC;
+
+	if (mode & SPI_CPHA)
+		val |= SYNQUACER_HSSPI_PCC_CPHA;
+	else
+		val &= ~SYNQUACER_HSSPI_PCC_CPHA;
+
+	if (mode & SPI_CPOL)
+		val |= SYNQUACER_HSSPI_PCC_CPOL;
+	else
+		val &= ~SYNQUACER_HSSPI_PCC_CPOL;
+
+	if (mode & SPI_CS_HIGH)
+		val |= SYNQUACER_HSSPI_PCC_SSPOL;
+	else
+		val &= ~SYNQUACER_HSSPI_PCC_SSPOL;
+
+	if (mode & SPI_LSB_FIRST)
+		val |= SYNQUACER_HSSPI_PCC_SDIR;
+	else
+		val &= ~SYNQUACER_HSSPI_PCC_SDIR;
+
+	if (sspi->aces)
+		val |= SYNQUACER_HSSPI_PCC_ACES;
+	else
+		val &= ~SYNQUACER_HSSPI_PCC_ACES;
+
+	if (sspi->rtm)
+		val |= SYNQUACER_HSSPI_PCC_RTM;
+	else
+		val &= ~SYNQUACER_HSSPI_PCC_RTM;
+
+	val |= (3 << SYNQUACER_HSSPI_PCC_SS2CD_SHIFT);
+	val |= SYNQUACER_HSSPI_PCC_SENDIAN;
+
+	val &= ~(SYNQUACER_HSSPI_PCC_CDRS_MASK <<
+		 SYNQUACER_HSSPI_PCC_CDRS_SHIFT);
+	val |= ((div >> 1) << SYNQUACER_HSSPI_PCC_CDRS_SHIFT);
+
+	writel(val, sspi->regs + SYNQUACER_HSSPI_REG_PCC(cs));
+
+	val = readl(sspi->regs + SYNQUACER_HSSPI_REG_FIFOCFG);
+	val &= ~(SYNQUACER_HSSPI_FIFOCFG_FIFO_WIDTH_MASK <<
+		 SYNQUACER_HSSPI_FIFOCFG_FIFO_WIDTH_SHIFT);
+	val |= ((bpw / 8 - 1) << SYNQUACER_HSSPI_FIFOCFG_FIFO_WIDTH_SHIFT);
+	writel(val, sspi->regs + SYNQUACER_HSSPI_REG_FIFOCFG);
+
+	val = readl(sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+	val &= ~(SYNQUACER_HSSPI_DMTRP_DATA_MASK <<
+		 SYNQUACER_HSSPI_DMTRP_DATA_SHIFT);
+
+	if (xfer->rx_buf)
+		val |= (SYNQUACER_HSSPI_DMTRP_DATA_RX <<
+			SYNQUACER_HSSPI_DMTRP_DATA_SHIFT);
+	else
+		val |= (SYNQUACER_HSSPI_DMTRP_DATA_TX <<
+			SYNQUACER_HSSPI_DMTRP_DATA_SHIFT);
+
+	val &= ~(3 << SYNQUACER_HSSPI_DMTRP_BUS_WIDTH_SHIFT);
+	val |= ((bus_width >> 1) << SYNQUACER_HSSPI_DMTRP_BUS_WIDTH_SHIFT);
+	writel(val, sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+
+	sspi->bpw = bpw;
+	sspi->mode = mode;
+	sspi->speed = speed;
+	sspi->cs = spi->chip_select;
+	sspi->bus_width = bus_width;
+
+	return 0;
+}
+
+static int synquacer_spi_transfer_one(struct spi_master *master,
+				      struct spi_device *spi,
+				      struct spi_transfer *xfer)
+{
+	struct synquacer_spi *sspi = spi_master_get_devdata(master);
+	int ret;
+	int status = 0;
+	u32 words;
+	u8 bpw;
+	u32 val;
+
+	val = readl(sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+	val &= ~SYNQUACER_HSSPI_DMSTOP_STOP;
+	writel(val, sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+
+	val = readl(sspi->regs + SYNQUACER_HSSPI_REG_FIFOCFG);
+	val |= SYNQUACER_HSSPI_FIFOCFG_RX_FLUSH;
+	val |= SYNQUACER_HSSPI_FIFOCFG_TX_FLUSH;
+	writel(val, sspi->regs + SYNQUACER_HSSPI_REG_FIFOCFG);
+
+	/*
+	 * See if we can transfer 4-bytes as 1 word
+	 * to maximize the FIFO buffer efficiency.
+	 */
+	bpw = xfer->bits_per_word;
+	if (bpw == 8 && !(xfer->len % 4) && !(spi->mode & SPI_LSB_FIRST))
+		xfer->bits_per_word = 32;
+
+	ret = synquacer_spi_config(master, spi, xfer);
+
+	/* restore */
+	xfer->bits_per_word = bpw;
+
+	if (ret)
+		return ret;
+
+	reinit_completion(&sspi->transfer_done);
+
+	sspi->tx_buf = xfer->tx_buf;
+	sspi->rx_buf = xfer->rx_buf;
+
+	switch (sspi->bpw) {
+	case 8:
+		words = xfer->len;
+		break;
+	case 16:
+		words = xfer->len / 2;
+		break;
+	case 24:
+		/* fallthrough, should use 32-bits access */
+	case 32:
+		words = xfer->len / 4;
+		break;
+	default:
+		dev_err(sspi->dev, "unsupported bpw: %d\n", sspi->bpw);
+		return -EINVAL;
+	}
+
+	if (xfer->tx_buf)
+		sspi->tx_words = words;
+	else
+		sspi->tx_words = 0;
+
+	if (xfer->rx_buf)
+		sspi->rx_words = words;
+	else
+		sspi->rx_words = 0;
+
+	if (xfer->tx_buf) {
+		status = write_fifo(sspi);
+		if (status < 0) {
+			dev_err(sspi->dev, "failed write_fifo. status: 0x%x\n",
+				status);
+			return status;
+		}
+	}
+
+	if (xfer->rx_buf) {
+		val = readl(sspi->regs + SYNQUACER_HSSPI_REG_FIFOCFG);
+		val &= ~(SYNQUACER_HSSPI_FIFOCFG_RX_THRESHOLD_MASK <<
+			 SYNQUACER_HSSPI_FIFOCFG_RX_THRESHOLD_SHIFT);
+		val |= ((sspi->rx_words > SYNQUACER_HSSPI_FIFO_DEPTH ?
+			SYNQUACER_HSSPI_FIFO_RX_THRESHOLD : sspi->rx_words) <<
+			SYNQUACER_HSSPI_FIFOCFG_RX_THRESHOLD_SHIFT);
+		writel(val, sspi->regs + SYNQUACER_HSSPI_REG_FIFOCFG);
+	}
+
+	writel(~0, sspi->regs + SYNQUACER_HSSPI_REG_TXC);
+	writel(~0, sspi->regs + SYNQUACER_HSSPI_REG_RXC);
+
+	/* Trigger */
+	val = readl(sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+	val |= SYNQUACER_HSSPI_DMSTART_START;
+	writel(val, sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+
+	if (xfer->tx_buf) {
+		val = SYNQUACER_HSSPI_TXE_FIFO_EMPTY;
+		writel(val, sspi->regs + SYNQUACER_HSSPI_REG_TXE);
+		status = wait_for_completion_timeout(&sspi->transfer_done,
+			msecs_to_jiffies(SYNQUACER_HSSPI_TRANSFER_TMOUT_MSEC));
+		writel(0, sspi->regs + SYNQUACER_HSSPI_REG_TXE);
+	}
+
+	if (xfer->rx_buf) {
+		u32 buf[SYNQUACER_HSSPI_FIFO_DEPTH];
+
+		val = SYNQUACER_HSSPI_RXE_FIFO_MORE_THAN_THRESHOLD |
+		      SYNQUACER_HSSPI_RXE_SLAVE_RELEASED;
+		writel(val, sspi->regs + SYNQUACER_HSSPI_REG_RXE);
+		status = wait_for_completion_timeout(&sspi->transfer_done,
+			msecs_to_jiffies(SYNQUACER_HSSPI_TRANSFER_TMOUT_MSEC));
+		writel(0, sspi->regs + SYNQUACER_HSSPI_REG_RXE);
+
+		/* stop RX and clean RXFIFO */
+		val = readl(sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+		val |= SYNQUACER_HSSPI_DMSTOP_STOP;
+		writel(val, sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+		sspi->rx_buf = buf;
+		sspi->rx_words = SYNQUACER_HSSPI_FIFO_DEPTH;
+		read_fifo(sspi);
+	}
+
+	if (status < 0) {
+		dev_err(sspi->dev, "failed to transfer. status: 0x%x\n",
+			status);
+		return status;
+	}
+
+	return 0;
+}
+
+static void synquacer_spi_set_cs(struct spi_device *spi, bool enable)
+{
+	struct synquacer_spi *sspi = spi_master_get_devdata(spi->master);
+	u32 val;
+
+	val = readl(sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+	val &= ~(SYNQUACER_HSSPI_DMPSEL_CS_MASK <<
+		 SYNQUACER_HSSPI_DMPSEL_CS_SHIFT);
+	val |= spi->chip_select << SYNQUACER_HSSPI_DMPSEL_CS_SHIFT;
+	writel(val, sspi->regs + SYNQUACER_HSSPI_REG_DMSTART);
+}
+
+static int synquacer_spi_wait_status_update(struct synquacer_spi *sspi,
+					    bool enable)
+{
+	u32 val;
+	unsigned long timeout = jiffies +
+		msecs_to_jiffies(SYNQUACER_HSSPI_ENABLE_TMOUT_MSEC);
+
+	/* wait MES(Module Enable Status) is updated */
+	do {
+		val = readl(sspi->regs + SYNQUACER_HSSPI_REG_MCTRL) &
+		      SYNQUACER_HSSPI_MCTRL_MES;
+		if (enable && val)
+			return 0;
+		if (!enable && !val)
+			return 0;
+	} while (time_before(jiffies, timeout));
+
+	dev_err(sspi->dev, "timeout occurs in updating Module Enable Status\n");
+	return -EBUSY;
+}
+
+static int synquacer_spi_enable(struct spi_master *master)
+{
+	u32 val;
+	int status;
+	struct synquacer_spi *sspi = spi_master_get_devdata(master);
+
+	/* Disable module */
+	writel(0, sspi->regs + SYNQUACER_HSSPI_REG_MCTRL);
+	status = synquacer_spi_wait_status_update(sspi, false);
+	if (status < 0)
+		return status;
+
+	writel(0, sspi->regs + SYNQUACER_HSSPI_REG_TXE);
+	writel(0, sspi->regs + SYNQUACER_HSSPI_REG_RXE);
+	writel(~0, sspi->regs + SYNQUACER_HSSPI_REG_TXC);
+	writel(~0, sspi->regs + SYNQUACER_HSSPI_REG_RXC);
+	writel(~0, sspi->regs + SYNQUACER_HSSPI_REG_FAULTC);
+
+	val = readl(sspi->regs + SYNQUACER_HSSPI_REG_DMCFG);
+	val &= ~SYNQUACER_HSSPI_DMCFG_SSDC;
+	val &= ~SYNQUACER_HSSPI_DMCFG_MSTARTEN;
+	writel(val, sspi->regs + SYNQUACER_HSSPI_REG_DMCFG);
+
+	val = readl(sspi->regs + SYNQUACER_HSSPI_REG_MCTRL);
+	if (sspi->clk_src_type == SYNQUACER_HSSPI_CLOCK_SRC_IPCLK)
+		val |= SYNQUACER_HSSPI_MCTRL_CDSS;
+	else
+		val &= ~SYNQUACER_HSSPI_MCTRL_CDSS;
+
+	val &= ~SYNQUACER_HSSPI_MCTRL_COMMAND_SEQUENCE_EN;
+	val |= SYNQUACER_HSSPI_MCTRL_MEN;
+	val |= SYNQUACER_HSSPI_MCTRL_SYNCON;
+
+	/* Enable module */
+	writel(val, sspi->regs + SYNQUACER_HSSPI_REG_MCTRL);
+	status = synquacer_spi_wait_status_update(sspi, true);
+	if (status < 0)
+		return status;
+
+	return 0;
+}
+
+static irqreturn_t sq_spi_rx_handler(int irq, void *priv)
+{
+	uint32_t val;
+	struct synquacer_spi *sspi = priv;
+
+	val = readl(sspi->regs + SYNQUACER_HSSPI_REG_RXF);
+	if ((val & SYNQUACER_HSSPI_RXF_SLAVE_RELEASED) ||
+	    (val & SYNQUACER_HSSPI_RXF_FIFO_MORE_THAN_THRESHOLD)) {
+		read_fifo(sspi);
+
+		if (sspi->rx_words == 0) {
+			writel(0, sspi->regs + SYNQUACER_HSSPI_REG_RXE);
+			complete(&sspi->transfer_done);
+		}
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t sq_spi_tx_handler(int irq, void *priv)
+{
+	uint32_t val;
+	struct synquacer_spi *sspi = priv;
+
+	val = readl(sspi->regs + SYNQUACER_HSSPI_REG_TXF);
+	if (val & SYNQUACER_HSSPI_TXF_FIFO_EMPTY) {
+		if (sspi->tx_words == 0) {
+			writel(0, sspi->regs + SYNQUACER_HSSPI_REG_TXE);
+			complete(&sspi->transfer_done);
+		} else {
+			write_fifo(sspi);
+		}
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static int synquacer_spi_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct spi_master *master;
+	struct synquacer_spi *sspi;
+	int ret;
+	int rx_irq, tx_irq;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(*sspi));
+	if (!master)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, master);
+
+	sspi = spi_master_get_devdata(master);
+	sspi->dev = &pdev->dev;
+
+	init_completion(&sspi->transfer_done);
+
+	sspi->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(sspi->regs)) {
+		ret = PTR_ERR(sspi->regs);
+		goto put_spi;
+	}
+
+	sspi->clk_src_type = SYNQUACER_HSSPI_CLOCK_SRC_IHCLK; /* Default */
+	device_property_read_u32(&pdev->dev, "socionext,ihclk-rate",
+				 &master->max_speed_hz); /* for ACPI */
+
+	if (dev_of_node(&pdev->dev)) {
+		if (device_property_match_string(&pdev->dev,
+					 "clock-names", "iHCLK") >= 0) {
+			sspi->clk_src_type = SYNQUACER_HSSPI_CLOCK_SRC_IHCLK;
+			sspi->clk = devm_clk_get(sspi->dev, "iHCLK");
+		} else if (device_property_match_string(&pdev->dev,
+						"clock-names", "iPCLK") >= 0) {
+			sspi->clk_src_type = SYNQUACER_HSSPI_CLOCK_SRC_IPCLK;
+			sspi->clk = devm_clk_get(sspi->dev, "iPCLK");
+		} else {
+			dev_err(&pdev->dev, "specified wrong clock source\n");
+			ret = -EINVAL;
+			goto put_spi;
+		}
+
+		if (IS_ERR(sspi->clk)) {
+			if (!(PTR_ERR(sspi->clk) == -EPROBE_DEFER))
+				dev_err(&pdev->dev, "clock not found\n");
+			ret = PTR_ERR(sspi->clk);
+			goto put_spi;
+		}
+
+		ret = clk_prepare_enable(sspi->clk);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to enable clock (%d)\n",
+				ret);
+			goto put_spi;
+		}
+
+		master->max_speed_hz = clk_get_rate(sspi->clk);
+	}
+
+	if (!master->max_speed_hz) {
+		dev_err(&pdev->dev, "missing clock source\n");
+		return -EINVAL;
+	}
+	master->min_speed_hz = master->max_speed_hz / 254;
+
+	sspi->aces = device_property_read_bool(&pdev->dev,
+					       "socionext,set-aces");
+	sspi->rtm = device_property_read_bool(&pdev->dev, "socionext,use-rtm");
+
+	master->num_chipselect = SYNQUACER_HSSPI_NUM_CHIP_SELECT;
+
+	rx_irq = platform_get_irq(pdev, 0);
+	if (rx_irq <= 0) {
+		ret = rx_irq;
+		goto put_spi;
+	}
+	snprintf(sspi->rx_irq_name, SYNQUACER_HSSPI_IRQ_NAME_MAX, "%s-rx",
+		 dev_name(&pdev->dev));
+	ret = devm_request_irq(&pdev->dev, rx_irq, sq_spi_rx_handler,
+				0, sspi->rx_irq_name, sspi);
+	if (ret) {
+		dev_err(&pdev->dev, "request rx_irq failed (%d)\n", ret);
+		goto put_spi;
+	}
+
+	tx_irq = platform_get_irq(pdev, 1);
+	if (tx_irq <= 0) {
+		ret = tx_irq;
+		goto put_spi;
+	}
+	snprintf(sspi->tx_irq_name, SYNQUACER_HSSPI_IRQ_NAME_MAX, "%s-tx",
+		 dev_name(&pdev->dev));
+	ret = devm_request_irq(&pdev->dev, tx_irq, sq_spi_tx_handler,
+				0, sspi->tx_irq_name, sspi);
+	if (ret) {
+		dev_err(&pdev->dev, "request tx_irq failed (%d)\n", ret);
+		goto put_spi;
+	}
+
+	master->dev.of_node = np;
+	master->dev.fwnode = pdev->dev.fwnode;
+	master->auto_runtime_pm = true;
+	master->bus_num = pdev->id;
+
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_TX_DUAL | SPI_RX_DUAL |
+			    SPI_TX_QUAD | SPI_RX_QUAD;
+	master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(24) |
+				     SPI_BPW_MASK(16) | SPI_BPW_MASK(8);
+
+	master->set_cs = synquacer_spi_set_cs;
+	master->transfer_one = synquacer_spi_transfer_one;
+
+	ret = synquacer_spi_enable(master);
+	if (ret)
+		goto fail_enable;
+
+	pm_runtime_set_active(sspi->dev);
+	pm_runtime_enable(sspi->dev);
+
+	ret = devm_spi_register_master(sspi->dev, master);
+	if (ret)
+		goto disable_pm;
+
+	return 0;
+
+disable_pm:
+	pm_runtime_disable(sspi->dev);
+fail_enable:
+	clk_disable_unprepare(sspi->clk);
+put_spi:
+	spi_master_put(master);
+
+	return ret;
+}
+
+static int synquacer_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct synquacer_spi *sspi = spi_master_get_devdata(master);
+
+	pm_runtime_disable(sspi->dev);
+
+	clk_disable_unprepare(sspi->clk);
+
+	return 0;
+}
+
+static int __maybe_unused synquacer_spi_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct synquacer_spi *sspi = spi_master_get_devdata(master);
+	int ret;
+
+	ret = spi_master_suspend(master);
+	if (ret)
+		return ret;
+
+	if (!pm_runtime_suspended(dev))
+		clk_disable_unprepare(sspi->clk);
+
+	return ret;
+}
+
+static int __maybe_unused synquacer_spi_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct synquacer_spi *sspi = spi_master_get_devdata(master);
+	int ret;
+
+	if (!pm_runtime_suspended(dev)) {
+		/* Ensure reconfigure during next xfer */
+		sspi->speed = 0;
+
+		ret = clk_prepare_enable(sspi->clk);
+		if (ret < 0) {
+			dev_err(dev, "failed to enable clk (%d)\n",
+				ret);
+			return ret;
+		}
+
+		ret = synquacer_spi_enable(master);
+		if (ret) {
+			dev_err(dev, "failed to enable spi (%d)\n", ret);
+			return ret;
+		}
+	}
+
+	ret = spi_master_resume(master);
+	if (ret < 0)
+		clk_disable_unprepare(sspi->clk);
+
+	return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(synquacer_spi_pm_ops, synquacer_spi_suspend,
+			 synquacer_spi_resume);
+
+static const struct of_device_id synquacer_spi_of_match[] = {
+	{.compatible = "socionext,synquacer-spi"},
+	{}
+};
+MODULE_DEVICE_TABLE(of, synquacer_spi_of_match);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id synquacer_hsspi_acpi_ids[] = {
+	{ "SCX0004" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(acpi, synquacer_hsspi_acpi_ids);
+#endif
+
+static struct platform_driver synquacer_spi_driver = {
+	.driver = {
+		.name = "synquacer-spi",
+		.pm = &synquacer_spi_pm_ops,
+		.of_match_table = synquacer_spi_of_match,
+		.acpi_match_table = ACPI_PTR(synquacer_hsspi_acpi_ids),
+	},
+	.probe = synquacer_spi_probe,
+	.remove = synquacer_spi_remove,
+};
+module_platform_driver(synquacer_spi_driver);
+
+MODULE_DESCRIPTION("Socionext Synquacer HS-SPI controller driver");
+MODULE_AUTHOR("Masahisa Kojima <masahisa.kojima@linaro.org>");
+MODULE_AUTHOR("Jassi Brar <jaswinder.singh@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
index a76aced..39374c2 100644
--- a/drivers/spi/spi-tegra114.c
+++ b/drivers/spi/spi-tegra114.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * SPI driver for NVIDIA's Tegra114 SPI Controller.
  *
  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/clk.h>
@@ -95,8 +84,10 @@
 		(reg = (((val) & 0x1) << ((cs) * 8 + 5)) |	\
 			((reg) & ~(1 << ((cs) * 8 + 5))))
 #define SPI_SET_CYCLES_BETWEEN_PACKETS(reg, cs, val)		\
-		(reg = (((val) & 0xF) << ((cs) * 8)) |		\
-			((reg) & ~(0xF << ((cs) * 8))))
+		(reg = (((val) & 0x1F) << ((cs) * 8)) |		\
+			((reg) & ~(0x1F << ((cs) * 8))))
+#define MAX_SETUP_HOLD_CYCLES			16
+#define MAX_INACTIVE_CYCLES			32
 
 #define SPI_TRANS_STATUS			0x010
 #define SPI_BLK_CNT(val)			(((val) >> 0) & 0xFFFF)
@@ -149,6 +140,8 @@
 
 #define SPI_TX_FIFO				0x108
 #define SPI_RX_FIFO				0x188
+#define SPI_INTR_MASK				0x18c
+#define SPI_INTR_ALL_MASK			(0x1fUL << 25)
 #define MAX_CHIP_SELECT				4
 #define SPI_FIFO_DEPTH				64
 #define DATA_DIR_TX				(1 << 0)
@@ -161,6 +154,15 @@
 #define MAX_HOLD_CYCLES				16
 #define SPI_DEFAULT_SPEED			25000000
 
+struct tegra_spi_soc_data {
+	bool has_intr_mask_reg;
+};
+
+struct tegra_spi_client_data {
+	int tx_clk_tap_delay;
+	int rx_clk_tap_delay;
+};
+
 struct tegra_spi_data {
 	struct device				*dev;
 	struct spi_master			*master;
@@ -187,6 +189,7 @@
 	unsigned				dma_buf_size;
 	unsigned				max_buf_size;
 	bool					is_curr_dma_xfer;
+	bool					use_hw_based_cs;
 
 	struct completion			rx_dma_complete;
 	struct completion			tx_dma_complete;
@@ -199,6 +202,10 @@
 	u32					command1_reg;
 	u32					dma_control_reg;
 	u32					def_command1_reg;
+	u32					def_command2_reg;
+	u32					spi_cs_timing1;
+	u32					spi_cs_timing2;
+	u8					last_used_cs;
 
 	struct completion			xfer_completion;
 	struct spi_transfer			*curr_xfer;
@@ -211,6 +218,7 @@
 	u32					*tx_dma_buf;
 	dma_addr_t				tx_dma_phys;
 	struct dma_async_tx_descriptor		*tx_dma_desc;
+	const struct tegra_spi_soc_data		*soc_data;
 };
 
 static int tegra_spi_runtime_suspend(struct device *dev);
@@ -259,7 +267,8 @@
 
 	tspi->bytes_per_word = DIV_ROUND_UP(bits_per_word, 8);
 
-	if (bits_per_word == 8 || bits_per_word == 16) {
+	if ((bits_per_word == 8 || bits_per_word == 16 ||
+	     bits_per_word == 32) && t->len > 3) {
 		tspi->is_packed = 1;
 		tspi->words_per_32bit = 32/bits_per_word;
 	} else {
@@ -307,10 +316,16 @@
 				x |= (u32)(*tx_buf++) << (i * 8);
 			tegra_spi_writel(tspi, x, SPI_TX_FIFO);
 		}
+
+		tspi->cur_tx_pos += written_words * tspi->bytes_per_word;
 	} else {
+		unsigned int write_bytes;
 		max_n_32bit = min(tspi->curr_dma_words,  tx_empty_count);
 		written_words = max_n_32bit;
 		nbytes = written_words * tspi->bytes_per_word;
+		if (nbytes > t->len - tspi->cur_pos)
+			nbytes = t->len - tspi->cur_pos;
+		write_bytes = nbytes;
 		for (count = 0; count < max_n_32bit; count++) {
 			u32 x = 0;
 
@@ -319,8 +334,10 @@
 				x |= (u32)(*tx_buf++) << (i * 8);
 			tegra_spi_writel(tspi, x, SPI_TX_FIFO);
 		}
+
+		tspi->cur_tx_pos += write_bytes;
 	}
-	tspi->cur_tx_pos += written_words * tspi->bytes_per_word;
+
 	return written_words;
 }
 
@@ -344,20 +361,27 @@
 			for (i = 0; len && (i < 4); i++, len--)
 				*rx_buf++ = (x >> i*8) & 0xFF;
 		}
-		tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
 		read_words += tspi->curr_dma_words;
+		tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
 	} else {
 		u32 rx_mask = ((u32)1 << t->bits_per_word) - 1;
+		u8 bytes_per_word = tspi->bytes_per_word;
+		unsigned int read_bytes;
 
+		len = rx_full_count * bytes_per_word;
+		if (len > t->len - tspi->cur_pos)
+			len = t->len - tspi->cur_pos;
+		read_bytes = len;
 		for (count = 0; count < rx_full_count; count++) {
 			u32 x = tegra_spi_readl(tspi, SPI_RX_FIFO) & rx_mask;
 
-			for (i = 0; (i < tspi->bytes_per_word); i++)
+			for (i = 0; len && (i < bytes_per_word); i++, len--)
 				*rx_buf++ = (x >> (i*8)) & 0xFF;
 		}
-		tspi->cur_rx_pos += rx_full_count * tspi->bytes_per_word;
 		read_words += rx_full_count;
+		tspi->cur_rx_pos += read_bytes;
 	}
+
 	return read_words;
 }
 
@@ -372,12 +396,17 @@
 		unsigned len = tspi->curr_dma_words * tspi->bytes_per_word;
 
 		memcpy(tspi->tx_dma_buf, t->tx_buf + tspi->cur_pos, len);
+		tspi->cur_tx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
 	} else {
 		unsigned int i;
 		unsigned int count;
 		u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_tx_pos;
 		unsigned consume = tspi->curr_dma_words * tspi->bytes_per_word;
+		unsigned int write_bytes;
 
+		if (consume > t->len - tspi->cur_pos)
+			consume = t->len - tspi->cur_pos;
+		write_bytes = consume;
 		for (count = 0; count < tspi->curr_dma_words; count++) {
 			u32 x = 0;
 
@@ -386,8 +415,9 @@
 				x |= (u32)(*tx_buf++) << (i * 8);
 			tspi->tx_dma_buf[count] = x;
 		}
+
+		tspi->cur_tx_pos += write_bytes;
 	}
-	tspi->cur_tx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
 
 	/* Make the dma buffer to read by dma */
 	dma_sync_single_for_device(tspi->dev, tspi->tx_dma_phys,
@@ -405,20 +435,28 @@
 		unsigned len = tspi->curr_dma_words * tspi->bytes_per_word;
 
 		memcpy(t->rx_buf + tspi->cur_rx_pos, tspi->rx_dma_buf, len);
+		tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
 	} else {
 		unsigned int i;
 		unsigned int count;
 		unsigned char *rx_buf = t->rx_buf + tspi->cur_rx_pos;
 		u32 rx_mask = ((u32)1 << t->bits_per_word) - 1;
+		unsigned consume = tspi->curr_dma_words * tspi->bytes_per_word;
+		unsigned int read_bytes;
 
+		if (consume > t->len - tspi->cur_pos)
+			consume = t->len - tspi->cur_pos;
+		read_bytes = consume;
 		for (count = 0; count < tspi->curr_dma_words; count++) {
 			u32 x = tspi->rx_dma_buf[count] & rx_mask;
 
-			for (i = 0; (i < tspi->bytes_per_word); i++)
+			for (i = 0; consume && (i < tspi->bytes_per_word);
+							i++, consume--)
 				*rx_buf++ = (x >> (i*8)) & 0xFF;
 		}
+
+		tspi->cur_rx_pos += read_bytes;
 	}
-	tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
 
 	/* Make the dma buffer to read by dma */
 	dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys,
@@ -470,21 +508,38 @@
 	return 0;
 }
 
+static int tegra_spi_flush_fifos(struct tegra_spi_data *tspi)
+{
+	unsigned long timeout = jiffies + HZ;
+	u32 status;
+
+	status = tegra_spi_readl(tspi, SPI_FIFO_STATUS);
+	if ((status & SPI_FIFO_EMPTY) != SPI_FIFO_EMPTY) {
+		status |= SPI_RX_FIFO_FLUSH | SPI_TX_FIFO_FLUSH;
+		tegra_spi_writel(tspi, status, SPI_FIFO_STATUS);
+		while ((status & SPI_FIFO_EMPTY) != SPI_FIFO_EMPTY) {
+			status = tegra_spi_readl(tspi, SPI_FIFO_STATUS);
+			if (time_after(jiffies, timeout)) {
+				dev_err(tspi->dev,
+					"timeout waiting for fifo flush\n");
+				return -EIO;
+			}
+
+			udelay(1);
+		}
+	}
+
+	return 0;
+}
+
 static int tegra_spi_start_dma_based_transfer(
 		struct tegra_spi_data *tspi, struct spi_transfer *t)
 {
 	u32 val;
 	unsigned int len;
 	int ret = 0;
-	u32 status;
-
-	/* Make sure that Rx and Tx fifo are empty */
-	status = tegra_spi_readl(tspi, SPI_FIFO_STATUS);
-	if ((status & SPI_FIFO_EMPTY) != SPI_FIFO_EMPTY) {
-		dev_err(tspi->dev, "Rx/Tx fifo are not empty status 0x%08x\n",
-			(unsigned)status);
-		return -EIO;
-	}
+	u8 dma_burst;
+	struct dma_slave_config dma_sconfig = {0};
 
 	val = SPI_DMA_BLK_SET(tspi->curr_dma_words - 1);
 	tegra_spi_writel(tspi, val, SPI_DMA_BLK);
@@ -496,23 +551,40 @@
 		len = tspi->curr_dma_words * 4;
 
 	/* Set attention level based on length of transfer */
-	if (len & 0xF)
+	if (len & 0xF) {
 		val |= SPI_TX_TRIG_1 | SPI_RX_TRIG_1;
-	else if (((len) >> 4) & 0x1)
+		dma_burst = 1;
+	} else if (((len) >> 4) & 0x1) {
 		val |= SPI_TX_TRIG_4 | SPI_RX_TRIG_4;
-	else
+		dma_burst = 4;
+	} else {
 		val |= SPI_TX_TRIG_8 | SPI_RX_TRIG_8;
+		dma_burst = 8;
+	}
 
-	if (tspi->cur_direction & DATA_DIR_TX)
-		val |= SPI_IE_TX;
+	if (!tspi->soc_data->has_intr_mask_reg) {
+		if (tspi->cur_direction & DATA_DIR_TX)
+			val |= SPI_IE_TX;
 
-	if (tspi->cur_direction & DATA_DIR_RX)
-		val |= SPI_IE_RX;
+		if (tspi->cur_direction & DATA_DIR_RX)
+			val |= SPI_IE_RX;
+	}
 
 	tegra_spi_writel(tspi, val, SPI_DMA_CTL);
 	tspi->dma_control_reg = val;
 
+	dma_sconfig.device_fc = true;
 	if (tspi->cur_direction & DATA_DIR_TX) {
+		dma_sconfig.dst_addr = tspi->phys + SPI_TX_FIFO;
+		dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		dma_sconfig.dst_maxburst = dma_burst;
+		ret = dmaengine_slave_config(tspi->tx_dma_chan, &dma_sconfig);
+		if (ret < 0) {
+			dev_err(tspi->dev,
+				"DMA slave config failed: %d\n", ret);
+			return ret;
+		}
+
 		tegra_spi_copy_client_txbuf_to_spi_txbuf(tspi, t);
 		ret = tegra_spi_start_tx_dma(tspi, len);
 		if (ret < 0) {
@@ -523,6 +595,16 @@
 	}
 
 	if (tspi->cur_direction & DATA_DIR_RX) {
+		dma_sconfig.src_addr = tspi->phys + SPI_RX_FIFO;
+		dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		dma_sconfig.src_maxburst = dma_burst;
+		ret = dmaengine_slave_config(tspi->rx_dma_chan, &dma_sconfig);
+		if (ret < 0) {
+			dev_err(tspi->dev,
+				"DMA slave config failed: %d\n", ret);
+			return ret;
+		}
+
 		/* Make the dma buffer to read by dma */
 		dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys,
 				tspi->dma_buf_size, DMA_FROM_DEVICE);
@@ -570,8 +652,9 @@
 
 	tspi->is_curr_dma_xfer = false;
 
-	val |= SPI_DMA_EN;
-	tegra_spi_writel(tspi, val, SPI_DMA_CTL);
+	val = tspi->command1_reg;
+	val |= SPI_PIO;
+	tegra_spi_writel(tspi, val, SPI_COMMAND1);
 	return 0;
 }
 
@@ -582,7 +665,6 @@
 	u32 *dma_buf;
 	dma_addr_t dma_phys;
 	int ret;
-	struct dma_slave_config dma_sconfig;
 
 	dma_chan = dma_request_slave_channel_reason(tspi->dev,
 					dma_to_memory ? "rx" : "tx");
@@ -603,19 +685,6 @@
 	}
 
 	if (dma_to_memory) {
-		dma_sconfig.src_addr = tspi->phys + SPI_RX_FIFO;
-		dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-		dma_sconfig.src_maxburst = 0;
-	} else {
-		dma_sconfig.dst_addr = tspi->phys + SPI_TX_FIFO;
-		dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-		dma_sconfig.dst_maxburst = 0;
-	}
-
-	ret = dmaengine_slave_config(dma_chan, &dma_sconfig);
-	if (ret)
-		goto scrub;
-	if (dma_to_memory) {
 		tspi->rx_dma_chan = dma_chan;
 		tspi->rx_dma_buf = dma_buf;
 		tspi->rx_dma_phys = dma_phys;
@@ -625,11 +694,6 @@
 		tspi->tx_dma_phys = dma_phys;
 	}
 	return 0;
-
-scrub:
-	dma_free_coherent(tspi->dev, tspi->dma_buf_size, dma_buf, dma_phys);
-	dma_release_channel(dma_chan);
-	return ret;
 }
 
 static void tegra_spi_deinit_dma_param(struct tegra_spi_data *tspi,
@@ -659,14 +723,55 @@
 	dma_release_channel(dma_chan);
 }
 
-static u32 tegra_spi_setup_transfer_one(struct spi_device *spi,
-		struct spi_transfer *t, bool is_first_of_msg)
+static void tegra_spi_set_hw_cs_timing(struct spi_device *spi, u8 setup_dly,
+				       u8 hold_dly, u8 inactive_dly)
 {
 	struct tegra_spi_data *tspi = spi_master_get_devdata(spi->master);
+	u32 setup_hold;
+	u32 spi_cs_timing;
+	u32 inactive_cycles;
+	u8 cs_state;
+
+	setup_dly = min_t(u8, setup_dly, MAX_SETUP_HOLD_CYCLES);
+	hold_dly = min_t(u8, hold_dly, MAX_SETUP_HOLD_CYCLES);
+	if (setup_dly && hold_dly) {
+		setup_hold = SPI_SETUP_HOLD(setup_dly - 1, hold_dly - 1);
+		spi_cs_timing = SPI_CS_SETUP_HOLD(tspi->spi_cs_timing1,
+						  spi->chip_select,
+						  setup_hold);
+		if (tspi->spi_cs_timing1 != spi_cs_timing) {
+			tspi->spi_cs_timing1 = spi_cs_timing;
+			tegra_spi_writel(tspi, spi_cs_timing, SPI_CS_TIMING1);
+		}
+	}
+
+	inactive_cycles = min_t(u8, inactive_dly, MAX_INACTIVE_CYCLES);
+	if (inactive_cycles)
+		inactive_cycles--;
+	cs_state = inactive_cycles ? 0 : 1;
+	spi_cs_timing = tspi->spi_cs_timing2;
+	SPI_SET_CS_ACTIVE_BETWEEN_PACKETS(spi_cs_timing, spi->chip_select,
+					  cs_state);
+	SPI_SET_CYCLES_BETWEEN_PACKETS(spi_cs_timing, spi->chip_select,
+				       inactive_cycles);
+	if (tspi->spi_cs_timing2 != spi_cs_timing) {
+		tspi->spi_cs_timing2 = spi_cs_timing;
+		tegra_spi_writel(tspi, spi_cs_timing, SPI_CS_TIMING2);
+	}
+}
+
+static u32 tegra_spi_setup_transfer_one(struct spi_device *spi,
+					struct spi_transfer *t,
+					bool is_first_of_msg,
+					bool is_single_xfer)
+{
+	struct tegra_spi_data *tspi = spi_master_get_devdata(spi->master);
+	struct tegra_spi_client_data *cdata = spi->controller_data;
 	u32 speed = t->speed_hz;
 	u8 bits_per_word = t->bits_per_word;
-	u32 command1;
+	u32 command1, command2;
 	int req_mode;
+	u32 tx_tap = 0, rx_tap = 0;
 
 	if (speed != tspi->cur_speed) {
 		clk_set_rate(tspi->clk, speed);
@@ -696,6 +801,16 @@
 		else if (req_mode == SPI_MODE_3)
 			command1 |= SPI_CONTROL_MODE_3;
 
+		if (spi->mode & SPI_LSB_FIRST)
+			command1 |= SPI_LSBIT_FE;
+		else
+			command1 &= ~SPI_LSBIT_FE;
+
+		if (spi->mode & SPI_3WIRE)
+			command1 |= SPI_BIDIROE;
+		else
+			command1 &= ~SPI_BIDIROE;
+
 		if (tspi->cs_control) {
 			if (tspi->cs_control != spi)
 				tegra_spi_writel(tspi, command1, SPI_COMMAND1);
@@ -703,13 +818,34 @@
 		} else
 			tegra_spi_writel(tspi, command1, SPI_COMMAND1);
 
-		command1 |= SPI_CS_SW_HW;
-		if (spi->mode & SPI_CS_HIGH)
-			command1 |= SPI_CS_SW_VAL;
-		else
-			command1 &= ~SPI_CS_SW_VAL;
+		/* GPIO based chip select control */
+		if (spi->cs_gpiod)
+			gpiod_set_value(spi->cs_gpiod, 1);
 
-		tegra_spi_writel(tspi, 0, SPI_COMMAND2);
+		if (is_single_xfer && !(t->cs_change)) {
+			tspi->use_hw_based_cs = true;
+			command1 &= ~(SPI_CS_SW_HW | SPI_CS_SW_VAL);
+		} else {
+			tspi->use_hw_based_cs = false;
+			command1 |= SPI_CS_SW_HW;
+			if (spi->mode & SPI_CS_HIGH)
+				command1 |= SPI_CS_SW_VAL;
+			else
+				command1 &= ~SPI_CS_SW_VAL;
+		}
+
+		if (tspi->last_used_cs != spi->chip_select) {
+			if (cdata && cdata->tx_clk_tap_delay)
+				tx_tap = cdata->tx_clk_tap_delay;
+			if (cdata && cdata->rx_clk_tap_delay)
+				rx_tap = cdata->rx_clk_tap_delay;
+			command2 = SPI_TX_TAP_DELAY(tx_tap) |
+				   SPI_RX_TAP_DELAY(rx_tap);
+			if (command2 != tspi->def_command2_reg)
+				tegra_spi_writel(tspi, command2, SPI_COMMAND2);
+			tspi->last_used_cs = spi->chip_select;
+		}
+
 	} else {
 		command1 = tspi->command1_reg;
 		command1 &= ~SPI_BIT_LENGTH(~0);
@@ -728,8 +864,15 @@
 
 	total_fifo_words = tegra_spi_calculate_curr_xfer_param(spi, tspi, t);
 
+	if (t->rx_nbits == SPI_NBITS_DUAL || t->tx_nbits == SPI_NBITS_DUAL)
+		command1 |= SPI_BOTH_EN_BIT;
+	else
+		command1 &= ~SPI_BOTH_EN_BIT;
+
 	if (tspi->is_packed)
 		command1 |= SPI_PACKED;
+	else
+		command1 &= ~SPI_PACKED;
 
 	command1 &= ~(SPI_CS_SEL_MASK | SPI_TX_EN | SPI_RX_EN);
 	tspi->cur_direction = 0;
@@ -748,6 +891,9 @@
 	dev_dbg(tspi->dev, "The def 0x%x and written 0x%x\n",
 		tspi->def_command1_reg, (unsigned)command1);
 
+	ret = tegra_spi_flush_fifos(tspi);
+	if (ret < 0)
+		return ret;
 	if (total_fifo_words > SPI_FIFO_DEPTH)
 		ret = tegra_spi_start_dma_based_transfer(tspi, t);
 	else
@@ -755,9 +901,42 @@
 	return ret;
 }
 
+static struct tegra_spi_client_data
+	*tegra_spi_parse_cdata_dt(struct spi_device *spi)
+{
+	struct tegra_spi_client_data *cdata;
+	struct device_node *slave_np;
+
+	slave_np = spi->dev.of_node;
+	if (!slave_np) {
+		dev_dbg(&spi->dev, "device node not found\n");
+		return NULL;
+	}
+
+	cdata = kzalloc(sizeof(*cdata), GFP_KERNEL);
+	if (!cdata)
+		return NULL;
+
+	of_property_read_u32(slave_np, "nvidia,tx-clk-tap-delay",
+			     &cdata->tx_clk_tap_delay);
+	of_property_read_u32(slave_np, "nvidia,rx-clk-tap-delay",
+			     &cdata->rx_clk_tap_delay);
+	return cdata;
+}
+
+static void tegra_spi_cleanup(struct spi_device *spi)
+{
+	struct tegra_spi_client_data *cdata = spi->controller_data;
+
+	spi->controller_data = NULL;
+	if (spi->dev.of_node)
+		kfree(cdata);
+}
+
 static int tegra_spi_setup(struct spi_device *spi)
 {
 	struct tegra_spi_data *tspi = spi_master_get_devdata(spi->master);
+	struct tegra_spi_client_data *cdata = spi->controller_data;
 	u32 val;
 	unsigned long flags;
 	int ret;
@@ -768,13 +947,30 @@
 		spi->mode & SPI_CPHA ? "" : "~",
 		spi->max_speed_hz);
 
+	if (!cdata) {
+		cdata = tegra_spi_parse_cdata_dt(spi);
+		spi->controller_data = cdata;
+	}
+
 	ret = pm_runtime_get_sync(tspi->dev);
 	if (ret < 0) {
 		dev_err(tspi->dev, "pm runtime failed, e = %d\n", ret);
+		if (cdata)
+			tegra_spi_cleanup(spi);
 		return ret;
 	}
 
+	if (tspi->soc_data->has_intr_mask_reg) {
+		val = tegra_spi_readl(tspi, SPI_INTR_MASK);
+		val &= ~SPI_INTR_ALL_MASK;
+		tegra_spi_writel(tspi, val, SPI_INTR_MASK);
+	}
+
 	spin_lock_irqsave(&tspi->lock, flags);
+	/* GPIO based chip select control */
+	if (spi->cs_gpiod)
+		gpiod_set_value(spi->cs_gpiod, 0);
+
 	val = tspi->def_command1_reg;
 	if (spi->mode & SPI_CS_HIGH)
 		val &= ~SPI_CS_POL_INACTIVE(spi->chip_select);
@@ -799,6 +995,40 @@
 	udelay(delay % 1000);
 }
 
+static void tegra_spi_transfer_end(struct spi_device *spi)
+{
+	struct tegra_spi_data *tspi = spi_master_get_devdata(spi->master);
+	int cs_val = (spi->mode & SPI_CS_HIGH) ? 0 : 1;
+
+	/* GPIO based chip select control */
+	if (spi->cs_gpiod)
+		gpiod_set_value(spi->cs_gpiod, 0);
+
+	if (!tspi->use_hw_based_cs) {
+		if (cs_val)
+			tspi->command1_reg |= SPI_CS_SW_VAL;
+		else
+			tspi->command1_reg &= ~SPI_CS_SW_VAL;
+		tegra_spi_writel(tspi, tspi->command1_reg, SPI_COMMAND1);
+	}
+
+	tegra_spi_writel(tspi, tspi->def_command1_reg, SPI_COMMAND1);
+}
+
+static void tegra_spi_dump_regs(struct tegra_spi_data *tspi)
+{
+	dev_dbg(tspi->dev, "============ SPI REGISTER DUMP ============\n");
+	dev_dbg(tspi->dev, "Command1:    0x%08x | Command2:    0x%08x\n",
+		tegra_spi_readl(tspi, SPI_COMMAND1),
+		tegra_spi_readl(tspi, SPI_COMMAND2));
+	dev_dbg(tspi->dev, "DMA_CTL:     0x%08x | DMA_BLK:     0x%08x\n",
+		tegra_spi_readl(tspi, SPI_DMA_CTL),
+		tegra_spi_readl(tspi, SPI_DMA_BLK));
+	dev_dbg(tspi->dev, "TRANS_STAT:  0x%08x | FIFO_STATUS: 0x%08x\n",
+		tegra_spi_readl(tspi, SPI_TRANS_STATUS),
+		tegra_spi_readl(tspi, SPI_FIFO_STATUS));
+}
+
 static int tegra_spi_transfer_one_message(struct spi_master *master,
 			struct spi_message *msg)
 {
@@ -808,16 +1038,19 @@
 	struct spi_device *spi = msg->spi;
 	int ret;
 	bool skip = false;
+	int single_xfer;
 
 	msg->status = 0;
 	msg->actual_length = 0;
 
+	single_xfer = list_is_singular(&msg->transfers);
 	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
 		u32 cmd1;
 
 		reinit_completion(&tspi->xfer_completion);
 
-		cmd1 = tegra_spi_setup_transfer_one(spi, xfer, is_first_msg);
+		cmd1 = tegra_spi_setup_transfer_one(spi, xfer, is_first_msg,
+						    single_xfer);
 
 		if (!xfer->len) {
 			ret = 0;
@@ -838,21 +1071,33 @@
 		if (WARN_ON(ret == 0)) {
 			dev_err(tspi->dev,
 				"spi transfer timeout, err %d\n", ret);
+			if (tspi->is_curr_dma_xfer &&
+			    (tspi->cur_direction & DATA_DIR_TX))
+				dmaengine_terminate_all(tspi->tx_dma_chan);
+			if (tspi->is_curr_dma_xfer &&
+			    (tspi->cur_direction & DATA_DIR_RX))
+				dmaengine_terminate_all(tspi->rx_dma_chan);
 			ret = -EIO;
+			tegra_spi_dump_regs(tspi);
+			tegra_spi_flush_fifos(tspi);
+			reset_control_assert(tspi->rst);
+			udelay(2);
+			reset_control_deassert(tspi->rst);
+			tspi->last_used_cs = master->num_chipselect + 1;
 			goto complete_xfer;
 		}
 
 		if (tspi->tx_status ||  tspi->rx_status) {
 			dev_err(tspi->dev, "Error in Transfer\n");
 			ret = -EIO;
+			tegra_spi_dump_regs(tspi);
 			goto complete_xfer;
 		}
 		msg->actual_length += xfer->len;
 
 complete_xfer:
 		if (ret < 0 || skip) {
-			tegra_spi_writel(tspi, tspi->def_command1_reg,
-					SPI_COMMAND1);
+			tegra_spi_transfer_end(spi);
 			tegra_spi_transfer_delay(xfer->delay_usecs);
 			goto exit;
 		} else if (list_is_last(&xfer->transfer_list,
@@ -860,13 +1105,11 @@
 			if (xfer->cs_change)
 				tspi->cs_control = spi;
 			else {
-				tegra_spi_writel(tspi, tspi->def_command1_reg,
-						SPI_COMMAND1);
+				tegra_spi_transfer_end(spi);
 				tegra_spi_transfer_delay(xfer->delay_usecs);
 			}
 		} else if (xfer->cs_change) {
-			tegra_spi_writel(tspi, tspi->def_command1_reg,
-					SPI_COMMAND1);
+			tegra_spi_transfer_end(spi);
 			tegra_spi_transfer_delay(xfer->delay_usecs);
 		}
 
@@ -889,11 +1132,14 @@
 			tspi->status_reg);
 		dev_err(tspi->dev, "CpuXfer 0x%08x:0x%08x\n",
 			tspi->command1_reg, tspi->dma_control_reg);
+		tegra_spi_dump_regs(tspi);
+		tegra_spi_flush_fifos(tspi);
+		complete(&tspi->xfer_completion);
+		spin_unlock_irqrestore(&tspi->lock, flags);
 		reset_control_assert(tspi->rst);
 		udelay(2);
 		reset_control_deassert(tspi->rst);
-		complete(&tspi->xfer_completion);
-		goto exit;
+		return IRQ_HANDLED;
 	}
 
 	if (tspi->cur_direction & DATA_DIR_RX)
@@ -961,11 +1207,13 @@
 			tspi->status_reg);
 		dev_err(tspi->dev, "DmaXfer 0x%08x:0x%08x\n",
 			tspi->command1_reg, tspi->dma_control_reg);
+		tegra_spi_dump_regs(tspi);
+		tegra_spi_flush_fifos(tspi);
+		complete(&tspi->xfer_completion);
+		spin_unlock_irqrestore(&tspi->lock, flags);
 		reset_control_assert(tspi->rst);
 		udelay(2);
 		reset_control_deassert(tspi->rst);
-		complete(&tspi->xfer_completion);
-		spin_unlock_irqrestore(&tspi->lock, flags);
 		return IRQ_HANDLED;
 	}
 
@@ -1021,8 +1269,29 @@
 	return IRQ_WAKE_THREAD;
 }
 
+static struct tegra_spi_soc_data tegra114_spi_soc_data = {
+	.has_intr_mask_reg = false,
+};
+
+static struct tegra_spi_soc_data tegra124_spi_soc_data = {
+	.has_intr_mask_reg = false,
+};
+
+static struct tegra_spi_soc_data tegra210_spi_soc_data = {
+	.has_intr_mask_reg = true,
+};
+
 static const struct of_device_id tegra_spi_of_match[] = {
-	{ .compatible = "nvidia,tegra114-spi", },
+	{
+		.compatible = "nvidia,tegra114-spi",
+		.data	    = &tegra114_spi_soc_data,
+	}, {
+		.compatible = "nvidia,tegra124-spi",
+		.data	    = &tegra124_spi_soc_data,
+	}, {
+		.compatible = "nvidia,tegra210-spi",
+		.data	    = &tegra210_spi_soc_data,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, tegra_spi_of_match);
@@ -1033,6 +1302,7 @@
 	struct tegra_spi_data	*tspi;
 	struct resource		*r;
 	int ret, spi_irq;
+	int bus_num;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*tspi));
 	if (!master) {
@@ -1047,16 +1317,31 @@
 		master->max_speed_hz = 25000000; /* 25MHz */
 
 	/* the spi->mode bits understood by this driver: */
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	master->use_gpio_descriptors = true;
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST |
+			    SPI_TX_DUAL | SPI_RX_DUAL | SPI_3WIRE;
+	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 	master->setup = tegra_spi_setup;
+	master->cleanup = tegra_spi_cleanup;
 	master->transfer_one_message = tegra_spi_transfer_one_message;
+	master->set_cs_timing = tegra_spi_set_hw_cs_timing;
 	master->num_chipselect = MAX_CHIP_SELECT;
 	master->auto_runtime_pm = true;
+	bus_num = of_alias_get_id(pdev->dev.of_node, "spi");
+	if (bus_num >= 0)
+		master->bus_num = bus_num;
 
 	tspi->master = master;
 	tspi->dev = &pdev->dev;
 	spin_lock_init(&tspi->lock);
 
+	tspi->soc_data = of_device_get_match_data(&pdev->dev);
+	if (!tspi->soc_data) {
+		dev_err(&pdev->dev, "unsupported tegra\n");
+		ret = -ENODEV;
+		goto exit_free_master;
+	}
+
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	tspi->base = devm_ioremap_resource(&pdev->dev, r);
 	if (IS_ERR(tspi->base)) {
@@ -1067,27 +1352,19 @@
 
 	spi_irq = platform_get_irq(pdev, 0);
 	tspi->irq = spi_irq;
-	ret = request_threaded_irq(tspi->irq, tegra_spi_isr,
-			tegra_spi_isr_thread, IRQF_ONESHOT,
-			dev_name(&pdev->dev), tspi);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n",
-					tspi->irq);
-		goto exit_free_master;
-	}
 
 	tspi->clk = devm_clk_get(&pdev->dev, "spi");
 	if (IS_ERR(tspi->clk)) {
 		dev_err(&pdev->dev, "can not get clock\n");
 		ret = PTR_ERR(tspi->clk);
-		goto exit_free_irq;
+		goto exit_free_master;
 	}
 
 	tspi->rst = devm_reset_control_get_exclusive(&pdev->dev, "spi");
 	if (IS_ERR(tspi->rst)) {
 		dev_err(&pdev->dev, "can not get reset\n");
 		ret = PTR_ERR(tspi->rst);
-		goto exit_free_irq;
+		goto exit_free_master;
 	}
 
 	tspi->max_buf_size = SPI_FIFO_DEPTH << 2;
@@ -1095,7 +1372,7 @@
 
 	ret = tegra_spi_init_dma_param(tspi, true);
 	if (ret < 0)
-		goto exit_free_irq;
+		goto exit_free_master;
 	ret = tegra_spi_init_dma_param(tspi, false);
 	if (ret < 0)
 		goto exit_rx_dma_free;
@@ -1117,18 +1394,36 @@
 		dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
 		goto exit_pm_disable;
 	}
+
+	reset_control_assert(tspi->rst);
+	udelay(2);
+	reset_control_deassert(tspi->rst);
 	tspi->def_command1_reg  = SPI_M_S;
 	tegra_spi_writel(tspi, tspi->def_command1_reg, SPI_COMMAND1);
+	tspi->spi_cs_timing1 = tegra_spi_readl(tspi, SPI_CS_TIMING1);
+	tspi->spi_cs_timing2 = tegra_spi_readl(tspi, SPI_CS_TIMING2);
+	tspi->def_command2_reg = tegra_spi_readl(tspi, SPI_COMMAND2);
+	tspi->last_used_cs = master->num_chipselect + 1;
 	pm_runtime_put(&pdev->dev);
+	ret = request_threaded_irq(tspi->irq, tegra_spi_isr,
+				   tegra_spi_isr_thread, IRQF_ONESHOT,
+				   dev_name(&pdev->dev), tspi);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n",
+			tspi->irq);
+		goto exit_pm_disable;
+	}
 
 	master->dev.of_node = pdev->dev.of_node;
 	ret = devm_spi_register_master(&pdev->dev, master);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "can not register to master err %d\n", ret);
-		goto exit_pm_disable;
+		goto exit_free_irq;
 	}
 	return ret;
 
+exit_free_irq:
+	free_irq(spi_irq, tspi);
 exit_pm_disable:
 	pm_runtime_disable(&pdev->dev);
 	if (!pm_runtime_status_suspended(&pdev->dev))
@@ -1136,8 +1431,6 @@
 	tegra_spi_deinit_dma_param(tspi, false);
 exit_rx_dma_free:
 	tegra_spi_deinit_dma_param(tspi, true);
-exit_free_irq:
-	free_irq(spi_irq, tspi);
 exit_free_master:
 	spi_master_put(master);
 	return ret;
@@ -1183,6 +1476,8 @@
 		return ret;
 	}
 	tegra_spi_writel(tspi, tspi->command1_reg, SPI_COMMAND1);
+	tegra_spi_writel(tspi, tspi->def_command2_reg, SPI_COMMAND2);
+	tspi->last_used_cs = master->num_chipselect + 1;
 	pm_runtime_put(dev);
 
 	return spi_master_resume(master);
diff --git a/drivers/spi/spi-tegra20-sflash.c b/drivers/spi/spi-tegra20-sflash.c
index 22893a7..a841a72 100644
--- a/drivers/spi/spi-tegra20-sflash.c
+++ b/drivers/spi/spi-tegra20-sflash.c
@@ -1,21 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * SPI driver for Nvidia's Tegra20 Serial Flash Controller.
  *
  * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
  *
  * Author: Laxman Dewangan <ldewangan@nvidia.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/clk.h>
@@ -430,7 +419,6 @@
 {
 	struct spi_master	*master;
 	struct tegra_sflash_data	*tsd;
-	struct resource		*r;
 	int ret;
 	const struct of_device_id *match;
 
@@ -462,8 +450,7 @@
 				 &master->max_speed_hz))
 		master->max_speed_hz = 25000000; /* 25MHz */
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	tsd->base = devm_ioremap_resource(&pdev->dev, r);
+	tsd->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(tsd->base)) {
 		ret = PTR_ERR(tsd->base);
 		goto exit_free_master;
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index 1427f34..111fffc 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * SPI driver for Nvidia's Tegra20/Tegra30 SLINK Controller.
  *
  * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/clk.h>
@@ -717,9 +706,6 @@
 	command2 = tspi->command2_reg;
 	command2 &= ~(SLINK_RXEN | SLINK_TXEN);
 
-	tegra_slink_writel(tspi, command, SLINK_COMMAND);
-	tspi->command_reg = command;
-
 	tspi->cur_direction = 0;
 	if (t->rx_buf) {
 		command2 |= SLINK_RXEN;
@@ -729,9 +715,18 @@
 		command2 |= SLINK_TXEN;
 		tspi->cur_direction |= DATA_DIR_TX;
 	}
+
+	/*
+	 * Writing to the command2 register bevore the command register prevents
+	 * a spike in chip_select line 0. This selects the chip_select line
+	 * before changing the chip_select value.
+	 */
 	tegra_slink_writel(tspi, command2, SLINK_COMMAND2);
 	tspi->command2_reg = command2;
 
+	tegra_slink_writel(tspi, command, SLINK_COMMAND);
+	tspi->command_reg = command;
+
 	if (total_fifo_words > SLINK_FIFO_DEPTH)
 		ret = tegra_slink_start_dma_based_transfer(tspi, t);
 	else
diff --git a/drivers/spi/spi-test.h b/drivers/spi/spi-test.h
index 6ed7b89..5ddecf0 100644
--- a/drivers/spi/spi-test.h
+++ b/drivers/spi/spi-test.h
@@ -1,19 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *  linux/drivers/spi/spi-test.h
  *
  *  (c) Martin Sperl <kernel@martin.sperl.org>
  *
  *  spi_test definitions
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
  */
 
 #include <linux/spi/spi.h>
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index 5f19016..3cb6537 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -1,16 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * TI QSPI driver
  *
  * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
  * Author: Sourav Poddar <sourav.poddar@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GPLv2.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR /PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
@@ -490,8 +483,8 @@
 	ti_qspi_write(qspi, MM_SWITCH, QSPI_SPI_SWITCH_REG);
 	if (qspi->ctrl_base) {
 		regmap_update_bits(qspi->ctrl_base, qspi->ctrl_reg,
-				   MEM_CS_EN(spi->chip_select),
-				   MEM_CS_MASK);
+				   MEM_CS_MASK,
+				   MEM_CS_EN(spi->chip_select));
 	}
 	qspi->mmap_enabled = true;
 }
@@ -503,7 +496,7 @@
 	ti_qspi_write(qspi, 0, QSPI_SPI_SWITCH_REG);
 	if (qspi->ctrl_base)
 		regmap_update_bits(qspi->ctrl_base, qspi->ctrl_reg,
-				   0, MEM_CS_MASK);
+				   MEM_CS_MASK, 0);
 	qspi->mmap_enabled = false;
 }
 
@@ -724,7 +717,6 @@
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(&pdev->dev, "no irq resource?\n");
 		ret = irq;
 		goto free_master;
 	}
diff --git a/drivers/spi/spi-tle62x0.c b/drivers/spi/spi-tle62x0.c
index c6ae775..60dc69a 100644
--- a/drivers/spi/spi-tle62x0.c
+++ b/drivers/spi/spi-tle62x0.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Support Infineon TLE62x0 driver chips
  *
  * Copyright (c) 2007 Simtec Electronics
  *	Ben Dooks, <ben@simtec.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/device.h>
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index 97d1375..f88cbb9 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * SPI bus driver for the Topcliff PCH used by Intel SoCs
  *
  * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/delay.h>
@@ -92,7 +84,6 @@
 #define PCH_MAX_SPBR		1023
 
 /* Definition for ML7213/ML7223/ML7831 by LAPIS Semiconductor */
-#define PCI_VENDOR_ID_ROHM		0x10DB
 #define PCI_DEVICE_ID_ML7213_SPI	0x802c
 #define PCI_DEVICE_ID_ML7223_SPI	0x800F
 #define PCI_DEVICE_ID_ML7831_SPI	0x8816
@@ -1008,6 +999,9 @@
 
 	/* RX */
 	dma->sg_rx_p = kcalloc(num, sizeof(*dma->sg_rx_p), GFP_ATOMIC);
+	if (!dma->sg_rx_p)
+		return;
+
 	sg_init_table(dma->sg_rx_p, num); /* Initialize SG table */
 	/* offset, length setting */
 	sg = dma->sg_rx_p;
@@ -1068,6 +1062,9 @@
 	}
 
 	dma->sg_tx_p = kcalloc(num, sizeof(*dma->sg_tx_p), GFP_ATOMIC);
+	if (!dma->sg_tx_p)
+		return;
+
 	sg_init_table(dma->sg_tx_p, num); /* Initialize SG table */
 	/* offset, length setting */
 	sg = dma->sg_tx_p;
@@ -1294,18 +1291,27 @@
 				  dma->rx_buf_virt, dma->rx_buf_dma);
 }
 
-static void pch_alloc_dma_buf(struct pch_spi_board_data *board_dat,
+static int pch_alloc_dma_buf(struct pch_spi_board_data *board_dat,
 			      struct pch_spi_data *data)
 {
 	struct pch_spi_dma_ctrl *dma;
+	int ret;
 
 	dma = &data->dma;
+	ret = 0;
 	/* Get Consistent memory for Tx DMA */
 	dma->tx_buf_virt = dma_alloc_coherent(&board_dat->pdev->dev,
 				PCH_BUF_SIZE, &dma->tx_buf_dma, GFP_KERNEL);
+	if (!dma->tx_buf_virt)
+		ret = -ENOMEM;
+
 	/* Get Consistent memory for Rx DMA */
 	dma->rx_buf_virt = dma_alloc_coherent(&board_dat->pdev->dev,
 				PCH_BUF_SIZE, &dma->rx_buf_dma, GFP_KERNEL);
+	if (!dma->rx_buf_virt)
+		ret = -ENOMEM;
+
+	return ret;
 }
 
 static int pch_spi_pd_probe(struct platform_device *plat_dev)
@@ -1382,7 +1388,9 @@
 
 	if (use_dma) {
 		dev_info(&plat_dev->dev, "Use DMA for data transfers\n");
-		pch_alloc_dma_buf(board_dat, data);
+		ret = pch_alloc_dma_buf(board_dat, data);
+		if (ret)
+			goto err_spi_register_master;
 	}
 
 	ret = spi_register_master(master);
diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c
index 5a6137f..47cde18 100644
--- a/drivers/spi/spi-uniphier.c
+++ b/drivers/spi/spi-uniphier.c
@@ -7,6 +7,7 @@
 #include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -16,6 +17,7 @@
 #include <asm/unaligned.h>
 
 #define SSI_TIMEOUT_MS		2000
+#define SSI_POLL_TIMEOUT_US	200
 #define SSI_MAX_CLK_DIVIDER	254
 #define SSI_MIN_CLK_DIVIDER	4
 
@@ -214,6 +216,7 @@
 	if (!priv->is_save_param || priv->mode != spi->mode) {
 		uniphier_spi_set_mode(spi);
 		priv->mode = spi->mode;
+		priv->is_save_param = false;
 	}
 
 	if (!priv->is_save_param || priv->bits_per_word != t->bits_per_word) {
@@ -226,8 +229,7 @@
 		priv->speed_hz = t->speed_hz;
 	}
 
-	if (!priv->is_save_param)
-		priv->is_save_param = true;
+	priv->is_save_param = true;
 
 	/* reset FIFOs */
 	val = SSI_FC_TXFFL | SSI_FC_RXFFL;
@@ -290,21 +292,23 @@
 
 static void uniphier_spi_fill_tx_fifo(struct uniphier_spi_priv *priv)
 {
-	unsigned int tx_count;
+	unsigned int fifo_threshold, fill_bytes;
 	u32 val;
 
-	tx_count = DIV_ROUND_UP(priv->tx_bytes,
+	fifo_threshold = DIV_ROUND_UP(priv->rx_bytes,
 				bytes_per_word(priv->bits_per_word));
-	tx_count = min(tx_count, SSI_FIFO_DEPTH);
+	fifo_threshold = min(fifo_threshold, SSI_FIFO_DEPTH);
+
+	fill_bytes = fifo_threshold - (priv->rx_bytes - priv->tx_bytes);
 
 	/* set fifo threshold */
 	val = readl(priv->base + SSI_FC);
 	val &= ~(SSI_FC_TXFTH_MASK | SSI_FC_RXFTH_MASK);
-	val |= FIELD_PREP(SSI_FC_TXFTH_MASK, tx_count);
-	val |= FIELD_PREP(SSI_FC_RXFTH_MASK, tx_count);
+	val |= FIELD_PREP(SSI_FC_TXFTH_MASK, fifo_threshold);
+	val |= FIELD_PREP(SSI_FC_RXFTH_MASK, fifo_threshold);
 	writel(val, priv->base + SSI_FC);
 
-	while (tx_count--)
+	while (fill_bytes--)
 		uniphier_spi_send(priv);
 }
 
@@ -323,14 +327,13 @@
 	writel(val, priv->base + SSI_FPS);
 }
 
-static int uniphier_spi_transfer_one(struct spi_master *master,
-				     struct spi_device *spi,
-				     struct spi_transfer *t)
+static int uniphier_spi_transfer_one_irq(struct spi_master *master,
+					 struct spi_device *spi,
+					 struct spi_transfer *t)
 {
 	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
-	int status;
-
-	uniphier_spi_setup_transfer(spi, t);
+	struct device *dev = master->dev.parent;
+	unsigned long time_left;
 
 	reinit_completion(&priv->xfer_done);
 
@@ -338,17 +341,72 @@
 
 	uniphier_spi_irq_enable(spi, SSI_IE_RCIE | SSI_IE_RORIE);
 
-	status = wait_for_completion_timeout(&priv->xfer_done,
-					     msecs_to_jiffies(SSI_TIMEOUT_MS));
+	time_left = wait_for_completion_timeout(&priv->xfer_done,
+					msecs_to_jiffies(SSI_TIMEOUT_MS));
 
 	uniphier_spi_irq_disable(spi, SSI_IE_RCIE | SSI_IE_RORIE);
 
-	if (status < 0)
-		return status;
+	if (!time_left) {
+		dev_err(dev, "transfer timeout.\n");
+		return -ETIMEDOUT;
+	}
 
 	return priv->error;
 }
 
+static int uniphier_spi_transfer_one_poll(struct spi_master *master,
+					  struct spi_device *spi,
+					  struct spi_transfer *t)
+{
+	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
+	int loop = SSI_POLL_TIMEOUT_US * 10;
+
+	while (priv->tx_bytes) {
+		uniphier_spi_fill_tx_fifo(priv);
+
+		while ((priv->rx_bytes - priv->tx_bytes) > 0) {
+			while (!(readl(priv->base + SSI_SR) & SSI_SR_RNE)
+								&& loop--)
+				ndelay(100);
+
+			if (loop == -1)
+				goto irq_transfer;
+
+			uniphier_spi_recv(priv);
+		}
+	}
+
+	return 0;
+
+irq_transfer:
+	return uniphier_spi_transfer_one_irq(master, spi, t);
+}
+
+static int uniphier_spi_transfer_one(struct spi_master *master,
+				     struct spi_device *spi,
+				     struct spi_transfer *t)
+{
+	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
+	unsigned long threshold;
+
+	/* Terminate and return success for 0 byte length transfer */
+	if (!t->len)
+		return 0;
+
+	uniphier_spi_setup_transfer(spi, t);
+
+	/*
+	 * If the transfer operation will take longer than
+	 * SSI_POLL_TIMEOUT_US, it should use irq.
+	 */
+	threshold = DIV_ROUND_UP(SSI_POLL_TIMEOUT_US * priv->speed_hz,
+					USEC_PER_SEC * BITS_PER_BYTE);
+	if (t->len > threshold)
+		return uniphier_spi_transfer_one_irq(master, spi, t);
+	else
+		return uniphier_spi_transfer_one_poll(master, spi, t);
+}
+
 static int uniphier_spi_prepare_transfer_hardware(struct spi_master *master)
 {
 	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
@@ -412,7 +470,6 @@
 {
 	struct uniphier_spi_priv *priv;
 	struct spi_master *master;
-	struct resource *res;
 	unsigned long clk_rate;
 	int irq;
 	int ret;
@@ -427,8 +484,7 @@
 	priv->master = master;
 	priv->is_save_param = false;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base)) {
 		ret = PTR_ERR(priv->base);
 		goto out_master_put;
@@ -447,7 +503,6 @@
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(&pdev->dev, "failed to get IRQ\n");
 		ret = irq;
 		goto out_disable_clk;
 	}
diff --git a/drivers/spi/spi-xcomm.c b/drivers/spi/spi-xcomm.c
index 3c28e24..a3496c4 100644
--- a/drivers/spi/spi-xcomm.c
+++ b/drivers/spi/spi-xcomm.c
@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Analog Devices AD-FMCOMMS1-EBZ board I2C-SPI bridge driver
  *
  * Copyright 2012 Analog Devices Inc.
  * Author: Lars-Peter Clausen <lars@metafoo.de>
- *
- * Licensed under the GPL-2 or later.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c
index 63fedc4..d5f9d5f 100644
--- a/drivers/spi/spi-xilinx.c
+++ b/drivers/spi/spi-xilinx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Xilinx SPI controller driver (master mode only)
  *
@@ -8,9 +9,6 @@
  * Copyright (c) 2009 Intel Corporation
  * 2002-2007 (c) MontaVista Software, Inc.
 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
diff --git a/drivers/spi/spi-xlp.c b/drivers/spi/spi-xlp.c
index 74a01b0..797ac0e 100644
--- a/drivers/spi/spi-xlp.c
+++ b/drivers/spi/spi-xlp.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2003-2015 Broadcom Corporation
  * All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 (GPL v2)
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #include <linux/acpi.h>
 #include <linux/clk.h>
@@ -378,7 +370,6 @@
 {
 	struct spi_master *master;
 	struct xlp_spi_priv *xspi;
-	struct resource *res;
 	struct clk *clk;
 	int irq, err;
 
@@ -386,16 +377,13 @@
 	if (!xspi)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	xspi->base = devm_ioremap_resource(&pdev->dev, res);
+	xspi->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(xspi->base))
 		return PTR_ERR(xspi->base);
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "no IRQ resource found: %d\n", irq);
+	if (irq < 0)
 		return irq;
-	}
 	err = devm_request_irq(&pdev->dev, irq, xlp_spi_interrupt, 0,
 			pdev->name, xspi);
 	if (err) {
diff --git a/drivers/spi/spi-xtensa-xtfpga.c b/drivers/spi/spi-xtensa-xtfpga.c
index 8ce04f8..86516eb 100644
--- a/drivers/spi/spi-xtensa-xtfpga.c
+++ b/drivers/spi/spi-xtensa-xtfpga.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Xtensa xtfpga SPI controller driver
  *
  * Copyright (c) 2014 Cadence Design Systems Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/delay.h>
diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
new file mode 100644
index 0000000..5cf6993
--- /dev/null
+++ b/drivers/spi/spi-zynq-qspi.c
@@ -0,0 +1,758 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Xilinx, Inc.
+ *
+ * Author: Naga Sureshkumar Relli <nagasure@xilinx.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/spi/spi-mem.h>
+
+/* Register offset definitions */
+#define ZYNQ_QSPI_CONFIG_OFFSET		0x00 /* Configuration  Register, RW */
+#define ZYNQ_QSPI_STATUS_OFFSET		0x04 /* Interrupt Status Register, RO */
+#define ZYNQ_QSPI_IEN_OFFSET		0x08 /* Interrupt Enable Register, WO */
+#define ZYNQ_QSPI_IDIS_OFFSET		0x0C /* Interrupt Disable Reg, WO */
+#define ZYNQ_QSPI_IMASK_OFFSET		0x10 /* Interrupt Enabled Mask Reg,RO */
+#define ZYNQ_QSPI_ENABLE_OFFSET		0x14 /* Enable/Disable Register, RW */
+#define ZYNQ_QSPI_DELAY_OFFSET		0x18 /* Delay Register, RW */
+#define ZYNQ_QSPI_TXD_00_00_OFFSET	0x1C /* Transmit 4-byte inst, WO */
+#define ZYNQ_QSPI_TXD_00_01_OFFSET	0x80 /* Transmit 1-byte inst, WO */
+#define ZYNQ_QSPI_TXD_00_10_OFFSET	0x84 /* Transmit 2-byte inst, WO */
+#define ZYNQ_QSPI_TXD_00_11_OFFSET	0x88 /* Transmit 3-byte inst, WO */
+#define ZYNQ_QSPI_RXD_OFFSET		0x20 /* Data Receive Register, RO */
+#define ZYNQ_QSPI_SIC_OFFSET		0x24 /* Slave Idle Count Register, RW */
+#define ZYNQ_QSPI_TX_THRESH_OFFSET	0x28 /* TX FIFO Watermark Reg, RW */
+#define ZYNQ_QSPI_RX_THRESH_OFFSET	0x2C /* RX FIFO Watermark Reg, RW */
+#define ZYNQ_QSPI_GPIO_OFFSET		0x30 /* GPIO Register, RW */
+#define ZYNQ_QSPI_LINEAR_CFG_OFFSET	0xA0 /* Linear Adapter Config Ref, RW */
+#define ZYNQ_QSPI_MOD_ID_OFFSET		0xFC /* Module ID Register, RO */
+
+/*
+ * QSPI Configuration Register bit Masks
+ *
+ * This register contains various control bits that effect the operation
+ * of the QSPI controller
+ */
+#define ZYNQ_QSPI_CONFIG_IFMODE_MASK	BIT(31) /* Flash Memory Interface */
+#define ZYNQ_QSPI_CONFIG_MANSRT_MASK	BIT(16) /* Manual TX Start */
+#define ZYNQ_QSPI_CONFIG_MANSRTEN_MASK	BIT(15) /* Enable Manual TX Mode */
+#define ZYNQ_QSPI_CONFIG_SSFORCE_MASK	BIT(14) /* Manual Chip Select */
+#define ZYNQ_QSPI_CONFIG_BDRATE_MASK	GENMASK(5, 3) /* Baud Rate Mask */
+#define ZYNQ_QSPI_CONFIG_CPHA_MASK	BIT(2) /* Clock Phase Control */
+#define ZYNQ_QSPI_CONFIG_CPOL_MASK	BIT(1) /* Clock Polarity Control */
+#define ZYNQ_QSPI_CONFIG_SSCTRL_MASK	BIT(10) /* Slave Select Mask */
+#define ZYNQ_QSPI_CONFIG_FWIDTH_MASK	GENMASK(7, 6) /* FIFO width */
+#define ZYNQ_QSPI_CONFIG_MSTREN_MASK	BIT(0) /* Master Mode */
+
+/*
+ * QSPI Configuration Register - Baud rate and slave select
+ *
+ * These are the values used in the calculation of baud rate divisor and
+ * setting the slave select.
+ */
+#define ZYNQ_QSPI_BAUD_DIV_MAX		GENMASK(2, 0) /* Baud rate maximum */
+#define ZYNQ_QSPI_BAUD_DIV_SHIFT	3 /* Baud rate divisor shift in CR */
+#define ZYNQ_QSPI_SS_SHIFT		10 /* Slave Select field shift in CR */
+
+/*
+ * QSPI Interrupt Registers bit Masks
+ *
+ * All the four interrupt registers (Status/Mask/Enable/Disable) have the same
+ * bit definitions.
+ */
+#define ZYNQ_QSPI_IXR_RX_OVERFLOW_MASK	BIT(0) /* QSPI RX FIFO Overflow */
+#define ZYNQ_QSPI_IXR_TXNFULL_MASK	BIT(2) /* QSPI TX FIFO Overflow */
+#define ZYNQ_QSPI_IXR_TXFULL_MASK	BIT(3) /* QSPI TX FIFO is full */
+#define ZYNQ_QSPI_IXR_RXNEMTY_MASK	BIT(4) /* QSPI RX FIFO Not Empty */
+#define ZYNQ_QSPI_IXR_RXF_FULL_MASK	BIT(5) /* QSPI RX FIFO is full */
+#define ZYNQ_QSPI_IXR_TXF_UNDRFLOW_MASK	BIT(6) /* QSPI TX FIFO Underflow */
+#define ZYNQ_QSPI_IXR_ALL_MASK		(ZYNQ_QSPI_IXR_RX_OVERFLOW_MASK | \
+					ZYNQ_QSPI_IXR_TXNFULL_MASK | \
+					ZYNQ_QSPI_IXR_TXFULL_MASK | \
+					ZYNQ_QSPI_IXR_RXNEMTY_MASK | \
+					ZYNQ_QSPI_IXR_RXF_FULL_MASK | \
+					ZYNQ_QSPI_IXR_TXF_UNDRFLOW_MASK)
+#define ZYNQ_QSPI_IXR_RXTX_MASK		(ZYNQ_QSPI_IXR_TXNFULL_MASK | \
+					ZYNQ_QSPI_IXR_RXNEMTY_MASK)
+
+/*
+ * QSPI Enable Register bit Masks
+ *
+ * This register is used to enable or disable the QSPI controller
+ */
+#define ZYNQ_QSPI_ENABLE_ENABLE_MASK	BIT(0) /* QSPI Enable Bit Mask */
+
+/*
+ * QSPI Linear Configuration Register
+ *
+ * It is named Linear Configuration but it controls other modes when not in
+ * linear mode also.
+ */
+#define ZYNQ_QSPI_LCFG_TWO_MEM_MASK	BIT(30) /* LQSPI Two memories Mask */
+#define ZYNQ_QSPI_LCFG_SEP_BUS_MASK	BIT(29) /* LQSPI Separate bus Mask */
+#define ZYNQ_QSPI_LCFG_U_PAGE_MASK	BIT(28) /* LQSPI Upper Page Mask */
+
+#define ZYNQ_QSPI_LCFG_DUMMY_SHIFT	8
+
+#define ZYNQ_QSPI_FAST_READ_QOUT_CODE	0x6B /* read instruction code */
+#define ZYNQ_QSPI_FIFO_DEPTH		63 /* FIFO depth in words */
+#define ZYNQ_QSPI_RX_THRESHOLD		32 /* Rx FIFO threshold level */
+#define ZYNQ_QSPI_TX_THRESHOLD		1 /* Tx FIFO threshold level */
+
+/*
+ * The modebits configurable by the driver to make the SPI support different
+ * data formats
+ */
+#define ZYNQ_QSPI_MODEBITS			(SPI_CPOL | SPI_CPHA)
+
+/* Default number of chip selects */
+#define ZYNQ_QSPI_DEFAULT_NUM_CS	1
+
+/**
+ * struct zynq_qspi - Defines qspi driver instance
+ * @regs:		Virtual address of the QSPI controller registers
+ * @refclk:		Pointer to the peripheral clock
+ * @pclk:		Pointer to the APB clock
+ * @irq:		IRQ number
+ * @txbuf:		Pointer to the TX buffer
+ * @rxbuf:		Pointer to the RX buffer
+ * @tx_bytes:		Number of bytes left to transfer
+ * @rx_bytes:		Number of bytes left to receive
+ * @data_completion:	completion structure
+ */
+struct zynq_qspi {
+	struct device *dev;
+	void __iomem *regs;
+	struct clk *refclk;
+	struct clk *pclk;
+	int irq;
+	u8 *txbuf;
+	u8 *rxbuf;
+	int tx_bytes;
+	int rx_bytes;
+	struct completion data_completion;
+};
+
+/*
+ * Inline functions for the QSPI controller read/write
+ */
+static inline u32 zynq_qspi_read(struct zynq_qspi *xqspi, u32 offset)
+{
+	return readl_relaxed(xqspi->regs + offset);
+}
+
+static inline void zynq_qspi_write(struct zynq_qspi *xqspi, u32 offset,
+				   u32 val)
+{
+	writel_relaxed(val, xqspi->regs + offset);
+}
+
+/**
+ * zynq_qspi_init_hw - Initialize the hardware
+ * @xqspi:	Pointer to the zynq_qspi structure
+ *
+ * The default settings of the QSPI controller's configurable parameters on
+ * reset are
+ *	- Master mode
+ *	- Baud rate divisor is set to 2
+ *	- Tx threshold set to 1l Rx threshold set to 32
+ *	- Flash memory interface mode enabled
+ *	- Size of the word to be transferred as 8 bit
+ * This function performs the following actions
+ *	- Disable and clear all the interrupts
+ *	- Enable manual slave select
+ *	- Enable manual start
+ *	- Deselect all the chip select lines
+ *	- Set the size of the word to be transferred as 32 bit
+ *	- Set the little endian mode of TX FIFO and
+ *	- Enable the QSPI controller
+ */
+static void zynq_qspi_init_hw(struct zynq_qspi *xqspi)
+{
+	u32 config_reg;
+
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_ENABLE_OFFSET, 0);
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_IDIS_OFFSET, ZYNQ_QSPI_IXR_ALL_MASK);
+
+	/* Disable linear mode as the boot loader may have used it */
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_LINEAR_CFG_OFFSET, 0);
+
+	/* Clear the RX FIFO */
+	while (zynq_qspi_read(xqspi, ZYNQ_QSPI_STATUS_OFFSET) &
+			      ZYNQ_QSPI_IXR_RXNEMTY_MASK)
+		zynq_qspi_read(xqspi, ZYNQ_QSPI_RXD_OFFSET);
+
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_STATUS_OFFSET, ZYNQ_QSPI_IXR_ALL_MASK);
+	config_reg = zynq_qspi_read(xqspi, ZYNQ_QSPI_CONFIG_OFFSET);
+	config_reg &= ~(ZYNQ_QSPI_CONFIG_MSTREN_MASK |
+			ZYNQ_QSPI_CONFIG_CPOL_MASK |
+			ZYNQ_QSPI_CONFIG_CPHA_MASK |
+			ZYNQ_QSPI_CONFIG_BDRATE_MASK |
+			ZYNQ_QSPI_CONFIG_SSFORCE_MASK |
+			ZYNQ_QSPI_CONFIG_MANSRTEN_MASK |
+			ZYNQ_QSPI_CONFIG_MANSRT_MASK);
+	config_reg |= (ZYNQ_QSPI_CONFIG_MSTREN_MASK |
+		       ZYNQ_QSPI_CONFIG_SSFORCE_MASK |
+		       ZYNQ_QSPI_CONFIG_FWIDTH_MASK |
+		       ZYNQ_QSPI_CONFIG_IFMODE_MASK);
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_CONFIG_OFFSET, config_reg);
+
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_RX_THRESH_OFFSET,
+			ZYNQ_QSPI_RX_THRESHOLD);
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_TX_THRESH_OFFSET,
+			ZYNQ_QSPI_TX_THRESHOLD);
+
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_ENABLE_OFFSET,
+			ZYNQ_QSPI_ENABLE_ENABLE_MASK);
+}
+
+static bool zynq_qspi_supports_op(struct spi_mem *mem,
+				  const struct spi_mem_op *op)
+{
+	if (!spi_mem_default_supports_op(mem, op))
+		return false;
+
+	/*
+	 * The number of address bytes should be equal to or less than 3 bytes.
+	 */
+	if (op->addr.nbytes > 3)
+		return false;
+
+	return true;
+}
+
+/**
+ * zynq_qspi_rxfifo_op - Read 1..4 bytes from RxFIFO to RX buffer
+ * @xqspi:	Pointer to the zynq_qspi structure
+ * @size:	Number of bytes to be read (1..4)
+ */
+static void zynq_qspi_rxfifo_op(struct zynq_qspi *xqspi, unsigned int size)
+{
+	u32 data;
+
+	data = zynq_qspi_read(xqspi, ZYNQ_QSPI_RXD_OFFSET);
+
+	if (xqspi->rxbuf) {
+		memcpy(xqspi->rxbuf, ((u8 *)&data) + 4 - size, size);
+		xqspi->rxbuf += size;
+	}
+
+	xqspi->rx_bytes -= size;
+	if (xqspi->rx_bytes < 0)
+		xqspi->rx_bytes = 0;
+}
+
+/**
+ * zynq_qspi_txfifo_op - Write 1..4 bytes from TX buffer to TxFIFO
+ * @xqspi:	Pointer to the zynq_qspi structure
+ * @size:	Number of bytes to be written (1..4)
+ */
+static void zynq_qspi_txfifo_op(struct zynq_qspi *xqspi, unsigned int size)
+{
+	static const unsigned int offset[4] = {
+		ZYNQ_QSPI_TXD_00_01_OFFSET, ZYNQ_QSPI_TXD_00_10_OFFSET,
+		ZYNQ_QSPI_TXD_00_11_OFFSET, ZYNQ_QSPI_TXD_00_00_OFFSET };
+	u32 data;
+
+	if (xqspi->txbuf) {
+		data = 0xffffffff;
+		memcpy(&data, xqspi->txbuf, size);
+		xqspi->txbuf += size;
+	} else {
+		data = 0;
+	}
+
+	xqspi->tx_bytes -= size;
+	zynq_qspi_write(xqspi, offset[size - 1], data);
+}
+
+/**
+ * zynq_qspi_chipselect - Select or deselect the chip select line
+ * @spi:	Pointer to the spi_device structure
+ * @assert:	1 for select or 0 for deselect the chip select line
+ */
+static void zynq_qspi_chipselect(struct spi_device *spi, bool assert)
+{
+	struct spi_controller *ctrl = spi->master;
+	struct zynq_qspi *xqspi = spi_controller_get_devdata(ctrl);
+	u32 config_reg;
+
+	config_reg = zynq_qspi_read(xqspi, ZYNQ_QSPI_CONFIG_OFFSET);
+	if (assert) {
+		/* Select the slave */
+		config_reg &= ~ZYNQ_QSPI_CONFIG_SSCTRL_MASK;
+		config_reg |= (((~(BIT(spi->chip_select))) <<
+				ZYNQ_QSPI_SS_SHIFT) &
+				ZYNQ_QSPI_CONFIG_SSCTRL_MASK);
+	} else {
+		config_reg |= ZYNQ_QSPI_CONFIG_SSCTRL_MASK;
+	}
+
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_CONFIG_OFFSET, config_reg);
+}
+
+/**
+ * zynq_qspi_config_op - Configure QSPI controller for specified transfer
+ * @xqspi:	Pointer to the zynq_qspi structure
+ * @qspi:	Pointer to the spi_device structure
+ *
+ * Sets the operational mode of QSPI controller for the next QSPI transfer and
+ * sets the requested clock frequency.
+ *
+ * Return:	0 on success and -EINVAL on invalid input parameter
+ *
+ * Note: If the requested frequency is not an exact match with what can be
+ * obtained using the prescalar value, the driver sets the clock frequency which
+ * is lower than the requested frequency (maximum lower) for the transfer. If
+ * the requested frequency is higher or lower than that is supported by the QSPI
+ * controller the driver will set the highest or lowest frequency supported by
+ * controller.
+ */
+static int zynq_qspi_config_op(struct zynq_qspi *xqspi, struct spi_device *spi)
+{
+	u32 config_reg, baud_rate_val = 0;
+
+	/*
+	 * Set the clock frequency
+	 * The baud rate divisor is not a direct mapping to the value written
+	 * into the configuration register (config_reg[5:3])
+	 * i.e. 000 - divide by 2
+	 *      001 - divide by 4
+	 *      ----------------
+	 *      111 - divide by 256
+	 */
+	while ((baud_rate_val < ZYNQ_QSPI_BAUD_DIV_MAX)  &&
+	       (clk_get_rate(xqspi->refclk) / (2 << baud_rate_val)) >
+		spi->max_speed_hz)
+		baud_rate_val++;
+
+	config_reg = zynq_qspi_read(xqspi, ZYNQ_QSPI_CONFIG_OFFSET);
+
+	/* Set the QSPI clock phase and clock polarity */
+	config_reg &= (~ZYNQ_QSPI_CONFIG_CPHA_MASK) &
+		      (~ZYNQ_QSPI_CONFIG_CPOL_MASK);
+	if (spi->mode & SPI_CPHA)
+		config_reg |= ZYNQ_QSPI_CONFIG_CPHA_MASK;
+	if (spi->mode & SPI_CPOL)
+		config_reg |= ZYNQ_QSPI_CONFIG_CPOL_MASK;
+
+	config_reg &= ~ZYNQ_QSPI_CONFIG_BDRATE_MASK;
+	config_reg |= (baud_rate_val << ZYNQ_QSPI_BAUD_DIV_SHIFT);
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_CONFIG_OFFSET, config_reg);
+
+	return 0;
+}
+
+/**
+ * zynq_qspi_setup - Configure the QSPI controller
+ * @spi:	Pointer to the spi_device structure
+ *
+ * Sets the operational mode of QSPI controller for the next QSPI transfer, baud
+ * rate and divisor value to setup the requested qspi clock.
+ *
+ * Return:	0 on success and error value on failure
+ */
+static int zynq_qspi_setup_op(struct spi_device *spi)
+{
+	struct spi_controller *ctrl = spi->master;
+	struct zynq_qspi *qspi = spi_controller_get_devdata(ctrl);
+
+	if (ctrl->busy)
+		return -EBUSY;
+
+	clk_enable(qspi->refclk);
+	clk_enable(qspi->pclk);
+	zynq_qspi_write(qspi, ZYNQ_QSPI_ENABLE_OFFSET,
+			ZYNQ_QSPI_ENABLE_ENABLE_MASK);
+
+	return 0;
+}
+
+/**
+ * zynq_qspi_write_op - Fills the TX FIFO with as many bytes as possible
+ * @xqspi:	Pointer to the zynq_qspi structure
+ * @txcount:	Maximum number of words to write
+ * @txempty:	Indicates that TxFIFO is empty
+ */
+static void zynq_qspi_write_op(struct zynq_qspi *xqspi, int txcount,
+			       bool txempty)
+{
+	int count, len, k;
+
+	len = xqspi->tx_bytes;
+	if (len && len < 4) {
+		/*
+		 * We must empty the TxFIFO between accesses to TXD0,
+		 * TXD1, TXD2, TXD3.
+		 */
+		if (txempty)
+			zynq_qspi_txfifo_op(xqspi, len);
+
+		return;
+	}
+
+	count = len / 4;
+	if (count > txcount)
+		count = txcount;
+
+	if (xqspi->txbuf) {
+		iowrite32_rep(xqspi->regs + ZYNQ_QSPI_TXD_00_00_OFFSET,
+			      xqspi->txbuf, count);
+		xqspi->txbuf += count * 4;
+	} else {
+		for (k = 0; k < count; k++)
+			writel_relaxed(0, xqspi->regs +
+					  ZYNQ_QSPI_TXD_00_00_OFFSET);
+	}
+
+	xqspi->tx_bytes -= count * 4;
+}
+
+/**
+ * zynq_qspi_read_op - Drains the RX FIFO by as many bytes as possible
+ * @xqspi:	Pointer to the zynq_qspi structure
+ * @rxcount:	Maximum number of words to read
+ */
+static void zynq_qspi_read_op(struct zynq_qspi *xqspi, int rxcount)
+{
+	int count, len, k;
+
+	len = xqspi->rx_bytes - xqspi->tx_bytes;
+	count = len / 4;
+	if (count > rxcount)
+		count = rxcount;
+	if (xqspi->rxbuf) {
+		ioread32_rep(xqspi->regs + ZYNQ_QSPI_RXD_OFFSET,
+			     xqspi->rxbuf, count);
+		xqspi->rxbuf += count * 4;
+	} else {
+		for (k = 0; k < count; k++)
+			readl_relaxed(xqspi->regs + ZYNQ_QSPI_RXD_OFFSET);
+	}
+	xqspi->rx_bytes -= count * 4;
+	len -= count * 4;
+
+	if (len && len < 4 && count < rxcount)
+		zynq_qspi_rxfifo_op(xqspi, len);
+}
+
+/**
+ * zynq_qspi_irq - Interrupt service routine of the QSPI controller
+ * @irq:	IRQ number
+ * @dev_id:	Pointer to the xqspi structure
+ *
+ * This function handles TX empty only.
+ * On TX empty interrupt this function reads the received data from RX FIFO and
+ * fills the TX FIFO if there is any data remaining to be transferred.
+ *
+ * Return:	IRQ_HANDLED when interrupt is handled; IRQ_NONE otherwise.
+ */
+static irqreturn_t zynq_qspi_irq(int irq, void *dev_id)
+{
+	u32 intr_status;
+	bool txempty;
+	struct zynq_qspi *xqspi = (struct zynq_qspi *)dev_id;
+
+	intr_status = zynq_qspi_read(xqspi, ZYNQ_QSPI_STATUS_OFFSET);
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_STATUS_OFFSET, intr_status);
+
+	if ((intr_status & ZYNQ_QSPI_IXR_TXNFULL_MASK) ||
+	    (intr_status & ZYNQ_QSPI_IXR_RXNEMTY_MASK)) {
+		/*
+		 * This bit is set when Tx FIFO has < THRESHOLD entries.
+		 * We have the THRESHOLD value set to 1,
+		 * so this bit indicates Tx FIFO is empty.
+		 */
+		txempty = !!(intr_status & ZYNQ_QSPI_IXR_TXNFULL_MASK);
+		/* Read out the data from the RX FIFO */
+		zynq_qspi_read_op(xqspi, ZYNQ_QSPI_RX_THRESHOLD);
+		if (xqspi->tx_bytes) {
+			/* There is more data to send */
+			zynq_qspi_write_op(xqspi, ZYNQ_QSPI_RX_THRESHOLD,
+					   txempty);
+		} else {
+			/*
+			 * If transfer and receive is completed then only send
+			 * complete signal.
+			 */
+			if (!xqspi->rx_bytes) {
+				zynq_qspi_write(xqspi,
+						ZYNQ_QSPI_IDIS_OFFSET,
+						ZYNQ_QSPI_IXR_RXTX_MASK);
+				complete(&xqspi->data_completion);
+			}
+		}
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+/**
+ * zynq_qspi_exec_mem_op() - Initiates the QSPI transfer
+ * @mem: the SPI memory
+ * @op: the memory operation to execute
+ *
+ * Executes a memory operation.
+ *
+ * This function first selects the chip and starts the memory operation.
+ *
+ * Return: 0 in case of success, a negative error code otherwise.
+ */
+static int zynq_qspi_exec_mem_op(struct spi_mem *mem,
+				 const struct spi_mem_op *op)
+{
+	struct zynq_qspi *xqspi = spi_controller_get_devdata(mem->spi->master);
+	int err = 0, i;
+	u8 *tmpbuf;
+
+	dev_dbg(xqspi->dev, "cmd:%#x mode:%d.%d.%d.%d\n",
+		op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
+		op->dummy.buswidth, op->data.buswidth);
+
+	zynq_qspi_chipselect(mem->spi, true);
+	zynq_qspi_config_op(xqspi, mem->spi);
+
+	if (op->cmd.opcode) {
+		reinit_completion(&xqspi->data_completion);
+		xqspi->txbuf = (u8 *)&op->cmd.opcode;
+		xqspi->rxbuf = NULL;
+		xqspi->tx_bytes = sizeof(op->cmd.opcode);
+		xqspi->rx_bytes = sizeof(op->cmd.opcode);
+		zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true);
+		zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET,
+				ZYNQ_QSPI_IXR_RXTX_MASK);
+		if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion,
+							       msecs_to_jiffies(1000)))
+			err = -ETIMEDOUT;
+	}
+
+	if (op->addr.nbytes) {
+		for (i = 0; i < op->addr.nbytes; i++) {
+			xqspi->txbuf[i] = op->addr.val >>
+					(8 * (op->addr.nbytes - i - 1));
+		}
+
+		reinit_completion(&xqspi->data_completion);
+		xqspi->rxbuf = NULL;
+		xqspi->tx_bytes = op->addr.nbytes;
+		xqspi->rx_bytes = op->addr.nbytes;
+		zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true);
+		zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET,
+				ZYNQ_QSPI_IXR_RXTX_MASK);
+		if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion,
+							       msecs_to_jiffies(1000)))
+			err = -ETIMEDOUT;
+	}
+
+	if (op->dummy.nbytes) {
+		tmpbuf = kzalloc(op->dummy.nbytes, GFP_KERNEL);
+		memset(tmpbuf, 0xff, op->dummy.nbytes);
+		reinit_completion(&xqspi->data_completion);
+		xqspi->txbuf = tmpbuf;
+		xqspi->rxbuf = NULL;
+		xqspi->tx_bytes = op->dummy.nbytes;
+		xqspi->rx_bytes = op->dummy.nbytes;
+		zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true);
+		zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET,
+				ZYNQ_QSPI_IXR_RXTX_MASK);
+		if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion,
+							       msecs_to_jiffies(1000)))
+			err = -ETIMEDOUT;
+
+		kfree(tmpbuf);
+	}
+
+	if (op->data.nbytes) {
+		reinit_completion(&xqspi->data_completion);
+		if (op->data.dir == SPI_MEM_DATA_OUT) {
+			xqspi->txbuf = (u8 *)op->data.buf.out;
+			xqspi->tx_bytes = op->data.nbytes;
+			xqspi->rxbuf = NULL;
+			xqspi->rx_bytes = op->data.nbytes;
+		} else {
+			xqspi->txbuf = NULL;
+			xqspi->rxbuf = (u8 *)op->data.buf.in;
+			xqspi->rx_bytes = op->data.nbytes;
+			xqspi->tx_bytes = op->data.nbytes;
+		}
+
+		zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true);
+		zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET,
+				ZYNQ_QSPI_IXR_RXTX_MASK);
+		if (!wait_for_completion_interruptible_timeout(&xqspi->data_completion,
+							       msecs_to_jiffies(1000)))
+			err = -ETIMEDOUT;
+	}
+	zynq_qspi_chipselect(mem->spi, false);
+
+	return err;
+}
+
+static const struct spi_controller_mem_ops zynq_qspi_mem_ops = {
+	.supports_op = zynq_qspi_supports_op,
+	.exec_op = zynq_qspi_exec_mem_op,
+};
+
+/**
+ * zynq_qspi_probe - Probe method for the QSPI driver
+ * @pdev:	Pointer to the platform_device structure
+ *
+ * This function initializes the driver data structures and the hardware.
+ *
+ * Return:	0 on success and error value on failure
+ */
+static int zynq_qspi_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct spi_controller *ctlr;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct zynq_qspi *xqspi;
+	u32 num_cs;
+
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(*xqspi));
+	if (!ctlr)
+		return -ENOMEM;
+
+	xqspi = spi_controller_get_devdata(ctlr);
+	xqspi->dev = dev;
+	platform_set_drvdata(pdev, xqspi);
+	xqspi->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(xqspi->regs)) {
+		ret = PTR_ERR(xqspi->regs);
+		goto remove_master;
+	}
+
+	xqspi->pclk = devm_clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(xqspi->pclk)) {
+		dev_err(&pdev->dev, "pclk clock not found.\n");
+		ret = PTR_ERR(xqspi->pclk);
+		goto remove_master;
+	}
+
+	init_completion(&xqspi->data_completion);
+
+	xqspi->refclk = devm_clk_get(&pdev->dev, "ref_clk");
+	if (IS_ERR(xqspi->refclk)) {
+		dev_err(&pdev->dev, "ref_clk clock not found.\n");
+		ret = PTR_ERR(xqspi->refclk);
+		goto remove_master;
+	}
+
+	ret = clk_prepare_enable(xqspi->pclk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to enable APB clock.\n");
+		goto remove_master;
+	}
+
+	ret = clk_prepare_enable(xqspi->refclk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to enable device clock.\n");
+		goto clk_dis_pclk;
+	}
+
+	/* QSPI controller initializations */
+	zynq_qspi_init_hw(xqspi);
+
+	xqspi->irq = platform_get_irq(pdev, 0);
+	if (xqspi->irq <= 0) {
+		ret = -ENXIO;
+		goto remove_master;
+	}
+	ret = devm_request_irq(&pdev->dev, xqspi->irq, zynq_qspi_irq,
+			       0, pdev->name, xqspi);
+	if (ret != 0) {
+		ret = -ENXIO;
+		dev_err(&pdev->dev, "request_irq failed\n");
+		goto remove_master;
+	}
+
+	ret = of_property_read_u32(np, "num-cs",
+				   &num_cs);
+	if (ret < 0)
+		ctlr->num_chipselect = ZYNQ_QSPI_DEFAULT_NUM_CS;
+	else
+		ctlr->num_chipselect = num_cs;
+
+	ctlr->mode_bits =  SPI_RX_DUAL | SPI_RX_QUAD |
+			    SPI_TX_DUAL | SPI_TX_QUAD;
+	ctlr->mem_ops = &zynq_qspi_mem_ops;
+	ctlr->setup = zynq_qspi_setup_op;
+	ctlr->max_speed_hz = clk_get_rate(xqspi->refclk) / 2;
+	ctlr->dev.of_node = np;
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
+	if (ret) {
+		dev_err(&pdev->dev, "spi_register_master failed\n");
+		goto clk_dis_all;
+	}
+
+	return ret;
+
+clk_dis_all:
+	clk_disable_unprepare(xqspi->refclk);
+clk_dis_pclk:
+	clk_disable_unprepare(xqspi->pclk);
+remove_master:
+	spi_controller_put(ctlr);
+
+	return ret;
+}
+
+/**
+ * zynq_qspi_remove - Remove method for the QSPI driver
+ * @pdev:	Pointer to the platform_device structure
+ *
+ * This function is called if a device is physically removed from the system or
+ * if the driver module is being unloaded. It frees all resources allocated to
+ * the device.
+ *
+ * Return:	0 on success and error value on failure
+ */
+static int zynq_qspi_remove(struct platform_device *pdev)
+{
+	struct zynq_qspi *xqspi = platform_get_drvdata(pdev);
+
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_ENABLE_OFFSET, 0);
+
+	clk_disable_unprepare(xqspi->refclk);
+	clk_disable_unprepare(xqspi->pclk);
+
+	return 0;
+}
+
+static const struct of_device_id zynq_qspi_of_match[] = {
+	{ .compatible = "xlnx,zynq-qspi-1.0", },
+	{ /* end of table */ }
+};
+
+MODULE_DEVICE_TABLE(of, zynq_qspi_of_match);
+
+/*
+ * zynq_qspi_driver - This structure defines the QSPI platform driver
+ */
+static struct platform_driver zynq_qspi_driver = {
+	.probe = zynq_qspi_probe,
+	.remove = zynq_qspi_remove,
+	.driver = {
+		.name = "zynq-qspi",
+		.of_match_table = zynq_qspi_of_match,
+	},
+};
+
+module_platform_driver(zynq_qspi_driver);
+
+MODULE_AUTHOR("Xilinx, Inc.");
+MODULE_DESCRIPTION("Xilinx Zynq QSPI driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index cc4d310..60c4de4 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -1,19 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Xilinx Zynq UltraScale+ MPSoC Quad-SPI (QSPI) controller driver
  * (master mode only)
  *
  * Copyright (C) 2009 - 2015 Xilinx, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
+#include <linux/firmware/xlnx-zynqmp.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -138,6 +135,7 @@
 
 #define SPI_AUTOSUSPEND_TIMEOUT		3000
 enum mode_type {GQSPI_MODE_IO, GQSPI_MODE_DMA};
+static const struct zynqmp_eemi_ops *eemi_ops;
 
 /**
  * struct zynqmp_qspi - Defines qspi driver instance
@@ -960,8 +958,7 @@
  */
 static int __maybe_unused zynqmp_runtime_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_master *master = dev_get_drvdata(dev);
 	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
 
 	clk_disable(xqspi->refclk);
@@ -980,8 +977,7 @@
  */
 static int __maybe_unused zynqmp_runtime_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_master *master = dev_get_drvdata(dev);
 	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
 	int ret;
 
@@ -1020,9 +1016,12 @@
 	int ret = 0;
 	struct spi_master *master;
 	struct zynqmp_qspi *xqspi;
-	struct resource *res;
 	struct device *dev = &pdev->dev;
 
+	eemi_ops = zynqmp_pm_get_eemi_ops();
+	if (IS_ERR(eemi_ops))
+		return PTR_ERR(eemi_ops);
+
 	master = spi_alloc_master(&pdev->dev, sizeof(*xqspi));
 	if (!master)
 		return -ENOMEM;
@@ -1031,8 +1030,7 @@
 	master->dev.of_node = pdev->dev.of_node;
 	platform_set_drvdata(pdev, master);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	xqspi->regs = devm_ioremap_resource(&pdev->dev, res);
+	xqspi->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(xqspi->regs)) {
 		ret = PTR_ERR(xqspi->regs);
 		goto remove_master;
@@ -1077,7 +1075,6 @@
 	xqspi->irq = platform_get_irq(pdev, 0);
 	if (xqspi->irq <= 0) {
 		ret = -ENXIO;
-		dev_err(dev, "irq resource not found\n");
 		goto clk_dis_all;
 	}
 	ret = devm_request_irq(&pdev->dev, xqspi->irq, zynqmp_qspi_irq,
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 9da0bc5..f9502db 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1,19 +1,8 @@
-/*
- * SPI init/core code
- *
- * Copyright (C) 2005 David Brownell
- * Copyright (C) 2008 Secret Lab Technologies Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0-or-later
+// SPI init/core code
+//
+// Copyright (C) 2005 David Brownell
+// Copyright (C) 2008 Secret Lab Technologies Ltd.
 
 #include <linux/kernel.h>
 #include <linux/device.h>
@@ -30,6 +19,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
 #include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
 #include <linux/property.h>
@@ -46,6 +36,8 @@
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/spi.h>
+EXPORT_TRACEPOINT_SYMBOL(spi_transfer_start);
+EXPORT_TRACEPOINT_SYMBOL(spi_transfer_stop);
 
 #include "internals.h"
 
@@ -60,6 +52,7 @@
 		spi->controller->cleanup(spi);
 
 	spi_controller_put(spi->controller);
+	kfree(spi->driver_override);
 	kfree(spi);
 }
 
@@ -77,6 +70,51 @@
 }
 static DEVICE_ATTR_RO(modalias);
 
+static ssize_t driver_override_store(struct device *dev,
+				     struct device_attribute *a,
+				     const char *buf, size_t count)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	const char *end = memchr(buf, '\n', count);
+	const size_t len = end ? end - buf : count;
+	const char *driver_override, *old;
+
+	/* We need to keep extra room for a newline when displaying value */
+	if (len >= (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	driver_override = kstrndup(buf, len, GFP_KERNEL);
+	if (!driver_override)
+		return -ENOMEM;
+
+	device_lock(dev);
+	old = spi->driver_override;
+	if (len) {
+		spi->driver_override = driver_override;
+	} else {
+		/* Emptry string, disable driver override */
+		spi->driver_override = NULL;
+		kfree(driver_override);
+	}
+	device_unlock(dev);
+	kfree(old);
+
+	return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+				    struct device_attribute *a, char *buf)
+{
+	const struct spi_device *spi = to_spi_device(dev);
+	ssize_t len;
+
+	device_lock(dev);
+	len = snprintf(buf, PAGE_SIZE, "%s\n", spi->driver_override ? : "");
+	device_unlock(dev);
+	return len;
+}
+static DEVICE_ATTR_RW(driver_override);
+
 #define SPI_STATISTICS_ATTRS(field, file)				\
 static ssize_t spi_controller_##field##_show(struct device *dev,	\
 					     struct device_attribute *attr, \
@@ -158,6 +196,7 @@
 
 static struct attribute *spi_dev_attrs[] = {
 	&dev_attr_modalias.attr,
+	&dev_attr_driver_override.attr,
 	NULL,
 };
 
@@ -305,6 +344,10 @@
 	const struct spi_device	*spi = to_spi_device(dev);
 	const struct spi_driver	*sdrv = to_spi_driver(drv);
 
+	/* Check override first, and if set, only use the named driver */
+	if (spi->driver_override)
+		return strcmp(spi->driver_override, drv->name) == 0;
+
 	/* Attempt an OF style match */
 	if (of_driver_match_device(dev, drv))
 		return 1;
@@ -538,7 +581,10 @@
 		goto done;
 	}
 
-	if (ctlr->cs_gpios)
+	/* Descriptors take precedence */
+	if (ctlr->cs_gpiods)
+		spi->cs_gpiod = ctlr->cs_gpiods[spi->chip_select];
+	else if (ctlr->cs_gpios)
 		spi->cs_gpio = ctlr->cs_gpios[spi->chip_select];
 
 	/* Drivers may modify this initial i/o setup, but will
@@ -732,8 +778,21 @@
 	if (spi->mode & SPI_CS_HIGH)
 		enable = !enable;
 
-	if (gpio_is_valid(spi->cs_gpio)) {
-		gpio_set_value(spi->cs_gpio, !enable);
+	if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) {
+		/*
+		 * Honour the SPI_NO_CS flag and invert the enable line, as
+		 * active low is default for SPI. Execution paths that handle
+		 * polarity inversion in gpiolib (such as device tree) will
+		 * enforce active high using the SPI_CS_HIGH resulting in a
+		 * double inversion through the code above.
+		 */
+		if (!(spi->mode & SPI_NO_CS)) {
+			if (spi->cs_gpiod)
+				gpiod_set_value_cansleep(spi->cs_gpiod,
+							 !enable);
+			else
+				gpio_set_value_cansleep(spi->cs_gpio, !enable);
+		}
 		/* Some SPI masters need both GPIO CS & slave_select */
 		if ((spi->controller->flags & SPI_MASTER_GPIO_SS) &&
 		    spi->controller->set_cs)
@@ -982,6 +1041,8 @@
 		if (max_tx || max_rx) {
 			list_for_each_entry(xfer, &msg->transfers,
 					    transfer_list) {
+				if (!xfer->len)
+					continue;
 				if (!xfer->tx_buf)
 					xfer->tx_buf = ctlr->dummy_tx;
 				if (!xfer->rx_buf)
@@ -993,6 +1054,96 @@
 	return __spi_map_msg(ctlr, msg);
 }
 
+static int spi_transfer_wait(struct spi_controller *ctlr,
+			     struct spi_message *msg,
+			     struct spi_transfer *xfer)
+{
+	struct spi_statistics *statm = &ctlr->statistics;
+	struct spi_statistics *stats = &msg->spi->statistics;
+	unsigned long long ms = 1;
+
+	if (spi_controller_is_slave(ctlr)) {
+		if (wait_for_completion_interruptible(&ctlr->xfer_completion)) {
+			dev_dbg(&msg->spi->dev, "SPI transfer interrupted\n");
+			return -EINTR;
+		}
+	} else {
+		ms = 8LL * 1000LL * xfer->len;
+		do_div(ms, xfer->speed_hz);
+		ms += ms + 200; /* some tolerance */
+
+		if (ms > UINT_MAX)
+			ms = UINT_MAX;
+
+		ms = wait_for_completion_timeout(&ctlr->xfer_completion,
+						 msecs_to_jiffies(ms));
+
+		if (ms == 0) {
+			SPI_STATISTICS_INCREMENT_FIELD(statm, timedout);
+			SPI_STATISTICS_INCREMENT_FIELD(stats, timedout);
+			dev_err(&msg->spi->dev,
+				"SPI transfer timed out\n");
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
+static void _spi_transfer_delay_ns(u32 ns)
+{
+	if (!ns)
+		return;
+	if (ns <= 1000) {
+		ndelay(ns);
+	} else {
+		u32 us = DIV_ROUND_UP(ns, 1000);
+
+		if (us <= 10)
+			udelay(us);
+		else
+			usleep_range(us, us + DIV_ROUND_UP(us, 10));
+	}
+}
+
+static void _spi_transfer_cs_change_delay(struct spi_message *msg,
+					  struct spi_transfer *xfer)
+{
+	u32 delay = xfer->cs_change_delay;
+	u32 unit = xfer->cs_change_delay_unit;
+	u32 hz;
+
+	/* return early on "fast" mode - for everything but USECS */
+	if (!delay && unit != SPI_DELAY_UNIT_USECS)
+		return;
+
+	switch (unit) {
+	case SPI_DELAY_UNIT_USECS:
+		/* for compatibility use default of 10us */
+		if (!delay)
+			delay = 10000;
+		else
+			delay *= 1000;
+		break;
+	case SPI_DELAY_UNIT_NSECS: /* nothing to do here */
+		break;
+	case SPI_DELAY_UNIT_SCK:
+		/* if there is no effective speed know, then approximate
+		 * by underestimating with half the requested hz
+		 */
+		hz = xfer->effective_speed_hz ?: xfer->speed_hz / 2;
+		delay *= DIV_ROUND_UP(1000000000, hz);
+		break;
+	default:
+		dev_err_once(&msg->spi->dev,
+			     "Use of unsupported delay unit %i, using default of 10us\n",
+			     xfer->cs_change_delay_unit);
+		delay = 10000;
+	}
+	/* now sleep for the requested amount of time */
+	_spi_transfer_delay_ns(delay);
+}
+
 /*
  * spi_transfer_one_message - Default implementation of transfer_one_message()
  *
@@ -1006,7 +1157,6 @@
 	struct spi_transfer *xfer;
 	bool keep_cs = false;
 	int ret = 0;
-	unsigned long long ms = 1;
 	struct spi_statistics *statm = &ctlr->statistics;
 	struct spi_statistics *stats = &msg->spi->statistics;
 
@@ -1036,26 +1186,9 @@
 			}
 
 			if (ret > 0) {
-				ret = 0;
-				ms = 8LL * 1000LL * xfer->len;
-				do_div(ms, xfer->speed_hz);
-				ms += ms + 200; /* some tolerance */
-
-				if (ms > UINT_MAX)
-					ms = UINT_MAX;
-
-				ms = wait_for_completion_timeout(&ctlr->xfer_completion,
-								 msecs_to_jiffies(ms));
-			}
-
-			if (ms == 0) {
-				SPI_STATISTICS_INCREMENT_FIELD(statm,
-							       timedout);
-				SPI_STATISTICS_INCREMENT_FIELD(stats,
-							       timedout);
-				dev_err(&msg->spi->dev,
-					"SPI transfer timed out\n");
-				msg->status = -ETIMEDOUT;
+				ret = spi_transfer_wait(ctlr, msg, xfer);
+				if (ret < 0)
+					msg->status = ret;
 			}
 		} else {
 			if (xfer->len)
@@ -1069,14 +1202,8 @@
 		if (msg->status != -EINPROGRESS)
 			goto out;
 
-		if (xfer->delay_usecs) {
-			u16 us = xfer->delay_usecs;
-
-			if (us <= 10)
-				udelay(us);
-			else
-				usleep_range(us, us + DIV_ROUND_UP(us, 10));
-		}
+		if (xfer->delay_usecs)
+			_spi_transfer_delay_ns(xfer->delay_usecs * 1000);
 
 		if (xfer->cs_change) {
 			if (list_is_last(&xfer->transfer_list,
@@ -1084,7 +1211,7 @@
 				keep_cs = true;
 			} else {
 				spi_set_cs(msg->spi, false);
-				udelay(10);
+				_spi_transfer_cs_change_delay(msg, xfer);
 				spi_set_cs(msg->spi, true);
 			}
 		}
@@ -1138,8 +1265,9 @@
  */
 static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
 {
-	unsigned long flags;
+	struct spi_message *msg;
 	bool was_busy = false;
+	unsigned long flags;
 	int ret;
 
 	/* Lock queue */
@@ -1198,10 +1326,10 @@
 	}
 
 	/* Extract head of queue */
-	ctlr->cur_msg =
-		list_first_entry(&ctlr->queue, struct spi_message, queue);
+	msg = list_first_entry(&ctlr->queue, struct spi_message, queue);
+	ctlr->cur_msg = msg;
 
-	list_del_init(&ctlr->cur_msg->queue);
+	list_del_init(&msg->queue);
 	if (ctlr->busy)
 		was_busy = true;
 	else
@@ -1228,37 +1356,42 @@
 		ret = ctlr->prepare_transfer_hardware(ctlr);
 		if (ret) {
 			dev_err(&ctlr->dev,
-				"failed to prepare transfer hardware\n");
+				"failed to prepare transfer hardware: %d\n",
+				ret);
 
 			if (ctlr->auto_runtime_pm)
 				pm_runtime_put(ctlr->dev.parent);
+
+			msg->status = ret;
+			spi_finalize_current_message(ctlr);
+
 			mutex_unlock(&ctlr->io_mutex);
 			return;
 		}
 	}
 
-	trace_spi_message_start(ctlr->cur_msg);
+	trace_spi_message_start(msg);
 
 	if (ctlr->prepare_message) {
-		ret = ctlr->prepare_message(ctlr, ctlr->cur_msg);
+		ret = ctlr->prepare_message(ctlr, msg);
 		if (ret) {
 			dev_err(&ctlr->dev, "failed to prepare message: %d\n",
 				ret);
-			ctlr->cur_msg->status = ret;
+			msg->status = ret;
 			spi_finalize_current_message(ctlr);
 			goto out;
 		}
 		ctlr->cur_msg_prepared = true;
 	}
 
-	ret = spi_map_msg(ctlr, ctlr->cur_msg);
+	ret = spi_map_msg(ctlr, msg);
 	if (ret) {
-		ctlr->cur_msg->status = ret;
+		msg->status = ret;
 		spi_finalize_current_message(ctlr);
 		goto out;
 	}
 
-	ret = ctlr->transfer_one_message(ctlr, ctlr->cur_msg);
+	ret = ctlr->transfer_one_message(ctlr, msg);
 	if (ret) {
 		dev_err(&ctlr->dev,
 			"failed to transfer one message from queue\n");
@@ -1285,10 +1418,32 @@
 	__spi_pump_messages(ctlr, true);
 }
 
+/**
+ * spi_set_thread_rt - set the controller to pump at realtime priority
+ * @ctlr: controller to boost priority of
+ *
+ * This can be called because the controller requested realtime priority
+ * (by setting the ->rt value before calling spi_register_controller()) or
+ * because a device on the bus said that its transfers needed realtime
+ * priority.
+ *
+ * NOTE: at the moment if any device on a bus says it needs realtime then
+ * the thread will be at realtime priority for all transfers on that
+ * controller.  If this eventually becomes a problem we may see if we can
+ * find a way to boost the priority only temporarily during relevant
+ * transfers.
+ */
+static void spi_set_thread_rt(struct spi_controller *ctlr)
+{
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO / 2 };
+
+	dev_info(&ctlr->dev,
+		"will run message pump with realtime priority\n");
+	sched_setscheduler(ctlr->kworker_task, SCHED_FIFO, &param);
+}
+
 static int spi_init_queue(struct spi_controller *ctlr)
 {
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
-
 	ctlr->running = false;
 	ctlr->busy = false;
 
@@ -1308,11 +1463,8 @@
 	 * request and the scheduling of the message pump thread. Without this
 	 * setting the message pump thread will remain at default priority.
 	 */
-	if (ctlr->rt) {
-		dev_info(&ctlr->dev,
-			"will run message pump with realtime priority\n");
-		sched_setscheduler(ctlr->kworker_task, SCHED_FIFO, &param);
-	}
+	if (ctlr->rt)
+		spi_set_thread_rt(ctlr);
 
 	return 0;
 }
@@ -1555,13 +1707,21 @@
 		spi->mode |= SPI_CPHA;
 	if (of_property_read_bool(nc, "spi-cpol"))
 		spi->mode |= SPI_CPOL;
-	if (of_property_read_bool(nc, "spi-cs-high"))
-		spi->mode |= SPI_CS_HIGH;
 	if (of_property_read_bool(nc, "spi-3wire"))
 		spi->mode |= SPI_3WIRE;
 	if (of_property_read_bool(nc, "spi-lsb-first"))
 		spi->mode |= SPI_LSB_FIRST;
 
+	/*
+	 * For descriptors associated with the device, polarity inversion is
+	 * handled in the gpiolib, so all chip selects are "active high" in
+	 * the logical sense, the gpiolib will invert the line if need be.
+	 */
+	if (ctlr->use_gpio_descriptors)
+		spi->mode |= SPI_CS_HIGH;
+	else if (of_property_read_bool(nc, "spi-cs-high"))
+		spi->mode |= SPI_CS_HIGH;
+
 	/* Device DUAL/QUAD mode */
 	if (!of_property_read_u32(nc, "spi-tx-bus-width", &value)) {
 		switch (value) {
@@ -1573,6 +1733,9 @@
 		case 4:
 			spi->mode |= SPI_TX_QUAD;
 			break;
+		case 8:
+			spi->mode |= SPI_TX_OCTAL;
+			break;
 		default:
 			dev_warn(&ctlr->dev,
 				"spi-tx-bus-width %d not supported\n",
@@ -1591,6 +1754,9 @@
 		case 4:
 			spi->mode |= SPI_RX_QUAD;
 			break;
+		case 8:
+			spi->mode |= SPI_RX_OCTAL;
+			break;
 		default:
 			dev_warn(&ctlr->dev,
 				"spi-rx-bus-width %d not supported\n",
@@ -1600,7 +1766,7 @@
 	}
 
 	if (spi_controller_is_slave(ctlr)) {
-		if (strcmp(nc->name, "slave")) {
+		if (!of_node_name_eq(nc, "slave")) {
 			dev_err(&ctlr->dev, "%pOF is not called 'slave'\n",
 				nc);
 			return -EINVAL;
@@ -1706,9 +1872,18 @@
 #endif
 
 #ifdef CONFIG_ACPI
-static void acpi_spi_parse_apple_properties(struct spi_device *spi)
+struct acpi_spi_lookup {
+	struct spi_controller 	*ctlr;
+	u32			max_speed_hz;
+	u32			mode;
+	int			irq;
+	u8			bits_per_word;
+	u8			chip_select;
+};
+
+static void acpi_spi_parse_apple_properties(struct acpi_device *dev,
+					    struct acpi_spi_lookup *lookup)
 {
-	struct acpi_device *dev = ACPI_COMPANION(&spi->dev);
 	const union acpi_object *obj;
 
 	if (!x86_apple_machine)
@@ -1716,35 +1891,46 @@
 
 	if (!acpi_dev_get_property(dev, "spiSclkPeriod", ACPI_TYPE_BUFFER, &obj)
 	    && obj->buffer.length >= 4)
-		spi->max_speed_hz  = NSEC_PER_SEC / *(u32 *)obj->buffer.pointer;
+		lookup->max_speed_hz  = NSEC_PER_SEC / *(u32 *)obj->buffer.pointer;
 
 	if (!acpi_dev_get_property(dev, "spiWordSize", ACPI_TYPE_BUFFER, &obj)
 	    && obj->buffer.length == 8)
-		spi->bits_per_word = *(u64 *)obj->buffer.pointer;
+		lookup->bits_per_word = *(u64 *)obj->buffer.pointer;
 
 	if (!acpi_dev_get_property(dev, "spiBitOrder", ACPI_TYPE_BUFFER, &obj)
 	    && obj->buffer.length == 8 && !*(u64 *)obj->buffer.pointer)
-		spi->mode |= SPI_LSB_FIRST;
+		lookup->mode |= SPI_LSB_FIRST;
 
 	if (!acpi_dev_get_property(dev, "spiSPO", ACPI_TYPE_BUFFER, &obj)
 	    && obj->buffer.length == 8 &&  *(u64 *)obj->buffer.pointer)
-		spi->mode |= SPI_CPOL;
+		lookup->mode |= SPI_CPOL;
 
 	if (!acpi_dev_get_property(dev, "spiSPH", ACPI_TYPE_BUFFER, &obj)
 	    && obj->buffer.length == 8 &&  *(u64 *)obj->buffer.pointer)
-		spi->mode |= SPI_CPHA;
+		lookup->mode |= SPI_CPHA;
 }
 
 static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 {
-	struct spi_device *spi = data;
-	struct spi_controller *ctlr = spi->controller;
+	struct acpi_spi_lookup *lookup = data;
+	struct spi_controller *ctlr = lookup->ctlr;
 
 	if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
 		struct acpi_resource_spi_serialbus *sb;
+		acpi_handle parent_handle;
+		acpi_status status;
 
 		sb = &ares->data.spi_serial_bus;
 		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_SPI) {
+
+			status = acpi_get_handle(NULL,
+						 sb->resource_source.string_ptr,
+						 &parent_handle);
+
+			if (ACPI_FAILURE(status) ||
+			    ACPI_HANDLE(ctlr->dev.parent) != parent_handle)
+				return -ENODEV;
+
 			/*
 			 * ACPI DeviceSelection numbering is handled by the
 			 * host controller driver in Windows and can vary
@@ -1757,25 +1943,25 @@
 						sb->device_selection);
 				if (cs < 0)
 					return cs;
-				spi->chip_select = cs;
+				lookup->chip_select = cs;
 			} else {
-				spi->chip_select = sb->device_selection;
+				lookup->chip_select = sb->device_selection;
 			}
 
-			spi->max_speed_hz = sb->connection_speed;
+			lookup->max_speed_hz = sb->connection_speed;
 
 			if (sb->clock_phase == ACPI_SPI_SECOND_PHASE)
-				spi->mode |= SPI_CPHA;
+				lookup->mode |= SPI_CPHA;
 			if (sb->clock_polarity == ACPI_SPI_START_HIGH)
-				spi->mode |= SPI_CPOL;
+				lookup->mode |= SPI_CPOL;
 			if (sb->device_polarity == ACPI_SPI_ACTIVE_HIGH)
-				spi->mode |= SPI_CS_HIGH;
+				lookup->mode |= SPI_CS_HIGH;
 		}
-	} else if (spi->irq < 0) {
+	} else if (lookup->irq < 0) {
 		struct resource r;
 
 		if (acpi_dev_resource_interrupt(ares, 0, &r))
-			spi->irq = r.start;
+			lookup->irq = r.start;
 	}
 
 	/* Always tell the ACPI core to skip this resource */
@@ -1785,7 +1971,9 @@
 static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 					    struct acpi_device *adev)
 {
+	acpi_handle parent_handle = NULL;
 	struct list_head resource_list;
+	struct acpi_spi_lookup lookup = {};
 	struct spi_device *spi;
 	int ret;
 
@@ -1793,6 +1981,28 @@
 	    acpi_device_enumerated(adev))
 		return AE_OK;
 
+	lookup.ctlr		= ctlr;
+	lookup.irq		= -1;
+
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list,
+				     acpi_spi_add_resource, &lookup);
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (ret < 0)
+		/* found SPI in _CRS but it points to another controller */
+		return AE_OK;
+
+	if (!lookup.max_speed_hz &&
+	    !ACPI_FAILURE(acpi_get_parent(adev->handle, &parent_handle)) &&
+	    ACPI_HANDLE(ctlr->dev.parent) == parent_handle) {
+		/* Apple does not use _CRS but nested devices for SPI slaves */
+		acpi_spi_parse_apple_properties(adev, &lookup);
+	}
+
+	if (!lookup.max_speed_hz)
+		return AE_OK;
+
 	spi = spi_alloc_device(ctlr);
 	if (!spi) {
 		dev_err(&ctlr->dev, "failed to allocate SPI device for %s\n",
@@ -1801,19 +2011,11 @@
 	}
 
 	ACPI_COMPANION_SET(&spi->dev, adev);
-	spi->irq = -1;
-
-	INIT_LIST_HEAD(&resource_list);
-	ret = acpi_dev_get_resources(adev, &resource_list,
-				     acpi_spi_add_resource, spi);
-	acpi_dev_free_resource_list(&resource_list);
-
-	acpi_spi_parse_apple_properties(spi);
-
-	if (ret < 0 || !spi->max_speed_hz) {
-		spi_dev_put(spi);
-		return AE_OK;
-	}
+	spi->max_speed_hz	= lookup.max_speed_hz;
+	spi->mode		= lookup.mode;
+	spi->irq		= lookup.irq;
+	spi->bits_per_word	= lookup.bits_per_word;
+	spi->chip_select	= lookup.chip_select;
 
 	acpi_set_modalias(adev, acpi_device_hid(adev), spi->modalias,
 			  sizeof(spi->modalias));
@@ -1846,6 +2048,8 @@
 	return acpi_register_spi_device(ctlr, adev);
 }
 
+#define SPI_ACPI_ENUMERATE_MAX_DEPTH		32
+
 static void acpi_register_spi_devices(struct spi_controller *ctlr)
 {
 	acpi_status status;
@@ -1855,7 +2059,8 @@
 	if (!handle)
 		return;
 
-	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+				     SPI_ACPI_ENUMERATE_MAX_DEPTH,
 				     acpi_spi_add_device, NULL, ctlr, NULL);
 	if (ACPI_FAILURE(status))
 		dev_warn(&ctlr->dev, "failed to enumerate SPI slaves\n");
@@ -1901,8 +2106,8 @@
 	return 1;
 }
 
-static ssize_t spi_slave_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
+static ssize_t slave_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
 {
 	struct spi_controller *ctlr = container_of(dev, struct spi_controller,
 						   dev);
@@ -1913,9 +2118,8 @@
 		       child ? to_spi_device(child)->modalias : NULL);
 }
 
-static ssize_t spi_slave_store(struct device *dev,
-			       struct device_attribute *attr, const char *buf,
-			       size_t count)
+static ssize_t slave_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
 {
 	struct spi_controller *ctlr = container_of(dev, struct spi_controller,
 						   dev);
@@ -1953,7 +2157,7 @@
 	return count;
 }
 
-static DEVICE_ATTR(slave, 0644, spi_slave_show, spi_slave_store);
+static DEVICE_ATTR_RW(slave);
 
 static struct attribute *spi_slave_attrs[] = {
 	&dev_attr_slave.attr,
@@ -1984,8 +2188,10 @@
  * __spi_alloc_controller - allocate an SPI master or slave controller
  * @dev: the controller, possibly using the platform_bus
  * @size: how much zeroed driver-private data to allocate; the pointer to this
- *	memory is in the driver_data field of the returned device,
- *	accessible with spi_controller_get_devdata().
+ *	memory is in the driver_data field of the returned device, accessible
+ *	with spi_controller_get_devdata(); the memory is cacheline aligned;
+ *	drivers granting DMA access to portions of their private data need to
+ *	round up @size using ALIGN(size, dma_get_cache_alignment()).
  * @slave: flag indicating whether to allocate an SPI master (false) or SPI
  *	slave (true) controller
  * Context: can sleep
@@ -2007,11 +2213,12 @@
 					      unsigned int size, bool slave)
 {
 	struct spi_controller	*ctlr;
+	size_t ctlr_size = ALIGN(sizeof(*ctlr), dma_get_cache_alignment());
 
 	if (!dev)
 		return NULL;
 
-	ctlr = kzalloc(size + sizeof(*ctlr), GFP_KERNEL);
+	ctlr = kzalloc(size + ctlr_size, GFP_KERNEL);
 	if (!ctlr)
 		return NULL;
 
@@ -2025,14 +2232,14 @@
 		ctlr->dev.class = &spi_master_class;
 	ctlr->dev.parent = dev;
 	pm_suspend_ignore_children(&ctlr->dev, true);
-	spi_controller_set_devdata(ctlr, &ctlr[1]);
+	spi_controller_set_devdata(ctlr, (void *)ctlr + ctlr_size);
 
 	return ctlr;
 }
 EXPORT_SYMBOL_GPL(__spi_alloc_controller);
 
 #ifdef CONFIG_OF
-static int of_spi_register_master(struct spi_controller *ctlr)
+static int of_spi_get_gpio_numbers(struct spi_controller *ctlr)
 {
 	int nb, i, *cs;
 	struct device_node *np = ctlr->dev.of_node;
@@ -2065,12 +2272,68 @@
 	return 0;
 }
 #else
-static int of_spi_register_master(struct spi_controller *ctlr)
+static int of_spi_get_gpio_numbers(struct spi_controller *ctlr)
 {
 	return 0;
 }
 #endif
 
+/**
+ * spi_get_gpio_descs() - grab chip select GPIOs for the master
+ * @ctlr: The SPI master to grab GPIO descriptors for
+ */
+static int spi_get_gpio_descs(struct spi_controller *ctlr)
+{
+	int nb, i;
+	struct gpio_desc **cs;
+	struct device *dev = &ctlr->dev;
+
+	nb = gpiod_count(dev, "cs");
+	ctlr->num_chipselect = max_t(int, nb, ctlr->num_chipselect);
+
+	/* No GPIOs at all is fine, else return the error */
+	if (nb == 0 || nb == -ENOENT)
+		return 0;
+	else if (nb < 0)
+		return nb;
+
+	cs = devm_kcalloc(dev, ctlr->num_chipselect, sizeof(*cs),
+			  GFP_KERNEL);
+	if (!cs)
+		return -ENOMEM;
+	ctlr->cs_gpiods = cs;
+
+	for (i = 0; i < nb; i++) {
+		/*
+		 * Most chipselects are active low, the inverted
+		 * semantics are handled by special quirks in gpiolib,
+		 * so initializing them GPIOD_OUT_LOW here means
+		 * "unasserted", in most cases this will drive the physical
+		 * line high.
+		 */
+		cs[i] = devm_gpiod_get_index_optional(dev, "cs", i,
+						      GPIOD_OUT_LOW);
+		if (IS_ERR(cs[i]))
+			return PTR_ERR(cs[i]);
+
+		if (cs[i]) {
+			/*
+			 * If we find a CS GPIO, name it after the device and
+			 * chip select line.
+			 */
+			char *gpioname;
+
+			gpioname = devm_kasprintf(dev, GFP_KERNEL, "%s CS%d",
+						  dev_name(dev), i);
+			if (!gpioname)
+				return -ENOMEM;
+			gpiod_set_consumer_name(cs[i], gpioname);
+		}
+	}
+
+	return 0;
+}
+
 static int spi_controller_check_ops(struct spi_controller *ctlr)
 {
 	/*
@@ -2118,7 +2381,7 @@
 {
 	struct device		*dev = ctlr->dev.parent;
 	struct boardinfo	*bi;
-	int			status = -ENODEV;
+	int			status;
 	int			id, first_dynamic;
 
 	if (!dev)
@@ -2132,17 +2395,6 @@
 	if (status)
 		return status;
 
-	if (!spi_controller_is_slave(ctlr)) {
-		status = of_spi_register_master(ctlr);
-		if (status)
-			return status;
-	}
-
-	/* even if it's just one always-selected device, there must
-	 * be at least one chipselect
-	 */
-	if (ctlr->num_chipselect == 0)
-		return -EINVAL;
 	if (ctlr->bus_num >= 0) {
 		/* devices with a fixed bus num must check-in with the num */
 		mutex_lock(&board_lock);
@@ -2194,6 +2446,32 @@
 	 * registration fails if the bus ID is in use.
 	 */
 	dev_set_name(&ctlr->dev, "spi%u", ctlr->bus_num);
+
+	if (!spi_controller_is_slave(ctlr)) {
+		if (ctlr->use_gpio_descriptors) {
+			status = spi_get_gpio_descs(ctlr);
+			if (status)
+				return status;
+			/*
+			 * A controller using GPIO descriptors always
+			 * supports SPI_CS_HIGH if need be.
+			 */
+			ctlr->mode_bits |= SPI_CS_HIGH;
+		} else {
+			/* Legacy code path for GPIOs from DT */
+			status = of_spi_get_gpio_numbers(ctlr);
+			if (status)
+				return status;
+		}
+	}
+
+	/*
+	 * Even if it's just one always-selected device, there must
+	 * be at least one chipselect.
+	 */
+	if (!ctlr->num_chipselect)
+		return -EINVAL;
+
 	status = device_add(&ctlr->dev);
 	if (status < 0) {
 		/* free bus id */
@@ -2303,7 +2581,6 @@
 {
 	struct spi_controller *found;
 	int id = ctlr->bus_num;
-	int dummy;
 
 	/* First make sure that this controller was ever added */
 	mutex_lock(&board_lock);
@@ -2317,7 +2594,7 @@
 	list_del(&ctlr->list);
 	mutex_unlock(&board_lock);
 
-	dummy = device_for_each_child(&ctlr->dev, NULL, __unregister);
+	device_for_each_child(&ctlr->dev, NULL, __unregister);
 	device_unregister(&ctlr->dev);
 	/* free bus id */
 	mutex_lock(&board_lock);
@@ -2466,12 +2743,9 @@
  */
 void spi_res_release(struct spi_controller *ctlr, struct spi_message *message)
 {
-	struct spi_res *res;
+	struct spi_res *res, *tmp;
 
-	while (!list_empty(&message->resources)) {
-		res = list_last_entry(&message->resources,
-				      struct spi_res, entry);
-
+	list_for_each_entry_safe_reverse(res, tmp, &message->resources, entry) {
 		if (res->release)
 			res->release(ctlr, message, res->data);
 
@@ -2535,8 +2809,7 @@
 
 	/* allocate the structure using spi_res */
 	rxfer = spi_res_alloc(msg->spi, __spi_replace_transfers_release,
-			      insert * sizeof(struct spi_transfer)
-			      + sizeof(struct spi_replaced_transfers)
+			      struct_size(rxfer, inserted_transfers, insert)
 			      + extradatasize,
 			      gfp);
 	if (!rxfer)
@@ -2626,11 +2899,6 @@
 	size_t offset;
 	size_t count, i;
 
-	/* warn once about this fact that we are splitting a transfer */
-	dev_warn_once(&msg->spi->dev,
-		      "spi_transfer of length %i exceed max length of %zu - needed to split transfers\n",
-		      xfer->len, maxsize);
-
 	/* calculate how many we have to replace */
 	count = DIV_ROUND_UP(xfer->len, maxsize);
 
@@ -2779,14 +3047,23 @@
 	/* if it is SPI_3WIRE mode, DUAL and QUAD should be forbidden
 	 */
 	if ((spi->mode & SPI_3WIRE) && (spi->mode &
-		(SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD)))
+		(SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL |
+		 SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL)))
 		return -EINVAL;
 	/* help drivers fail *cleanly* when they need options
 	 * that aren't supported with their current controller
+	 * SPI_CS_WORD has a fallback software implementation,
+	 * so it is ignored here.
 	 */
-	bad_bits = spi->mode & ~spi->controller->mode_bits;
+	bad_bits = spi->mode & ~(spi->controller->mode_bits | SPI_CS_WORD);
+	/* nothing prevents from working with active-high CS in case if it
+	 * is driven by GPIO.
+	 */
+	if (gpio_is_valid(spi->cs_gpio))
+		bad_bits &= ~SPI_CS_HIGH;
 	ugly_bits = bad_bits &
-		    (SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD);
+		    (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL |
+		     SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL);
 	if (ugly_bits) {
 		dev_warn(&spi->dev,
 			 "setup: ignoring unsupported mode bits %x\n",
@@ -2816,6 +3093,11 @@
 
 	spi_set_cs(spi, false);
 
+	if (spi->rt && !spi->controller->rt) {
+		spi->controller->rt = true;
+		spi_set_thread_rt(spi->controller);
+	}
+
 	dev_dbg(&spi->dev, "setup mode %d, %s%s%s%s%u bits/w, %u Hz max --> %d\n",
 			(int) (spi->mode & (SPI_CPOL | SPI_CPHA)),
 			(spi->mode & SPI_CS_HIGH) ? "cs_high, " : "",
@@ -2829,6 +3111,21 @@
 }
 EXPORT_SYMBOL_GPL(spi_setup);
 
+/**
+ * spi_set_cs_timing - configure CS setup, hold, and inactive delays
+ * @spi: the device that requires specific CS timing configuration
+ * @setup: CS setup time in terms of clock count
+ * @hold: CS hold time in terms of clock count
+ * @inactive_dly: CS inactive delay between transfers in terms of clock count
+ */
+void spi_set_cs_timing(struct spi_device *spi, u8 setup, u8 hold,
+		       u8 inactive_dly)
+{
+	if (spi->controller->set_cs_timing)
+		spi->controller->set_cs_timing(spi, setup, hold, inactive_dly);
+}
+EXPORT_SYMBOL_GPL(spi_set_cs_timing);
+
 static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 {
 	struct spi_controller *ctlr = spi->controller;
@@ -2838,6 +3135,36 @@
 	if (list_empty(&message->transfers))
 		return -EINVAL;
 
+	/* If an SPI controller does not support toggling the CS line on each
+	 * transfer (indicated by the SPI_CS_WORD flag) or we are using a GPIO
+	 * for the CS line, we can emulate the CS-per-word hardware function by
+	 * splitting transfers into one-word transfers and ensuring that
+	 * cs_change is set for each transfer.
+	 */
+	if ((spi->mode & SPI_CS_WORD) && (!(ctlr->mode_bits & SPI_CS_WORD) ||
+					  spi->cs_gpiod ||
+					  gpio_is_valid(spi->cs_gpio))) {
+		size_t maxsize;
+		int ret;
+
+		maxsize = (spi->bits_per_word + 7) / 8;
+
+		/* spi_split_transfers_maxsize() requires message->spi */
+		message->spi = spi;
+
+		ret = spi_split_transfers_maxsize(ctlr, message, maxsize,
+						  GFP_KERNEL);
+		if (ret)
+			return ret;
+
+		list_for_each_entry(xfer, &message->transfers, transfer_list) {
+			/* don't change cs_change on the last entry in the list */
+			if (list_is_last(&xfer->transfer_list, &message->transfers))
+				break;
+			xfer->cs_change = 1;
+		}
+	}
+
 	/* Half-duplex links include original MicroWire, and ones with
 	 * only one data pin like SPI_3WIRE (switches direction) or where
 	 * either MOSI or MISO is missing.  They can also be caused by
@@ -2862,17 +3189,18 @@
 	 * it is not set for this transfer.
 	 * Set transfer tx_nbits and rx_nbits as single transfer default
 	 * (SPI_NBITS_SINGLE) if it is not set for this transfer.
+	 * Ensure transfer word_delay is at least as long as that required by
+	 * device itself.
 	 */
 	message->frame_length = 0;
 	list_for_each_entry(xfer, &message->transfers, transfer_list) {
+		xfer->effective_speed_hz = 0;
 		message->frame_length += xfer->len;
 		if (!xfer->bits_per_word)
 			xfer->bits_per_word = spi->bits_per_word;
 
 		if (!xfer->speed_hz)
 			xfer->speed_hz = spi->max_speed_hz;
-		if (!xfer->speed_hz)
-			xfer->speed_hz = ctlr->max_speed_hz;
 
 		if (ctlr->max_speed_hz && xfer->speed_hz > ctlr->max_speed_hz)
 			xfer->speed_hz = ctlr->max_speed_hz;
@@ -2932,6 +3260,9 @@
 				!(spi->mode & SPI_RX_QUAD))
 				return -EINVAL;
 		}
+
+		if (xfer->word_delay_usecs < spi->word_delay_usecs)
+			xfer->word_delay_usecs = spi->word_delay_usecs;
 	}
 
 	message->status = -EINPROGRESS;
@@ -3323,35 +3654,26 @@
 
 /*-------------------------------------------------------------------------*/
 
-#if IS_ENABLED(CONFIG_OF_DYNAMIC)
-static int __spi_of_device_match(struct device *dev, void *data)
-{
-	return dev->of_node == data;
-}
-
+#if IS_ENABLED(CONFIG_OF)
 /* must call put_device() when done with returned spi_device device */
-static struct spi_device *of_find_spi_device_by_node(struct device_node *node)
+struct spi_device *of_find_spi_device_by_node(struct device_node *node)
 {
-	struct device *dev = bus_find_device(&spi_bus_type, NULL, node,
-						__spi_of_device_match);
+	struct device *dev = bus_find_device_by_of_node(&spi_bus_type, node);
+
 	return dev ? to_spi_device(dev) : NULL;
 }
+EXPORT_SYMBOL_GPL(of_find_spi_device_by_node);
+#endif /* IS_ENABLED(CONFIG_OF) */
 
-static int __spi_of_controller_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
+#if IS_ENABLED(CONFIG_OF_DYNAMIC)
 /* the spi controllers are not using spi_bus, so we find it with another way */
 static struct spi_controller *of_find_spi_controller_by_node(struct device_node *node)
 {
 	struct device *dev;
 
-	dev = class_find_device(&spi_master_class, NULL, node,
-				__spi_of_controller_match);
+	dev = class_find_device_by_of_node(&spi_master_class, node);
 	if (!dev && IS_ENABLED(CONFIG_SPI_SLAVE))
-		dev = class_find_device(&spi_slave_class, NULL, node,
-					__spi_of_controller_match);
+		dev = class_find_device_by_of_node(&spi_slave_class, node);
 	if (!dev)
 		return NULL;
 
@@ -3422,11 +3744,6 @@
 	return ACPI_COMPANION(dev->parent) == data;
 }
 
-static int spi_acpi_device_match(struct device *dev, void *data)
-{
-	return ACPI_COMPANION(dev) == data;
-}
-
 static struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev)
 {
 	struct device *dev;
@@ -3446,8 +3763,7 @@
 {
 	struct device *dev;
 
-	dev = bus_find_device(&spi_bus_type, NULL, adev, spi_acpi_device_match);
-
+	dev = bus_find_device_by_acpi_dev(&spi_bus_type, adev);
 	return dev ? to_spi_device(dev) : NULL;
 }
 
@@ -3540,4 +3856,3 @@
  * include needing to have boardinfo data structures be much more public.
  */
 postcore_initcall(spi_init);
-
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index cda1071..255786f 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -1,19 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Simple synchronous userspace interface to SPI devices
  *
  * Copyright (C) 2006 SWAPP
  *	Andrea Paterniani <a.paterniani@swapp-eng.it>
  * Copyright (C) 2007 David Brownell (simplification, cleanup)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/init.h>
@@ -276,17 +267,19 @@
 		k_tmp->bits_per_word = u_tmp->bits_per_word;
 		k_tmp->delay_usecs = u_tmp->delay_usecs;
 		k_tmp->speed_hz = u_tmp->speed_hz;
+		k_tmp->word_delay_usecs = u_tmp->word_delay_usecs;
 		if (!k_tmp->speed_hz)
 			k_tmp->speed_hz = spidev->speed_hz;
 #ifdef VERBOSE
 		dev_dbg(&spidev->spi->dev,
-			"  xfer len %u %s%s%s%dbits %u usec %uHz\n",
+			"  xfer len %u %s%s%s%dbits %u usec %u usec %uHz\n",
 			u_tmp->len,
 			u_tmp->rx_buf ? "rx " : "",
 			u_tmp->tx_buf ? "tx " : "",
 			u_tmp->cs_change ? "cs " : "",
 			u_tmp->bits_per_word ? : spidev->spi->bits_per_word,
 			u_tmp->delay_usecs,
+			u_tmp->word_delay_usecs,
 			u_tmp->speed_hz ? : spidev->spi->max_speed_hz);
 #endif
 		spi_message_add_tail(k_tmp, &msg);
@@ -591,7 +584,7 @@
 
 	spidev->users++;
 	filp->private_data = spidev;
-	nonseekable_open(inode, filp);
+	stream_open(inode, filp);
 
 	mutex_unlock(&device_list_lock);
 	return 0;
@@ -669,6 +662,9 @@
 	{ .compatible = "lineartechnology,ltc2488" },
 	{ .compatible = "ge,achc" },
 	{ .compatible = "semtech,sx1301" },
+	{ .compatible = "lwn,bk4" },
+	{ .compatible = "dh,dhcom-board" },
+	{ .compatible = "menlo,m53cpld" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, spidev_dt_ids);
@@ -724,11 +720,9 @@
 	 * compatible string, it is a Linux implementation thing
 	 * rather than a description of the hardware.
 	 */
-	if (spi->dev.of_node && !of_match_device(spidev_dt_ids, &spi->dev)) {
-		dev_err(&spi->dev, "buggy DT: spidev listed directly in DT\n");
-		WARN_ON(spi->dev.of_node &&
-			!of_match_device(spidev_dt_ids, &spi->dev));
-	}
+	WARN(spi->dev.of_node &&
+	     of_device_is_compatible(spi->dev.of_node, "spidev"),
+	     "%pOF: buggy DT: spidev listed directly in DT\n", spi->dev.of_node);
 
 	spidev_probe_acpi(spi);