v4.19.13 snapshot.
diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
new file mode 100644
index 0000000..95944e5
--- /dev/null
+++ b/drivers/ntb/Kconfig
@@ -0,0 +1,28 @@
+menuconfig NTB
+	tristate "Non-Transparent Bridge support"
+	depends on PCI
+	help
+	 The PCI-E Non-transparent bridge hardware is a point-to-point PCI-E bus
+	 connecting 2 systems.  When configured, writes to the device's PCI
+	 mapped memory will be mirrored to a buffer on the remote system.  The
+	 ntb Linux driver uses this point-to-point communication as a method to
+	 transfer data from one system to the other.
+
+	 If unsure, say N.
+
+if NTB
+
+source "drivers/ntb/hw/Kconfig"
+
+source "drivers/ntb/test/Kconfig"
+
+config NTB_TRANSPORT
+	tristate "NTB Transport Client"
+	help
+	 This is a transport driver that enables connected systems to exchange
+	 messages over the ntb hardware.  The transport exposes a queue pair api
+	 to client drivers.
+
+	 If unsure, say N.
+
+endif # NTB
diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
new file mode 100644
index 0000000..1921dec
--- /dev/null
+++ b/drivers/ntb/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_NTB) += ntb.o hw/ test/
+obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig
new file mode 100644
index 0000000..e51b581
--- /dev/null
+++ b/drivers/ntb/hw/Kconfig
@@ -0,0 +1,4 @@
+source "drivers/ntb/hw/amd/Kconfig"
+source "drivers/ntb/hw/idt/Kconfig"
+source "drivers/ntb/hw/intel/Kconfig"
+source "drivers/ntb/hw/mscc/Kconfig"
diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile
new file mode 100644
index 0000000..923c442
--- /dev/null
+++ b/drivers/ntb/hw/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_NTB_AMD)	+= amd/
+obj-$(CONFIG_NTB_IDT)	+= idt/
+obj-$(CONFIG_NTB_INTEL)	+= intel/
+obj-$(CONFIG_NTB_SWITCHTEC) += mscc/
diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
new file mode 100644
index 0000000..cfe903c
--- /dev/null
+++ b/drivers/ntb/hw/amd/Kconfig
@@ -0,0 +1,7 @@
+config NTB_AMD
+	tristate "AMD Non-Transparent Bridge support"
+	depends on X86_64
+	help
+	 This driver supports AMD NTB on capable Zeppelin hardware.
+
+	 If unsure, say N.
diff --git a/drivers/ntb/hw/amd/Makefile b/drivers/ntb/hw/amd/Makefile
new file mode 100644
index 0000000..ad54da9
--- /dev/null
+++ b/drivers/ntb/hw/amd/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
new file mode 100644
index 0000000..efb214f
--- /dev/null
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -0,0 +1,1155 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_amd.h"
+
+#define NTB_NAME	"ntb_hw_amd"
+#define NTB_DESC	"AMD(R) PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER		"1.0"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("AMD Inc.");
+
+static const struct file_operations amd_ntb_debugfs_info;
+static struct dentry *debugfs_dir;
+
+static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
+{
+	if (idx < 0 || idx > ndev->mw_count)
+		return -EINVAL;
+
+	return 1 << idx;
+}
+
+static int amd_ntb_mw_count(struct ntb_dev *ntb, int pidx)
+{
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int idx,
+				resource_size_t *addr_align,
+				resource_size_t *size_align,
+				resource_size_t *size_max)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	int bar;
+
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	if (addr_align)
+		*addr_align = SZ_4K;
+
+	if (size_align)
+		*size_align = 1;
+
+	if (size_max)
+		*size_max = pci_resource_len(ndev->ntb.pdev, bar);
+
+	return 0;
+}
+
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
+				dma_addr_t addr, resource_size_t size)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	unsigned long xlat_reg, limit_reg = 0;
+	resource_size_t mw_size;
+	void __iomem *mmio, *peer_mmio;
+	u64 base_addr, limit, reg_val;
+	int bar;
+
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	mw_size = pci_resource_len(ntb->pdev, bar);
+
+	/* make sure the range fits in the usable mw size */
+	if (size > mw_size)
+		return -EINVAL;
+
+	mmio = ndev->self_mmio;
+	peer_mmio = ndev->peer_mmio;
+
+	base_addr = pci_resource_start(ntb->pdev, bar);
+
+	if (bar != 1) {
+		xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 2);
+		limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 2);
+
+		/* Set the limit if supported */
+		limit = size;
+
+		/* set and verify setting the translation address */
+		write64(addr, peer_mmio + xlat_reg);
+		reg_val = read64(peer_mmio + xlat_reg);
+		if (reg_val != addr) {
+			write64(0, peer_mmio + xlat_reg);
+			return -EIO;
+		}
+
+		/* set and verify setting the limit */
+		write64(limit, mmio + limit_reg);
+		reg_val = read64(mmio + limit_reg);
+		if (reg_val != limit) {
+			write64(base_addr, mmio + limit_reg);
+			write64(0, peer_mmio + xlat_reg);
+			return -EIO;
+		}
+	} else {
+		xlat_reg = AMD_BAR1XLAT_OFFSET;
+		limit_reg = AMD_BAR1LMT_OFFSET;
+
+		/* Set the limit if supported */
+		limit = size;
+
+		/* set and verify setting the translation address */
+		write64(addr, peer_mmio + xlat_reg);
+		reg_val = read64(peer_mmio + xlat_reg);
+		if (reg_val != addr) {
+			write64(0, peer_mmio + xlat_reg);
+			return -EIO;
+		}
+
+		/* set and verify setting the limit */
+		writel(limit, mmio + limit_reg);
+		reg_val = readl(mmio + limit_reg);
+		if (reg_val != limit) {
+			writel(base_addr, mmio + limit_reg);
+			writel(0, peer_mmio + xlat_reg);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+static int amd_link_is_up(struct amd_ntb_dev *ndev)
+{
+	if (!ndev->peer_sta)
+		return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
+
+	if (ndev->peer_sta & AMD_LINK_UP_EVENT) {
+		ndev->peer_sta = 0;
+		return 1;
+	}
+
+	/* If peer_sta is reset or D0 event, the ISR has
+	 * started a timer to check link status of hardware.
+	 * So here just clear status bit. And if peer_sta is
+	 * D3 or PME_TO, D0/reset event will be happened when
+	 * system wakeup/poweron, so do nothing here.
+	 */
+	if (ndev->peer_sta & AMD_PEER_RESET_EVENT)
+		ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
+	else if (ndev->peer_sta & (AMD_PEER_D0_EVENT | AMD_LINK_DOWN_EVENT))
+		ndev->peer_sta = 0;
+
+	return 0;
+}
+
+static u64 amd_ntb_link_is_up(struct ntb_dev *ntb,
+			      enum ntb_speed *speed,
+			      enum ntb_width *width)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	int ret = 0;
+
+	if (amd_link_is_up(ndev)) {
+		if (speed)
+			*speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
+		if (width)
+			*width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
+
+		dev_dbg(&ntb->pdev->dev, "link is up.\n");
+
+		ret = 1;
+	} else {
+		if (speed)
+			*speed = NTB_SPEED_NONE;
+		if (width)
+			*width = NTB_WIDTH_NONE;
+
+		dev_dbg(&ntb->pdev->dev, "link is down.\n");
+	}
+
+	return ret;
+}
+
+static int amd_ntb_link_enable(struct ntb_dev *ntb,
+			       enum ntb_speed max_speed,
+			       enum ntb_width max_width)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 ntb_ctl;
+
+	/* Enable event interrupt */
+	ndev->int_mask &= ~AMD_EVENT_INTMASK;
+	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+	if (ndev->ntb.topo == NTB_TOPO_SEC)
+		return -EINVAL;
+	dev_dbg(&ntb->pdev->dev, "Enabling Link.\n");
+
+	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+	ntb_ctl |= (PMM_REG_CTL | SMM_REG_CTL);
+	writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+	return 0;
+}
+
+static int amd_ntb_link_disable(struct ntb_dev *ntb)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 ntb_ctl;
+
+	/* Disable event interrupt */
+	ndev->int_mask |= AMD_EVENT_INTMASK;
+	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+	if (ndev->ntb.topo == NTB_TOPO_SEC)
+		return -EINVAL;
+	dev_dbg(&ntb->pdev->dev, "Enabling Link.\n");
+
+	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+	ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL);
+	writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+	return 0;
+}
+
+static int amd_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	/* The same as for inbound MWs */
+	return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+				    phys_addr_t *base, resource_size_t *size)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	int bar;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	if (base)
+		*base = pci_resource_start(ndev->ntb.pdev, bar);
+
+	if (size)
+		*size = pci_resource_len(ndev->ntb.pdev, bar);
+
+	return 0;
+}
+
+static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+	return ntb_ndev(ntb)->db_valid_mask;
+}
+
+static int amd_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+	return ntb_ndev(ntb)->db_count;
+}
+
+static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+	if (db_vector < 0 || db_vector > ndev->db_count)
+		return 0;
+
+	return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+}
+
+static u64 amd_ntb_db_read(struct ntb_dev *ntb)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+
+	return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
+}
+
+static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+
+	writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
+
+	return 0;
+}
+
+static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	unsigned long flags;
+
+	if (db_bits & ~ndev->db_valid_mask)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ndev->db_mask_lock, flags);
+	ndev->db_mask |= db_bits;
+	writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+	spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+	return 0;
+}
+
+static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	unsigned long flags;
+
+	if (db_bits & ~ndev->db_valid_mask)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ndev->db_mask_lock, flags);
+	ndev->db_mask &= ~db_bits;
+	writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+	spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+	return 0;
+}
+
+static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+
+	writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
+
+	return 0;
+}
+
+static int amd_ntb_spad_count(struct ntb_dev *ntb)
+{
+	return ntb_ndev(ntb)->spad_count;
+}
+
+static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 offset;
+
+	if (idx < 0 || idx >= ndev->spad_count)
+		return 0;
+
+	offset = ndev->self_spad + (idx << 2);
+	return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_spad_write(struct ntb_dev *ntb,
+			      int idx, u32 val)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 offset;
+
+	if (idx < 0 || idx >= ndev->spad_count)
+		return -EINVAL;
+
+	offset = ndev->self_spad + (idx << 2);
+	writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+	return 0;
+}
+
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 offset;
+
+	if (sidx < 0 || sidx >= ndev->spad_count)
+		return -EINVAL;
+
+	offset = ndev->peer_spad + (sidx << 2);
+	return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx,
+				   int sidx, u32 val)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 offset;
+
+	if (sidx < 0 || sidx >= ndev->spad_count)
+		return -EINVAL;
+
+	offset = ndev->peer_spad + (sidx << 2);
+	writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+	return 0;
+}
+
+static const struct ntb_dev_ops amd_ntb_ops = {
+	.mw_count		= amd_ntb_mw_count,
+	.mw_get_align		= amd_ntb_mw_get_align,
+	.mw_set_trans		= amd_ntb_mw_set_trans,
+	.peer_mw_count		= amd_ntb_peer_mw_count,
+	.peer_mw_get_addr	= amd_ntb_peer_mw_get_addr,
+	.link_is_up		= amd_ntb_link_is_up,
+	.link_enable		= amd_ntb_link_enable,
+	.link_disable		= amd_ntb_link_disable,
+	.db_valid_mask		= amd_ntb_db_valid_mask,
+	.db_vector_count	= amd_ntb_db_vector_count,
+	.db_vector_mask		= amd_ntb_db_vector_mask,
+	.db_read		= amd_ntb_db_read,
+	.db_clear		= amd_ntb_db_clear,
+	.db_set_mask		= amd_ntb_db_set_mask,
+	.db_clear_mask		= amd_ntb_db_clear_mask,
+	.peer_db_set		= amd_ntb_peer_db_set,
+	.spad_count		= amd_ntb_spad_count,
+	.spad_read		= amd_ntb_spad_read,
+	.spad_write		= amd_ntb_spad_write,
+	.peer_spad_read		= amd_ntb_peer_spad_read,
+	.peer_spad_write	= amd_ntb_peer_spad_write,
+};
+
+static void amd_ack_smu(struct amd_ntb_dev *ndev, u32 bit)
+{
+	void __iomem *mmio = ndev->self_mmio;
+	int reg;
+
+	reg = readl(mmio + AMD_SMUACK_OFFSET);
+	reg |= bit;
+	writel(reg, mmio + AMD_SMUACK_OFFSET);
+
+	ndev->peer_sta |= bit;
+}
+
+static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
+{
+	void __iomem *mmio = ndev->self_mmio;
+	struct device *dev = &ndev->ntb.pdev->dev;
+	u32 status;
+
+	status = readl(mmio + AMD_INTSTAT_OFFSET);
+	if (!(status & AMD_EVENT_INTMASK))
+		return;
+
+	dev_dbg(dev, "status = 0x%x and vec = %d\n", status, vec);
+
+	status &= AMD_EVENT_INTMASK;
+	switch (status) {
+	case AMD_PEER_FLUSH_EVENT:
+		dev_info(dev, "Flush is done.\n");
+		break;
+	case AMD_PEER_RESET_EVENT:
+		amd_ack_smu(ndev, AMD_PEER_RESET_EVENT);
+
+		/* link down first */
+		ntb_link_event(&ndev->ntb);
+		/* polling peer status */
+		schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+		break;
+	case AMD_PEER_D3_EVENT:
+	case AMD_PEER_PMETO_EVENT:
+	case AMD_LINK_UP_EVENT:
+	case AMD_LINK_DOWN_EVENT:
+		amd_ack_smu(ndev, status);
+
+		/* link down */
+		ntb_link_event(&ndev->ntb);
+
+		break;
+	case AMD_PEER_D0_EVENT:
+		mmio = ndev->peer_mmio;
+		status = readl(mmio + AMD_PMESTAT_OFFSET);
+		/* check if this is WAKEUP event */
+		if (status & 0x1)
+			dev_info(dev, "Wakeup is done.\n");
+
+		amd_ack_smu(ndev, AMD_PEER_D0_EVENT);
+
+		/* start a timer to poll link status */
+		schedule_delayed_work(&ndev->hb_timer,
+				      AMD_LINK_HB_TIMEOUT);
+		break;
+	default:
+		dev_info(dev, "event status = 0x%x.\n", status);
+		break;
+	}
+}
+
+static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
+{
+	dev_dbg(&ndev->ntb.pdev->dev, "vec %d\n", vec);
+
+	if (vec > (AMD_DB_CNT - 1) || (ndev->msix_vec_count == 1))
+		amd_handle_event(ndev, vec);
+
+	if (vec < AMD_DB_CNT)
+		ntb_db_event(&ndev->ntb, vec);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ndev_vec_isr(int irq, void *dev)
+{
+	struct amd_ntb_vec *nvec = dev;
+
+	return ndev_interrupt(nvec->ndev, nvec->num);
+}
+
+static irqreturn_t ndev_irq_isr(int irq, void *dev)
+{
+	struct amd_ntb_dev *ndev = dev;
+
+	return ndev_interrupt(ndev, irq - ndev->ntb.pdev->irq);
+}
+
+static int ndev_init_isr(struct amd_ntb_dev *ndev,
+			 int msix_min, int msix_max)
+{
+	struct pci_dev *pdev;
+	int rc, i, msix_count, node;
+
+	pdev = ndev->ntb.pdev;
+
+	node = dev_to_node(&pdev->dev);
+
+	ndev->db_mask = ndev->db_valid_mask;
+
+	/* Try to set up msix irq */
+	ndev->vec = kcalloc_node(msix_max, sizeof(*ndev->vec),
+				 GFP_KERNEL, node);
+	if (!ndev->vec)
+		goto err_msix_vec_alloc;
+
+	ndev->msix = kcalloc_node(msix_max, sizeof(*ndev->msix),
+				  GFP_KERNEL, node);
+	if (!ndev->msix)
+		goto err_msix_alloc;
+
+	for (i = 0; i < msix_max; ++i)
+		ndev->msix[i].entry = i;
+
+	msix_count = pci_enable_msix_range(pdev, ndev->msix,
+					   msix_min, msix_max);
+	if (msix_count < 0)
+		goto err_msix_enable;
+
+	/* NOTE: Disable MSIX if msix count is less than 16 because of
+	 * hardware limitation.
+	 */
+	if (msix_count < msix_min) {
+		pci_disable_msix(pdev);
+		goto err_msix_enable;
+	}
+
+	for (i = 0; i < msix_count; ++i) {
+		ndev->vec[i].ndev = ndev;
+		ndev->vec[i].num = i;
+		rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
+				 "ndev_vec_isr", &ndev->vec[i]);
+		if (rc)
+			goto err_msix_request;
+	}
+
+	dev_dbg(&pdev->dev, "Using msix interrupts\n");
+	ndev->db_count = msix_min;
+	ndev->msix_vec_count = msix_max;
+	return 0;
+
+err_msix_request:
+	while (i-- > 0)
+		free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+	pci_disable_msix(pdev);
+err_msix_enable:
+	kfree(ndev->msix);
+err_msix_alloc:
+	kfree(ndev->vec);
+err_msix_vec_alloc:
+	ndev->msix = NULL;
+	ndev->vec = NULL;
+
+	/* Try to set up msi irq */
+	rc = pci_enable_msi(pdev);
+	if (rc)
+		goto err_msi_enable;
+
+	rc = request_irq(pdev->irq, ndev_irq_isr, 0,
+			 "ndev_irq_isr", ndev);
+	if (rc)
+		goto err_msi_request;
+
+	dev_dbg(&pdev->dev, "Using msi interrupts\n");
+	ndev->db_count = 1;
+	ndev->msix_vec_count = 1;
+	return 0;
+
+err_msi_request:
+	pci_disable_msi(pdev);
+err_msi_enable:
+
+	/* Try to set up intx irq */
+	pci_intx(pdev, 1);
+
+	rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
+			 "ndev_irq_isr", ndev);
+	if (rc)
+		goto err_intx_request;
+
+	dev_dbg(&pdev->dev, "Using intx interrupts\n");
+	ndev->db_count = 1;
+	ndev->msix_vec_count = 1;
+	return 0;
+
+err_intx_request:
+	return rc;
+}
+
+static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
+{
+	struct pci_dev *pdev;
+	void __iomem *mmio = ndev->self_mmio;
+	int i;
+
+	pdev = ndev->ntb.pdev;
+
+	/* Mask all doorbell interrupts */
+	ndev->db_mask = ndev->db_valid_mask;
+	writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+
+	if (ndev->msix) {
+		i = ndev->msix_vec_count;
+		while (i--)
+			free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+		pci_disable_msix(pdev);
+		kfree(ndev->msix);
+		kfree(ndev->vec);
+	} else {
+		free_irq(pdev->irq, ndev);
+		if (pci_dev_msi_enabled(pdev))
+			pci_disable_msi(pdev);
+		else
+			pci_intx(pdev, 0);
+	}
+}
+
+static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
+				 size_t count, loff_t *offp)
+{
+	struct amd_ntb_dev *ndev;
+	void __iomem *mmio;
+	char *buf;
+	size_t buf_size;
+	ssize_t ret, off;
+	union { u64 v64; u32 v32; u16 v16; } u;
+
+	ndev = filp->private_data;
+	mmio = ndev->self_mmio;
+
+	buf_size = min(count, 0x800ul);
+
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	off = 0;
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "NTB Device Information:\n");
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Connection Topology -\t%s\n",
+			 ntb_topo_string(ndev->ntb.topo));
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
+
+	if (!amd_link_is_up(ndev)) {
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Status -\t\tDown\n");
+	} else {
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Status -\t\tUp\n");
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Speed -\t\tPCI-E Gen %u\n",
+				 NTB_LNK_STA_SPEED(ndev->lnk_sta));
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Width -\t\tx%u\n",
+				 NTB_LNK_STA_WIDTH(ndev->lnk_sta));
+	}
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Memory Window Count -\t%u\n", ndev->mw_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Scratchpad Count -\t%u\n", ndev->spad_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Count -\t%u\n", ndev->db_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "MSIX Vector Count -\t%u\n", ndev->msix_vec_count);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
+
+	u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Mask -\t\t\t%#06x\n", u.v32);
+
+	u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Bell -\t\t\t%#06x\n", u.v32);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "\nNTB Incoming XLAT:\n");
+
+	u.v64 = read64(mmio + AMD_BAR1XLAT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "XLAT1 -\t\t%#018llx\n", u.v64);
+
+	u.v64 = read64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "XLAT23 -\t\t%#018llx\n", u.v64);
+
+	u.v64 = read64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "XLAT45 -\t\t%#018llx\n", u.v64);
+
+	u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "LMT1 -\t\t\t%#06x\n", u.v32);
+
+	u.v64 = read64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "LMT23 -\t\t\t%#018llx\n", u.v64);
+
+	u.v64 = read64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "LMT45 -\t\t\t%#018llx\n", u.v64);
+
+	ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
+	kfree(buf);
+	return ret;
+}
+
+static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
+{
+	if (!debugfs_dir) {
+		ndev->debugfs_dir = NULL;
+		ndev->debugfs_info = NULL;
+	} else {
+		ndev->debugfs_dir =
+			debugfs_create_dir(pci_name(ndev->ntb.pdev),
+					   debugfs_dir);
+		if (!ndev->debugfs_dir)
+			ndev->debugfs_info = NULL;
+		else
+			ndev->debugfs_info =
+				debugfs_create_file("info", S_IRUSR,
+						    ndev->debugfs_dir, ndev,
+						    &amd_ntb_debugfs_info);
+	}
+}
+
+static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev)
+{
+	debugfs_remove_recursive(ndev->debugfs_dir);
+}
+
+static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
+				    struct pci_dev *pdev)
+{
+	ndev->ntb.pdev = pdev;
+	ndev->ntb.topo = NTB_TOPO_NONE;
+	ndev->ntb.ops = &amd_ntb_ops;
+	ndev->int_mask = AMD_EVENT_INTMASK;
+	spin_lock_init(&ndev->db_mask_lock);
+}
+
+static int amd_poll_link(struct amd_ntb_dev *ndev)
+{
+	void __iomem *mmio = ndev->peer_mmio;
+	u32 reg, stat;
+	int rc;
+
+	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+	reg &= NTB_LIN_STA_ACTIVE_BIT;
+
+	dev_dbg(&ndev->ntb.pdev->dev, "%s: reg_val = 0x%x.\n", __func__, reg);
+
+	if (reg == ndev->cntl_sta)
+		return 0;
+
+	ndev->cntl_sta = reg;
+
+	rc = pci_read_config_dword(ndev->ntb.pdev,
+				   AMD_LINK_STATUS_OFFSET, &stat);
+	if (rc)
+		return 0;
+	ndev->lnk_sta = stat;
+
+	return 1;
+}
+
+static void amd_link_hb(struct work_struct *work)
+{
+	struct amd_ntb_dev *ndev = hb_ndev(work);
+
+	if (amd_poll_link(ndev))
+		ntb_link_event(&ndev->ntb);
+
+	if (!amd_link_is_up(ndev))
+		schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+}
+
+static int amd_init_isr(struct amd_ntb_dev *ndev)
+{
+	return ndev_init_isr(ndev, AMD_DB_CNT, AMD_MSIX_VECTOR_CNT);
+}
+
+static void amd_init_side_info(struct amd_ntb_dev *ndev)
+{
+	void __iomem *mmio = ndev->self_mmio;
+	unsigned int reg;
+
+	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+	if (!(reg & AMD_SIDE_READY)) {
+		reg |= AMD_SIDE_READY;
+		writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+	}
+}
+
+static void amd_deinit_side_info(struct amd_ntb_dev *ndev)
+{
+	void __iomem *mmio = ndev->self_mmio;
+	unsigned int reg;
+
+	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+	if (reg & AMD_SIDE_READY) {
+		reg &= ~AMD_SIDE_READY;
+		writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+		readl(mmio + AMD_SIDEINFO_OFFSET);
+	}
+}
+
+static int amd_init_ntb(struct amd_ntb_dev *ndev)
+{
+	void __iomem *mmio = ndev->self_mmio;
+
+	ndev->mw_count = AMD_MW_CNT;
+	ndev->spad_count = AMD_SPADS_CNT;
+	ndev->db_count = AMD_DB_CNT;
+
+	switch (ndev->ntb.topo) {
+	case NTB_TOPO_PRI:
+	case NTB_TOPO_SEC:
+		ndev->spad_count >>= 1;
+		if (ndev->ntb.topo == NTB_TOPO_PRI) {
+			ndev->self_spad = 0;
+			ndev->peer_spad = 0x20;
+		} else {
+			ndev->self_spad = 0x20;
+			ndev->peer_spad = 0;
+		}
+
+		INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
+		schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+		break;
+	default:
+		dev_err(&ndev->ntb.pdev->dev,
+			"AMD NTB does not support B2B mode.\n");
+		return -EINVAL;
+	}
+
+	ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+	/* Mask event interrupts */
+	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+	return 0;
+}
+
+static enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)
+{
+	void __iomem *mmio = ndev->self_mmio;
+	u32 info;
+
+	info = readl(mmio + AMD_SIDEINFO_OFFSET);
+	if (info & AMD_SIDE_MASK)
+		return NTB_TOPO_SEC;
+	else
+		return NTB_TOPO_PRI;
+}
+
+static int amd_init_dev(struct amd_ntb_dev *ndev)
+{
+	struct pci_dev *pdev;
+	int rc = 0;
+
+	pdev = ndev->ntb.pdev;
+
+	ndev->ntb.topo = amd_get_topo(ndev);
+	dev_dbg(&pdev->dev, "AMD NTB topo is %s\n",
+		ntb_topo_string(ndev->ntb.topo));
+
+	rc = amd_init_ntb(ndev);
+	if (rc)
+		return rc;
+
+	rc = amd_init_isr(ndev);
+	if (rc) {
+		dev_err(&pdev->dev, "fail to init isr.\n");
+		return rc;
+	}
+
+	ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+	return 0;
+}
+
+static void amd_deinit_dev(struct amd_ntb_dev *ndev)
+{
+	cancel_delayed_work_sync(&ndev->hb_timer);
+
+	ndev_deinit_isr(ndev);
+}
+
+static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
+			    struct pci_dev *pdev)
+{
+	int rc;
+
+	pci_set_drvdata(pdev, ndev);
+
+	rc = pci_enable_device(pdev);
+	if (rc)
+		goto err_pci_enable;
+
+	rc = pci_request_regions(pdev, NTB_NAME);
+	if (rc)
+		goto err_pci_regions;
+
+	pci_set_master(pdev);
+
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc) {
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc)
+			goto err_dma_mask;
+		dev_warn(&pdev->dev, "Cannot DMA highmem\n");
+	}
+
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc) {
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc)
+			goto err_dma_mask;
+		dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
+	}
+	rc = dma_coerce_mask_and_coherent(&ndev->ntb.dev,
+					  dma_get_mask(&pdev->dev));
+	if (rc)
+		goto err_dma_mask;
+
+	ndev->self_mmio = pci_iomap(pdev, 0, 0);
+	if (!ndev->self_mmio) {
+		rc = -EIO;
+		goto err_dma_mask;
+	}
+	ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
+
+	return 0;
+
+err_dma_mask:
+	pci_clear_master(pdev);
+err_pci_regions:
+	pci_disable_device(pdev);
+err_pci_enable:
+	pci_set_drvdata(pdev, NULL);
+	return rc;
+}
+
+static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+
+	pci_iounmap(pdev, ndev->self_mmio);
+
+	pci_clear_master(pdev);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static int amd_ntb_pci_probe(struct pci_dev *pdev,
+			     const struct pci_device_id *id)
+{
+	struct amd_ntb_dev *ndev;
+	int rc, node;
+
+	node = dev_to_node(&pdev->dev);
+
+	ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
+	if (!ndev) {
+		rc = -ENOMEM;
+		goto err_ndev;
+	}
+
+	ndev_init_struct(ndev, pdev);
+
+	rc = amd_ntb_init_pci(ndev, pdev);
+	if (rc)
+		goto err_init_pci;
+
+	rc = amd_init_dev(ndev);
+	if (rc)
+		goto err_init_dev;
+
+	/* write side info */
+	amd_init_side_info(ndev);
+
+	amd_poll_link(ndev);
+
+	ndev_init_debugfs(ndev);
+
+	rc = ntb_register_device(&ndev->ntb);
+	if (rc)
+		goto err_register;
+
+	dev_info(&pdev->dev, "NTB device registered.\n");
+
+	return 0;
+
+err_register:
+	ndev_deinit_debugfs(ndev);
+	amd_deinit_dev(ndev);
+err_init_dev:
+	amd_ntb_deinit_pci(ndev);
+err_init_pci:
+	kfree(ndev);
+err_ndev:
+	return rc;
+}
+
+static void amd_ntb_pci_remove(struct pci_dev *pdev)
+{
+	struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+	ntb_unregister_device(&ndev->ntb);
+	ndev_deinit_debugfs(ndev);
+	amd_deinit_side_info(ndev);
+	amd_deinit_dev(ndev);
+	amd_ntb_deinit_pci(ndev);
+	kfree(ndev);
+}
+
+static const struct file_operations amd_ntb_debugfs_info = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ndev_debugfs_read,
+};
+
+static const struct pci_device_id amd_ntb_pci_tbl[] = {
+	{PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
+
+static struct pci_driver amd_ntb_pci_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= amd_ntb_pci_tbl,
+	.probe		= amd_ntb_pci_probe,
+	.remove		= amd_ntb_pci_remove,
+};
+
+static int __init amd_ntb_pci_driver_init(void)
+{
+	pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+	if (debugfs_initialized())
+		debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	return pci_register_driver(&amd_ntb_pci_driver);
+}
+module_init(amd_ntb_pci_driver_init);
+
+static void __exit amd_ntb_pci_driver_exit(void)
+{
+	pci_unregister_driver(&amd_ntb_pci_driver);
+	debugfs_remove_recursive(debugfs_dir);
+}
+module_exit(amd_ntb_pci_driver_exit);
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
new file mode 100644
index 0000000..8f3617a
--- /dev/null
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.h
@@ -0,0 +1,217 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#ifndef NTB_HW_AMD_H
+#define NTB_HW_AMD_H
+
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+#define PCI_DEVICE_ID_AMD_NTB	0x145B
+#define AMD_LINK_HB_TIMEOUT	msecs_to_jiffies(1000)
+#define AMD_LINK_STATUS_OFFSET	0x68
+#define NTB_LIN_STA_ACTIVE_BIT	0x00000002
+#define NTB_LNK_STA_SPEED_MASK	0x000F0000
+#define NTB_LNK_STA_WIDTH_MASK	0x03F00000
+#define NTB_LNK_STA_ACTIVE(x)	(!!((x) & NTB_LIN_STA_ACTIVE_BIT))
+#define NTB_LNK_STA_SPEED(x)	(((x) & NTB_LNK_STA_SPEED_MASK) >> 16)
+#define NTB_LNK_STA_WIDTH(x)	(((x) & NTB_LNK_STA_WIDTH_MASK) >> 20)
+
+#ifndef read64
+#ifdef readq
+#define read64 readq
+#else
+#define read64 _read64
+static inline u64 _read64(void __iomem *mmio)
+{
+	u64 low, high;
+
+	low = readl(mmio);
+	high = readl(mmio + sizeof(u32));
+	return low | (high << 32);
+}
+#endif
+#endif
+
+#ifndef write64
+#ifdef writeq
+#define write64 writeq
+#else
+#define write64 _write64
+static inline void _write64(u64 val, void __iomem *mmio)
+{
+	writel(val, mmio);
+	writel(val >> 32, mmio + sizeof(u32));
+}
+#endif
+#endif
+
+enum {
+	/* AMD NTB Capability */
+	AMD_MW_CNT		= 3,
+	AMD_DB_CNT		= 16,
+	AMD_MSIX_VECTOR_CNT	= 24,
+	AMD_SPADS_CNT		= 16,
+
+	/*  AMD NTB register offset */
+	AMD_CNTL_OFFSET		= 0x200,
+
+	/* NTB control register bits */
+	PMM_REG_CTL		= BIT(21),
+	SMM_REG_CTL		= BIT(20),
+	SMM_REG_ACC_PATH	= BIT(18),
+	PMM_REG_ACC_PATH	= BIT(17),
+	NTB_CLK_EN		= BIT(16),
+
+	AMD_STA_OFFSET		= 0x204,
+	AMD_PGSLV_OFFSET	= 0x208,
+	AMD_SPAD_MUX_OFFSET	= 0x20C,
+	AMD_SPAD_OFFSET		= 0x210,
+	AMD_RSMU_HCID		= 0x250,
+	AMD_RSMU_SIID		= 0x254,
+	AMD_PSION_OFFSET	= 0x300,
+	AMD_SSION_OFFSET	= 0x330,
+	AMD_MMINDEX_OFFSET	= 0x400,
+	AMD_MMDATA_OFFSET	= 0x404,
+	AMD_SIDEINFO_OFFSET	= 0x408,
+
+	AMD_SIDE_MASK		= BIT(0),
+	AMD_SIDE_READY		= BIT(1),
+
+	/* limit register */
+	AMD_ROMBARLMT_OFFSET	= 0x410,
+	AMD_BAR1LMT_OFFSET	= 0x414,
+	AMD_BAR23LMT_OFFSET	= 0x418,
+	AMD_BAR45LMT_OFFSET	= 0x420,
+	/* xlat address */
+	AMD_POMBARXLAT_OFFSET	= 0x428,
+	AMD_BAR1XLAT_OFFSET	= 0x430,
+	AMD_BAR23XLAT_OFFSET	= 0x438,
+	AMD_BAR45XLAT_OFFSET	= 0x440,
+	/* doorbell and interrupt */
+	AMD_DBFM_OFFSET		= 0x450,
+	AMD_DBREQ_OFFSET	= 0x454,
+	AMD_MIRRDBSTAT_OFFSET	= 0x458,
+	AMD_DBMASK_OFFSET	= 0x45C,
+	AMD_DBSTAT_OFFSET	= 0x460,
+	AMD_INTMASK_OFFSET	= 0x470,
+	AMD_INTSTAT_OFFSET	= 0x474,
+
+	/* event type */
+	AMD_PEER_FLUSH_EVENT	= BIT(0),
+	AMD_PEER_RESET_EVENT	= BIT(1),
+	AMD_PEER_D3_EVENT	= BIT(2),
+	AMD_PEER_PMETO_EVENT	= BIT(3),
+	AMD_PEER_D0_EVENT	= BIT(4),
+	AMD_LINK_UP_EVENT	= BIT(5),
+	AMD_LINK_DOWN_EVENT	= BIT(6),
+	AMD_EVENT_INTMASK	= (AMD_PEER_FLUSH_EVENT |
+				AMD_PEER_RESET_EVENT | AMD_PEER_D3_EVENT |
+				AMD_PEER_PMETO_EVENT | AMD_PEER_D0_EVENT |
+				AMD_LINK_UP_EVENT | AMD_LINK_DOWN_EVENT),
+
+	AMD_PMESTAT_OFFSET	= 0x480,
+	AMD_PMSGTRIG_OFFSET	= 0x490,
+	AMD_LTRLATENCY_OFFSET	= 0x494,
+	AMD_FLUSHTRIG_OFFSET	= 0x498,
+
+	/* SMU register*/
+	AMD_SMUACK_OFFSET	= 0x4A0,
+	AMD_SINRST_OFFSET	= 0x4A4,
+	AMD_RSPNUM_OFFSET	= 0x4A8,
+	AMD_SMU_SPADMUTEX	= 0x4B0,
+	AMD_SMU_SPADOFFSET	= 0x4B4,
+
+	AMD_PEER_OFFSET		= 0x400,
+};
+
+struct amd_ntb_dev;
+
+struct amd_ntb_vec {
+	struct amd_ntb_dev	*ndev;
+	int			num;
+};
+
+struct amd_ntb_dev {
+	struct ntb_dev ntb;
+
+	u32 ntb_side;
+	u32 lnk_sta;
+	u32 cntl_sta;
+	u32 peer_sta;
+
+	unsigned char mw_count;
+	unsigned char spad_count;
+	unsigned char db_count;
+	unsigned char msix_vec_count;
+
+	u64 db_valid_mask;
+	u64 db_mask;
+	u32 int_mask;
+
+	struct msix_entry *msix;
+	struct amd_ntb_vec *vec;
+
+	/* synchronize rmw access of db_mask and hw reg */
+	spinlock_t db_mask_lock;
+
+	void __iomem *self_mmio;
+	void __iomem *peer_mmio;
+	unsigned int self_spad;
+	unsigned int peer_spad;
+
+	struct delayed_work hb_timer;
+
+	struct dentry *debugfs_dir;
+	struct dentry *debugfs_info;
+};
+
+#define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
+
+#endif
diff --git a/drivers/ntb/hw/idt/Kconfig b/drivers/ntb/hw/idt/Kconfig
new file mode 100644
index 0000000..b360e56
--- /dev/null
+++ b/drivers/ntb/hw/idt/Kconfig
@@ -0,0 +1,31 @@
+config NTB_IDT
+	tristate "IDT PCIe-switch Non-Transparent Bridge support"
+	depends on PCI
+	help
+	 This driver supports NTB of cappable IDT PCIe-switches.
+
+	 Some of the pre-initializations must be made before IDT PCIe-switch
+	 exposes it NT-functions correctly. It should be done by either proper
+	 initialisation of EEPROM connected to master smbus of the switch or
+	 by BIOS using slave-SMBus interface changing corresponding registers
+	 value. Evidently it must be done before PCI bus enumeration is
+	 finished in Linux kernel.
+
+	 First of all partitions must be activated and properly assigned to all
+	 the ports with NT-functions intended to be activated (see SWPARTxCTL
+	 and SWPORTxCTL registers). Then all NT-function BARs must be enabled
+	 with chosen valid aperture. For memory windows related BARs the
+	 aperture settings shall determine the maximum size of memory windows
+	 accepted by a BAR. Note that BAR0 must map PCI configuration space
+	 registers.
+
+	 It's worth to note, that since a part of this driver relies on the
+	 BAR settings of peer NT-functions, the BAR setups can't be done over
+	 kernel PCI fixups. That's why the alternative pre-initialization
+	 techniques like BIOS using SMBus interface or EEPROM should be
+	 utilized. Additionally if one needs to have temperature sensor
+	 information printed to system log, the corresponding registers must
+	 be initialized within BIOS/EEPROM as well.
+
+	 If unsure, say N.
+
diff --git a/drivers/ntb/hw/idt/Makefile b/drivers/ntb/hw/idt/Makefile
new file mode 100644
index 0000000..a102cf1
--- /dev/null
+++ b/drivers/ntb/hw/idt/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_IDT) += ntb_hw_idt.o
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
new file mode 100644
index 0000000..dbe72f1
--- /dev/null
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -0,0 +1,2715 @@
+/*
+ *   This file is provided under a GPLv2 license.  When using or
+ *   redistributing this file, you may do so under that license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 T-Platforms All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms and conditions of the GNU General Public License,
+ *   version 2, as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ *   Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License along
+ *   with this program; if not, one can be found http://www.gnu.org/licenses/.
+ *
+ *   The full GNU General Public License is included in this distribution in
+ *   the file called "COPYING".
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * IDT PCIe-switch NTB Linux driver
+ *
+ * Contact Information:
+ * Serge Semin <fancer.lancer@gmail.com>, <Sergey.Semin@t-platforms.ru>
+ */
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/sizes.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/debugfs.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_idt.h"
+
+#define NTB_NAME	"ntb_hw_idt"
+#define NTB_DESC	"IDT PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER		"2.0"
+#define NTB_IRQNAME	"ntb_irq_idt"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("T-platforms");
+
+/*
+ * NT Endpoint registers table simplifying a loop access to the functionally
+ * related registers
+ */
+static const struct idt_ntb_regs ntdata_tbl = {
+	{ {IDT_NT_BARSETUP0,	IDT_NT_BARLIMIT0,
+	   IDT_NT_BARLTBASE0,	IDT_NT_BARUTBASE0},
+	  {IDT_NT_BARSETUP1,	IDT_NT_BARLIMIT1,
+	   IDT_NT_BARLTBASE1,	IDT_NT_BARUTBASE1},
+	  {IDT_NT_BARSETUP2,	IDT_NT_BARLIMIT2,
+	   IDT_NT_BARLTBASE2,	IDT_NT_BARUTBASE2},
+	  {IDT_NT_BARSETUP3,	IDT_NT_BARLIMIT3,
+	   IDT_NT_BARLTBASE3,	IDT_NT_BARUTBASE3},
+	  {IDT_NT_BARSETUP4,	IDT_NT_BARLIMIT4,
+	   IDT_NT_BARLTBASE4,	IDT_NT_BARUTBASE4},
+	  {IDT_NT_BARSETUP5,	IDT_NT_BARLIMIT5,
+	   IDT_NT_BARLTBASE5,	IDT_NT_BARUTBASE5} },
+	{ {IDT_NT_INMSG0,	IDT_NT_OUTMSG0,	IDT_NT_INMSGSRC0},
+	  {IDT_NT_INMSG1,	IDT_NT_OUTMSG1,	IDT_NT_INMSGSRC1},
+	  {IDT_NT_INMSG2,	IDT_NT_OUTMSG2,	IDT_NT_INMSGSRC2},
+	  {IDT_NT_INMSG3,	IDT_NT_OUTMSG3,	IDT_NT_INMSGSRC3} }
+};
+
+/*
+ * NT Endpoint ports data table with the corresponding pcie command, link
+ * status, control and BAR-related registers
+ */
+static const struct idt_ntb_port portdata_tbl[IDT_MAX_NR_PORTS] = {
+/*0*/	{ IDT_SW_NTP0_PCIECMDSTS,	IDT_SW_NTP0_PCIELCTLSTS,
+	  IDT_SW_NTP0_NTCTL,
+	  IDT_SW_SWPORT0CTL,		IDT_SW_SWPORT0STS,
+	  { {IDT_SW_NTP0_BARSETUP0,	IDT_SW_NTP0_BARLIMIT0,
+	     IDT_SW_NTP0_BARLTBASE0,	IDT_SW_NTP0_BARUTBASE0},
+	    {IDT_SW_NTP0_BARSETUP1,	IDT_SW_NTP0_BARLIMIT1,
+	     IDT_SW_NTP0_BARLTBASE1,	IDT_SW_NTP0_BARUTBASE1},
+	    {IDT_SW_NTP0_BARSETUP2,	IDT_SW_NTP0_BARLIMIT2,
+	     IDT_SW_NTP0_BARLTBASE2,	IDT_SW_NTP0_BARUTBASE2},
+	    {IDT_SW_NTP0_BARSETUP3,	IDT_SW_NTP0_BARLIMIT3,
+	     IDT_SW_NTP0_BARLTBASE3,	IDT_SW_NTP0_BARUTBASE3},
+	    {IDT_SW_NTP0_BARSETUP4,	IDT_SW_NTP0_BARLIMIT4,
+	     IDT_SW_NTP0_BARLTBASE4,	IDT_SW_NTP0_BARUTBASE4},
+	    {IDT_SW_NTP0_BARSETUP5,	IDT_SW_NTP0_BARLIMIT5,
+	     IDT_SW_NTP0_BARLTBASE5,	IDT_SW_NTP0_BARUTBASE5} } },
+/*1*/	{0},
+/*2*/	{ IDT_SW_NTP2_PCIECMDSTS,	IDT_SW_NTP2_PCIELCTLSTS,
+	  IDT_SW_NTP2_NTCTL,
+	  IDT_SW_SWPORT2CTL,		IDT_SW_SWPORT2STS,
+	  { {IDT_SW_NTP2_BARSETUP0,	IDT_SW_NTP2_BARLIMIT0,
+	     IDT_SW_NTP2_BARLTBASE0,	IDT_SW_NTP2_BARUTBASE0},
+	    {IDT_SW_NTP2_BARSETUP1,	IDT_SW_NTP2_BARLIMIT1,
+	     IDT_SW_NTP2_BARLTBASE1,	IDT_SW_NTP2_BARUTBASE1},
+	    {IDT_SW_NTP2_BARSETUP2,	IDT_SW_NTP2_BARLIMIT2,
+	     IDT_SW_NTP2_BARLTBASE2,	IDT_SW_NTP2_BARUTBASE2},
+	    {IDT_SW_NTP2_BARSETUP3,	IDT_SW_NTP2_BARLIMIT3,
+	     IDT_SW_NTP2_BARLTBASE3,	IDT_SW_NTP2_BARUTBASE3},
+	    {IDT_SW_NTP2_BARSETUP4,	IDT_SW_NTP2_BARLIMIT4,
+	     IDT_SW_NTP2_BARLTBASE4,	IDT_SW_NTP2_BARUTBASE4},
+	    {IDT_SW_NTP2_BARSETUP5,	IDT_SW_NTP2_BARLIMIT5,
+	     IDT_SW_NTP2_BARLTBASE5,	IDT_SW_NTP2_BARUTBASE5} } },
+/*3*/	{0},
+/*4*/	{ IDT_SW_NTP4_PCIECMDSTS,	IDT_SW_NTP4_PCIELCTLSTS,
+	  IDT_SW_NTP4_NTCTL,
+	  IDT_SW_SWPORT4CTL,		IDT_SW_SWPORT4STS,
+	  { {IDT_SW_NTP4_BARSETUP0,	IDT_SW_NTP4_BARLIMIT0,
+	     IDT_SW_NTP4_BARLTBASE0,	IDT_SW_NTP4_BARUTBASE0},
+	    {IDT_SW_NTP4_BARSETUP1,	IDT_SW_NTP4_BARLIMIT1,
+	     IDT_SW_NTP4_BARLTBASE1,	IDT_SW_NTP4_BARUTBASE1},
+	    {IDT_SW_NTP4_BARSETUP2,	IDT_SW_NTP4_BARLIMIT2,
+	     IDT_SW_NTP4_BARLTBASE2,	IDT_SW_NTP4_BARUTBASE2},
+	    {IDT_SW_NTP4_BARSETUP3,	IDT_SW_NTP4_BARLIMIT3,
+	     IDT_SW_NTP4_BARLTBASE3,	IDT_SW_NTP4_BARUTBASE3},
+	    {IDT_SW_NTP4_BARSETUP4,	IDT_SW_NTP4_BARLIMIT4,
+	     IDT_SW_NTP4_BARLTBASE4,	IDT_SW_NTP4_BARUTBASE4},
+	    {IDT_SW_NTP4_BARSETUP5,	IDT_SW_NTP4_BARLIMIT5,
+	     IDT_SW_NTP4_BARLTBASE5,	IDT_SW_NTP4_BARUTBASE5} } },
+/*5*/	{0},
+/*6*/	{ IDT_SW_NTP6_PCIECMDSTS,	IDT_SW_NTP6_PCIELCTLSTS,
+	  IDT_SW_NTP6_NTCTL,
+	  IDT_SW_SWPORT6CTL,		IDT_SW_SWPORT6STS,
+	  { {IDT_SW_NTP6_BARSETUP0,	IDT_SW_NTP6_BARLIMIT0,
+	     IDT_SW_NTP6_BARLTBASE0,	IDT_SW_NTP6_BARUTBASE0},
+	    {IDT_SW_NTP6_BARSETUP1,	IDT_SW_NTP6_BARLIMIT1,
+	     IDT_SW_NTP6_BARLTBASE1,	IDT_SW_NTP6_BARUTBASE1},
+	    {IDT_SW_NTP6_BARSETUP2,	IDT_SW_NTP6_BARLIMIT2,
+	     IDT_SW_NTP6_BARLTBASE2,	IDT_SW_NTP6_BARUTBASE2},
+	    {IDT_SW_NTP6_BARSETUP3,	IDT_SW_NTP6_BARLIMIT3,
+	     IDT_SW_NTP6_BARLTBASE3,	IDT_SW_NTP6_BARUTBASE3},
+	    {IDT_SW_NTP6_BARSETUP4,	IDT_SW_NTP6_BARLIMIT4,
+	     IDT_SW_NTP6_BARLTBASE4,	IDT_SW_NTP6_BARUTBASE4},
+	    {IDT_SW_NTP6_BARSETUP5,	IDT_SW_NTP6_BARLIMIT5,
+	     IDT_SW_NTP6_BARLTBASE5,	IDT_SW_NTP6_BARUTBASE5} } },
+/*7*/	{0},
+/*8*/	{ IDT_SW_NTP8_PCIECMDSTS,	IDT_SW_NTP8_PCIELCTLSTS,
+	  IDT_SW_NTP8_NTCTL,
+	  IDT_SW_SWPORT8CTL,		IDT_SW_SWPORT8STS,
+	  { {IDT_SW_NTP8_BARSETUP0,	IDT_SW_NTP8_BARLIMIT0,
+	     IDT_SW_NTP8_BARLTBASE0,	IDT_SW_NTP8_BARUTBASE0},
+	    {IDT_SW_NTP8_BARSETUP1,	IDT_SW_NTP8_BARLIMIT1,
+	     IDT_SW_NTP8_BARLTBASE1,	IDT_SW_NTP8_BARUTBASE1},
+	    {IDT_SW_NTP8_BARSETUP2,	IDT_SW_NTP8_BARLIMIT2,
+	     IDT_SW_NTP8_BARLTBASE2,	IDT_SW_NTP8_BARUTBASE2},
+	    {IDT_SW_NTP8_BARSETUP3,	IDT_SW_NTP8_BARLIMIT3,
+	     IDT_SW_NTP8_BARLTBASE3,	IDT_SW_NTP8_BARUTBASE3},
+	    {IDT_SW_NTP8_BARSETUP4,	IDT_SW_NTP8_BARLIMIT4,
+	     IDT_SW_NTP8_BARLTBASE4,	IDT_SW_NTP8_BARUTBASE4},
+	    {IDT_SW_NTP8_BARSETUP5,	IDT_SW_NTP8_BARLIMIT5,
+	     IDT_SW_NTP8_BARLTBASE5,	IDT_SW_NTP8_BARUTBASE5} } },
+/*9*/	{0},
+/*10*/	{0},
+/*11*/	{0},
+/*12*/	{ IDT_SW_NTP12_PCIECMDSTS,	IDT_SW_NTP12_PCIELCTLSTS,
+	  IDT_SW_NTP12_NTCTL,
+	  IDT_SW_SWPORT12CTL,		IDT_SW_SWPORT12STS,
+	  { {IDT_SW_NTP12_BARSETUP0,	IDT_SW_NTP12_BARLIMIT0,
+	     IDT_SW_NTP12_BARLTBASE0,	IDT_SW_NTP12_BARUTBASE0},
+	    {IDT_SW_NTP12_BARSETUP1,	IDT_SW_NTP12_BARLIMIT1,
+	     IDT_SW_NTP12_BARLTBASE1,	IDT_SW_NTP12_BARUTBASE1},
+	    {IDT_SW_NTP12_BARSETUP2,	IDT_SW_NTP12_BARLIMIT2,
+	     IDT_SW_NTP12_BARLTBASE2,	IDT_SW_NTP12_BARUTBASE2},
+	    {IDT_SW_NTP12_BARSETUP3,	IDT_SW_NTP12_BARLIMIT3,
+	     IDT_SW_NTP12_BARLTBASE3,	IDT_SW_NTP12_BARUTBASE3},
+	    {IDT_SW_NTP12_BARSETUP4,	IDT_SW_NTP12_BARLIMIT4,
+	     IDT_SW_NTP12_BARLTBASE4,	IDT_SW_NTP12_BARUTBASE4},
+	    {IDT_SW_NTP12_BARSETUP5,	IDT_SW_NTP12_BARLIMIT5,
+	     IDT_SW_NTP12_BARLTBASE5,	IDT_SW_NTP12_BARUTBASE5} } },
+/*13*/	{0},
+/*14*/	{0},
+/*15*/	{0},
+/*16*/	{ IDT_SW_NTP16_PCIECMDSTS,	IDT_SW_NTP16_PCIELCTLSTS,
+	  IDT_SW_NTP16_NTCTL,
+	  IDT_SW_SWPORT16CTL,		IDT_SW_SWPORT16STS,
+	  { {IDT_SW_NTP16_BARSETUP0,	IDT_SW_NTP16_BARLIMIT0,
+	     IDT_SW_NTP16_BARLTBASE0,	IDT_SW_NTP16_BARUTBASE0},
+	    {IDT_SW_NTP16_BARSETUP1,	IDT_SW_NTP16_BARLIMIT1,
+	     IDT_SW_NTP16_BARLTBASE1,	IDT_SW_NTP16_BARUTBASE1},
+	    {IDT_SW_NTP16_BARSETUP2,	IDT_SW_NTP16_BARLIMIT2,
+	     IDT_SW_NTP16_BARLTBASE2,	IDT_SW_NTP16_BARUTBASE2},
+	    {IDT_SW_NTP16_BARSETUP3,	IDT_SW_NTP16_BARLIMIT3,
+	     IDT_SW_NTP16_BARLTBASE3,	IDT_SW_NTP16_BARUTBASE3},
+	    {IDT_SW_NTP16_BARSETUP4,	IDT_SW_NTP16_BARLIMIT4,
+	     IDT_SW_NTP16_BARLTBASE4,	IDT_SW_NTP16_BARUTBASE4},
+	    {IDT_SW_NTP16_BARSETUP5,	IDT_SW_NTP16_BARLIMIT5,
+	     IDT_SW_NTP16_BARLTBASE5,	IDT_SW_NTP16_BARUTBASE5} } },
+/*17*/	{0},
+/*18*/	{0},
+/*19*/	{0},
+/*20*/	{ IDT_SW_NTP20_PCIECMDSTS,	IDT_SW_NTP20_PCIELCTLSTS,
+	  IDT_SW_NTP20_NTCTL,
+	  IDT_SW_SWPORT20CTL,		IDT_SW_SWPORT20STS,
+	  { {IDT_SW_NTP20_BARSETUP0,	IDT_SW_NTP20_BARLIMIT0,
+	     IDT_SW_NTP20_BARLTBASE0,	IDT_SW_NTP20_BARUTBASE0},
+	    {IDT_SW_NTP20_BARSETUP1,	IDT_SW_NTP20_BARLIMIT1,
+	     IDT_SW_NTP20_BARLTBASE1,	IDT_SW_NTP20_BARUTBASE1},
+	    {IDT_SW_NTP20_BARSETUP2,	IDT_SW_NTP20_BARLIMIT2,
+	     IDT_SW_NTP20_BARLTBASE2,	IDT_SW_NTP20_BARUTBASE2},
+	    {IDT_SW_NTP20_BARSETUP3,	IDT_SW_NTP20_BARLIMIT3,
+	     IDT_SW_NTP20_BARLTBASE3,	IDT_SW_NTP20_BARUTBASE3},
+	    {IDT_SW_NTP20_BARSETUP4,	IDT_SW_NTP20_BARLIMIT4,
+	     IDT_SW_NTP20_BARLTBASE4,	IDT_SW_NTP20_BARUTBASE4},
+	    {IDT_SW_NTP20_BARSETUP5,	IDT_SW_NTP20_BARLIMIT5,
+	     IDT_SW_NTP20_BARLTBASE5,	IDT_SW_NTP20_BARUTBASE5} } },
+/*21*/	{0},
+/*22*/	{0},
+/*23*/	{0}
+};
+
+/*
+ * IDT PCIe-switch partitions table with the corresponding control, status
+ * and messages control registers
+ */
+static const struct idt_ntb_part partdata_tbl[IDT_MAX_NR_PARTS] = {
+/*0*/	{ IDT_SW_SWPART0CTL,	IDT_SW_SWPART0STS,
+	  {IDT_SW_SWP0MSGCTL0,	IDT_SW_SWP0MSGCTL1,
+	   IDT_SW_SWP0MSGCTL2,	IDT_SW_SWP0MSGCTL3} },
+/*1*/	{ IDT_SW_SWPART1CTL,	IDT_SW_SWPART1STS,
+	  {IDT_SW_SWP1MSGCTL0,	IDT_SW_SWP1MSGCTL1,
+	   IDT_SW_SWP1MSGCTL2,	IDT_SW_SWP1MSGCTL3} },
+/*2*/	{ IDT_SW_SWPART2CTL,	IDT_SW_SWPART2STS,
+	  {IDT_SW_SWP2MSGCTL0,	IDT_SW_SWP2MSGCTL1,
+	   IDT_SW_SWP2MSGCTL2,	IDT_SW_SWP2MSGCTL3} },
+/*3*/	{ IDT_SW_SWPART3CTL,	IDT_SW_SWPART3STS,
+	  {IDT_SW_SWP3MSGCTL0,	IDT_SW_SWP3MSGCTL1,
+	   IDT_SW_SWP3MSGCTL2,	IDT_SW_SWP3MSGCTL3} },
+/*4*/	{ IDT_SW_SWPART4CTL,	IDT_SW_SWPART4STS,
+	  {IDT_SW_SWP4MSGCTL0,	IDT_SW_SWP4MSGCTL1,
+	   IDT_SW_SWP4MSGCTL2,	IDT_SW_SWP4MSGCTL3} },
+/*5*/	{ IDT_SW_SWPART5CTL,	IDT_SW_SWPART5STS,
+	  {IDT_SW_SWP5MSGCTL0,	IDT_SW_SWP5MSGCTL1,
+	   IDT_SW_SWP5MSGCTL2,	IDT_SW_SWP5MSGCTL3} },
+/*6*/	{ IDT_SW_SWPART6CTL,	IDT_SW_SWPART6STS,
+	  {IDT_SW_SWP6MSGCTL0,	IDT_SW_SWP6MSGCTL1,
+	   IDT_SW_SWP6MSGCTL2,	IDT_SW_SWP6MSGCTL3} },
+/*7*/	{ IDT_SW_SWPART7CTL,	IDT_SW_SWPART7STS,
+	  {IDT_SW_SWP7MSGCTL0,	IDT_SW_SWP7MSGCTL1,
+	   IDT_SW_SWP7MSGCTL2,	IDT_SW_SWP7MSGCTL3} }
+};
+
+/*
+ * DebugFS directory to place the driver debug file
+ */
+static struct dentry *dbgfs_topdir;
+
+/*=============================================================================
+ *                1. IDT PCIe-switch registers IO-functions
+ *
+ *    Beside ordinary configuration space registers IDT PCIe-switch expose
+ * global configuration registers, which are used to determine state of other
+ * device ports as well as being notified of some switch-related events.
+ * Additionally all the configuration space registers of all the IDT
+ * PCIe-switch functions are mapped to the Global Address space, so each
+ * function can determine a configuration of any other PCI-function.
+ *    Functions declared in this chapter are created to encapsulate access
+ * to configuration and global registers, so the driver code just need to
+ * provide IDT NTB hardware descriptor and a register address.
+ *=============================================================================
+ */
+
+/*
+ * idt_nt_write() - PCI configuration space registers write method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ * @data:	Value to write to the register
+ *
+ * IDT PCIe-switch registers are all Little endian.
+ */
+static void idt_nt_write(struct idt_ntb_dev *ndev,
+			 const unsigned int reg, const u32 data)
+{
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_PCI_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return;
+
+	/* Just write the value to the specified register */
+	iowrite32(data, ndev->cfgspc + (ptrdiff_t)reg);
+}
+
+/*
+ * idt_nt_read() - PCI configuration space registers read method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ *
+ * IDT PCIe-switch Global configuration registers are all Little endian.
+ *
+ * Return: register value
+ */
+static u32 idt_nt_read(struct idt_ntb_dev *ndev, const unsigned int reg)
+{
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_PCI_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return ~0;
+
+	/* Just read the value from the specified register */
+	return ioread32(ndev->cfgspc + (ptrdiff_t)reg);
+}
+
+/*
+ * idt_sw_write() - Global registers write method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ * @data:	Value to write to the register
+ *
+ * IDT PCIe-switch Global configuration registers are all Little endian.
+ */
+static void idt_sw_write(struct idt_ntb_dev *ndev,
+			 const unsigned int reg, const u32 data)
+{
+	unsigned long irqflags;
+
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_SW_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return;
+
+	/* Lock GASA registers operations */
+	spin_lock_irqsave(&ndev->gasa_lock, irqflags);
+	/* Set the global register address */
+	iowrite32((u32)reg, ndev->cfgspc + (ptrdiff_t)IDT_NT_GASAADDR);
+	/* Put the new value of the register */
+	iowrite32(data, ndev->cfgspc + (ptrdiff_t)IDT_NT_GASADATA);
+	/* Make sure the PCIe transactions are executed */
+	mmiowb();
+	/* Unlock GASA registers operations */
+	spin_unlock_irqrestore(&ndev->gasa_lock, irqflags);
+}
+
+/*
+ * idt_sw_read() - Global registers read method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ *
+ * IDT PCIe-switch Global configuration registers are all Little endian.
+ *
+ * Return: register value
+ */
+static u32 idt_sw_read(struct idt_ntb_dev *ndev, const unsigned int reg)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_SW_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return ~0;
+
+	/* Lock GASA registers operations */
+	spin_lock_irqsave(&ndev->gasa_lock, irqflags);
+	/* Set the global register address */
+	iowrite32((u32)reg, ndev->cfgspc + (ptrdiff_t)IDT_NT_GASAADDR);
+	/* Get the data of the register (read ops acts as MMIO barrier) */
+	data = ioread32(ndev->cfgspc + (ptrdiff_t)IDT_NT_GASADATA);
+	/* Unlock GASA registers operations */
+	spin_unlock_irqrestore(&ndev->gasa_lock, irqflags);
+
+	return data;
+}
+
+/*
+ * idt_reg_set_bits() - set bits of a passed register
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to change bits of
+ * @reg_lock:	Register access spin lock
+ * @valid_mask:	Mask of valid bits
+ * @set_bits:	Bitmask to set
+ *
+ * Helper method to check whether a passed bitfield is valid and set
+ * corresponding bits of a register.
+ *
+ * WARNING! Make sure the passed register isn't accessed over plane
+ * idt_nt_write() method (read method is ok to be used concurrently).
+ *
+ * Return: zero on success, negative error on invalid bitmask.
+ */
+static inline int idt_reg_set_bits(struct idt_ntb_dev *ndev, unsigned int reg,
+				   spinlock_t *reg_lock,
+				   u64 valid_mask, u64 set_bits)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	if (set_bits & ~(u64)valid_mask)
+		return -EINVAL;
+
+	/* Lock access to the register unless the change is written back */
+	spin_lock_irqsave(reg_lock, irqflags);
+	data = idt_nt_read(ndev, reg) | (u32)set_bits;
+	idt_nt_write(ndev, reg, data);
+	/* Unlock the register */
+	spin_unlock_irqrestore(reg_lock, irqflags);
+
+	return 0;
+}
+
+/*
+ * idt_reg_clear_bits() - clear bits of a passed register
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to change bits of
+ * @reg_lock:	Register access spin lock
+ * @set_bits:	Bitmask to clear
+ *
+ * Helper method to check whether a passed bitfield is valid and clear
+ * corresponding bits of a register.
+ *
+ * NOTE! Invalid bits are always considered cleared so it's not an error
+ * to clear them over.
+ *
+ * WARNING! Make sure the passed register isn't accessed over plane
+ * idt_nt_write() method (read method is ok to use concurrently).
+ */
+static inline void idt_reg_clear_bits(struct idt_ntb_dev *ndev,
+				     unsigned int reg, spinlock_t *reg_lock,
+				     u64 clear_bits)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	/* Lock access to the register unless the change is written back */
+	spin_lock_irqsave(reg_lock, irqflags);
+	data = idt_nt_read(ndev, reg) & ~(u32)clear_bits;
+	idt_nt_write(ndev, reg, data);
+	/* Unlock the register */
+	spin_unlock_irqrestore(reg_lock, irqflags);
+}
+
+/*===========================================================================
+ *                           2. Ports operations
+ *
+ *    IDT PCIe-switches can have from 3 up to 8 ports with possible
+ * NT-functions enabled. So all the possible ports need to be scanned looking
+ * for NTB activated. NTB API will have enumerated only the ports with NTB.
+ *===========================================================================
+ */
+
+/*
+ * idt_scan_ports() - scan IDT PCIe-switch ports collecting info in the tables
+ * @ndev:	Pointer to the PCI device descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_scan_ports(struct idt_ntb_dev *ndev)
+{
+	unsigned char pidx, port, part;
+	u32 data, portsts, partsts;
+
+	/* Retrieve the local port number */
+	data = idt_nt_read(ndev, IDT_NT_PCIELCAP);
+	ndev->port = GET_FIELD(PCIELCAP_PORTNUM, data);
+
+	/* Retrieve the local partition number */
+	portsts = idt_sw_read(ndev, portdata_tbl[ndev->port].sts);
+	ndev->part = GET_FIELD(SWPORTxSTS_SWPART, portsts);
+
+	/* Initialize port/partition -> index tables with invalid values */
+	memset(ndev->port_idx_map, -EINVAL, sizeof(ndev->port_idx_map));
+	memset(ndev->part_idx_map, -EINVAL, sizeof(ndev->part_idx_map));
+
+	/*
+	 * Walk over all the possible ports checking whether any of them has
+	 * NT-function activated
+	 */
+	ndev->peer_cnt = 0;
+	for (pidx = 0; pidx < ndev->swcfg->port_cnt; pidx++) {
+		port = ndev->swcfg->ports[pidx];
+		/* Skip local port */
+		if (port == ndev->port)
+			continue;
+
+		/* Read the port status register to get it partition */
+		portsts = idt_sw_read(ndev, portdata_tbl[port].sts);
+		part = GET_FIELD(SWPORTxSTS_SWPART, portsts);
+
+		/* Retrieve the partition status */
+		partsts = idt_sw_read(ndev, partdata_tbl[part].sts);
+		/* Check if partition state is active and port has NTB */
+		if (IS_FLD_SET(SWPARTxSTS_STATE, partsts, ACT) &&
+		    (IS_FLD_SET(SWPORTxSTS_MODE, portsts, NT) ||
+		     IS_FLD_SET(SWPORTxSTS_MODE, portsts, USNT) ||
+		     IS_FLD_SET(SWPORTxSTS_MODE, portsts, USNTDMA) ||
+		     IS_FLD_SET(SWPORTxSTS_MODE, portsts, NTDMA))) {
+			/* Save the port and partition numbers */
+			ndev->peers[ndev->peer_cnt].port = port;
+			ndev->peers[ndev->peer_cnt].part = part;
+			/* Fill in the port/partition -> index tables */
+			ndev->port_idx_map[port] = ndev->peer_cnt;
+			ndev->part_idx_map[part] = ndev->peer_cnt;
+			ndev->peer_cnt++;
+		}
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Local port: %hhu, num of peers: %hhu\n",
+		ndev->port, ndev->peer_cnt);
+
+	/* It's useless to have this driver loaded if there is no any peer */
+	if (ndev->peer_cnt == 0) {
+		dev_warn(&ndev->ntb.pdev->dev, "No active peer found\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_ntb_port_number() - get the local port number
+ * @ntb:	NTB device context.
+ *
+ * Return: the local port number
+ */
+static int idt_ntb_port_number(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return ndev->port;
+}
+
+/*
+ * idt_ntb_peer_port_count() - get the number of peer ports
+ * @ntb:	NTB device context.
+ *
+ * Return the count of detected peer NT-functions.
+ *
+ * Return: number of peer ports
+ */
+static int idt_ntb_peer_port_count(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return ndev->peer_cnt;
+}
+
+/*
+ * idt_ntb_peer_port_number() - get peer port by given index
+ * @ntb:	NTB device context.
+ * @pidx:	Peer port index.
+ *
+ * Return: peer port or negative error
+ */
+static int idt_ntb_peer_port_number(struct ntb_dev *ntb, int pidx)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	/* Return the detected NT-function port number */
+	return ndev->peers[pidx].port;
+}
+
+/*
+ * idt_ntb_peer_port_idx() - get peer port index by given port number
+ * @ntb:	NTB device context.
+ * @port:	Peer port number.
+ *
+ * Internal port -> index table is pre-initialized with -EINVAL values,
+ * so we just need to return it value
+ *
+ * Return: peer NT-function port index or negative error
+ */
+static int idt_ntb_peer_port_idx(struct ntb_dev *ntb, int port)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (port < 0 || IDT_MAX_NR_PORTS <= port)
+		return -EINVAL;
+
+	return ndev->port_idx_map[port];
+}
+
+/*===========================================================================
+ *                         3. Link status operations
+ *    There is no any ready-to-use method to have peer ports notified if NTB
+ * link is set up or got down. Instead global signal can be used instead.
+ * In case if any one of ports changes local NTB link state, it sends
+ * global signal and clears corresponding global state bit. Then all the ports
+ * receive a notification of that, so to make client driver being aware of
+ * possible NTB link change.
+ *    Additionally each of active NT-functions is subscribed to PCIe-link
+ * state changes of peer ports.
+ *===========================================================================
+ */
+
+static void idt_ntb_local_link_disable(struct idt_ntb_dev *ndev);
+
+/*
+ * idt_init_link() - Initialize NTB link state notification subsystem
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Function performs the basic initialization of some global registers
+ * needed to enable IRQ-based notifications of PCIe Link Up/Down and
+ * Global Signal events.
+ * NOTE Since it's not possible to determine when all the NTB peer drivers are
+ * unloaded as well as have those registers accessed concurrently, we must
+ * preinitialize them with the same value and leave it uncleared on local
+ * driver unload.
+ */
+static void idt_init_link(struct idt_ntb_dev *ndev)
+{
+	u32 part_mask, port_mask, se_mask;
+	unsigned char pidx;
+
+	/* Initialize spin locker of Mapping Table access registers */
+	spin_lock_init(&ndev->mtbl_lock);
+
+	/* Walk over all detected peers collecting port and partition masks */
+	port_mask = ~BIT(ndev->port);
+	part_mask = ~BIT(ndev->part);
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		port_mask &= ~BIT(ndev->peers[pidx].port);
+		part_mask &= ~BIT(ndev->peers[pidx].part);
+	}
+
+	/* Clean the Link Up/Down and GLobal Signal status registers */
+	idt_sw_write(ndev, IDT_SW_SELINKUPSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SELINKDNSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)-1);
+
+	/* Unmask NT-activated partitions to receive Global Switch events */
+	idt_sw_write(ndev, IDT_SW_SEPMSK, part_mask);
+
+	/* Enable PCIe Link Up events of NT-activated ports */
+	idt_sw_write(ndev, IDT_SW_SELINKUPMSK, port_mask);
+
+	/* Enable PCIe Link Down events of NT-activated ports */
+	idt_sw_write(ndev, IDT_SW_SELINKDNMSK, port_mask);
+
+	/* Unmask NT-activated partitions to receive Global Signal events */
+	idt_sw_write(ndev, IDT_SW_SEGSIGMSK, part_mask);
+
+	/* Unmask Link Up/Down and Global Switch Events */
+	se_mask = ~(IDT_SEMSK_LINKUP | IDT_SEMSK_LINKDN | IDT_SEMSK_GSIGNAL);
+	idt_sw_write(ndev, IDT_SW_SEMSK, se_mask);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB link status events initialized");
+}
+
+/*
+ * idt_deinit_link() - deinitialize link subsystem
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Just disable the link back.
+ */
+static void idt_deinit_link(struct idt_ntb_dev *ndev)
+{
+	/* Disable the link */
+	idt_ntb_local_link_disable(ndev);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB link status events deinitialized");
+}
+
+/*
+ * idt_se_isr() - switch events ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * This driver doesn't support IDT PCIe-switch dynamic reconfigurations,
+ * Failover capability, etc, so switch events are utilized to notify of
+ * PCIe and NTB link events.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_se_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	u32 sests;
+
+	/* Read Switch Events status */
+	sests = idt_sw_read(ndev, IDT_SW_SESTS);
+
+	/* Clean the Link Up/Down and Global Signal status registers */
+	idt_sw_write(ndev, IDT_SW_SELINKUPSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SELINKDNSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)-1);
+
+	/* Clean the corresponding interrupt bit */
+	idt_nt_write(ndev, IDT_NT_NTINTSTS, IDT_NTINTSTS_SEVENT);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "SE IRQ detected %#08x (SESTS %#08x)",
+			  ntint_sts, sests);
+
+	/* Notify the client driver of possible link state change */
+	ntb_link_event(&ndev->ntb);
+}
+
+/*
+ * idt_ntb_local_link_enable() - enable the local NTB link.
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * In order to enable the NTB link we need:
+ * - enable Completion TLPs translation
+ * - initialize mapping table to enable the Request ID translation
+ * - notify peers of NTB link state change
+ */
+static void idt_ntb_local_link_enable(struct idt_ntb_dev *ndev)
+{
+	u32 reqid, mtbldata = 0;
+	unsigned long irqflags;
+
+	/* Enable the ID protection and Completion TLPs translation */
+	idt_nt_write(ndev, IDT_NT_NTCTL, IDT_NTCTL_CPEN);
+
+	/* Retrieve the current Requester ID (Bus:Device:Function) */
+	reqid = idt_nt_read(ndev, IDT_NT_REQIDCAP);
+
+	/*
+	 * Set the corresponding NT Mapping table entry of port partition index
+	 * with the data to perform the Request ID translation
+	 */
+	mtbldata = SET_FIELD(NTMTBLDATA_REQID, 0, reqid) |
+		   SET_FIELD(NTMTBLDATA_PART, 0, ndev->part) |
+		   IDT_NTMTBLDATA_VALID;
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->part);
+	idt_nt_write(ndev, IDT_NT_NTMTBLDATA, mtbldata);
+	mmiowb();
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	/* Notify the peers by setting and clearing the global signal bit */
+	idt_nt_write(ndev, IDT_NT_NTGSIGNAL, IDT_NTGSIGNAL_SET);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)1 << ndev->part);
+}
+
+/*
+ * idt_ntb_local_link_disable() - disable the local NTB link.
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * In order to enable the NTB link we need:
+ * - disable Completion TLPs translation
+ * - clear corresponding mapping table entry
+ * - notify peers of NTB link state change
+ */
+static void idt_ntb_local_link_disable(struct idt_ntb_dev *ndev)
+{
+	unsigned long irqflags;
+
+	/* Disable Completion TLPs translation */
+	idt_nt_write(ndev, IDT_NT_NTCTL, 0);
+
+	/* Clear the corresponding NT Mapping table entry */
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->part);
+	idt_nt_write(ndev, IDT_NT_NTMTBLDATA, 0);
+	mmiowb();
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	/* Notify the peers by setting and clearing the global signal bit */
+	idt_nt_write(ndev, IDT_NT_NTGSIGNAL, IDT_NTGSIGNAL_SET);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)1 << ndev->part);
+}
+
+/*
+ * idt_ntb_local_link_is_up() - test wethter local NTB link is up
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Local link is up under the following conditions:
+ * - Bus mastering is enabled
+ * - NTCTL has Completion TLPs translation enabled
+ * - Mapping table permits Request TLPs translation
+ * NOTE: We don't need to check PCIe link state since it's obviously
+ * up while we are able to communicate with IDT PCIe-switch
+ *
+ * Return: true if link is up, otherwise false
+ */
+static bool idt_ntb_local_link_is_up(struct idt_ntb_dev *ndev)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	/* Read the local Bus Master Enable status */
+	data = idt_nt_read(ndev, IDT_NT_PCICMDSTS);
+	if (!(data & IDT_PCICMDSTS_BME))
+		return false;
+
+	/* Read the local Completion TLPs translation enable status */
+	data = idt_nt_read(ndev, IDT_NT_NTCTL);
+	if (!(data & IDT_NTCTL_CPEN))
+		return false;
+
+	/* Read Mapping table entry corresponding to the local partition */
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->part);
+	data = idt_nt_read(ndev, IDT_NT_NTMTBLDATA);
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	return !!(data & IDT_NTMTBLDATA_VALID);
+}
+
+/*
+ * idt_ntb_peer_link_is_up() - test whether peer NTB link is up
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @pidx:	Peer port index
+ *
+ * Peer link is up under the following conditions:
+ * - PCIe link is up
+ * - Bus mastering is enabled
+ * - NTCTL has Completion TLPs translation enabled
+ * - Mapping table permits Request TLPs translation
+ *
+ * Return: true if link is up, otherwise false
+ */
+static bool idt_ntb_peer_link_is_up(struct idt_ntb_dev *ndev, int pidx)
+{
+	unsigned long irqflags;
+	unsigned char port;
+	u32 data;
+
+	/* Retrieve the device port number */
+	port = ndev->peers[pidx].port;
+
+	/* Check whether PCIe link is up */
+	data = idt_sw_read(ndev, portdata_tbl[port].sts);
+	if (!(data & IDT_SWPORTxSTS_LINKUP))
+		return false;
+
+	/* Check whether bus mastering is enabled on the peer port */
+	data = idt_sw_read(ndev, portdata_tbl[port].pcicmdsts);
+	if (!(data & IDT_PCICMDSTS_BME))
+		return false;
+
+	/* Check if Completion TLPs translation is enabled on the peer port */
+	data = idt_sw_read(ndev, portdata_tbl[port].ntctl);
+	if (!(data & IDT_NTCTL_CPEN))
+		return false;
+
+	/* Read Mapping table entry corresponding to the peer partition */
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->peers[pidx].part);
+	data = idt_nt_read(ndev, IDT_NT_NTMTBLDATA);
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	return !!(data & IDT_NTMTBLDATA_VALID);
+}
+
+/*
+ * idt_ntb_link_is_up() - get the current ntb link state (NTB API callback)
+ * @ntb:	NTB device context.
+ * @speed:	OUT - The link speed expressed as PCIe generation number.
+ * @width:	OUT - The link width expressed as the number of PCIe lanes.
+ *
+ * Get the bitfield of NTB link states for all peer ports
+ *
+ * Return: bitfield of indexed ports link state: bit is set/cleared if the
+ *         link is up/down respectively.
+ */
+static u64 idt_ntb_link_is_up(struct ntb_dev *ntb,
+			      enum ntb_speed *speed, enum ntb_width *width)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	unsigned char pidx;
+	u64 status;
+	u32 data;
+
+	/* Retrieve the local link speed and width */
+	if (speed != NULL || width != NULL) {
+		data = idt_nt_read(ndev, IDT_NT_PCIELCTLSTS);
+		if (speed != NULL)
+			*speed = GET_FIELD(PCIELCTLSTS_CLS, data);
+		if (width != NULL)
+			*width = GET_FIELD(PCIELCTLSTS_NLW, data);
+	}
+
+	/* If local NTB link isn't up then all the links are considered down */
+	if (!idt_ntb_local_link_is_up(ndev))
+		return 0;
+
+	/* Collect all the peer ports link states into the bitfield */
+	status = 0;
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		if (idt_ntb_peer_link_is_up(ndev, pidx))
+			status |= ((u64)1 << pidx);
+	}
+
+	return status;
+}
+
+/*
+ * idt_ntb_link_enable() - enable local port ntb link (NTB API callback)
+ * @ntb:	NTB device context.
+ * @max_speed:	The maximum link speed expressed as PCIe generation number.
+ * @max_width:	The maximum link width expressed as the number of PCIe lanes.
+ *
+ * Enable just local NTB link. PCIe link parameters are ignored.
+ *
+ * Return: always zero.
+ */
+static int idt_ntb_link_enable(struct ntb_dev *ntb, enum ntb_speed speed,
+			       enum ntb_width width)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	/* Just enable the local NTB link */
+	idt_ntb_local_link_enable(ndev);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Local NTB link enabled");
+
+	return 0;
+}
+
+/*
+ * idt_ntb_link_disable() - disable local port ntb link (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * Disable just local NTB link.
+ *
+ * Return: always zero.
+ */
+static int idt_ntb_link_disable(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	/* Just disable the local NTB link */
+	idt_ntb_local_link_disable(ndev);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Local NTB link disabled");
+
+	return 0;
+}
+
+/*=============================================================================
+ *                         4. Memory Window operations
+ *
+ *    IDT PCIe-switches have two types of memory windows: MWs with direct
+ * address translation and MWs with LUT based translation. The first type of
+ * MWs is simple map of corresponding BAR address space to a memory space
+ * of specified target port. So it implemets just ont-to-one mapping. Lookup
+ * table in its turn can map one BAR address space to up to 24 different
+ * memory spaces of different ports.
+ *    NT-functions BARs can be turned on to implement either direct or lookup
+ * table based address translations, so:
+ * BAR0 - NT configuration registers space/direct address translation
+ * BAR1 - direct address translation/upper address of BAR0x64
+ * BAR2 - direct address translation/Lookup table with either 12 or 24 entries
+ * BAR3 - direct address translation/upper address of BAR2x64
+ * BAR4 - direct address translation/Lookup table with either 12 or 24 entries
+ * BAR5 - direct address translation/upper address of BAR4x64
+ *    Additionally BAR2 and BAR4 can't have 24-entries LUT enabled at the same
+ * time. Since the BARs setup can be rather complicated this driver implements
+ * a scanning algorithm to have all the possible memory windows configuration
+ * covered.
+ *
+ * NOTE 1 BAR setup must be done before Linux kernel enumerated NT-function
+ * of any port, so this driver would have memory windows configurations fixed.
+ * In this way all initializations must be performed either by platform BIOS
+ * or using EEPROM connected to IDT PCIe-switch master SMBus.
+ *
+ * NOTE 2 This driver expects BAR0 mapping NT-function configuration space.
+ * Easy calculation can give us an upper boundary of 29 possible memory windows
+ * per each NT-function if all the BARs are of 32bit type.
+ *=============================================================================
+ */
+
+/*
+ * idt_get_mw_count() - get memory window count
+ * @mw_type:	Memory window type
+ *
+ * Return: number of memory windows with respect to the BAR type
+ */
+static inline unsigned char idt_get_mw_count(enum idt_mw_type mw_type)
+{
+	switch (mw_type) {
+	case IDT_MW_DIR:
+		return 1;
+	case IDT_MW_LUT12:
+		return 12;
+	case IDT_MW_LUT24:
+		return 24;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_get_mw_name() - get memory window name
+ * @mw_type:	Memory window type
+ *
+ * Return: pointer to a string with name
+ */
+static inline char *idt_get_mw_name(enum idt_mw_type mw_type)
+{
+	switch (mw_type) {
+	case IDT_MW_DIR:
+		return "DIR  ";
+	case IDT_MW_LUT12:
+		return "LUT12";
+	case IDT_MW_LUT24:
+		return "LUT24";
+	default:
+		break;
+	}
+
+	return "unknown";
+}
+
+/*
+ * idt_scan_mws() - scan memory windows of the port
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @port:	Port to get number of memory windows for
+ * @mw_cnt:	Out - number of memory windows
+ *
+ * It walks over BAR setup registers of the specified port and determines
+ * the memory windows parameters if any activated.
+ *
+ * Return: array of memory windows
+ */
+static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port,
+				       unsigned char *mw_cnt)
+{
+	struct idt_mw_cfg mws[IDT_MAX_NR_MWS], *ret_mws;
+	const struct idt_ntb_bar *bars;
+	enum idt_mw_type mw_type;
+	unsigned char widx, bidx, en_cnt;
+	bool bar_64bit = false;
+	int aprt_size;
+	u32 data;
+
+	/* Retrieve the array of the BARs registers */
+	bars = portdata_tbl[port].bars;
+
+	/* Scan all the BARs belonging to the port */
+	*mw_cnt = 0;
+	for (bidx = 0; bidx < IDT_BAR_CNT; bidx += 1 + bar_64bit) {
+		/* Read BARSETUP register value */
+		data = idt_sw_read(ndev, bars[bidx].setup);
+
+		/* Skip disabled BARs */
+		if (!(data & IDT_BARSETUP_EN)) {
+			bar_64bit = false;
+			continue;
+		}
+
+		/* Skip next BARSETUP if current one has 64bit addressing */
+		bar_64bit = IS_FLD_SET(BARSETUP_TYPE, data, 64);
+
+		/* Skip configuration space mapping BARs */
+		if (data & IDT_BARSETUP_MODE_CFG)
+			continue;
+
+		/* Retrieve MW type/entries count and aperture size */
+		mw_type = GET_FIELD(BARSETUP_ATRAN, data);
+		en_cnt = idt_get_mw_count(mw_type);
+		aprt_size = (u64)1 << GET_FIELD(BARSETUP_SIZE, data);
+
+		/* Save configurations of all available memory windows */
+		for (widx = 0; widx < en_cnt; widx++, (*mw_cnt)++) {
+			/*
+			 * IDT can expose a limited number of MWs, so it's bug
+			 * to have more than the driver expects
+			 */
+			if (*mw_cnt >= IDT_MAX_NR_MWS)
+				return ERR_PTR(-EINVAL);
+
+			/* Save basic MW info */
+			mws[*mw_cnt].type = mw_type;
+			mws[*mw_cnt].bar = bidx;
+			mws[*mw_cnt].idx = widx;
+			/* It's always DWORD aligned */
+			mws[*mw_cnt].addr_align = IDT_TRANS_ALIGN;
+			/* DIR and LUT approachs differently configure MWs */
+			if (mw_type == IDT_MW_DIR)
+				mws[*mw_cnt].size_max = aprt_size;
+			else if (mw_type == IDT_MW_LUT12)
+				mws[*mw_cnt].size_max = aprt_size / 16;
+			else
+				mws[*mw_cnt].size_max = aprt_size / 32;
+			mws[*mw_cnt].size_align = (mw_type == IDT_MW_DIR) ?
+				IDT_DIR_SIZE_ALIGN : mws[*mw_cnt].size_max;
+		}
+	}
+
+	/* Allocate memory for memory window descriptors */
+	ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt,
+				sizeof(*ret_mws), GFP_KERNEL);
+	if (IS_ERR_OR_NULL(ret_mws))
+		return ERR_PTR(-ENOMEM);
+
+	/* Copy the info of detected memory windows */
+	memcpy(ret_mws, mws, (*mw_cnt)*sizeof(*ret_mws));
+
+	return ret_mws;
+}
+
+/*
+ * idt_init_mws() - initialize memory windows subsystem
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Scan BAR setup registers of local and peer ports to determine the
+ * outbound and inbound memory windows parameters
+ *
+ * Return: zero on success, otherwise a negative error number
+ */
+static int idt_init_mws(struct idt_ntb_dev *ndev)
+{
+	struct idt_ntb_peer *peer;
+	unsigned char pidx;
+
+	/* Scan memory windows of the local port */
+	ndev->mws = idt_scan_mws(ndev, ndev->port, &ndev->mw_cnt);
+	if (IS_ERR(ndev->mws)) {
+		dev_err(&ndev->ntb.pdev->dev,
+			"Failed to scan mws of local port %hhu", ndev->port);
+		return PTR_ERR(ndev->mws);
+	}
+
+	/* Scan memory windows of the peer ports */
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		peer = &ndev->peers[pidx];
+		peer->mws = idt_scan_mws(ndev, peer->port, &peer->mw_cnt);
+		if (IS_ERR(peer->mws)) {
+			dev_err(&ndev->ntb.pdev->dev,
+				"Failed to scan mws of port %hhu", peer->port);
+			return PTR_ERR(peer->mws);
+		}
+	}
+
+	/* Initialize spin locker of the LUT registers */
+	spin_lock_init(&ndev->lut_lock);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Outbound and inbound MWs initialized");
+
+	return 0;
+}
+
+/*
+ * idt_ntb_mw_count() - number of inbound memory windows (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ *
+ * The value is returned for the specified peer, so generally speaking it can
+ * be different for different port depending on the IDT PCIe-switch
+ * initialization.
+ *
+ * Return: the number of memory windows.
+ */
+static int idt_ntb_mw_count(struct ntb_dev *ntb, int pidx)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	return ndev->peers[pidx].mw_cnt;
+}
+
+/*
+ * idt_ntb_mw_get_align() - inbound memory window parameters (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ * @addr_align:	OUT - the base alignment for translating the memory window
+ * @size_align:	OUT - the size alignment for translating the memory window
+ * @size_max:	OUT - the maximum size of the memory window
+ *
+ * The peer memory window parameters have already been determined, so just
+ * return the corresponding values, which mustn't change within session.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static int idt_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int widx,
+				resource_size_t *addr_align,
+				resource_size_t *size_align,
+				resource_size_t *size_max)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	struct idt_ntb_peer *peer;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	peer = &ndev->peers[pidx];
+
+	if (widx < 0 || peer->mw_cnt <= widx)
+		return -EINVAL;
+
+	if (addr_align != NULL)
+		*addr_align = peer->mws[widx].addr_align;
+
+	if (size_align != NULL)
+		*size_align = peer->mws[widx].size_align;
+
+	if (size_max != NULL)
+		*size_max = peer->mws[widx].size_max;
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_mw_count() - number of outbound memory windows
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * Outbound memory windows parameters have been determined based on the
+ * BAR setup registers value, which are mostly constants within one session.
+ *
+ * Return: the number of memory windows.
+ */
+static int idt_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return ndev->mw_cnt;
+}
+
+/*
+ * idt_ntb_peer_mw_get_addr() - get map address of an outbound memory window
+ *				(NTB API callback)
+ * @ntb:	NTB device context.
+ * @widx:	Memory window index (within ntb_peer_mw_count() return value).
+ * @base:	OUT - the base address of mapping region.
+ * @size:	OUT - the size of mapping region.
+ *
+ * Return just parameters of BAR resources mapping. Size reflects just the size
+ * of the resource
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static int idt_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int widx,
+				    phys_addr_t *base, resource_size_t *size)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (widx < 0 || ndev->mw_cnt <= widx)
+		return -EINVAL;
+
+	/* Mapping address is just properly shifted BAR resource start */
+	if (base != NULL)
+		*base = pci_resource_start(ntb->pdev, ndev->mws[widx].bar) +
+			ndev->mws[widx].idx * ndev->mws[widx].size_max;
+
+	/* Mapping size has already been calculated at MWs scanning */
+	if (size != NULL)
+		*size = ndev->mws[widx].size_max;
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_mw_set_trans() - set a translation address of a memory window
+ *				 (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device the translation address received from.
+ * @widx:	Memory window index.
+ * @addr:	The dma address of the shared memory to access.
+ * @size:	The size of the shared memory to access.
+ *
+ * The Direct address translation and LUT base translation is initialized a
+ * bit differenet. Although the parameters restriction are now determined by
+ * the same code.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static int idt_ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
+				     u64 addr, resource_size_t size)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	struct idt_mw_cfg *mw_cfg;
+	u32 data = 0, lutoff = 0;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	if (widx < 0 || ndev->mw_cnt <= widx)
+		return -EINVAL;
+
+	/*
+	 * Retrieve the memory window config to make sure the passed arguments
+	 * fit it restrictions
+	 */
+	mw_cfg = &ndev->mws[widx];
+	if (!IS_ALIGNED(addr, mw_cfg->addr_align))
+		return -EINVAL;
+	if (!IS_ALIGNED(size, mw_cfg->size_align) || size > mw_cfg->size_max)
+		return -EINVAL;
+
+	/* DIR and LUT based translations are initialized differently */
+	if (mw_cfg->type == IDT_MW_DIR) {
+		const struct idt_ntb_bar *bar = &ntdata_tbl.bars[mw_cfg->bar];
+		u64 limit;
+		/* Set destination partition of translation */
+		data = idt_nt_read(ndev, bar->setup);
+		data = SET_FIELD(BARSETUP_TPART, data, ndev->peers[pidx].part);
+		idt_nt_write(ndev, bar->setup, data);
+		/* Set translation base address */
+		idt_nt_write(ndev, bar->ltbase, (u32)addr);
+		idt_nt_write(ndev, bar->utbase, (u32)(addr >> 32));
+		/* Set the custom BAR aperture limit */
+		limit = pci_resource_start(ntb->pdev, mw_cfg->bar) + size;
+		idt_nt_write(ndev, bar->limit, (u32)limit);
+		if (IS_FLD_SET(BARSETUP_TYPE, data, 64))
+			idt_nt_write(ndev, (bar + 1)->limit, (limit >> 32));
+	} else {
+		unsigned long irqflags;
+		/* Initialize corresponding LUT entry */
+		lutoff = SET_FIELD(LUTOFFSET_INDEX, 0, mw_cfg->idx) |
+			 SET_FIELD(LUTOFFSET_BAR, 0, mw_cfg->bar);
+		data = SET_FIELD(LUTUDATA_PART, 0, ndev->peers[pidx].part) |
+			IDT_LUTUDATA_VALID;
+		spin_lock_irqsave(&ndev->lut_lock, irqflags);
+		idt_nt_write(ndev, IDT_NT_LUTOFFSET, lutoff);
+		idt_nt_write(ndev, IDT_NT_LUTLDATA, (u32)addr);
+		idt_nt_write(ndev, IDT_NT_LUTMDATA, (u32)(addr >> 32));
+		idt_nt_write(ndev, IDT_NT_LUTUDATA, data);
+		mmiowb();
+		spin_unlock_irqrestore(&ndev->lut_lock, irqflags);
+		/* Limit address isn't specified since size is fixed for LUT */
+	}
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_mw_clear_trans() - clear the outbound MW translation address
+ *				   (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ *
+ * It effectively disables the translation over the specified outbound MW.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static int idt_ntb_peer_mw_clear_trans(struct ntb_dev *ntb, int pidx,
+					int widx)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	struct idt_mw_cfg *mw_cfg;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	if (widx < 0 || ndev->mw_cnt <= widx)
+		return -EINVAL;
+
+	mw_cfg = &ndev->mws[widx];
+
+	/* DIR and LUT based translations are initialized differently */
+	if (mw_cfg->type == IDT_MW_DIR) {
+		const struct idt_ntb_bar *bar = &ntdata_tbl.bars[mw_cfg->bar];
+		u32 data;
+		/* Read BARSETUP to check BAR type */
+		data = idt_nt_read(ndev, bar->setup);
+		/* Disable translation by specifying zero BAR limit */
+		idt_nt_write(ndev, bar->limit, 0);
+		if (IS_FLD_SET(BARSETUP_TYPE, data, 64))
+			idt_nt_write(ndev, (bar + 1)->limit, 0);
+	} else {
+		unsigned long irqflags;
+		u32 lutoff;
+		/* Clear the corresponding LUT entry up */
+		lutoff = SET_FIELD(LUTOFFSET_INDEX, 0, mw_cfg->idx) |
+			 SET_FIELD(LUTOFFSET_BAR, 0, mw_cfg->bar);
+		spin_lock_irqsave(&ndev->lut_lock, irqflags);
+		idt_nt_write(ndev, IDT_NT_LUTOFFSET, lutoff);
+		idt_nt_write(ndev, IDT_NT_LUTLDATA, 0);
+		idt_nt_write(ndev, IDT_NT_LUTMDATA, 0);
+		idt_nt_write(ndev, IDT_NT_LUTUDATA, 0);
+		mmiowb();
+		spin_unlock_irqrestore(&ndev->lut_lock, irqflags);
+	}
+
+	return 0;
+}
+
+/*=============================================================================
+ *                          5. Doorbell operations
+ *
+ *    Doorbell functionality of IDT PCIe-switches is pretty unusual. First of
+ * all there is global doorbell register which state can be changed by any
+ * NT-function of the IDT device in accordance with global permissions. These
+ * permissions configs are not supported by NTB API, so it must be done by
+ * either BIOS or EEPROM settings. In the same way the state of the global
+ * doorbell is reflected to the NT-functions local inbound doorbell registers.
+ * It can lead to situations when client driver sets some peer doorbell bits
+ * and get them bounced back to local inbound doorbell if permissions are
+ * granted.
+ *    Secondly there is just one IRQ vector for Doorbell, Message, Temperature
+ * and Switch events, so if client driver left any of Doorbell bits set and
+ * some other event occurred, the driver will be notified of Doorbell event
+ * again.
+ *=============================================================================
+ */
+
+/*
+ * idt_db_isr() - doorbell event ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * Doorbell event happans when DBELL bit of NTINTSTS switches from 0 to 1.
+ * It happens only when unmasked doorbell bits are set to ones on completely
+ * zeroed doorbell register.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_db_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	/*
+	 * Doorbell IRQ status will be cleaned only when client
+	 * driver unsets all the doorbell bits.
+	 */
+	dev_dbg(&ndev->ntb.pdev->dev, "DB IRQ detected %#08x", ntint_sts);
+
+	/* Notify the client driver of possible doorbell state change */
+	ntb_db_event(&ndev->ntb, 0);
+}
+
+/*
+ * idt_ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * IDT PCIe-switches expose just one Doorbell register of DWORD size.
+ *
+ * Return: A mask of doorbell bits supported by the ntb.
+ */
+static u64 idt_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+	return IDT_DBELL_MASK;
+}
+
+/*
+ * idt_ntb_db_read() - read the local doorbell register (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * There is just on inbound doorbell register of each NT-function, so
+ * this method return it value.
+ *
+ * Return: The bits currently set in the local doorbell register.
+ */
+static u64 idt_ntb_db_read(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_nt_read(ndev, IDT_NT_INDBELLSTS);
+}
+
+/*
+ * idt_ntb_db_clear() - clear bits in the local doorbell register
+ *			(NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to clear.
+ *
+ * Clear bits of inbound doorbell register by writing ones to it.
+ *
+ * NOTE! Invalid bits are always considered cleared so it's not an error
+ * to clear them over.
+ *
+ * Return: always zero as success.
+ */
+static int idt_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_nt_write(ndev, IDT_NT_INDBELLSTS, (u32)db_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_db_read_mask() - read the local doorbell mask (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * Each inbound doorbell bit can be masked from generating IRQ by setting
+ * the corresponding bit in inbound doorbell mask. So this method returns
+ * the value of the register.
+ *
+ * Return: The bits currently set in the local doorbell mask register.
+ */
+static u64 idt_ntb_db_read_mask(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_nt_read(ndev, IDT_NT_INDBELLMSK);
+}
+
+/*
+ * idt_ntb_db_set_mask() - set bits in the local doorbell mask
+ *			   (NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell mask bits to set.
+ *
+ * The inbound doorbell register mask value must be read, then OR'ed with
+ * passed field and only then set back.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_reg_set_bits(ndev, IDT_NT_INDBELLMSK, &ndev->db_mask_lock,
+				IDT_DBELL_MASK, db_bits);
+}
+
+/*
+ * idt_ntb_db_clear_mask() - clear bits in the local doorbell mask
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to clear.
+ *
+ * The method just clears the set bits up in accordance with the passed
+ * bitfield. IDT PCIe-switch shall generate an interrupt if there hasn't
+ * been any unmasked bit set before current unmasking. Otherwise IRQ won't
+ * be generated since there is only one IRQ vector for all doorbells.
+ *
+ * Return: always zero as success
+ */
+static int idt_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_reg_clear_bits(ndev, IDT_NT_INDBELLMSK, &ndev->db_mask_lock,
+			   db_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_db_set() - set bits in the peer doorbell register
+ *			   (NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to set.
+ *
+ * IDT PCIe-switches exposes local outbound doorbell register to change peer
+ * inbound doorbell register state.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (db_bits & ~(u64)IDT_DBELL_MASK)
+		return -EINVAL;
+
+	idt_nt_write(ndev, IDT_NT_OUTDBELLSET, (u32)db_bits);
+	return 0;
+}
+
+/*=============================================================================
+ *                          6. Messaging operations
+ *
+ *    Each NT-function of IDT PCIe-switch has four inbound and four outbound
+ * message registers. Each outbound message register can be connected to one or
+ * even more than one peer inbound message registers by setting global
+ * configurations. Since NTB API permits one-on-one message registers mapping
+ * only, the driver acts in according with that restriction.
+ *=============================================================================
+ */
+
+/*
+ * idt_init_msg() - initialize messaging interface
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Just initialize the message registers routing tables locker.
+ */
+static void idt_init_msg(struct idt_ntb_dev *ndev)
+{
+	unsigned char midx;
+
+	/* Init the messages routing table lockers */
+	for (midx = 0; midx < IDT_MSG_CNT; midx++)
+		spin_lock_init(&ndev->msg_locks[midx]);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB Messaging initialized");
+}
+
+/*
+ * idt_msg_isr() - message event ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * Message event happens when MSG bit of NTINTSTS switches from 0 to 1.
+ * It happens only when unmasked message status bits are set to ones on
+ * completely zeroed message status register.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_msg_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	/*
+	 * Message IRQ status will be cleaned only when client
+	 * driver unsets all the message status bits.
+	 */
+	dev_dbg(&ndev->ntb.pdev->dev, "Message IRQ detected %#08x", ntint_sts);
+
+	/* Notify the client driver of possible message status change */
+	ntb_msg_event(&ndev->ntb);
+}
+
+/*
+ * idt_ntb_msg_count() - get the number of message registers (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * IDT PCIe-switches support four message registers.
+ *
+ * Return: the number of message registers.
+ */
+static int idt_ntb_msg_count(struct ntb_dev *ntb)
+{
+	return IDT_MSG_CNT;
+}
+
+/*
+ * idt_ntb_msg_inbits() - get a bitfield of inbound message registers status
+ *			  (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * NT message status register is shared between inbound and outbound message
+ * registers status
+ *
+ * Return: bitfield of inbound message registers.
+ */
+static u64 idt_ntb_msg_inbits(struct ntb_dev *ntb)
+{
+	return (u64)IDT_INMSG_MASK;
+}
+
+/*
+ * idt_ntb_msg_outbits() - get a bitfield of outbound message registers status
+ *			  (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * NT message status register is shared between inbound and outbound message
+ * registers status
+ *
+ * Return: bitfield of outbound message registers.
+ */
+static u64 idt_ntb_msg_outbits(struct ntb_dev *ntb)
+{
+	return (u64)IDT_OUTMSG_MASK;
+}
+
+/*
+ * idt_ntb_msg_read_sts() - read the message registers status (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * IDT PCIe-switches expose message status registers to notify drivers of
+ * incoming data and failures in case if peer message register isn't freed.
+ *
+ * Return: status bits of message registers
+ */
+static u64 idt_ntb_msg_read_sts(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_nt_read(ndev, IDT_NT_MSGSTS);
+}
+
+/*
+ * idt_ntb_msg_clear_sts() - clear status bits of message registers
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ * @sts_bits:	Status bits to clear.
+ *
+ * Clear bits in the status register by writing ones.
+ *
+ * NOTE! Invalid bits are always considered cleared so it's not an error
+ * to clear them over.
+ *
+ * Return: always zero as success.
+ */
+static int idt_ntb_msg_clear_sts(struct ntb_dev *ntb, u64 sts_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_nt_write(ndev, IDT_NT_MSGSTS, sts_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_msg_set_mask() - set mask of message register status bits
+ *			    (NTB API callback)
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits.
+ *
+ * Mask the message status bits from raising an IRQ.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_msg_set_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_reg_set_bits(ndev, IDT_NT_MSGSTSMSK, &ndev->msg_mask_lock,
+				IDT_MSG_MASK, mask_bits);
+}
+
+/*
+ * idt_ntb_msg_clear_mask() - clear message registers mask
+ *			      (NTB API callback)
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits.
+ *
+ * Clear mask of message status bits IRQs.
+ *
+ * Return: always zero as success.
+ */
+static int idt_ntb_msg_clear_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_reg_clear_bits(ndev, IDT_NT_MSGSTSMSK, &ndev->msg_mask_lock,
+			   mask_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_msg_read() - read message register with specified index
+ *			(NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	OUT - Port index of peer device a message retrieved from
+ * @midx:	Message register index
+ *
+ * Read data from the specified message register and source register.
+ *
+ * Return: inbound message register value.
+ */
+static u32 idt_ntb_msg_read(struct ntb_dev *ntb, int *pidx, int midx)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (midx < 0 || IDT_MSG_CNT <= midx)
+		return ~(u32)0;
+
+	/* Retrieve source port index of the message */
+	if (pidx != NULL) {
+		u32 srcpart;
+
+		srcpart = idt_nt_read(ndev, ntdata_tbl.msgs[midx].src);
+		*pidx = ndev->part_idx_map[srcpart];
+
+		/* Sanity check partition index (for initial case) */
+		if (*pidx == -EINVAL)
+			*pidx = 0;
+	}
+
+	/* Retrieve data of the corresponding message register */
+	return idt_nt_read(ndev, ntdata_tbl.msgs[midx].in);
+}
+
+/*
+ * idt_ntb_peer_msg_write() - write data to the specified message register
+ *			      (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device a message being sent to
+ * @midx:	Message register index
+ * @msg:	Data to send
+ *
+ * Just try to send data to a peer. Message status register should be
+ * checked by client driver.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx,
+				  u32 msg)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	unsigned long irqflags;
+	u32 swpmsgctl = 0;
+
+	if (midx < 0 || IDT_MSG_CNT <= midx)
+		return -EINVAL;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	/* Collect the routing information */
+	swpmsgctl = SET_FIELD(SWPxMSGCTL_REG, 0, midx) |
+		    SET_FIELD(SWPxMSGCTL_PART, 0, ndev->peers[pidx].part);
+
+	/* Lock the messages routing table of the specified register */
+	spin_lock_irqsave(&ndev->msg_locks[midx], irqflags);
+	/* Set the route and send the data */
+	idt_sw_write(ndev, partdata_tbl[ndev->part].msgctl[midx], swpmsgctl);
+	idt_nt_write(ndev, ntdata_tbl.msgs[midx].out, msg);
+	mmiowb();
+	/* Unlock the messages routing table */
+	spin_unlock_irqrestore(&ndev->msg_locks[midx], irqflags);
+
+	/* Client driver shall check the status register */
+	return 0;
+}
+
+/*=============================================================================
+ *                      7. Temperature sensor operations
+ *
+ *    IDT PCIe-switch has an embedded temperature sensor, which can be used to
+ * warn a user-space of possible chip overheating. Since workload temperature
+ * can be different on different platforms, temperature thresholds as well as
+ * general sensor settings must be setup in the framework of BIOS/EEPROM
+ * initializations. It includes the actual sensor enabling as well.
+ *=============================================================================
+ */
+
+/*
+ * idt_read_temp() - read temperature from chip sensor
+ * @ntb:	NTB device context.
+ * @val:	OUT - integer value of temperature
+ * @frac:	OUT - fraction
+ */
+static void idt_read_temp(struct idt_ntb_dev *ndev, unsigned char *val,
+			  unsigned char *frac)
+{
+	u32 data;
+
+	/* Read the data from TEMP field of the TMPSTS register */
+	data = idt_sw_read(ndev, IDT_SW_TMPSTS);
+	data = GET_FIELD(TMPSTS_TEMP, data);
+	/* TEMP field has one fractional bit and seven integer bits */
+	*val = data >> 1;
+	*frac = ((data & 0x1) ? 5 : 0);
+}
+
+/*
+ * idt_temp_isr() - temperature sensor alarm events ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * It handles events of temperature crossing alarm thresholds. Since reading
+ * of TMPALARM register clears it up, the function doesn't analyze the
+ * read value, instead the current temperature value just warningly printed to
+ * log.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_temp_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	unsigned char val, frac;
+
+	/* Read the current temperature value */
+	idt_read_temp(ndev, &val, &frac);
+
+	/* Read the temperature alarm to clean the alarm status out */
+	/*(void)idt_sw_read(ndev, IDT_SW_TMPALARM);*/
+
+	/* Clean the corresponding interrupt bit */
+	idt_nt_write(ndev, IDT_NT_NTINTSTS, IDT_NTINTSTS_TMPSENSOR);
+
+	dev_dbg(&ndev->ntb.pdev->dev,
+		"Temp sensor IRQ detected %#08x", ntint_sts);
+
+	/* Print temperature value to log */
+	dev_warn(&ndev->ntb.pdev->dev, "Temperature %hhu.%hhu", val, frac);
+}
+
+/*=============================================================================
+ *                           8. ISRs related operations
+ *
+ *    IDT PCIe-switch has strangely developed IRQ system. There is just one
+ * interrupt vector for doorbell and message registers. So the hardware driver
+ * can't determine actual source of IRQ if, for example, message event happened
+ * while any of unmasked doorbell is still set. The similar situation may be if
+ * switch or temperature sensor events pop up. The difference is that SEVENT
+ * and TMPSENSOR bits of NT interrupt status register can be cleaned by
+ * IRQ handler so a next interrupt request won't have false handling of
+ * corresponding events.
+ *    The hardware driver has only bottom-half handler of the IRQ, since if any
+ * of events happened the device won't raise it again before the last one is
+ * handled by clearing of corresponding NTINTSTS bit.
+ *=============================================================================
+ */
+
+static irqreturn_t idt_thread_isr(int irq, void *devid);
+
+/*
+ * idt_init_isr() - initialize PCIe interrupt handler
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_init_isr(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+	u32 ntint_mask;
+	int ret;
+
+	/* Allocate just one interrupt vector for the ISR */
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_LEGACY);
+	if (ret != 1) {
+		dev_err(&pdev->dev, "Failed to allocate IRQ vector");
+		return ret;
+	}
+
+	/* Retrieve the IRQ vector */
+	ret = pci_irq_vector(pdev, 0);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to get IRQ vector");
+		goto err_free_vectors;
+	}
+
+	/* Set the IRQ handler */
+	ret = devm_request_threaded_irq(&pdev->dev, ret, NULL, idt_thread_isr,
+					IRQF_ONESHOT, NTB_IRQNAME, ndev);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to set MSI IRQ handler, %d", ret);
+		goto err_free_vectors;
+	}
+
+	/* Unmask Message/Doorbell/SE/Temperature interrupts */
+	ntint_mask = idt_nt_read(ndev, IDT_NT_NTINTMSK) & ~IDT_NTINTMSK_ALL;
+	idt_nt_write(ndev, IDT_NT_NTINTMSK, ntint_mask);
+
+	/* From now on the interrupts are enabled */
+	dev_dbg(&pdev->dev, "NTB interrupts initialized");
+
+	return 0;
+
+err_free_vectors:
+	pci_free_irq_vectors(pdev);
+
+	return ret;
+}
+
+
+/*
+ * idt_deinit_ist() - deinitialize PCIe interrupt handler
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Disable corresponding interrupts and free allocated IRQ vectors.
+ */
+static void idt_deinit_isr(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+	u32 ntint_mask;
+
+	/* Mask interrupts back */
+	ntint_mask = idt_nt_read(ndev, IDT_NT_NTINTMSK) | IDT_NTINTMSK_ALL;
+	idt_nt_write(ndev, IDT_NT_NTINTMSK, ntint_mask);
+
+	/* Manually free IRQ otherwise PCI free irq vectors will fail */
+	devm_free_irq(&pdev->dev, pci_irq_vector(pdev, 0), ndev);
+
+	/* Free allocated IRQ vectors */
+	pci_free_irq_vectors(pdev);
+
+	dev_dbg(&pdev->dev, "NTB interrupts deinitialized");
+}
+
+/*
+ * idt_thread_isr() - NT function interrupts handler
+ * @irq:	IRQ number
+ * @devid:	Custom buffer
+ *
+ * It reads current NT interrupts state register and handles all the event
+ * it declares.
+ * The method is bottom-half routine of actual default PCIe IRQ handler.
+ */
+static irqreturn_t idt_thread_isr(int irq, void *devid)
+{
+	struct idt_ntb_dev *ndev = devid;
+	bool handled = false;
+	u32 ntint_sts;
+
+	/* Read the NT interrupts status register */
+	ntint_sts = idt_nt_read(ndev, IDT_NT_NTINTSTS);
+
+	/* Handle messaging interrupts */
+	if (ntint_sts & IDT_NTINTSTS_MSG) {
+		idt_msg_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	/* Handle doorbell interrupts */
+	if (ntint_sts & IDT_NTINTSTS_DBELL) {
+		idt_db_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	/* Handle switch event interrupts */
+	if (ntint_sts & IDT_NTINTSTS_SEVENT) {
+		idt_se_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	/* Handle temperature sensor interrupt */
+	if (ntint_sts & IDT_NTINTSTS_TMPSENSOR) {
+		idt_temp_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "IDT IRQs 0x%08x handled", ntint_sts);
+
+	return handled ? IRQ_HANDLED : IRQ_NONE;
+}
+
+/*===========================================================================
+ *                     9. NTB hardware driver initialization
+ *===========================================================================
+ */
+
+/*
+ * NTB API operations
+ */
+static const struct ntb_dev_ops idt_ntb_ops = {
+	.port_number		= idt_ntb_port_number,
+	.peer_port_count	= idt_ntb_peer_port_count,
+	.peer_port_number	= idt_ntb_peer_port_number,
+	.peer_port_idx		= idt_ntb_peer_port_idx,
+	.link_is_up		= idt_ntb_link_is_up,
+	.link_enable		= idt_ntb_link_enable,
+	.link_disable		= idt_ntb_link_disable,
+	.mw_count		= idt_ntb_mw_count,
+	.mw_get_align		= idt_ntb_mw_get_align,
+	.peer_mw_count		= idt_ntb_peer_mw_count,
+	.peer_mw_get_addr	= idt_ntb_peer_mw_get_addr,
+	.peer_mw_set_trans	= idt_ntb_peer_mw_set_trans,
+	.peer_mw_clear_trans	= idt_ntb_peer_mw_clear_trans,
+	.db_valid_mask		= idt_ntb_db_valid_mask,
+	.db_read		= idt_ntb_db_read,
+	.db_clear		= idt_ntb_db_clear,
+	.db_read_mask		= idt_ntb_db_read_mask,
+	.db_set_mask		= idt_ntb_db_set_mask,
+	.db_clear_mask		= idt_ntb_db_clear_mask,
+	.peer_db_set		= idt_ntb_peer_db_set,
+	.msg_count		= idt_ntb_msg_count,
+	.msg_inbits		= idt_ntb_msg_inbits,
+	.msg_outbits		= idt_ntb_msg_outbits,
+	.msg_read_sts		= idt_ntb_msg_read_sts,
+	.msg_clear_sts		= idt_ntb_msg_clear_sts,
+	.msg_set_mask		= idt_ntb_msg_set_mask,
+	.msg_clear_mask		= idt_ntb_msg_clear_mask,
+	.msg_read		= idt_ntb_msg_read,
+	.peer_msg_write		= idt_ntb_peer_msg_write
+};
+
+/*
+ * idt_register_device() - register IDT NTB device
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_register_device(struct idt_ntb_dev *ndev)
+{
+	int ret;
+
+	/* Initialize the rest of NTB device structure and register it */
+	ndev->ntb.ops = &idt_ntb_ops;
+	ndev->ntb.topo = NTB_TOPO_SWITCH;
+
+	ret = ntb_register_device(&ndev->ntb);
+	if (ret != 0) {
+		dev_err(&ndev->ntb.pdev->dev, "Failed to register NTB device");
+		return ret;
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device successfully registered");
+
+	return 0;
+}
+
+/*
+ * idt_unregister_device() - unregister IDT NTB device
+ * @ndev:	IDT NTB hardware driver descriptor
+ */
+static void idt_unregister_device(struct idt_ntb_dev *ndev)
+{
+	/* Just unregister the NTB device */
+	ntb_unregister_device(&ndev->ntb);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device unregistered");
+}
+
+/*=============================================================================
+ *                        10. DebugFS node initialization
+ *=============================================================================
+ */
+
+static ssize_t idt_dbgfs_info_read(struct file *filp, char __user *ubuf,
+				   size_t count, loff_t *offp);
+
+/*
+ * Driver DebugFS info file operations
+ */
+static const struct file_operations idt_dbgfs_info_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = idt_dbgfs_info_read
+};
+
+/*
+ * idt_dbgfs_info_read() - DebugFS read info node callback
+ * @file:	File node descriptor.
+ * @ubuf:	User-space buffer to put data to
+ * @count:	Size of the buffer
+ * @offp:	Offset within the buffer
+ */
+static ssize_t idt_dbgfs_info_read(struct file *filp, char __user *ubuf,
+				   size_t count, loff_t *offp)
+{
+	struct idt_ntb_dev *ndev = filp->private_data;
+	unsigned char temp, frac, idx, pidx, cnt;
+	ssize_t ret = 0, off = 0;
+	unsigned long irqflags;
+	enum ntb_speed speed;
+	enum ntb_width width;
+	char *strbuf;
+	size_t size;
+	u32 data;
+
+	/* Lets limit the buffer size the way the Intel/AMD drivers do */
+	size = min_t(size_t, count, 0x1000U);
+
+	/* Allocate the memory for the buffer */
+	strbuf = kmalloc(size, GFP_KERNEL);
+	if (strbuf == NULL)
+		return -ENOMEM;
+
+	/* Put the data into the string buffer */
+	off += scnprintf(strbuf + off, size - off,
+		"\n\t\tIDT NTB device Information:\n\n");
+
+	/* General local device configurations */
+	off += scnprintf(strbuf + off, size - off,
+		"Local Port %hhu, Partition %hhu\n", ndev->port, ndev->part);
+
+	/* Peer ports information */
+	off += scnprintf(strbuf + off, size - off, "Peers:\n");
+	for (idx = 0; idx < ndev->peer_cnt; idx++) {
+		off += scnprintf(strbuf + off, size - off,
+			"\t%hhu. Port %hhu, Partition %hhu\n",
+			idx, ndev->peers[idx].port, ndev->peers[idx].part);
+	}
+
+	/* Links status */
+	data = idt_ntb_link_is_up(&ndev->ntb, &speed, &width);
+	off += scnprintf(strbuf + off, size - off,
+		"NTB link status\t- 0x%08x, ", data);
+	off += scnprintf(strbuf + off, size - off, "PCIe Gen %d x%d lanes\n",
+		speed, width);
+
+	/* Mapping table entries */
+	off += scnprintf(strbuf + off, size - off, "NTB Mapping Table:\n");
+	for (idx = 0; idx < IDT_MTBL_ENTRY_CNT; idx++) {
+		spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+		idt_nt_write(ndev, IDT_NT_NTMTBLADDR, idx);
+		data = idt_nt_read(ndev, IDT_NT_NTMTBLDATA);
+		spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+		/* Print valid entries only */
+		if (data & IDT_NTMTBLDATA_VALID) {
+			off += scnprintf(strbuf + off, size - off,
+				"\t%hhu. Partition %d, Requester ID 0x%04x\n",
+				idx, GET_FIELD(NTMTBLDATA_PART, data),
+				GET_FIELD(NTMTBLDATA_REQID, data));
+		}
+	}
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Outbound memory windows information */
+	off += scnprintf(strbuf + off, size - off,
+		"Outbound Memory Windows:\n");
+	for (idx = 0; idx < ndev->mw_cnt; idx += cnt) {
+		data = ndev->mws[idx].type;
+		cnt = idt_get_mw_count(data);
+
+		/* Print Memory Window information */
+		if (data == IDT_MW_DIR)
+			off += scnprintf(strbuf + off, size - off,
+				"\t%hhu.\t", idx);
+		else
+			off += scnprintf(strbuf + off, size - off,
+				"\t%hhu-%hhu.\t", idx, idx + cnt - 1);
+
+		off += scnprintf(strbuf + off, size - off, "%s BAR%hhu, ",
+			idt_get_mw_name(data), ndev->mws[idx].bar);
+
+		off += scnprintf(strbuf + off, size - off,
+			"Address align 0x%08llx, ", ndev->mws[idx].addr_align);
+
+		off += scnprintf(strbuf + off, size - off,
+			"Size align 0x%08llx, Size max %llu\n",
+			ndev->mws[idx].size_align, ndev->mws[idx].size_max);
+	}
+
+	/* Inbound memory windows information */
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		off += scnprintf(strbuf + off, size - off,
+			"Inbound Memory Windows for peer %hhu (Port %hhu):\n",
+			pidx, ndev->peers[pidx].port);
+
+		/* Print Memory Windows information */
+		for (idx = 0; idx < ndev->peers[pidx].mw_cnt; idx += cnt) {
+			data = ndev->peers[pidx].mws[idx].type;
+			cnt = idt_get_mw_count(data);
+
+			if (data == IDT_MW_DIR)
+				off += scnprintf(strbuf + off, size - off,
+					"\t%hhu.\t", idx);
+			else
+				off += scnprintf(strbuf + off, size - off,
+					"\t%hhu-%hhu.\t", idx, idx + cnt - 1);
+
+			off += scnprintf(strbuf + off, size - off,
+				"%s BAR%hhu, ", idt_get_mw_name(data),
+				ndev->peers[pidx].mws[idx].bar);
+
+			off += scnprintf(strbuf + off, size - off,
+				"Address align 0x%08llx, ",
+				ndev->peers[pidx].mws[idx].addr_align);
+
+			off += scnprintf(strbuf + off, size - off,
+				"Size align 0x%08llx, Size max %llu\n",
+				ndev->peers[pidx].mws[idx].size_align,
+				ndev->peers[pidx].mws[idx].size_max);
+		}
+	}
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Doorbell information */
+	data = idt_sw_read(ndev, IDT_SW_GDBELLSTS);
+	off += scnprintf(strbuf + off, size - off,
+		 "Global Doorbell state\t- 0x%08x\n", data);
+	data = idt_ntb_db_read(&ndev->ntb);
+	off += scnprintf(strbuf + off, size - off,
+		 "Local  Doorbell state\t- 0x%08x\n", data);
+	data = idt_nt_read(ndev, IDT_NT_INDBELLMSK);
+	off += scnprintf(strbuf + off, size - off,
+		 "Local  Doorbell mask\t- 0x%08x\n", data);
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Messaging information */
+	off += scnprintf(strbuf + off, size - off,
+		 "Message event valid\t- 0x%08x\n", IDT_MSG_MASK);
+	data = idt_ntb_msg_read_sts(&ndev->ntb);
+	off += scnprintf(strbuf + off, size - off,
+		 "Message event status\t- 0x%08x\n", data);
+	data = idt_nt_read(ndev, IDT_NT_MSGSTSMSK);
+	off += scnprintf(strbuf + off, size - off,
+		 "Message event mask\t- 0x%08x\n", data);
+	off += scnprintf(strbuf + off, size - off,
+		 "Message data:\n");
+	for (idx = 0; idx < IDT_MSG_CNT; idx++) {
+		int src;
+		data = idt_ntb_msg_read(&ndev->ntb, &src, idx);
+		off += scnprintf(strbuf + off, size - off,
+			"\t%hhu. 0x%08x from peer %hhu (Port %hhu)\n",
+			idx, data, src, ndev->peers[src].port);
+	}
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Current temperature */
+	idt_read_temp(ndev, &temp, &frac);
+	off += scnprintf(strbuf + off, size - off,
+		"Switch temperature\t\t- %hhu.%hhuC\n", temp, frac);
+
+	/* Copy the buffer to the User Space */
+	ret = simple_read_from_buffer(ubuf, count, offp, strbuf, off);
+	kfree(strbuf);
+
+	return ret;
+}
+
+/*
+ * idt_init_dbgfs() - initialize DebugFS node
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_init_dbgfs(struct idt_ntb_dev *ndev)
+{
+	char devname[64];
+
+	/* If the top directory is not created then do nothing */
+	if (IS_ERR_OR_NULL(dbgfs_topdir)) {
+		dev_info(&ndev->ntb.pdev->dev, "Top DebugFS directory absent");
+		return PTR_ERR(dbgfs_topdir);
+	}
+
+	/* Create the info file node */
+	snprintf(devname, 64, "info:%s", pci_name(ndev->ntb.pdev));
+	ndev->dbgfs_info = debugfs_create_file(devname, 0400, dbgfs_topdir,
+		ndev, &idt_dbgfs_info_ops);
+	if (IS_ERR(ndev->dbgfs_info)) {
+		dev_dbg(&ndev->ntb.pdev->dev, "Failed to create DebugFS node");
+		return PTR_ERR(ndev->dbgfs_info);
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device DebugFS node created");
+
+	return 0;
+}
+
+/*
+ * idt_deinit_dbgfs() - deinitialize DebugFS node
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Just discard the info node from DebugFS
+ */
+static void idt_deinit_dbgfs(struct idt_ntb_dev *ndev)
+{
+	debugfs_remove(ndev->dbgfs_info);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device DebugFS node discarded");
+}
+
+/*=============================================================================
+ *                     11. Basic PCIe device initialization
+ *=============================================================================
+ */
+
+/*
+ * idt_check_setup() - Check whether the IDT PCIe-swtich is properly
+ *		       pre-initialized
+ * @pdev:	Pointer to the PCI device descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_check_setup(struct pci_dev *pdev)
+{
+	u32 data;
+	int ret;
+
+	/* Read the BARSETUP0 */
+	ret = pci_read_config_dword(pdev, IDT_NT_BARSETUP0, &data);
+	if (ret != 0) {
+		dev_err(&pdev->dev,
+			"Failed to read BARSETUP0 config register");
+		return ret;
+	}
+
+	/* Check whether the BAR0 register is enabled to be of config space */
+	if (!(data & IDT_BARSETUP_EN) || !(data & IDT_BARSETUP_MODE_CFG)) {
+		dev_err(&pdev->dev, "BAR0 doesn't map config space");
+		return -EINVAL;
+	}
+
+	/* Configuration space BAR0 must have certain size */
+	if ((data & IDT_BARSETUP_SIZE_MASK) != IDT_BARSETUP_SIZE_CFG) {
+		dev_err(&pdev->dev, "Invalid size of config space");
+		return -EINVAL;
+	}
+
+	dev_dbg(&pdev->dev, "NTB device pre-initialized correctly");
+
+	return 0;
+}
+
+/*
+ * Create the IDT PCIe-switch driver descriptor
+ * @pdev:	Pointer to the PCI device descriptor
+ * @id:		IDT PCIe-device configuration
+ *
+ * It just allocates a memory for IDT PCIe-switch device structure and
+ * initializes some commonly used fields.
+ *
+ * No need of release method, since managed device resource is used for
+ * memory allocation.
+ *
+ * Return: pointer to the descriptor, otherwise a negative error number.
+ */
+static struct idt_ntb_dev *idt_create_dev(struct pci_dev *pdev,
+					  const struct pci_device_id *id)
+{
+	struct idt_ntb_dev *ndev;
+
+	/* Allocate memory for the IDT PCIe-device descriptor */
+	ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL);
+	if (IS_ERR_OR_NULL(ndev)) {
+		dev_err(&pdev->dev, "Memory allocation failed for descriptor");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Save the IDT PCIe-switch ports configuration */
+	ndev->swcfg = (struct idt_89hpes_cfg *)id->driver_data;
+	/* Save the PCI-device pointer inside the NTB device structure */
+	ndev->ntb.pdev = pdev;
+
+	/* Initialize spin locker of Doorbell, Message and GASA registers */
+	spin_lock_init(&ndev->db_mask_lock);
+	spin_lock_init(&ndev->msg_mask_lock);
+	spin_lock_init(&ndev->gasa_lock);
+
+	dev_info(&pdev->dev, "IDT %s discovered", ndev->swcfg->name);
+
+	dev_dbg(&pdev->dev, "NTB device descriptor created");
+
+	return ndev;
+}
+
+/*
+ * idt_init_pci() - initialize the basic PCI-related subsystem
+ * @ndev:	Pointer to the IDT PCIe-switch driver descriptor
+ *
+ * Managed device resources will be freed automatically in case of failure or
+ * driver detachment.
+ *
+ * Return: zero on success, otherwise negative error number.
+ */
+static int idt_init_pci(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+	int ret;
+
+	/* Initialize the bit mask of PCI/NTB DMA */
+	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (ret != 0) {
+		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (ret != 0) {
+			dev_err(&pdev->dev, "Failed to set DMA bit mask\n");
+			return ret;
+		}
+		dev_warn(&pdev->dev, "Cannot set DMA highmem bit mask\n");
+	}
+	ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (ret != 0) {
+		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (ret != 0) {
+			dev_err(&pdev->dev,
+				"Failed to set consistent DMA bit mask\n");
+			return ret;
+		}
+		dev_warn(&pdev->dev,
+			"Cannot set consistent DMA highmem bit mask\n");
+	}
+	ret = dma_coerce_mask_and_coherent(&ndev->ntb.dev,
+					   dma_get_mask(&pdev->dev));
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to set NTB device DMA bit mask\n");
+		return ret;
+	}
+
+	/*
+	 * Enable the device advanced error reporting. It's not critical to
+	 * have AER disabled in the kernel.
+	 */
+	ret = pci_enable_pcie_error_reporting(pdev);
+	if (ret != 0)
+		dev_warn(&pdev->dev, "PCIe AER capability disabled\n");
+	else /* Cleanup uncorrectable error status before getting to init */
+		pci_cleanup_aer_uncorrect_error_status(pdev);
+
+	/* First enable the PCI device */
+	ret = pcim_enable_device(pdev);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to enable PCIe device\n");
+		goto err_disable_aer;
+	}
+
+	/*
+	 * Enable the bus mastering, which effectively enables MSI IRQs and
+	 * Request TLPs translation
+	 */
+	pci_set_master(pdev);
+
+	/* Request all BARs resources and map BAR0 only */
+	ret = pcim_iomap_regions_request_all(pdev, 1, NTB_NAME);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request resources\n");
+		goto err_clear_master;
+	}
+
+	/* Retrieve virtual address of BAR0 - PCI configuration space */
+	ndev->cfgspc = pcim_iomap_table(pdev)[0];
+
+	/* Put the IDT driver data pointer to the PCI-device private pointer */
+	pci_set_drvdata(pdev, ndev);
+
+	dev_dbg(&pdev->dev, "NT-function PCIe interface initialized");
+
+	return 0;
+
+err_clear_master:
+	pci_clear_master(pdev);
+err_disable_aer:
+	(void)pci_disable_pcie_error_reporting(pdev);
+
+	return ret;
+}
+
+/*
+ * idt_deinit_pci() - deinitialize the basic PCI-related subsystem
+ * @ndev:	Pointer to the IDT PCIe-switch driver descriptor
+ *
+ * Managed resources will be freed on the driver detachment
+ */
+static void idt_deinit_pci(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+
+	/* Clean up the PCI-device private data pointer */
+	pci_set_drvdata(pdev, NULL);
+
+	/* Clear the bus master disabling the Request TLPs translation */
+	pci_clear_master(pdev);
+
+	/* Disable the AER capability */
+	(void)pci_disable_pcie_error_reporting(pdev);
+
+	dev_dbg(&pdev->dev, "NT-function PCIe interface cleared");
+}
+
+/*===========================================================================
+ *                       12. PCI bus callback functions
+ *===========================================================================
+ */
+
+/*
+ * idt_pci_probe() - PCI device probe callback
+ * @pdev:	Pointer to PCI device structure
+ * @id:		PCIe device custom descriptor
+ *
+ * Return: zero on success, otherwise negative error number
+ */
+static int idt_pci_probe(struct pci_dev *pdev,
+			 const struct pci_device_id *id)
+{
+	struct idt_ntb_dev *ndev;
+	int ret;
+
+	/* Check whether IDT PCIe-switch is properly pre-initialized */
+	ret = idt_check_setup(pdev);
+	if (ret != 0)
+		return ret;
+
+	/* Allocate the memory for IDT NTB device data */
+	ndev = idt_create_dev(pdev, id);
+	if (IS_ERR_OR_NULL(ndev))
+		return PTR_ERR(ndev);
+
+	/* Initialize the basic PCI subsystem of the device */
+	ret = idt_init_pci(ndev);
+	if (ret != 0)
+		return ret;
+
+	/* Scan ports of the IDT PCIe-switch */
+	(void)idt_scan_ports(ndev);
+
+	/* Initialize NTB link events subsystem */
+	idt_init_link(ndev);
+
+	/* Initialize MWs subsystem */
+	ret = idt_init_mws(ndev);
+	if (ret != 0)
+		goto err_deinit_link;
+
+	/* Initialize Messaging subsystem */
+	idt_init_msg(ndev);
+
+	/* Initialize IDT interrupts handler */
+	ret = idt_init_isr(ndev);
+	if (ret != 0)
+		goto err_deinit_link;
+
+	/* Register IDT NTB devices on the NTB bus */
+	ret = idt_register_device(ndev);
+	if (ret != 0)
+		goto err_deinit_isr;
+
+	/* Initialize DebugFS info node */
+	(void)idt_init_dbgfs(ndev);
+
+	/* IDT PCIe-switch NTB driver is finally initialized */
+	dev_info(&pdev->dev, "IDT NTB device is ready");
+
+	/* May the force be with us... */
+	return 0;
+
+err_deinit_isr:
+	idt_deinit_isr(ndev);
+err_deinit_link:
+	idt_deinit_link(ndev);
+	idt_deinit_pci(ndev);
+
+	return ret;
+}
+
+/*
+ * idt_pci_probe() - PCI device remove callback
+ * @pdev:	Pointer to PCI device structure
+ */
+static void idt_pci_remove(struct pci_dev *pdev)
+{
+	struct idt_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+	/* Deinit the DebugFS node */
+	idt_deinit_dbgfs(ndev);
+
+	/* Unregister NTB device */
+	idt_unregister_device(ndev);
+
+	/* Stop the interrupts handling */
+	idt_deinit_isr(ndev);
+
+	/* Deinitialize link event subsystem */
+	idt_deinit_link(ndev);
+
+	/* Deinit basic PCI subsystem */
+	idt_deinit_pci(ndev);
+
+	/* IDT PCIe-switch NTB driver is finally initialized */
+	dev_info(&pdev->dev, "IDT NTB device is removed");
+
+	/* Sayonara... */
+}
+
+/*
+ * IDT PCIe-switch models ports configuration structures
+ */
+static const struct idt_89hpes_cfg idt_89hpes24nt6ag2_config = {
+	.name = "89HPES24NT6AG2",
+	.port_cnt = 6, .ports = {0, 2, 4, 6, 8, 12}
+};
+static const struct idt_89hpes_cfg idt_89hpes32nt8ag2_config = {
+	.name = "89HPES32NT8AG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static const struct idt_89hpes_cfg idt_89hpes32nt8bg2_config = {
+	.name = "89HPES32NT8BG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static const struct idt_89hpes_cfg idt_89hpes12nt12g2_config = {
+	.name = "89HPES12NT12G2",
+	.port_cnt = 3, .ports = {0, 8, 16}
+};
+static const struct idt_89hpes_cfg idt_89hpes16nt16g2_config = {
+	.name = "89HPES16NT16G2",
+	.port_cnt = 4, .ports = {0, 8, 12, 16}
+};
+static const struct idt_89hpes_cfg idt_89hpes24nt24g2_config = {
+	.name = "89HPES24NT24G2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static const struct idt_89hpes_cfg idt_89hpes32nt24ag2_config = {
+	.name = "89HPES32NT24AG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static const struct idt_89hpes_cfg idt_89hpes32nt24bg2_config = {
+	.name = "89HPES32NT24BG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+
+/*
+ * PCI-ids table of the supported IDT PCIe-switch devices
+ */
+static const struct pci_device_id idt_pci_tbl[] = {
+	{IDT_PCI_DEVICE_IDS(89HPES24NT6AG2,  idt_89hpes24nt6ag2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT8AG2,  idt_89hpes32nt8ag2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT8BG2,  idt_89hpes32nt8bg2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES12NT12G2,  idt_89hpes12nt12g2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES16NT16G2,  idt_89hpes16nt16g2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES24NT24G2,  idt_89hpes24nt24g2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT24AG2, idt_89hpes32nt24ag2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT24BG2, idt_89hpes32nt24bg2_config)},
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, idt_pci_tbl);
+
+/*
+ * IDT PCIe-switch NT-function device driver structure definition
+ */
+static struct pci_driver idt_pci_driver = {
+	.name		= KBUILD_MODNAME,
+	.probe		= idt_pci_probe,
+	.remove		= idt_pci_remove,
+	.id_table	= idt_pci_tbl,
+};
+
+static int __init idt_pci_driver_init(void)
+{
+	pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+	/* Create the top DebugFS directory if the FS is initialized */
+	if (debugfs_initialized())
+		dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	/* Register the NTB hardware driver to handle the PCI device */
+	return pci_register_driver(&idt_pci_driver);
+}
+module_init(idt_pci_driver_init);
+
+static void __exit idt_pci_driver_exit(void)
+{
+	/* Unregister the NTB hardware driver */
+	pci_unregister_driver(&idt_pci_driver);
+
+	/* Discard the top DebugFS directory */
+	debugfs_remove_recursive(dbgfs_topdir);
+}
+module_exit(idt_pci_driver_exit);
+
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.h b/drivers/ntb/hw/idt/ntb_hw_idt.h
new file mode 100644
index 0000000..856fd18
--- /dev/null
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.h
@@ -0,0 +1,1149 @@
+/*
+ *   This file is provided under a GPLv2 license.  When using or
+ *   redistributing this file, you may do so under that license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 T-Platforms All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms and conditions of the GNU General Public License,
+ *   version 2, as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ *   Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License along
+ *   with this program; if not, one can be found http://www.gnu.org/licenses/.
+ *
+ *   The full GNU General Public License is included in this distribution in
+ *   the file called "COPYING".
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * IDT PCIe-switch NTB Linux driver
+ *
+ * Contact Information:
+ * Serge Semin <fancer.lancer@gmail.com>, <Sergey.Semin@t-platforms.ru>
+ */
+
+#ifndef NTB_HW_IDT_H
+#define NTB_HW_IDT_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/ntb.h>
+
+
+/*
+ * Macro is used to create the struct pci_device_id that matches
+ * the supported IDT PCIe-switches
+ * @devname: Capitalized name of the particular device
+ * @data: Variable passed to the driver of the particular device
+ */
+#define IDT_PCI_DEVICE_IDS(devname, data) \
+	.vendor = PCI_VENDOR_ID_IDT, .device = PCI_DEVICE_ID_IDT_##devname, \
+	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, \
+	.class = (PCI_CLASS_BRIDGE_OTHER << 8), .class_mask = (0xFFFF00), \
+	.driver_data = (kernel_ulong_t)&data
+
+/*
+ * IDT PCIe-switches device IDs
+ */
+#define PCI_DEVICE_ID_IDT_89HPES24NT6AG2  0x8091
+#define PCI_DEVICE_ID_IDT_89HPES32NT8AG2  0x808F
+#define PCI_DEVICE_ID_IDT_89HPES32NT8BG2  0x8088
+#define PCI_DEVICE_ID_IDT_89HPES12NT12G2  0x8092
+#define PCI_DEVICE_ID_IDT_89HPES16NT16G2  0x8090
+#define PCI_DEVICE_ID_IDT_89HPES24NT24G2  0x808E
+#define PCI_DEVICE_ID_IDT_89HPES32NT24AG2 0x808C
+#define PCI_DEVICE_ID_IDT_89HPES32NT24BG2 0x808A
+
+/*
+ * NT-function Configuration Space registers
+ * NOTE 1) The IDT PCIe-switch internal data is little-endian
+ *      so it must be taken into account in the driver
+ *      internals.
+ *      2) Additionally the registers should be accessed either
+ *      with byte-enables corresponding to their native size or
+ *      the size of one DWORD
+ *
+ * So to simplify the driver code, there is only DWORD-sized read/write
+ * operations utilized.
+ */
+/* PCI Express Configuration Space */
+/* PCI Express command/status register	(DWORD) */
+#define IDT_NT_PCICMDSTS		0x00004U
+/* PCI Express Device Capabilities	(DWORD) */
+#define IDT_NT_PCIEDCAP			0x00044U
+/* PCI Express Device Control/Status	(WORD+WORD) */
+#define IDT_NT_PCIEDCTLSTS		0x00048U
+/* PCI Express Link Capabilities	(DWORD) */
+#define IDT_NT_PCIELCAP			0x0004CU
+/* PCI Express Link Control/Status	(WORD+WORD) */
+#define IDT_NT_PCIELCTLSTS		0x00050U
+/* PCI Express Device Capabilities 2	(DWORD) */
+#define IDT_NT_PCIEDCAP2		0x00064U
+/* PCI Express Device Control 2		(WORD+WORD) */
+#define IDT_NT_PCIEDCTL2		0x00068U
+/* PCI Power Management Control and Status (DWORD) */
+#define IDT_NT_PMCSR			0x000C4U
+/*==========================================*/
+/* IDT Proprietary NT-port-specific registers */
+/* NT-function main control registers */
+/* NT Endpoint Control			(DWORD) */
+#define IDT_NT_NTCTL			0x00400U
+/* NT Endpoint Interrupt Status/Mask	(DWORD) */
+#define IDT_NT_NTINTSTS			0x00404U
+#define IDT_NT_NTINTMSK			0x00408U
+/* NT Endpoint Signal Data		(DWORD) */
+#define IDT_NT_NTSDATA			0x0040CU
+/* NT Endpoint Global Signal		(DWORD) */
+#define IDT_NT_NTGSIGNAL		0x00410U
+/* Internal Error Reporting Mask 0/1	(DWORD) */
+#define IDT_NT_NTIERRORMSK0		0x00414U
+#define IDT_NT_NTIERRORMSK1		0x00418U
+/* Doorbel registers */
+/* NT Outbound Doorbell Set		(DWORD) */
+#define IDT_NT_OUTDBELLSET		0x00420U
+/* NT Inbound Doorbell Status/Mask	(DWORD) */
+#define IDT_NT_INDBELLSTS		0x00428U
+#define IDT_NT_INDBELLMSK		0x0042CU
+/* Message registers */
+/* Outbound Message N			(DWORD) */
+#define IDT_NT_OUTMSG0			0x00430U
+#define IDT_NT_OUTMSG1			0x00434U
+#define IDT_NT_OUTMSG2			0x00438U
+#define IDT_NT_OUTMSG3			0x0043CU
+/* Inbound Message N			(DWORD) */
+#define IDT_NT_INMSG0			0x00440U
+#define IDT_NT_INMSG1			0x00444U
+#define IDT_NT_INMSG2			0x00448U
+#define IDT_NT_INMSG3			0x0044CU
+/* Inbound Message Source N		(DWORD) */
+#define IDT_NT_INMSGSRC0		0x00450U
+#define IDT_NT_INMSGSRC1		0x00454U
+#define IDT_NT_INMSGSRC2		0x00458U
+#define IDT_NT_INMSGSRC3		0x0045CU
+/* Message Status			(DWORD) */
+#define IDT_NT_MSGSTS			0x00460U
+/* Message Status Mask			(DWORD) */
+#define IDT_NT_MSGSTSMSK		0x00464U
+/* BAR-setup registers */
+/* BAR N Setup/Limit Address/Lower and Upper Translated Base Address (DWORD) */
+#define IDT_NT_BARSETUP0		0x00470U
+#define IDT_NT_BARLIMIT0		0x00474U
+#define IDT_NT_BARLTBASE0		0x00478U
+#define IDT_NT_BARUTBASE0		0x0047CU
+#define IDT_NT_BARSETUP1		0x00480U
+#define IDT_NT_BARLIMIT1		0x00484U
+#define IDT_NT_BARLTBASE1		0x00488U
+#define IDT_NT_BARUTBASE1		0x0048CU
+#define IDT_NT_BARSETUP2		0x00490U
+#define IDT_NT_BARLIMIT2		0x00494U
+#define IDT_NT_BARLTBASE2		0x00498U
+#define IDT_NT_BARUTBASE2		0x0049CU
+#define IDT_NT_BARSETUP3		0x004A0U
+#define IDT_NT_BARLIMIT3		0x004A4U
+#define IDT_NT_BARLTBASE3		0x004A8U
+#define IDT_NT_BARUTBASE3		0x004ACU
+#define IDT_NT_BARSETUP4		0x004B0U
+#define IDT_NT_BARLIMIT4		0x004B4U
+#define IDT_NT_BARLTBASE4		0x004B8U
+#define IDT_NT_BARUTBASE4		0x004BCU
+#define IDT_NT_BARSETUP5		0x004C0U
+#define IDT_NT_BARLIMIT5		0x004C4U
+#define IDT_NT_BARLTBASE5		0x004C8U
+#define IDT_NT_BARUTBASE5		0x004CCU
+/* NT mapping table registers */
+/* NT Mapping Table Address/Status/Data	(DWORD) */
+#define IDT_NT_NTMTBLADDR		0x004D0U
+#define IDT_NT_NTMTBLSTS		0x004D4U
+#define IDT_NT_NTMTBLDATA		0x004D8U
+/* Requester ID (Bus:Device:Function) Capture	(DWORD) */
+#define IDT_NT_REQIDCAP			0x004DCU
+/* Memory Windows Lookup table registers */
+/* Lookup Table Offset/Lower, Middle and Upper data	(DWORD) */
+#define IDT_NT_LUTOFFSET		0x004E0U
+#define IDT_NT_LUTLDATA			0x004E4U
+#define IDT_NT_LUTMDATA			0x004E8U
+#define IDT_NT_LUTUDATA			0x004ECU
+/* NT Endpoint Uncorrectable/Correctable Errors Emulation registers (DWORD) */
+#define IDT_NT_NTUEEM			0x004F0U
+#define IDT_NT_NTCEEM			0x004F4U
+/* Global Address Space Access/Data registers	(DWARD) */
+#define IDT_NT_GASAADDR			0x00FF8U
+#define IDT_NT_GASADATA			0x00FFCU
+
+/*
+ * IDT PCIe-switch Global Configuration and Status registers
+ */
+/* Port N Configuration register in global space */
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP0_PCIECMDSTS		0x01004U
+#define IDT_SW_NTP0_PCIELCTLSTS		0x01050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP0_NTCTL		0x01400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP0_BARSETUP0		0x01470U
+#define IDT_SW_NTP0_BARLIMIT0		0x01474U
+#define IDT_SW_NTP0_BARLTBASE0		0x01478U
+#define IDT_SW_NTP0_BARUTBASE0		0x0147CU
+#define IDT_SW_NTP0_BARSETUP1		0x01480U
+#define IDT_SW_NTP0_BARLIMIT1		0x01484U
+#define IDT_SW_NTP0_BARLTBASE1		0x01488U
+#define IDT_SW_NTP0_BARUTBASE1		0x0148CU
+#define IDT_SW_NTP0_BARSETUP2		0x01490U
+#define IDT_SW_NTP0_BARLIMIT2		0x01494U
+#define IDT_SW_NTP0_BARLTBASE2		0x01498U
+#define IDT_SW_NTP0_BARUTBASE2		0x0149CU
+#define IDT_SW_NTP0_BARSETUP3		0x014A0U
+#define IDT_SW_NTP0_BARLIMIT3		0x014A4U
+#define IDT_SW_NTP0_BARLTBASE3		0x014A8U
+#define IDT_SW_NTP0_BARUTBASE3		0x014ACU
+#define IDT_SW_NTP0_BARSETUP4		0x014B0U
+#define IDT_SW_NTP0_BARLIMIT4		0x014B4U
+#define IDT_SW_NTP0_BARLTBASE4		0x014B8U
+#define IDT_SW_NTP0_BARUTBASE4		0x014BCU
+#define IDT_SW_NTP0_BARSETUP5		0x014C0U
+#define IDT_SW_NTP0_BARLIMIT5		0x014C4U
+#define IDT_SW_NTP0_BARLTBASE5		0x014C8U
+#define IDT_SW_NTP0_BARUTBASE5		0x014CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP2_PCIECMDSTS		0x05004U
+#define IDT_SW_NTP2_PCIELCTLSTS		0x05050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP2_NTCTL		0x05400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP2_BARSETUP0		0x05470U
+#define IDT_SW_NTP2_BARLIMIT0		0x05474U
+#define IDT_SW_NTP2_BARLTBASE0		0x05478U
+#define IDT_SW_NTP2_BARUTBASE0		0x0547CU
+#define IDT_SW_NTP2_BARSETUP1		0x05480U
+#define IDT_SW_NTP2_BARLIMIT1		0x05484U
+#define IDT_SW_NTP2_BARLTBASE1		0x05488U
+#define IDT_SW_NTP2_BARUTBASE1		0x0548CU
+#define IDT_SW_NTP2_BARSETUP2		0x05490U
+#define IDT_SW_NTP2_BARLIMIT2		0x05494U
+#define IDT_SW_NTP2_BARLTBASE2		0x05498U
+#define IDT_SW_NTP2_BARUTBASE2		0x0549CU
+#define IDT_SW_NTP2_BARSETUP3		0x054A0U
+#define IDT_SW_NTP2_BARLIMIT3		0x054A4U
+#define IDT_SW_NTP2_BARLTBASE3		0x054A8U
+#define IDT_SW_NTP2_BARUTBASE3		0x054ACU
+#define IDT_SW_NTP2_BARSETUP4		0x054B0U
+#define IDT_SW_NTP2_BARLIMIT4		0x054B4U
+#define IDT_SW_NTP2_BARLTBASE4		0x054B8U
+#define IDT_SW_NTP2_BARUTBASE4		0x054BCU
+#define IDT_SW_NTP2_BARSETUP5		0x054C0U
+#define IDT_SW_NTP2_BARLIMIT5		0x054C4U
+#define IDT_SW_NTP2_BARLTBASE5		0x054C8U
+#define IDT_SW_NTP2_BARUTBASE5		0x054CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP4_PCIECMDSTS		0x09004U
+#define IDT_SW_NTP4_PCIELCTLSTS		0x09050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP4_NTCTL		0x09400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP4_BARSETUP0		0x09470U
+#define IDT_SW_NTP4_BARLIMIT0		0x09474U
+#define IDT_SW_NTP4_BARLTBASE0		0x09478U
+#define IDT_SW_NTP4_BARUTBASE0		0x0947CU
+#define IDT_SW_NTP4_BARSETUP1		0x09480U
+#define IDT_SW_NTP4_BARLIMIT1		0x09484U
+#define IDT_SW_NTP4_BARLTBASE1		0x09488U
+#define IDT_SW_NTP4_BARUTBASE1		0x0948CU
+#define IDT_SW_NTP4_BARSETUP2		0x09490U
+#define IDT_SW_NTP4_BARLIMIT2		0x09494U
+#define IDT_SW_NTP4_BARLTBASE2		0x09498U
+#define IDT_SW_NTP4_BARUTBASE2		0x0949CU
+#define IDT_SW_NTP4_BARSETUP3		0x094A0U
+#define IDT_SW_NTP4_BARLIMIT3		0x094A4U
+#define IDT_SW_NTP4_BARLTBASE3		0x094A8U
+#define IDT_SW_NTP4_BARUTBASE3		0x094ACU
+#define IDT_SW_NTP4_BARSETUP4		0x094B0U
+#define IDT_SW_NTP4_BARLIMIT4		0x094B4U
+#define IDT_SW_NTP4_BARLTBASE4		0x094B8U
+#define IDT_SW_NTP4_BARUTBASE4		0x094BCU
+#define IDT_SW_NTP4_BARSETUP5		0x094C0U
+#define IDT_SW_NTP4_BARLIMIT5		0x094C4U
+#define IDT_SW_NTP4_BARLTBASE5		0x094C8U
+#define IDT_SW_NTP4_BARUTBASE5		0x094CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP6_PCIECMDSTS		0x0D004U
+#define IDT_SW_NTP6_PCIELCTLSTS		0x0D050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP6_NTCTL		0x0D400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP6_BARSETUP0		0x0D470U
+#define IDT_SW_NTP6_BARLIMIT0		0x0D474U
+#define IDT_SW_NTP6_BARLTBASE0		0x0D478U
+#define IDT_SW_NTP6_BARUTBASE0		0x0D47CU
+#define IDT_SW_NTP6_BARSETUP1		0x0D480U
+#define IDT_SW_NTP6_BARLIMIT1		0x0D484U
+#define IDT_SW_NTP6_BARLTBASE1		0x0D488U
+#define IDT_SW_NTP6_BARUTBASE1		0x0D48CU
+#define IDT_SW_NTP6_BARSETUP2		0x0D490U
+#define IDT_SW_NTP6_BARLIMIT2		0x0D494U
+#define IDT_SW_NTP6_BARLTBASE2		0x0D498U
+#define IDT_SW_NTP6_BARUTBASE2		0x0D49CU
+#define IDT_SW_NTP6_BARSETUP3		0x0D4A0U
+#define IDT_SW_NTP6_BARLIMIT3		0x0D4A4U
+#define IDT_SW_NTP6_BARLTBASE3		0x0D4A8U
+#define IDT_SW_NTP6_BARUTBASE3		0x0D4ACU
+#define IDT_SW_NTP6_BARSETUP4		0x0D4B0U
+#define IDT_SW_NTP6_BARLIMIT4		0x0D4B4U
+#define IDT_SW_NTP6_BARLTBASE4		0x0D4B8U
+#define IDT_SW_NTP6_BARUTBASE4		0x0D4BCU
+#define IDT_SW_NTP6_BARSETUP5		0x0D4C0U
+#define IDT_SW_NTP6_BARLIMIT5		0x0D4C4U
+#define IDT_SW_NTP6_BARLTBASE5		0x0D4C8U
+#define IDT_SW_NTP6_BARUTBASE5		0x0D4CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP8_PCIECMDSTS		0x11004U
+#define IDT_SW_NTP8_PCIELCTLSTS		0x11050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP8_NTCTL		0x11400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP8_BARSETUP0		0x11470U
+#define IDT_SW_NTP8_BARLIMIT0		0x11474U
+#define IDT_SW_NTP8_BARLTBASE0		0x11478U
+#define IDT_SW_NTP8_BARUTBASE0		0x1147CU
+#define IDT_SW_NTP8_BARSETUP1		0x11480U
+#define IDT_SW_NTP8_BARLIMIT1		0x11484U
+#define IDT_SW_NTP8_BARLTBASE1		0x11488U
+#define IDT_SW_NTP8_BARUTBASE1		0x1148CU
+#define IDT_SW_NTP8_BARSETUP2		0x11490U
+#define IDT_SW_NTP8_BARLIMIT2		0x11494U
+#define IDT_SW_NTP8_BARLTBASE2		0x11498U
+#define IDT_SW_NTP8_BARUTBASE2		0x1149CU
+#define IDT_SW_NTP8_BARSETUP3		0x114A0U
+#define IDT_SW_NTP8_BARLIMIT3		0x114A4U
+#define IDT_SW_NTP8_BARLTBASE3		0x114A8U
+#define IDT_SW_NTP8_BARUTBASE3		0x114ACU
+#define IDT_SW_NTP8_BARSETUP4		0x114B0U
+#define IDT_SW_NTP8_BARLIMIT4		0x114B4U
+#define IDT_SW_NTP8_BARLTBASE4		0x114B8U
+#define IDT_SW_NTP8_BARUTBASE4		0x114BCU
+#define IDT_SW_NTP8_BARSETUP5		0x114C0U
+#define IDT_SW_NTP8_BARLIMIT5		0x114C4U
+#define IDT_SW_NTP8_BARLTBASE5		0x114C8U
+#define IDT_SW_NTP8_BARUTBASE5		0x114CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP12_PCIECMDSTS		0x19004U
+#define IDT_SW_NTP12_PCIELCTLSTS	0x19050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP12_NTCTL		0x19400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP12_BARSETUP0		0x19470U
+#define IDT_SW_NTP12_BARLIMIT0		0x19474U
+#define IDT_SW_NTP12_BARLTBASE0		0x19478U
+#define IDT_SW_NTP12_BARUTBASE0		0x1947CU
+#define IDT_SW_NTP12_BARSETUP1		0x19480U
+#define IDT_SW_NTP12_BARLIMIT1		0x19484U
+#define IDT_SW_NTP12_BARLTBASE1		0x19488U
+#define IDT_SW_NTP12_BARUTBASE1		0x1948CU
+#define IDT_SW_NTP12_BARSETUP2		0x19490U
+#define IDT_SW_NTP12_BARLIMIT2		0x19494U
+#define IDT_SW_NTP12_BARLTBASE2		0x19498U
+#define IDT_SW_NTP12_BARUTBASE2		0x1949CU
+#define IDT_SW_NTP12_BARSETUP3		0x194A0U
+#define IDT_SW_NTP12_BARLIMIT3		0x194A4U
+#define IDT_SW_NTP12_BARLTBASE3		0x194A8U
+#define IDT_SW_NTP12_BARUTBASE3		0x194ACU
+#define IDT_SW_NTP12_BARSETUP4		0x194B0U
+#define IDT_SW_NTP12_BARLIMIT4		0x194B4U
+#define IDT_SW_NTP12_BARLTBASE4		0x194B8U
+#define IDT_SW_NTP12_BARUTBASE4		0x194BCU
+#define IDT_SW_NTP12_BARSETUP5		0x194C0U
+#define IDT_SW_NTP12_BARLIMIT5		0x194C4U
+#define IDT_SW_NTP12_BARLTBASE5		0x194C8U
+#define IDT_SW_NTP12_BARUTBASE5		0x194CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP16_PCIECMDSTS		0x21004U
+#define IDT_SW_NTP16_PCIELCTLSTS	0x21050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP16_NTCTL		0x21400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP16_BARSETUP0		0x21470U
+#define IDT_SW_NTP16_BARLIMIT0		0x21474U
+#define IDT_SW_NTP16_BARLTBASE0		0x21478U
+#define IDT_SW_NTP16_BARUTBASE0		0x2147CU
+#define IDT_SW_NTP16_BARSETUP1		0x21480U
+#define IDT_SW_NTP16_BARLIMIT1		0x21484U
+#define IDT_SW_NTP16_BARLTBASE1		0x21488U
+#define IDT_SW_NTP16_BARUTBASE1		0x2148CU
+#define IDT_SW_NTP16_BARSETUP2		0x21490U
+#define IDT_SW_NTP16_BARLIMIT2		0x21494U
+#define IDT_SW_NTP16_BARLTBASE2		0x21498U
+#define IDT_SW_NTP16_BARUTBASE2		0x2149CU
+#define IDT_SW_NTP16_BARSETUP3		0x214A0U
+#define IDT_SW_NTP16_BARLIMIT3		0x214A4U
+#define IDT_SW_NTP16_BARLTBASE3		0x214A8U
+#define IDT_SW_NTP16_BARUTBASE3		0x214ACU
+#define IDT_SW_NTP16_BARSETUP4		0x214B0U
+#define IDT_SW_NTP16_BARLIMIT4		0x214B4U
+#define IDT_SW_NTP16_BARLTBASE4		0x214B8U
+#define IDT_SW_NTP16_BARUTBASE4		0x214BCU
+#define IDT_SW_NTP16_BARSETUP5		0x214C0U
+#define IDT_SW_NTP16_BARLIMIT5		0x214C4U
+#define IDT_SW_NTP16_BARLTBASE5		0x214C8U
+#define IDT_SW_NTP16_BARUTBASE5		0x214CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP20_PCIECMDSTS		0x29004U
+#define IDT_SW_NTP20_PCIELCTLSTS	0x29050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP20_NTCTL		0x29400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP20_BARSETUP0		0x29470U
+#define IDT_SW_NTP20_BARLIMIT0		0x29474U
+#define IDT_SW_NTP20_BARLTBASE0		0x29478U
+#define IDT_SW_NTP20_BARUTBASE0		0x2947CU
+#define IDT_SW_NTP20_BARSETUP1		0x29480U
+#define IDT_SW_NTP20_BARLIMIT1		0x29484U
+#define IDT_SW_NTP20_BARLTBASE1		0x29488U
+#define IDT_SW_NTP20_BARUTBASE1		0x2948CU
+#define IDT_SW_NTP20_BARSETUP2		0x29490U
+#define IDT_SW_NTP20_BARLIMIT2		0x29494U
+#define IDT_SW_NTP20_BARLTBASE2		0x29498U
+#define IDT_SW_NTP20_BARUTBASE2		0x2949CU
+#define IDT_SW_NTP20_BARSETUP3		0x294A0U
+#define IDT_SW_NTP20_BARLIMIT3		0x294A4U
+#define IDT_SW_NTP20_BARLTBASE3		0x294A8U
+#define IDT_SW_NTP20_BARUTBASE3		0x294ACU
+#define IDT_SW_NTP20_BARSETUP4		0x294B0U
+#define IDT_SW_NTP20_BARLIMIT4		0x294B4U
+#define IDT_SW_NTP20_BARLTBASE4		0x294B8U
+#define IDT_SW_NTP20_BARUTBASE4		0x294BCU
+#define IDT_SW_NTP20_BARSETUP5		0x294C0U
+#define IDT_SW_NTP20_BARLIMIT5		0x294C4U
+#define IDT_SW_NTP20_BARLTBASE5		0x294C8U
+#define IDT_SW_NTP20_BARUTBASE5		0x294CCU
+/* IDT PCIe-switch control register	(DWORD) */
+#define IDT_SW_CTL			0x3E000U
+/* Boot Configuration Vector Status	(DWORD) */
+#define IDT_SW_BCVSTS			0x3E004U
+/* Port Clocking Mode			(DWORD) */
+#define IDT_SW_PCLKMODE			0x3E008U
+/* Reset Drain Delay			(DWORD) */
+#define IDT_SW_RDRAINDELAY		0x3E080U
+/* Port Operating Mode Change Drain Delay (DWORD) */
+#define IDT_SW_POMCDELAY		0x3E084U
+/* Side Effect Delay			(DWORD) */
+#define IDT_SW_SEDELAY			0x3E088U
+/* Upstream Secondary Bus Reset Delay	(DWORD) */
+#define IDT_SW_SSBRDELAY		0x3E08CU
+/* Switch partition N Control/Status/Failover registers */
+#define IDT_SW_SWPART0CTL		0x3E100U
+#define IDT_SW_SWPART0STS		0x3E104U
+#define IDT_SW_SWPART0FCTL		0x3E108U
+#define IDT_SW_SWPART1CTL		0x3E120U
+#define IDT_SW_SWPART1STS		0x3E124U
+#define IDT_SW_SWPART1FCTL		0x3E128U
+#define IDT_SW_SWPART2CTL		0x3E140U
+#define IDT_SW_SWPART2STS		0x3E144U
+#define IDT_SW_SWPART2FCTL		0x3E148U
+#define IDT_SW_SWPART3CTL		0x3E160U
+#define IDT_SW_SWPART3STS		0x3E164U
+#define IDT_SW_SWPART3FCTL		0x3E168U
+#define IDT_SW_SWPART4CTL		0x3E180U
+#define IDT_SW_SWPART4STS		0x3E184U
+#define IDT_SW_SWPART4FCTL		0x3E188U
+#define IDT_SW_SWPART5CTL		0x3E1A0U
+#define IDT_SW_SWPART5STS		0x3E1A4U
+#define IDT_SW_SWPART5FCTL		0x3E1A8U
+#define IDT_SW_SWPART6CTL		0x3E1C0U
+#define IDT_SW_SWPART6STS		0x3E1C4U
+#define IDT_SW_SWPART6FCTL		0x3E1C8U
+#define IDT_SW_SWPART7CTL		0x3E1E0U
+#define IDT_SW_SWPART7STS		0x3E1E4U
+#define IDT_SW_SWPART7FCTL		0x3E1E8U
+/* Switch port N control and status registers */
+#define IDT_SW_SWPORT0CTL		0x3E200U
+#define IDT_SW_SWPORT0STS		0x3E204U
+#define IDT_SW_SWPORT0FCTL		0x3E208U
+#define IDT_SW_SWPORT2CTL		0x3E240U
+#define IDT_SW_SWPORT2STS		0x3E244U
+#define IDT_SW_SWPORT2FCTL		0x3E248U
+#define IDT_SW_SWPORT4CTL		0x3E280U
+#define IDT_SW_SWPORT4STS		0x3E284U
+#define IDT_SW_SWPORT4FCTL		0x3E288U
+#define IDT_SW_SWPORT6CTL		0x3E2C0U
+#define IDT_SW_SWPORT6STS		0x3E2C4U
+#define IDT_SW_SWPORT6FCTL		0x3E2C8U
+#define IDT_SW_SWPORT8CTL		0x3E300U
+#define IDT_SW_SWPORT8STS		0x3E304U
+#define IDT_SW_SWPORT8FCTL		0x3E308U
+#define IDT_SW_SWPORT12CTL		0x3E380U
+#define IDT_SW_SWPORT12STS		0x3E384U
+#define IDT_SW_SWPORT12FCTL		0x3E388U
+#define IDT_SW_SWPORT16CTL		0x3E400U
+#define IDT_SW_SWPORT16STS		0x3E404U
+#define IDT_SW_SWPORT16FCTL		0x3E408U
+#define IDT_SW_SWPORT20CTL		0x3E480U
+#define IDT_SW_SWPORT20STS		0x3E484U
+#define IDT_SW_SWPORT20FCTL		0x3E488U
+/* Switch Event registers */
+/* Switch Event Status/Mask/Partition mask (DWORD) */
+#define IDT_SW_SESTS			0x3EC00U
+#define IDT_SW_SEMSK			0x3EC04U
+#define IDT_SW_SEPMSK			0x3EC08U
+/* Switch Event Link Up/Down Status/Mask (DWORD) */
+#define IDT_SW_SELINKUPSTS		0x3EC0CU
+#define IDT_SW_SELINKUPMSK		0x3EC10U
+#define IDT_SW_SELINKDNSTS		0x3EC14U
+#define IDT_SW_SELINKDNMSK		0x3EC18U
+/* Switch Event Fundamental Reset Status/Mask (DWORD) */
+#define IDT_SW_SEFRSTSTS		0x3EC1CU
+#define IDT_SW_SEFRSTMSK		0x3EC20U
+/* Switch Event Hot Reset Status/Mask	(DWORD) */
+#define IDT_SW_SEHRSTSTS		0x3EC24U
+#define IDT_SW_SEHRSTMSK		0x3EC28U
+/* Switch Event Failover Mask		(DWORD) */
+#define IDT_SW_SEFOVRMSK		0x3EC2CU
+/* Switch Event Global Signal Status/Mask (DWORD) */
+#define IDT_SW_SEGSIGSTS		0x3EC30U
+#define IDT_SW_SEGSIGMSK		0x3EC34U
+/* NT Global Doorbell Status		(DWORD) */
+#define IDT_SW_GDBELLSTS		0x3EC3CU
+/* Switch partition N message M control (msgs routing table) (DWORD) */
+#define IDT_SW_SWP0MSGCTL0		0x3EE00U
+#define IDT_SW_SWP1MSGCTL0		0x3EE04U
+#define IDT_SW_SWP2MSGCTL0		0x3EE08U
+#define IDT_SW_SWP3MSGCTL0		0x3EE0CU
+#define IDT_SW_SWP4MSGCTL0		0x3EE10U
+#define IDT_SW_SWP5MSGCTL0		0x3EE14U
+#define IDT_SW_SWP6MSGCTL0		0x3EE18U
+#define IDT_SW_SWP7MSGCTL0		0x3EE1CU
+#define IDT_SW_SWP0MSGCTL1		0x3EE20U
+#define IDT_SW_SWP1MSGCTL1		0x3EE24U
+#define IDT_SW_SWP2MSGCTL1		0x3EE28U
+#define IDT_SW_SWP3MSGCTL1		0x3EE2CU
+#define IDT_SW_SWP4MSGCTL1		0x3EE30U
+#define IDT_SW_SWP5MSGCTL1		0x3EE34U
+#define IDT_SW_SWP6MSGCTL1		0x3EE38U
+#define IDT_SW_SWP7MSGCTL1		0x3EE3CU
+#define IDT_SW_SWP0MSGCTL2		0x3EE40U
+#define IDT_SW_SWP1MSGCTL2		0x3EE44U
+#define IDT_SW_SWP2MSGCTL2		0x3EE48U
+#define IDT_SW_SWP3MSGCTL2		0x3EE4CU
+#define IDT_SW_SWP4MSGCTL2		0x3EE50U
+#define IDT_SW_SWP5MSGCTL2		0x3EE54U
+#define IDT_SW_SWP6MSGCTL2		0x3EE58U
+#define IDT_SW_SWP7MSGCTL2		0x3EE5CU
+#define IDT_SW_SWP0MSGCTL3		0x3EE60U
+#define IDT_SW_SWP1MSGCTL3		0x3EE64U
+#define IDT_SW_SWP2MSGCTL3		0x3EE68U
+#define IDT_SW_SWP3MSGCTL3		0x3EE6CU
+#define IDT_SW_SWP4MSGCTL3		0x3EE70U
+#define IDT_SW_SWP5MSGCTL3		0x3EE74U
+#define IDT_SW_SWP6MSGCTL3		0x3EE78U
+#define IDT_SW_SWP7MSGCTL3		0x3EE7CU
+/* SMBus Status and Control registers	(DWORD) */
+#define IDT_SW_SMBUSSTS			0x3F188U
+#define IDT_SW_SMBUSCTL			0x3F18CU
+/* Serial EEPROM Interface		(DWORD) */
+#define IDT_SW_EEPROMINTF		0x3F190U
+/* MBus I/O Expander Address N		(DWORD) */
+#define IDT_SW_IOEXPADDR0		0x3F198U
+#define IDT_SW_IOEXPADDR1		0x3F19CU
+#define IDT_SW_IOEXPADDR2		0x3F1A0U
+#define IDT_SW_IOEXPADDR3		0x3F1A4U
+#define IDT_SW_IOEXPADDR4		0x3F1A8U
+#define IDT_SW_IOEXPADDR5		0x3F1ACU
+/* General Purpose Events Control and Status registers (DWORD) */
+#define IDT_SW_GPECTL			0x3F1B0U
+#define IDT_SW_GPESTS			0x3F1B4U
+/* Temperature sensor Control/Status/Alarm/Adjustment/Slope registers */
+#define IDT_SW_TMPCTL			0x3F1D4U
+#define IDT_SW_TMPSTS			0x3F1D8U
+#define IDT_SW_TMPALARM			0x3F1DCU
+#define IDT_SW_TMPADJ			0x3F1E0U
+#define IDT_SW_TSSLOPE			0x3F1E4U
+/* SMBus Configuration Block header log	(DWORD) */
+#define IDT_SW_SMBUSCBHL		0x3F1E8U
+
+/*
+ * Common registers related constants
+ * @IDT_REG_ALIGN:	Registers alignment used in the driver
+ * @IDT_REG_PCI_MAX:	Maximum PCI configuration space register value
+ * @IDT_REG_SW_MAX:	Maximum global register value
+ */
+#define IDT_REG_ALIGN			4
+#define IDT_REG_PCI_MAX			0x00FFFU
+#define IDT_REG_SW_MAX			0x3FFFFU
+
+/*
+ * PCICMDSTS register fields related constants
+ * @IDT_PCICMDSTS_IOAE:	I/O access enable
+ * @IDT_PCICMDSTS_MAE:	Memory access enable
+ * @IDT_PCICMDSTS_BME:	Bus master enable
+ */
+#define IDT_PCICMDSTS_IOAE		0x00000001U
+#define IDT_PCICMDSTS_MAE		0x00000002U
+#define IDT_PCICMDSTS_BME		0x00000004U
+
+/*
+ * PCIEDCAP register fields related constants
+ * @IDT_PCIEDCAP_MPAYLOAD_MASK:	 Maximum payload size mask
+ * @IDT_PCIEDCAP_MPAYLOAD_FLD:	 Maximum payload size field offset
+ * @IDT_PCIEDCAP_MPAYLOAD_S128:	 Max supported payload size of 128 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S256:	 Max supported payload size of 256 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S512:	 Max supported payload size of 512 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S1024: Max supported payload size of 1024 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S2048: Max supported payload size of 2048 bytes
+ */
+#define IDT_PCIEDCAP_MPAYLOAD_MASK	0x00000007U
+#define IDT_PCIEDCAP_MPAYLOAD_FLD	0
+#define IDT_PCIEDCAP_MPAYLOAD_S128	0x00000000U
+#define IDT_PCIEDCAP_MPAYLOAD_S256	0x00000001U
+#define IDT_PCIEDCAP_MPAYLOAD_S512	0x00000002U
+#define IDT_PCIEDCAP_MPAYLOAD_S1024	0x00000003U
+#define IDT_PCIEDCAP_MPAYLOAD_S2048	0x00000004U
+
+/*
+ * PCIEDCTLSTS registers fields related constants
+ * @IDT_PCIEDCTL_MPS_MASK:	Maximum payload size mask
+ * @IDT_PCIEDCTL_MPS_FLD:	MPS field offset
+ * @IDT_PCIEDCTL_MPS_S128:	Max payload size of 128 bytes
+ * @IDT_PCIEDCTL_MPS_S256:	Max payload size of 256 bytes
+ * @IDT_PCIEDCTL_MPS_S512:	Max payload size of 512 bytes
+ * @IDT_PCIEDCTL_MPS_S1024:	Max payload size of 1024 bytes
+ * @IDT_PCIEDCTL_MPS_S2048:	Max payload size of 2048 bytes
+ * @IDT_PCIEDCTL_MPS_S4096:	Max payload size of 4096 bytes
+ */
+#define IDT_PCIEDCTLSTS_MPS_MASK	0x000000E0U
+#define IDT_PCIEDCTLSTS_MPS_FLD		5
+#define IDT_PCIEDCTLSTS_MPS_S128	0x00000000U
+#define IDT_PCIEDCTLSTS_MPS_S256	0x00000020U
+#define IDT_PCIEDCTLSTS_MPS_S512	0x00000040U
+#define IDT_PCIEDCTLSTS_MPS_S1024	0x00000060U
+#define IDT_PCIEDCTLSTS_MPS_S2048	0x00000080U
+#define IDT_PCIEDCTLSTS_MPS_S4096	0x000000A0U
+
+/*
+ * PCIELCAP register fields related constants
+ * @IDT_PCIELCAP_PORTNUM_MASK:	Port number field mask
+ * @IDT_PCIELCAP_PORTNUM_FLD:	Port number field offset
+ */
+#define IDT_PCIELCAP_PORTNUM_MASK	0xFF000000U
+#define IDT_PCIELCAP_PORTNUM_FLD	24
+
+/*
+ * PCIELCTLSTS registers fields related constants
+ * @IDT_PCIELSTS_CLS_MASK:	Current link speed mask
+ * @IDT_PCIELSTS_CLS_FLD:	Current link speed field offset
+ * @IDT_PCIELSTS_NLW_MASK:	Negotiated link width mask
+ * @IDT_PCIELSTS_NLW_FLD:	Negotiated link width field offset
+ * @IDT_PCIELSTS_SCLK_COM:	Common slot clock configuration
+ */
+#define IDT_PCIELCTLSTS_CLS_MASK	0x000F0000U
+#define IDT_PCIELCTLSTS_CLS_FLD		16
+#define IDT_PCIELCTLSTS_NLW_MASK	0x03F00000U
+#define IDT_PCIELCTLSTS_NLW_FLD		20
+#define IDT_PCIELCTLSTS_SCLK_COM	0x10000000U
+
+/*
+ * NTCTL register fields related constants
+ * @IDT_NTCTL_IDPROTDIS:	ID Protection check disable (disable MTBL)
+ * @IDT_NTCTL_CPEN:		Completion enable
+ * @IDT_NTCTL_RNS:		Request no snoop processing (if MTBL disabled)
+ * @IDT_NTCTL_ATP:		Address type processing (if MTBL disabled)
+ */
+#define IDT_NTCTL_IDPROTDIS		0x00000001U
+#define IDT_NTCTL_CPEN			0x00000002U
+#define IDT_NTCTL_RNS			0x00000004U
+#define IDT_NTCTL_ATP			0x00000008U
+
+/*
+ * NTINTSTS register fields related constants
+ * @IDT_NTINTSTS_MSG:		Message interrupt bit
+ * @IDT_NTINTSTS_DBELL:		Doorbell interrupt bit
+ * @IDT_NTINTSTS_SEVENT:	Switch Event interrupt bit
+ * @IDT_NTINTSTS_TMPSENSOR:	Temperature sensor interrupt bit
+ */
+#define IDT_NTINTSTS_MSG		0x00000001U
+#define IDT_NTINTSTS_DBELL		0x00000002U
+#define IDT_NTINTSTS_SEVENT		0x00000008U
+#define IDT_NTINTSTS_TMPSENSOR		0x00000080U
+
+/*
+ * NTINTMSK register fields related constants
+ * @IDT_NTINTMSK_MSG:		Message interrupt mask bit
+ * @IDT_NTINTMSK_DBELL:		Doorbell interrupt mask bit
+ * @IDT_NTINTMSK_SEVENT:	Switch Event interrupt mask bit
+ * @IDT_NTINTMSK_TMPSENSOR:	Temperature sensor interrupt mask bit
+ * @IDT_NTINTMSK_ALL:		All the useful interrupts mask
+ */
+#define IDT_NTINTMSK_MSG		0x00000001U
+#define IDT_NTINTMSK_DBELL		0x00000002U
+#define IDT_NTINTMSK_SEVENT		0x00000008U
+#define IDT_NTINTMSK_TMPSENSOR		0x00000080U
+#define IDT_NTINTMSK_ALL \
+	(IDT_NTINTMSK_MSG | IDT_NTINTMSK_DBELL | \
+	 IDT_NTINTMSK_SEVENT | IDT_NTINTMSK_TMPSENSOR)
+
+/*
+ * NTGSIGNAL register fields related constants
+ * @IDT_NTGSIGNAL_SET:	Set global signal of the local partition
+ */
+#define IDT_NTGSIGNAL_SET		0x00000001U
+
+/*
+ * BARSETUP register fields related constants
+ * @IDT_BARSETUP_TYPE_MASK:	Mask of the TYPE field
+ * @IDT_BARSETUP_TYPE_32:	32-bit addressing BAR
+ * @IDT_BARSETUP_TYPE_64:	64-bit addressing BAR
+ * @IDT_BARSETUP_PREF:		Value of the BAR prefetchable field
+ * @IDT_BARSETUP_SIZE_MASK:	Mask of the SIZE field
+ * @IDT_BARSETUP_SIZE_FLD:	SIZE field offset
+ * @IDT_BARSETUP_SIZE_CFG:	SIZE field value in case of config space MODE
+ * @IDT_BARSETUP_MODE_CFG:	Configuration space BAR mode
+ * @IDT_BARSETUP_ATRAN_MASK:	ATRAN field mask
+ * @IDT_BARSETUP_ATRAN_FLD:	ATRAN field offset
+ * @IDT_BARSETUP_ATRAN_DIR:	Direct address translation memory window
+ * @IDT_BARSETUP_ATRAN_LUT12:	12-entry lookup table
+ * @IDT_BARSETUP_ATRAN_LUT24:	24-entry lookup table
+ * @IDT_BARSETUP_TPART_MASK:	TPART field mask
+ * @IDT_BARSETUP_TPART_FLD:	TPART field offset
+ * @IDT_BARSETUP_EN:		BAR enable bit
+ */
+#define IDT_BARSETUP_TYPE_MASK		0x00000006U
+#define IDT_BARSETUP_TYPE_FLD		0
+#define IDT_BARSETUP_TYPE_32		0x00000000U
+#define IDT_BARSETUP_TYPE_64		0x00000004U
+#define IDT_BARSETUP_PREF		0x00000008U
+#define IDT_BARSETUP_SIZE_MASK		0x000003F0U
+#define IDT_BARSETUP_SIZE_FLD		4
+#define IDT_BARSETUP_SIZE_CFG		0x000000C0U
+#define IDT_BARSETUP_MODE_CFG		0x00000400U
+#define IDT_BARSETUP_ATRAN_MASK		0x00001800U
+#define IDT_BARSETUP_ATRAN_FLD		11
+#define IDT_BARSETUP_ATRAN_DIR		0x00000000U
+#define IDT_BARSETUP_ATRAN_LUT12	0x00000800U
+#define IDT_BARSETUP_ATRAN_LUT24	0x00001000U
+#define IDT_BARSETUP_TPART_MASK		0x0000E000U
+#define IDT_BARSETUP_TPART_FLD		13
+#define IDT_BARSETUP_EN			0x80000000U
+
+/*
+ * NTMTBLDATA register fields related constants
+ * @IDT_NTMTBLDATA_VALID:	Set the MTBL entry being valid
+ * @IDT_NTMTBLDATA_REQID_MASK:	Bus:Device:Function field mask
+ * @IDT_NTMTBLDATA_REQID_FLD:	Bus:Device:Function field offset
+ * @IDT_NTMTBLDATA_PART_MASK:	Partition field mask
+ * @IDT_NTMTBLDATA_PART_FLD:	Partition field offset
+ * @IDT_NTMTBLDATA_ATP_TRANS:	Enable AT field translation on request TLPs
+ * @IDT_NTMTBLDATA_CNS_INV:	Enable No Snoop attribute inversion of
+ *				Completion TLPs
+ * @IDT_NTMTBLDATA_RNS_INV:	Enable No Snoop attribute inversion of
+ *				Request TLPs
+ */
+#define IDT_NTMTBLDATA_VALID		0x00000001U
+#define IDT_NTMTBLDATA_REQID_MASK	0x0001FFFEU
+#define IDT_NTMTBLDATA_REQID_FLD	1
+#define IDT_NTMTBLDATA_PART_MASK	0x000E0000U
+#define IDT_NTMTBLDATA_PART_FLD		17
+#define IDT_NTMTBLDATA_ATP_TRANS	0x20000000U
+#define IDT_NTMTBLDATA_CNS_INV		0x40000000U
+#define IDT_NTMTBLDATA_RNS_INV		0x80000000U
+
+/*
+ * REQIDCAP register fields related constants
+ * @IDT_REQIDCAP_REQID_MASK:	Request ID field mask
+ * @IDT_REQIDCAP_REQID_FLD:	Request ID field offset
+ */
+#define IDT_REQIDCAP_REQID_MASK		0x0000FFFFU
+#define IDT_REQIDCAP_REQID_FLD		0
+
+/*
+ * LUTOFFSET register fields related constants
+ * @IDT_LUTOFFSET_INDEX_MASK:	Lookup table index field mask
+ * @IDT_LUTOFFSET_INDEX_FLD:	Lookup table index field offset
+ * @IDT_LUTOFFSET_BAR_MASK:	Lookup table BAR select field mask
+ * @IDT_LUTOFFSET_BAR_FLD:	Lookup table BAR select field offset
+ */
+#define IDT_LUTOFFSET_INDEX_MASK	0x0000001FU
+#define IDT_LUTOFFSET_INDEX_FLD		0
+#define IDT_LUTOFFSET_BAR_MASK		0x00000700U
+#define IDT_LUTOFFSET_BAR_FLD		8
+
+/*
+ * LUTUDATA register fields related constants
+ * @IDT_LUTUDATA_PART_MASK:	Partition field mask
+ * @IDT_LUTUDATA_PART_FLD:	Partition field offset
+ * @IDT_LUTUDATA_VALID:		Lookup table entry valid bit
+ */
+#define IDT_LUTUDATA_PART_MASK		0x0000000FU
+#define IDT_LUTUDATA_PART_FLD		0
+#define IDT_LUTUDATA_VALID		0x80000000U
+
+/*
+ * SWPARTxSTS register fields related constants
+ * @IDT_SWPARTxSTS_SCI:		Switch partition state change initiated
+ * @IDT_SWPARTxSTS_SCC:		Switch partition state change completed
+ * @IDT_SWPARTxSTS_STATE_MASK:	Switch partition state mask
+ * @IDT_SWPARTxSTS_STATE_FLD:	Switch partition state field offset
+ * @IDT_SWPARTxSTS_STATE_DIS:	Switch partition disabled
+ * @IDT_SWPARTxSTS_STATE_ACT:	Switch partition enabled
+ * @IDT_SWPARTxSTS_STATE_RES:	Switch partition in reset
+ * @IDT_SWPARTxSTS_US:		Switch partition has upstream port
+ * @IDT_SWPARTxSTS_USID_MASK:	Switch partition upstream port ID mask
+ * @IDT_SWPARTxSTS_USID_FLD:	Switch partition upstream port ID field offset
+ * @IDT_SWPARTxSTS_NT:		Upstream port has NT function
+ * @IDT_SWPARTxSTS_DMA:		Upstream port has DMA function
+ */
+#define IDT_SWPARTxSTS_SCI		0x00000001U
+#define IDT_SWPARTxSTS_SCC		0x00000002U
+#define IDT_SWPARTxSTS_STATE_MASK	0x00000060U
+#define IDT_SWPARTxSTS_STATE_FLD	5
+#define IDT_SWPARTxSTS_STATE_DIS	0x00000000U
+#define IDT_SWPARTxSTS_STATE_ACT	0x00000020U
+#define IDT_SWPARTxSTS_STATE_RES	0x00000060U
+#define IDT_SWPARTxSTS_US		0x00000100U
+#define IDT_SWPARTxSTS_USID_MASK	0x00003E00U
+#define IDT_SWPARTxSTS_USID_FLD		9
+#define IDT_SWPARTxSTS_NT		0x00004000U
+#define IDT_SWPARTxSTS_DMA		0x00008000U
+
+/*
+ * SWPORTxSTS register fields related constants
+ * @IDT_SWPORTxSTS_OMCI:	Operation mode change initiated
+ * @IDT_SWPORTxSTS_OMCC:	Operation mode change completed
+ * @IDT_SWPORTxSTS_LINKUP:	Link up status
+ * @IDT_SWPORTxSTS_DS:		Port lanes behave as downstream lanes
+ * @IDT_SWPORTxSTS_MODE_MASK:	Port mode field mask
+ * @IDT_SWPORTxSTS_MODE_FLD:	Port mode field offset
+ * @IDT_SWPORTxSTS_MODE_DIS:	Port mode - disabled
+ * @IDT_SWPORTxSTS_MODE_DS:	Port mode - downstream switch port
+ * @IDT_SWPORTxSTS_MODE_US:	Port mode - upstream switch port
+ * @IDT_SWPORTxSTS_MODE_NT:	Port mode - NT function
+ * @IDT_SWPORTxSTS_MODE_USNT:	Port mode - upstream switch port with NTB
+ * @IDT_SWPORTxSTS_MODE_UNAT:	Port mode - unattached
+ * @IDT_SWPORTxSTS_MODE_USDMA:	Port mode - upstream switch port with DMA
+ * @IDT_SWPORTxSTS_MODE_USNTDMA:Port mode - upstream port with NTB and DMA
+ * @IDT_SWPORTxSTS_MODE_NTDMA:	Port mode - NT function with DMA
+ * @IDT_SWPORTxSTS_SWPART_MASK:	Port partition field mask
+ * @IDT_SWPORTxSTS_SWPART_FLD:	Port partition field offset
+ * @IDT_SWPORTxSTS_DEVNUM_MASK:	Port device number field mask
+ * @IDT_SWPORTxSTS_DEVNUM_FLD:	Port device number field offset
+ */
+#define IDT_SWPORTxSTS_OMCI		0x00000001U
+#define IDT_SWPORTxSTS_OMCC		0x00000002U
+#define IDT_SWPORTxSTS_LINKUP		0x00000010U
+#define IDT_SWPORTxSTS_DS		0x00000020U
+#define IDT_SWPORTxSTS_MODE_MASK	0x000003C0U
+#define IDT_SWPORTxSTS_MODE_FLD		6
+#define IDT_SWPORTxSTS_MODE_DIS		0x00000000U
+#define IDT_SWPORTxSTS_MODE_DS		0x00000040U
+#define IDT_SWPORTxSTS_MODE_US		0x00000080U
+#define IDT_SWPORTxSTS_MODE_NT		0x000000C0U
+#define IDT_SWPORTxSTS_MODE_USNT	0x00000100U
+#define IDT_SWPORTxSTS_MODE_UNAT	0x00000140U
+#define IDT_SWPORTxSTS_MODE_USDMA	0x00000180U
+#define IDT_SWPORTxSTS_MODE_USNTDMA	0x000001C0U
+#define IDT_SWPORTxSTS_MODE_NTDMA	0x00000200U
+#define IDT_SWPORTxSTS_SWPART_MASK	0x00001C00U
+#define IDT_SWPORTxSTS_SWPART_FLD	10
+#define IDT_SWPORTxSTS_DEVNUM_MASK	0x001F0000U
+#define IDT_SWPORTxSTS_DEVNUM_FLD	16
+
+/*
+ * SEMSK register fields related constants
+ * @IDT_SEMSK_LINKUP:	Link Up event mask bit
+ * @IDT_SEMSK_LINKDN:	Link Down event mask bit
+ * @IDT_SEMSK_GSIGNAL:	Global Signal event mask bit
+ */
+#define IDT_SEMSK_LINKUP		0x00000001U
+#define IDT_SEMSK_LINKDN		0x00000002U
+#define IDT_SEMSK_GSIGNAL		0x00000020U
+
+/*
+ * SWPxMSGCTL register fields related constants
+ * @IDT_SWPxMSGCTL_REG_MASK:	Register select field mask
+ * @IDT_SWPxMSGCTL_REG_FLD:	Register select field offset
+ * @IDT_SWPxMSGCTL_PART_MASK:	Partition select field mask
+ * @IDT_SWPxMSGCTL_PART_FLD:	Partition select field offset
+ */
+#define IDT_SWPxMSGCTL_REG_MASK		0x00000003U
+#define IDT_SWPxMSGCTL_REG_FLD		0
+#define IDT_SWPxMSGCTL_PART_MASK	0x00000070U
+#define IDT_SWPxMSGCTL_PART_FLD		4
+
+/*
+ * TMPSTS register fields related constants
+ * @IDT_TMPSTS_TEMP_MASK:	Current temperature field mask
+ * @IDT_TMPSTS_TEMP_FLD:	Current temperature field offset
+ */
+#define IDT_TMPSTS_TEMP_MASK		0x000000FFU
+#define IDT_TMPSTS_TEMP_FLD		0
+
+/*
+ * Helper macro to get/set the corresponding field value
+ * @GET_FIELD:		Retrieve the value of the corresponding field
+ * @SET_FIELD:		Set the specified field up
+ * @IS_FLD_SET:		Check whether a field is set with value
+ */
+#define GET_FIELD(field, data) \
+	(((u32)(data) & IDT_ ##field## _MASK) >> IDT_ ##field## _FLD)
+#define SET_FIELD(field, data, value) \
+	(((u32)(data) & ~IDT_ ##field## _MASK) | \
+	 ((u32)(value) << IDT_ ##field## _FLD))
+#define IS_FLD_SET(field, data, value) \
+	(((u32)(data) & IDT_ ##field## _MASK) == IDT_ ##field## _ ##value)
+
+/*
+ * Useful registers masks:
+ * @IDT_DBELL_MASK:	Doorbell bits mask
+ * @IDT_OUTMSG_MASK:	Out messages status bits mask
+ * @IDT_INMSG_MASK:	In messages status bits mask
+ * @IDT_MSG_MASK:	Any message status bits mask
+ */
+#define IDT_DBELL_MASK		((u32)0xFFFFFFFFU)
+#define IDT_OUTMSG_MASK		((u32)0x0000000FU)
+#define IDT_INMSG_MASK		((u32)0x000F0000U)
+#define IDT_MSG_MASK		(IDT_INMSG_MASK | IDT_OUTMSG_MASK)
+
+/*
+ * Number of IDT NTB resources:
+ * @IDT_MSG_CNT:	Number of Message registers
+ * @IDT_BAR_CNT:	Number of BARs of each port
+ * @IDT_MTBL_ENTRY_CNT:	Number mapping table entries
+ */
+#define IDT_MSG_CNT		4
+#define IDT_BAR_CNT		6
+#define IDT_MTBL_ENTRY_CNT	64
+
+/*
+ * General IDT PCIe-switch constant
+ * @IDT_MAX_NR_PORTS:	Maximum number of ports per IDT PCIe-switch
+ * @IDT_MAX_NR_PARTS:	Maximum number of partitions per IDT PCIe-switch
+ * @IDT_MAX_NR_PEERS:	Maximum number of NT-peers per IDT PCIe-switch
+ * @IDT_MAX_NR_MWS:	Maximum number of Memory Widows
+ * @IDT_PCIE_REGSIZE:	Size of the registers in bytes
+ * @IDT_TRANS_ALIGN:	Alignment of translated base address
+ * @IDT_DIR_SIZE_ALIGN:	Alignment of size setting for direct translated MWs.
+ *			Even though the lower 10 bits are reserved, they are
+ *			treated by IDT as one's so basically there is no any
+ *			alignment of size limit for DIR address translation.
+ */
+#define IDT_MAX_NR_PORTS	24
+#define IDT_MAX_NR_PARTS	8
+#define IDT_MAX_NR_PEERS	8
+#define IDT_MAX_NR_MWS		29
+#define IDT_PCIE_REGSIZE	4
+#define IDT_TRANS_ALIGN		4
+#define IDT_DIR_SIZE_ALIGN	1
+
+/*
+ * IDT Memory Windows type. Depending on the device settings, IDT supports
+ * Direct Address Translation MW registers and Lookup Table registers
+ * @IDT_MW_DIR:		Direct address translation
+ * @IDT_MW_LUT12:	12-entry lookup table entry
+ * @IDT_MW_LUT24:	24-entry lookup table entry
+ *
+ * NOTE These values are exactly the same as one of the BARSETUP ATRAN field
+ */
+enum idt_mw_type {
+	IDT_MW_DIR = 0x0,
+	IDT_MW_LUT12 = 0x1,
+	IDT_MW_LUT24 = 0x2
+};
+
+/*
+ * IDT PCIe-switch model private data
+ * @name:	Device name
+ * @port_cnt:	Total number of NT endpoint ports
+ * @ports:	Port ids
+ */
+struct idt_89hpes_cfg {
+	char *name;
+	unsigned char port_cnt;
+	unsigned char ports[];
+};
+
+/*
+ * Memory window configuration structure
+ * @type:	Type of the memory window (direct address translation or lookup
+ *		table)
+ *
+ * @bar:	PCIe BAR the memory window referenced to
+ * @idx:	Index of the memory window within the BAR
+ *
+ * @addr_align:	Alignment of translated address
+ * @size_align:	Alignment of memory window size
+ * @size_max:	Maximum size of memory window
+ */
+struct idt_mw_cfg {
+	enum idt_mw_type type;
+
+	unsigned char bar;
+	unsigned char idx;
+
+	u64 addr_align;
+	u64 size_align;
+	u64 size_max;
+};
+
+/*
+ * Description structure of peer IDT NT-functions:
+ * @port:		NT-function port
+ * @part:		NT-function partition
+ *
+ * @mw_cnt:		Number of memory windows supported by NT-function
+ * @mws:		Array of memory windows descriptors
+ */
+struct idt_ntb_peer {
+	unsigned char port;
+	unsigned char part;
+
+	unsigned char mw_cnt;
+	struct idt_mw_cfg *mws;
+};
+
+/*
+ * Description structure of local IDT NT-function:
+ * @ntb:		Linux NTB-device description structure
+ * @swcfg:		Pointer to the structure of local IDT PCIe-switch
+ *			specific cofnfigurations
+ *
+ * @port:		Local NT-function port
+ * @part:		Local NT-function partition
+ *
+ * @peer_cnt:		Number of peers with activated NTB-function
+ * @peers:		Array of peers descripting structures
+ * @port_idx_map:	Map of port number -> peer index
+ * @part_idx_map:	Map of partition number -> peer index
+ *
+ * @mtbl_lock:		Mapping table access lock
+ *
+ * @mw_cnt:		Number of memory windows supported by NT-function
+ * @mws:		Array of memory windows descriptors
+ * @lut_lock:		Lookup table access lock
+ *
+ * @msg_locks:		Message registers mapping table lockers
+ *
+ * @cfgspc:		Virtual address of the memory mapped configuration
+ *			space of the NT-function
+ * @db_mask_lock:	Doorbell mask register lock
+ * @msg_mask_lock:	Message mask register lock
+ * @gasa_lock:		GASA registers access lock
+ *
+ * @dbgfs_info:		DebugFS info node
+ */
+struct idt_ntb_dev {
+	struct ntb_dev ntb;
+	struct idt_89hpes_cfg *swcfg;
+
+	unsigned char port;
+	unsigned char part;
+
+	unsigned char peer_cnt;
+	struct idt_ntb_peer peers[IDT_MAX_NR_PEERS];
+	char port_idx_map[IDT_MAX_NR_PORTS];
+	char part_idx_map[IDT_MAX_NR_PARTS];
+
+	spinlock_t mtbl_lock;
+
+	unsigned char mw_cnt;
+	struct idt_mw_cfg *mws;
+	spinlock_t lut_lock;
+
+	spinlock_t msg_locks[IDT_MSG_CNT];
+
+	void __iomem *cfgspc;
+	spinlock_t db_mask_lock;
+	spinlock_t msg_mask_lock;
+	spinlock_t gasa_lock;
+
+	struct dentry *dbgfs_info;
+};
+#define to_ndev_ntb(__ntb) container_of(__ntb, struct idt_ntb_dev, ntb)
+
+/*
+ * Descriptor of the IDT PCIe-switch BAR resources
+ * @setup:	BAR setup register
+ * @limit:	BAR limit register
+ * @ltbase:	Lower translated base address
+ * @utbase:	Upper translated base address
+ */
+struct idt_ntb_bar {
+	unsigned int setup;
+	unsigned int limit;
+	unsigned int ltbase;
+	unsigned int utbase;
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch message resources
+ * @in:		Inbound message register
+ * @out:	Outbound message register
+ * @src:	Source of inbound message register
+ */
+struct idt_ntb_msg {
+	unsigned int in;
+	unsigned int out;
+	unsigned int src;
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch NT-function specific parameters in the
+ * PCI Configuration Space
+ * @bars:	BARs related registers
+ * @msgs:	Messaging related registers
+ */
+struct idt_ntb_regs {
+	struct idt_ntb_bar bars[IDT_BAR_CNT];
+	struct idt_ntb_msg msgs[IDT_MSG_CNT];
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch port specific parameters in the
+ * Global Configuration Space
+ * @pcicmdsts:	 PCI command/status register
+ * @pcielctlsts: PCIe link control/status
+ *
+ * @ctl:	Port control register
+ * @sts:	Port status register
+ *
+ * @bars:	BARs related registers
+ */
+struct idt_ntb_port {
+	unsigned int pcicmdsts;
+	unsigned int pcielctlsts;
+	unsigned int ntctl;
+
+	unsigned int ctl;
+	unsigned int sts;
+
+	struct idt_ntb_bar bars[IDT_BAR_CNT];
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch partition specific parameters.
+ * @ctl:	Partition control register in the Global Address Space
+ * @sts:	Partition status register in the Global Address Space
+ * @msgctl:	Messages control registers
+ */
+struct idt_ntb_part {
+	unsigned int ctl;
+	unsigned int sts;
+	unsigned int msgctl[IDT_MSG_CNT];
+};
+
+#endif /* NTB_HW_IDT_H */
diff --git a/drivers/ntb/hw/intel/Kconfig b/drivers/ntb/hw/intel/Kconfig
new file mode 100644
index 0000000..91f995e
--- /dev/null
+++ b/drivers/ntb/hw/intel/Kconfig
@@ -0,0 +1,7 @@
+config NTB_INTEL
+	tristate "Intel Non-Transparent Bridge support"
+	depends on X86_64
+	help
+	 This driver supports Intel NTB on capable Xeon and Atom hardware.
+
+	 If unsure, say N.
diff --git a/drivers/ntb/hw/intel/Makefile b/drivers/ntb/hw/intel/Makefile
new file mode 100644
index 0000000..4ff22af
--- /dev/null
+++ b/drivers/ntb/hw/intel/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_NTB_INTEL) += ntb_hw_intel.o
+ntb_hw_intel-y := ntb_hw_gen1.o ntb_hw_gen3.o
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c
new file mode 100644
index 0000000..6aa5732
--- /dev/null
+++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c
@@ -0,0 +1,2070 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel PCIe NTB Linux driver
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_intel.h"
+#include "ntb_hw_gen1.h"
+#include "ntb_hw_gen3.h"
+
+#define NTB_NAME	"ntb_hw_intel"
+#define NTB_DESC	"Intel(R) PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER		"2.0"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+#define bar0_off(base, bar) ((base) + ((bar) << 2))
+#define bar2_off(base, bar) bar0_off(base, (bar) - 2)
+
+static const struct intel_ntb_reg xeon_reg;
+static const struct intel_ntb_alt_reg xeon_pri_reg;
+static const struct intel_ntb_alt_reg xeon_sec_reg;
+static const struct intel_ntb_alt_reg xeon_b2b_reg;
+static const struct intel_ntb_xlat_reg xeon_pri_xlat;
+static const struct intel_ntb_xlat_reg xeon_sec_xlat;
+static const struct ntb_dev_ops intel_ntb_ops;
+
+static const struct file_operations intel_ntb_debugfs_info;
+static struct dentry *debugfs_dir;
+
+static int b2b_mw_idx = -1;
+module_param(b2b_mw_idx, int, 0644);
+MODULE_PARM_DESC(b2b_mw_idx, "Use this mw idx to access the peer ntb.  A "
+		 "value of zero or positive starts from first mw idx, and a "
+		 "negative value starts from last mw idx.  Both sides MUST "
+		 "set the same value here!");
+
+static unsigned int b2b_mw_share;
+module_param(b2b_mw_share, uint, 0644);
+MODULE_PARM_DESC(b2b_mw_share, "If the b2b mw is large enough, configure the "
+		 "ntb so that the peer ntb only occupies the first half of "
+		 "the mw, so the second half can still be used as a mw.  Both "
+		 "sides MUST set the same value here!");
+
+module_param_named(xeon_b2b_usd_bar2_addr64,
+		   xeon_b2b_usd_addr.bar2_addr64, ullong, 0644);
+MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64,
+		 "XEON B2B USD BAR 2 64-bit address");
+
+module_param_named(xeon_b2b_usd_bar4_addr64,
+		   xeon_b2b_usd_addr.bar4_addr64, ullong, 0644);
+MODULE_PARM_DESC(xeon_b2b_usd_bar4_addr64,
+		 "XEON B2B USD BAR 4 64-bit address");
+
+module_param_named(xeon_b2b_usd_bar4_addr32,
+		   xeon_b2b_usd_addr.bar4_addr32, ullong, 0644);
+MODULE_PARM_DESC(xeon_b2b_usd_bar4_addr32,
+		 "XEON B2B USD split-BAR 4 32-bit address");
+
+module_param_named(xeon_b2b_usd_bar5_addr32,
+		   xeon_b2b_usd_addr.bar5_addr32, ullong, 0644);
+MODULE_PARM_DESC(xeon_b2b_usd_bar5_addr32,
+		 "XEON B2B USD split-BAR 5 32-bit address");
+
+module_param_named(xeon_b2b_dsd_bar2_addr64,
+		   xeon_b2b_dsd_addr.bar2_addr64, ullong, 0644);
+MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64,
+		 "XEON B2B DSD BAR 2 64-bit address");
+
+module_param_named(xeon_b2b_dsd_bar4_addr64,
+		   xeon_b2b_dsd_addr.bar4_addr64, ullong, 0644);
+MODULE_PARM_DESC(xeon_b2b_dsd_bar4_addr64,
+		 "XEON B2B DSD BAR 4 64-bit address");
+
+module_param_named(xeon_b2b_dsd_bar4_addr32,
+		   xeon_b2b_dsd_addr.bar4_addr32, ullong, 0644);
+MODULE_PARM_DESC(xeon_b2b_dsd_bar4_addr32,
+		 "XEON B2B DSD split-BAR 4 32-bit address");
+
+module_param_named(xeon_b2b_dsd_bar5_addr32,
+		   xeon_b2b_dsd_addr.bar5_addr32, ullong, 0644);
+MODULE_PARM_DESC(xeon_b2b_dsd_bar5_addr32,
+		 "XEON B2B DSD split-BAR 5 32-bit address");
+
+
+static int xeon_init_isr(struct intel_ntb_dev *ndev);
+
+static inline void ndev_reset_unsafe_flags(struct intel_ntb_dev *ndev)
+{
+	ndev->unsafe_flags = 0;
+	ndev->unsafe_flags_ignore = 0;
+
+	/* Only B2B has a workaround to avoid SDOORBELL */
+	if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP)
+		if (!ntb_topo_is_b2b(ndev->ntb.topo))
+			ndev->unsafe_flags |= NTB_UNSAFE_DB;
+
+	/* No low level workaround to avoid SB01BASE */
+	if (ndev->hwerr_flags & NTB_HWERR_SB01BASE_LOCKUP) {
+		ndev->unsafe_flags |= NTB_UNSAFE_DB;
+		ndev->unsafe_flags |= NTB_UNSAFE_SPAD;
+	}
+}
+
+static inline int ndev_is_unsafe(struct intel_ntb_dev *ndev,
+				 unsigned long flag)
+{
+	return !!(flag & ndev->unsafe_flags & ~ndev->unsafe_flags_ignore);
+}
+
+static inline int ndev_ignore_unsafe(struct intel_ntb_dev *ndev,
+				     unsigned long flag)
+{
+	flag &= ndev->unsafe_flags;
+	ndev->unsafe_flags_ignore |= flag;
+
+	return !!flag;
+}
+
+int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx)
+{
+	if (idx < 0 || idx >= ndev->mw_count)
+		return -EINVAL;
+	return ndev->reg->mw_bar[idx];
+}
+
+static inline int ndev_db_addr(struct intel_ntb_dev *ndev,
+			       phys_addr_t *db_addr, resource_size_t *db_size,
+			       phys_addr_t reg_addr, unsigned long reg)
+{
+	if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB))
+		pr_warn_once("%s: NTB unsafe doorbell access", __func__);
+
+	if (db_addr) {
+		*db_addr = reg_addr + reg;
+		dev_dbg(&ndev->ntb.pdev->dev, "Peer db addr %llx\n", *db_addr);
+	}
+
+	if (db_size) {
+		*db_size = ndev->reg->db_size;
+		dev_dbg(&ndev->ntb.pdev->dev, "Peer db size %llx\n", *db_size);
+	}
+
+	return 0;
+}
+
+u64 ndev_db_read(struct intel_ntb_dev *ndev,
+			       void __iomem *mmio)
+{
+	if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB))
+		pr_warn_once("%s: NTB unsafe doorbell access", __func__);
+
+	return ndev->reg->db_ioread(mmio);
+}
+
+int ndev_db_write(struct intel_ntb_dev *ndev, u64 db_bits,
+				void __iomem *mmio)
+{
+	if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB))
+		pr_warn_once("%s: NTB unsafe doorbell access", __func__);
+
+	if (db_bits & ~ndev->db_valid_mask)
+		return -EINVAL;
+
+	ndev->reg->db_iowrite(db_bits, mmio);
+
+	return 0;
+}
+
+static inline int ndev_db_set_mask(struct intel_ntb_dev *ndev, u64 db_bits,
+				   void __iomem *mmio)
+{
+	unsigned long irqflags;
+
+	if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB))
+		pr_warn_once("%s: NTB unsafe doorbell access", __func__);
+
+	if (db_bits & ~ndev->db_valid_mask)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ndev->db_mask_lock, irqflags);
+	{
+		ndev->db_mask |= db_bits;
+		ndev->reg->db_iowrite(ndev->db_mask, mmio);
+	}
+	spin_unlock_irqrestore(&ndev->db_mask_lock, irqflags);
+
+	return 0;
+}
+
+static inline int ndev_db_clear_mask(struct intel_ntb_dev *ndev, u64 db_bits,
+				     void __iomem *mmio)
+{
+	unsigned long irqflags;
+
+	if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB))
+		pr_warn_once("%s: NTB unsafe doorbell access", __func__);
+
+	if (db_bits & ~ndev->db_valid_mask)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ndev->db_mask_lock, irqflags);
+	{
+		ndev->db_mask &= ~db_bits;
+		ndev->reg->db_iowrite(ndev->db_mask, mmio);
+	}
+	spin_unlock_irqrestore(&ndev->db_mask_lock, irqflags);
+
+	return 0;
+}
+
+static inline int ndev_vec_mask(struct intel_ntb_dev *ndev, int db_vector)
+{
+	u64 shift, mask;
+
+	shift = ndev->db_vec_shift;
+	mask = BIT_ULL(shift) - 1;
+
+	return mask << (shift * db_vector);
+}
+
+static inline int ndev_spad_addr(struct intel_ntb_dev *ndev, int idx,
+				 phys_addr_t *spad_addr, phys_addr_t reg_addr,
+				 unsigned long reg)
+{
+	if (ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD))
+		pr_warn_once("%s: NTB unsafe scratchpad access", __func__);
+
+	if (idx < 0 || idx >= ndev->spad_count)
+		return -EINVAL;
+
+	if (spad_addr) {
+		*spad_addr = reg_addr + reg + (idx << 2);
+		dev_dbg(&ndev->ntb.pdev->dev, "Peer spad addr %llx\n",
+			*spad_addr);
+	}
+
+	return 0;
+}
+
+static inline u32 ndev_spad_read(struct intel_ntb_dev *ndev, int idx,
+				 void __iomem *mmio)
+{
+	if (ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD))
+		pr_warn_once("%s: NTB unsafe scratchpad access", __func__);
+
+	if (idx < 0 || idx >= ndev->spad_count)
+		return 0;
+
+	return ioread32(mmio + (idx << 2));
+}
+
+static inline int ndev_spad_write(struct intel_ntb_dev *ndev, int idx, u32 val,
+				  void __iomem *mmio)
+{
+	if (ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD))
+		pr_warn_once("%s: NTB unsafe scratchpad access", __func__);
+
+	if (idx < 0 || idx >= ndev->spad_count)
+		return -EINVAL;
+
+	iowrite32(val, mmio + (idx << 2));
+
+	return 0;
+}
+
+static irqreturn_t ndev_interrupt(struct intel_ntb_dev *ndev, int vec)
+{
+	u64 vec_mask;
+
+	vec_mask = ndev_vec_mask(ndev, vec);
+
+	if ((ndev->hwerr_flags & NTB_HWERR_MSIX_VECTOR32_BAD) && (vec == 31))
+		vec_mask |= ndev->db_link_mask;
+
+	dev_dbg(&ndev->ntb.pdev->dev, "vec %d vec_mask %llx\n", vec, vec_mask);
+
+	ndev->last_ts = jiffies;
+
+	if (vec_mask & ndev->db_link_mask) {
+		if (ndev->reg->poll_link(ndev))
+			ntb_link_event(&ndev->ntb);
+	}
+
+	if (vec_mask & ndev->db_valid_mask)
+		ntb_db_event(&ndev->ntb, vec);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ndev_vec_isr(int irq, void *dev)
+{
+	struct intel_ntb_vec *nvec = dev;
+
+	dev_dbg(&nvec->ndev->ntb.pdev->dev, "irq: %d  nvec->num: %d\n",
+		irq, nvec->num);
+
+	return ndev_interrupt(nvec->ndev, nvec->num);
+}
+
+static irqreturn_t ndev_irq_isr(int irq, void *dev)
+{
+	struct intel_ntb_dev *ndev = dev;
+
+	return ndev_interrupt(ndev, irq - ndev->ntb.pdev->irq);
+}
+
+int ndev_init_isr(struct intel_ntb_dev *ndev,
+			 int msix_min, int msix_max,
+			 int msix_shift, int total_shift)
+{
+	struct pci_dev *pdev;
+	int rc, i, msix_count, node;
+
+	pdev = ndev->ntb.pdev;
+
+	node = dev_to_node(&pdev->dev);
+
+	/* Mask all doorbell interrupts */
+	ndev->db_mask = ndev->db_valid_mask;
+	ndev->reg->db_iowrite(ndev->db_mask,
+			      ndev->self_mmio +
+			      ndev->self_reg->db_mask);
+
+	/* Try to set up msix irq */
+
+	ndev->vec = kcalloc_node(msix_max, sizeof(*ndev->vec),
+				 GFP_KERNEL, node);
+	if (!ndev->vec)
+		goto err_msix_vec_alloc;
+
+	ndev->msix = kcalloc_node(msix_max, sizeof(*ndev->msix),
+				  GFP_KERNEL, node);
+	if (!ndev->msix)
+		goto err_msix_alloc;
+
+	for (i = 0; i < msix_max; ++i)
+		ndev->msix[i].entry = i;
+
+	msix_count = pci_enable_msix_range(pdev, ndev->msix,
+					   msix_min, msix_max);
+	if (msix_count < 0)
+		goto err_msix_enable;
+
+	for (i = 0; i < msix_count; ++i) {
+		ndev->vec[i].ndev = ndev;
+		ndev->vec[i].num = i;
+		rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
+				 "ndev_vec_isr", &ndev->vec[i]);
+		if (rc)
+			goto err_msix_request;
+	}
+
+	dev_dbg(&pdev->dev, "Using %d msix interrupts\n", msix_count);
+	ndev->db_vec_count = msix_count;
+	ndev->db_vec_shift = msix_shift;
+	return 0;
+
+err_msix_request:
+	while (i-- > 0)
+		free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+	pci_disable_msix(pdev);
+err_msix_enable:
+	kfree(ndev->msix);
+err_msix_alloc:
+	kfree(ndev->vec);
+err_msix_vec_alloc:
+	ndev->msix = NULL;
+	ndev->vec = NULL;
+
+	/* Try to set up msi irq */
+
+	rc = pci_enable_msi(pdev);
+	if (rc)
+		goto err_msi_enable;
+
+	rc = request_irq(pdev->irq, ndev_irq_isr, 0,
+			 "ndev_irq_isr", ndev);
+	if (rc)
+		goto err_msi_request;
+
+	dev_dbg(&pdev->dev, "Using msi interrupts\n");
+	ndev->db_vec_count = 1;
+	ndev->db_vec_shift = total_shift;
+	return 0;
+
+err_msi_request:
+	pci_disable_msi(pdev);
+err_msi_enable:
+
+	/* Try to set up intx irq */
+
+	pci_intx(pdev, 1);
+
+	rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
+			 "ndev_irq_isr", ndev);
+	if (rc)
+		goto err_intx_request;
+
+	dev_dbg(&pdev->dev, "Using intx interrupts\n");
+	ndev->db_vec_count = 1;
+	ndev->db_vec_shift = total_shift;
+	return 0;
+
+err_intx_request:
+	return rc;
+}
+
+static void ndev_deinit_isr(struct intel_ntb_dev *ndev)
+{
+	struct pci_dev *pdev;
+	int i;
+
+	pdev = ndev->ntb.pdev;
+
+	/* Mask all doorbell interrupts */
+	ndev->db_mask = ndev->db_valid_mask;
+	ndev->reg->db_iowrite(ndev->db_mask,
+			      ndev->self_mmio +
+			      ndev->self_reg->db_mask);
+
+	if (ndev->msix) {
+		i = ndev->db_vec_count;
+		while (i--)
+			free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+		pci_disable_msix(pdev);
+		kfree(ndev->msix);
+		kfree(ndev->vec);
+	} else {
+		free_irq(pdev->irq, ndev);
+		if (pci_dev_msi_enabled(pdev))
+			pci_disable_msi(pdev);
+	}
+}
+
+static ssize_t ndev_ntb_debugfs_read(struct file *filp, char __user *ubuf,
+				     size_t count, loff_t *offp)
+{
+	struct intel_ntb_dev *ndev;
+	struct pci_dev *pdev;
+	void __iomem *mmio;
+	char *buf;
+	size_t buf_size;
+	ssize_t ret, off;
+	union { u64 v64; u32 v32; u16 v16; u8 v8; } u;
+
+	ndev = filp->private_data;
+	pdev = ndev->ntb.pdev;
+	mmio = ndev->self_mmio;
+
+	buf_size = min(count, 0x800ul);
+
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	off = 0;
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "NTB Device Information:\n");
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Connection Topology -\t%s\n",
+			 ntb_topo_string(ndev->ntb.topo));
+
+	if (ndev->b2b_idx != UINT_MAX) {
+		off += scnprintf(buf + off, buf_size - off,
+				 "B2B MW Idx -\t\t%u\n", ndev->b2b_idx);
+		off += scnprintf(buf + off, buf_size - off,
+				 "B2B Offset -\t\t%#lx\n", ndev->b2b_off);
+	}
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "BAR4 Split -\t\t%s\n",
+			 ndev->bar4_split ? "yes" : "no");
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "NTB CTL -\t\t%#06x\n", ndev->ntb_ctl);
+	off += scnprintf(buf + off, buf_size - off,
+			 "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
+
+	if (!ndev->reg->link_is_up(ndev)) {
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Status -\t\tDown\n");
+	} else {
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Status -\t\tUp\n");
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Speed -\t\tPCI-E Gen %u\n",
+				 NTB_LNK_STA_SPEED(ndev->lnk_sta));
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Width -\t\tx%u\n",
+				 NTB_LNK_STA_WIDTH(ndev->lnk_sta));
+	}
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Memory Window Count -\t%u\n", ndev->mw_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Scratchpad Count -\t%u\n", ndev->spad_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Count -\t%u\n", ndev->db_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Vector Count -\t%u\n", ndev->db_vec_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Vector Shift -\t%u\n", ndev->db_vec_shift);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Link Mask -\t%#llx\n", ndev->db_link_mask);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Mask Cached -\t%#llx\n", ndev->db_mask);
+
+	u.v64 = ndev_db_read(ndev, mmio + ndev->self_reg->db_mask);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Mask -\t\t%#llx\n", u.v64);
+
+	u.v64 = ndev_db_read(ndev, mmio + ndev->self_reg->db_bell);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Bell -\t\t%#llx\n", u.v64);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "\nNTB Window Size:\n");
+
+	pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &u.v8);
+	off += scnprintf(buf + off, buf_size - off,
+			 "PBAR23SZ %hhu\n", u.v8);
+	if (!ndev->bar4_split) {
+		pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &u.v8);
+		off += scnprintf(buf + off, buf_size - off,
+				 "PBAR45SZ %hhu\n", u.v8);
+	} else {
+		pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &u.v8);
+		off += scnprintf(buf + off, buf_size - off,
+				 "PBAR4SZ %hhu\n", u.v8);
+		pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &u.v8);
+		off += scnprintf(buf + off, buf_size - off,
+				 "PBAR5SZ %hhu\n", u.v8);
+	}
+
+	pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &u.v8);
+	off += scnprintf(buf + off, buf_size - off,
+			 "SBAR23SZ %hhu\n", u.v8);
+	if (!ndev->bar4_split) {
+		pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &u.v8);
+		off += scnprintf(buf + off, buf_size - off,
+				 "SBAR45SZ %hhu\n", u.v8);
+	} else {
+		pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &u.v8);
+		off += scnprintf(buf + off, buf_size - off,
+				 "SBAR4SZ %hhu\n", u.v8);
+		pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &u.v8);
+		off += scnprintf(buf + off, buf_size - off,
+				 "SBAR5SZ %hhu\n", u.v8);
+	}
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "\nNTB Incoming XLAT:\n");
+
+	u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 2));
+	off += scnprintf(buf + off, buf_size - off,
+			 "XLAT23 -\t\t%#018llx\n", u.v64);
+
+	if (ndev->bar4_split) {
+		u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 4));
+		off += scnprintf(buf + off, buf_size - off,
+				 "XLAT4 -\t\t\t%#06x\n", u.v32);
+
+		u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 5));
+		off += scnprintf(buf + off, buf_size - off,
+				 "XLAT5 -\t\t\t%#06x\n", u.v32);
+	} else {
+		u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 4));
+		off += scnprintf(buf + off, buf_size - off,
+				 "XLAT45 -\t\t%#018llx\n", u.v64);
+	}
+
+	u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 2));
+	off += scnprintf(buf + off, buf_size - off,
+			 "LMT23 -\t\t\t%#018llx\n", u.v64);
+
+	if (ndev->bar4_split) {
+		u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 4));
+		off += scnprintf(buf + off, buf_size - off,
+				 "LMT4 -\t\t\t%#06x\n", u.v32);
+		u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 5));
+		off += scnprintf(buf + off, buf_size - off,
+				 "LMT5 -\t\t\t%#06x\n", u.v32);
+	} else {
+		u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 4));
+		off += scnprintf(buf + off, buf_size - off,
+				 "LMT45 -\t\t\t%#018llx\n", u.v64);
+	}
+
+	if (pdev_is_gen1(pdev)) {
+		if (ntb_topo_is_b2b(ndev->ntb.topo)) {
+			off += scnprintf(buf + off, buf_size - off,
+					 "\nNTB Outgoing B2B XLAT:\n");
+
+			u.v64 = ioread64(mmio + XEON_PBAR23XLAT_OFFSET);
+			off += scnprintf(buf + off, buf_size - off,
+					 "B2B XLAT23 -\t\t%#018llx\n", u.v64);
+
+			if (ndev->bar4_split) {
+				u.v32 = ioread32(mmio + XEON_PBAR4XLAT_OFFSET);
+				off += scnprintf(buf + off, buf_size - off,
+						 "B2B XLAT4 -\t\t%#06x\n",
+						 u.v32);
+				u.v32 = ioread32(mmio + XEON_PBAR5XLAT_OFFSET);
+				off += scnprintf(buf + off, buf_size - off,
+						 "B2B XLAT5 -\t\t%#06x\n",
+						 u.v32);
+			} else {
+				u.v64 = ioread64(mmio + XEON_PBAR45XLAT_OFFSET);
+				off += scnprintf(buf + off, buf_size - off,
+						 "B2B XLAT45 -\t\t%#018llx\n",
+						 u.v64);
+			}
+
+			u.v64 = ioread64(mmio + XEON_PBAR23LMT_OFFSET);
+			off += scnprintf(buf + off, buf_size - off,
+					 "B2B LMT23 -\t\t%#018llx\n", u.v64);
+
+			if (ndev->bar4_split) {
+				u.v32 = ioread32(mmio + XEON_PBAR4LMT_OFFSET);
+				off += scnprintf(buf + off, buf_size - off,
+						 "B2B LMT4 -\t\t%#06x\n",
+						 u.v32);
+				u.v32 = ioread32(mmio + XEON_PBAR5LMT_OFFSET);
+				off += scnprintf(buf + off, buf_size - off,
+						 "B2B LMT5 -\t\t%#06x\n",
+						 u.v32);
+			} else {
+				u.v64 = ioread64(mmio + XEON_PBAR45LMT_OFFSET);
+				off += scnprintf(buf + off, buf_size - off,
+						 "B2B LMT45 -\t\t%#018llx\n",
+						 u.v64);
+			}
+
+			off += scnprintf(buf + off, buf_size - off,
+					 "\nNTB Secondary BAR:\n");
+
+			u.v64 = ioread64(mmio + XEON_SBAR0BASE_OFFSET);
+			off += scnprintf(buf + off, buf_size - off,
+					 "SBAR01 -\t\t%#018llx\n", u.v64);
+
+			u.v64 = ioread64(mmio + XEON_SBAR23BASE_OFFSET);
+			off += scnprintf(buf + off, buf_size - off,
+					 "SBAR23 -\t\t%#018llx\n", u.v64);
+
+			if (ndev->bar4_split) {
+				u.v32 = ioread32(mmio + XEON_SBAR4BASE_OFFSET);
+				off += scnprintf(buf + off, buf_size - off,
+						 "SBAR4 -\t\t\t%#06x\n", u.v32);
+				u.v32 = ioread32(mmio + XEON_SBAR5BASE_OFFSET);
+				off += scnprintf(buf + off, buf_size - off,
+						 "SBAR5 -\t\t\t%#06x\n", u.v32);
+			} else {
+				u.v64 = ioread64(mmio + XEON_SBAR45BASE_OFFSET);
+				off += scnprintf(buf + off, buf_size - off,
+						 "SBAR45 -\t\t%#018llx\n",
+						 u.v64);
+			}
+		}
+
+		off += scnprintf(buf + off, buf_size - off,
+				 "\nXEON NTB Statistics:\n");
+
+		u.v16 = ioread16(mmio + XEON_USMEMMISS_OFFSET);
+		off += scnprintf(buf + off, buf_size - off,
+				 "Upstream Memory Miss -\t%u\n", u.v16);
+
+		off += scnprintf(buf + off, buf_size - off,
+				 "\nXEON NTB Hardware Errors:\n");
+
+		if (!pci_read_config_word(pdev,
+					  XEON_DEVSTS_OFFSET, &u.v16))
+			off += scnprintf(buf + off, buf_size - off,
+					 "DEVSTS -\t\t%#06x\n", u.v16);
+
+		if (!pci_read_config_word(pdev,
+					  XEON_LINK_STATUS_OFFSET, &u.v16))
+			off += scnprintf(buf + off, buf_size - off,
+					 "LNKSTS -\t\t%#06x\n", u.v16);
+
+		if (!pci_read_config_dword(pdev,
+					   XEON_UNCERRSTS_OFFSET, &u.v32))
+			off += scnprintf(buf + off, buf_size - off,
+					 "UNCERRSTS -\t\t%#06x\n", u.v32);
+
+		if (!pci_read_config_dword(pdev,
+					   XEON_CORERRSTS_OFFSET, &u.v32))
+			off += scnprintf(buf + off, buf_size - off,
+					 "CORERRSTS -\t\t%#06x\n", u.v32);
+	}
+
+	ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
+	kfree(buf);
+	return ret;
+}
+
+static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
+				 size_t count, loff_t *offp)
+{
+	struct intel_ntb_dev *ndev = filp->private_data;
+
+	if (pdev_is_gen1(ndev->ntb.pdev))
+		return ndev_ntb_debugfs_read(filp, ubuf, count, offp);
+	else if (pdev_is_gen3(ndev->ntb.pdev))
+		return ndev_ntb3_debugfs_read(filp, ubuf, count, offp);
+
+	return -ENXIO;
+}
+
+static void ndev_init_debugfs(struct intel_ntb_dev *ndev)
+{
+	if (!debugfs_dir) {
+		ndev->debugfs_dir = NULL;
+		ndev->debugfs_info = NULL;
+	} else {
+		ndev->debugfs_dir =
+			debugfs_create_dir(pci_name(ndev->ntb.pdev),
+					   debugfs_dir);
+		if (!ndev->debugfs_dir)
+			ndev->debugfs_info = NULL;
+		else
+			ndev->debugfs_info =
+				debugfs_create_file("info", S_IRUSR,
+						    ndev->debugfs_dir, ndev,
+						    &intel_ntb_debugfs_info);
+	}
+}
+
+static void ndev_deinit_debugfs(struct intel_ntb_dev *ndev)
+{
+	debugfs_remove_recursive(ndev->debugfs_dir);
+}
+
+int intel_ntb_mw_count(struct ntb_dev *ntb, int pidx)
+{
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	return ntb_ndev(ntb)->mw_count;
+}
+
+int intel_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int idx,
+			   resource_size_t *addr_align,
+			   resource_size_t *size_align,
+			   resource_size_t *size_max)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+	resource_size_t bar_size, mw_size;
+	int bar;
+
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
+		idx += 1;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	bar_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+	if (idx == ndev->b2b_idx)
+		mw_size = bar_size - ndev->b2b_off;
+	else
+		mw_size = bar_size;
+
+	if (addr_align)
+		*addr_align = pci_resource_len(ndev->ntb.pdev, bar);
+
+	if (size_align)
+		*size_align = 1;
+
+	if (size_max)
+		*size_max = mw_size;
+
+	return 0;
+}
+
+static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
+				  dma_addr_t addr, resource_size_t size)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+	unsigned long base_reg, xlat_reg, limit_reg;
+	resource_size_t bar_size, mw_size;
+	void __iomem *mmio;
+	u64 base, limit, reg_val;
+	int bar;
+
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
+		idx += 1;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	bar_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+	if (idx == ndev->b2b_idx)
+		mw_size = bar_size - ndev->b2b_off;
+	else
+		mw_size = bar_size;
+
+	/* hardware requires that addr is aligned to bar size */
+	if (addr & (bar_size - 1))
+		return -EINVAL;
+
+	/* make sure the range fits in the usable mw size */
+	if (size > mw_size)
+		return -EINVAL;
+
+	mmio = ndev->self_mmio;
+	base_reg = bar0_off(ndev->xlat_reg->bar0_base, bar);
+	xlat_reg = bar2_off(ndev->xlat_reg->bar2_xlat, bar);
+	limit_reg = bar2_off(ndev->xlat_reg->bar2_limit, bar);
+
+	if (bar < 4 || !ndev->bar4_split) {
+		base = ioread64(mmio + base_reg) & NTB_BAR_MASK_64;
+
+		/* Set the limit if supported, if size is not mw_size */
+		if (limit_reg && size != mw_size)
+			limit = base + size;
+		else
+			limit = 0;
+
+		/* set and verify setting the translation address */
+		iowrite64(addr, mmio + xlat_reg);
+		reg_val = ioread64(mmio + xlat_reg);
+		if (reg_val != addr) {
+			iowrite64(0, mmio + xlat_reg);
+			return -EIO;
+		}
+
+		/* set and verify setting the limit */
+		iowrite64(limit, mmio + limit_reg);
+		reg_val = ioread64(mmio + limit_reg);
+		if (reg_val != limit) {
+			iowrite64(base, mmio + limit_reg);
+			iowrite64(0, mmio + xlat_reg);
+			return -EIO;
+		}
+	} else {
+		/* split bar addr range must all be 32 bit */
+		if (addr & (~0ull << 32))
+			return -EINVAL;
+		if ((addr + size) & (~0ull << 32))
+			return -EINVAL;
+
+		base = ioread32(mmio + base_reg) & NTB_BAR_MASK_32;
+
+		/* Set the limit if supported, if size is not mw_size */
+		if (limit_reg && size != mw_size)
+			limit = base + size;
+		else
+			limit = 0;
+
+		/* set and verify setting the translation address */
+		iowrite32(addr, mmio + xlat_reg);
+		reg_val = ioread32(mmio + xlat_reg);
+		if (reg_val != addr) {
+			iowrite32(0, mmio + xlat_reg);
+			return -EIO;
+		}
+
+		/* set and verify setting the limit */
+		iowrite32(limit, mmio + limit_reg);
+		reg_val = ioread32(mmio + limit_reg);
+		if (reg_val != limit) {
+			iowrite32(base, mmio + limit_reg);
+			iowrite32(0, mmio + xlat_reg);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+u64 intel_ntb_link_is_up(struct ntb_dev *ntb, enum ntb_speed *speed,
+			 enum ntb_width *width)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	if (ndev->reg->link_is_up(ndev)) {
+		if (speed)
+			*speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
+		if (width)
+			*width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
+		return 1;
+	} else {
+		/* TODO MAYBE: is it possible to observe the link speed and
+		 * width while link is training? */
+		if (speed)
+			*speed = NTB_SPEED_NONE;
+		if (width)
+			*width = NTB_WIDTH_NONE;
+		return 0;
+	}
+}
+
+static int intel_ntb_link_enable(struct ntb_dev *ntb,
+				 enum ntb_speed max_speed,
+				 enum ntb_width max_width)
+{
+	struct intel_ntb_dev *ndev;
+	u32 ntb_ctl;
+
+	ndev = container_of(ntb, struct intel_ntb_dev, ntb);
+
+	if (ndev->ntb.topo == NTB_TOPO_SEC)
+		return -EINVAL;
+
+	dev_dbg(&ntb->pdev->dev,
+		"Enabling link with max_speed %d max_width %d\n",
+		max_speed, max_width);
+	if (max_speed != NTB_SPEED_AUTO)
+		dev_dbg(&ntb->pdev->dev, "ignoring max_speed %d\n", max_speed);
+	if (max_width != NTB_WIDTH_AUTO)
+		dev_dbg(&ntb->pdev->dev, "ignoring max_width %d\n", max_width);
+
+	ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl);
+	ntb_ctl &= ~(NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK);
+	ntb_ctl |= NTB_CTL_P2S_BAR2_SNOOP | NTB_CTL_S2P_BAR2_SNOOP;
+	ntb_ctl |= NTB_CTL_P2S_BAR4_SNOOP | NTB_CTL_S2P_BAR4_SNOOP;
+	if (ndev->bar4_split)
+		ntb_ctl |= NTB_CTL_P2S_BAR5_SNOOP | NTB_CTL_S2P_BAR5_SNOOP;
+	iowrite32(ntb_ctl, ndev->self_mmio + ndev->reg->ntb_ctl);
+
+	return 0;
+}
+
+int intel_ntb_link_disable(struct ntb_dev *ntb)
+{
+	struct intel_ntb_dev *ndev;
+	u32 ntb_cntl;
+
+	ndev = container_of(ntb, struct intel_ntb_dev, ntb);
+
+	if (ndev->ntb.topo == NTB_TOPO_SEC)
+		return -EINVAL;
+
+	dev_dbg(&ntb->pdev->dev, "Disabling link\n");
+
+	/* Bring NTB link down */
+	ntb_cntl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl);
+	ntb_cntl &= ~(NTB_CTL_P2S_BAR2_SNOOP | NTB_CTL_S2P_BAR2_SNOOP);
+	ntb_cntl &= ~(NTB_CTL_P2S_BAR4_SNOOP | NTB_CTL_S2P_BAR4_SNOOP);
+	if (ndev->bar4_split)
+		ntb_cntl &= ~(NTB_CTL_P2S_BAR5_SNOOP | NTB_CTL_S2P_BAR5_SNOOP);
+	ntb_cntl |= NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK;
+	iowrite32(ntb_cntl, ndev->self_mmio + ndev->reg->ntb_ctl);
+
+	return 0;
+}
+
+int intel_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	/* Numbers of inbound and outbound memory windows match */
+	return ntb_ndev(ntb)->mw_count;
+}
+
+int intel_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+			       phys_addr_t *base, resource_size_t *size)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+	int bar;
+
+	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
+		idx += 1;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	if (base)
+		*base = pci_resource_start(ndev->ntb.pdev, bar) +
+			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+
+	if (size)
+		*size = pci_resource_len(ndev->ntb.pdev, bar) -
+			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+
+	return 0;
+}
+
+static int intel_ntb_db_is_unsafe(struct ntb_dev *ntb)
+{
+	return ndev_ignore_unsafe(ntb_ndev(ntb), NTB_UNSAFE_DB);
+}
+
+u64 intel_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+	return ntb_ndev(ntb)->db_valid_mask;
+}
+
+int intel_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+	struct intel_ntb_dev *ndev;
+
+	ndev = container_of(ntb, struct intel_ntb_dev, ntb);
+
+	return ndev->db_vec_count;
+}
+
+u64 intel_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	if (db_vector < 0 || db_vector > ndev->db_vec_count)
+		return 0;
+
+	return ndev->db_valid_mask & ndev_vec_mask(ndev, db_vector);
+}
+
+static u64 intel_ntb_db_read(struct ntb_dev *ntb)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_db_read(ndev,
+			    ndev->self_mmio +
+			    ndev->self_reg->db_bell);
+}
+
+static int intel_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_db_write(ndev, db_bits,
+			     ndev->self_mmio +
+			     ndev->self_reg->db_bell);
+}
+
+int intel_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_db_set_mask(ndev, db_bits,
+				ndev->self_mmio +
+				ndev->self_reg->db_mask);
+}
+
+int intel_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_db_clear_mask(ndev, db_bits,
+				  ndev->self_mmio +
+				  ndev->self_reg->db_mask);
+}
+
+int intel_ntb_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
+			   resource_size_t *db_size)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_db_addr(ndev, db_addr, db_size, ndev->peer_addr,
+			    ndev->peer_reg->db_bell);
+}
+
+static int intel_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_db_write(ndev, db_bits,
+			     ndev->peer_mmio +
+			     ndev->peer_reg->db_bell);
+}
+
+int intel_ntb_spad_is_unsafe(struct ntb_dev *ntb)
+{
+	return ndev_ignore_unsafe(ntb_ndev(ntb), NTB_UNSAFE_SPAD);
+}
+
+int intel_ntb_spad_count(struct ntb_dev *ntb)
+{
+	struct intel_ntb_dev *ndev;
+
+	ndev = container_of(ntb, struct intel_ntb_dev, ntb);
+
+	return ndev->spad_count;
+}
+
+u32 intel_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_spad_read(ndev, idx,
+			      ndev->self_mmio +
+			      ndev->self_reg->spad);
+}
+
+int intel_ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_spad_write(ndev, idx, val,
+			       ndev->self_mmio +
+			       ndev->self_reg->spad);
+}
+
+int intel_ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx, int sidx,
+			     phys_addr_t *spad_addr)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_spad_addr(ndev, sidx, spad_addr, ndev->peer_addr,
+			      ndev->peer_reg->spad);
+}
+
+u32 intel_ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_spad_read(ndev, sidx,
+			      ndev->peer_mmio +
+			      ndev->peer_reg->spad);
+}
+
+int intel_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx,
+			      u32 val)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_spad_write(ndev, sidx, val,
+			       ndev->peer_mmio +
+			       ndev->peer_reg->spad);
+}
+
+static u64 xeon_db_ioread(void __iomem *mmio)
+{
+	return (u64)ioread16(mmio);
+}
+
+static void xeon_db_iowrite(u64 bits, void __iomem *mmio)
+{
+	iowrite16((u16)bits, mmio);
+}
+
+static int xeon_poll_link(struct intel_ntb_dev *ndev)
+{
+	u16 reg_val;
+	int rc;
+
+	ndev->reg->db_iowrite(ndev->db_link_mask,
+			      ndev->self_mmio +
+			      ndev->self_reg->db_bell);
+
+	rc = pci_read_config_word(ndev->ntb.pdev,
+				  XEON_LINK_STATUS_OFFSET, &reg_val);
+	if (rc)
+		return 0;
+
+	if (reg_val == ndev->lnk_sta)
+		return 0;
+
+	ndev->lnk_sta = reg_val;
+
+	return 1;
+}
+
+int xeon_link_is_up(struct intel_ntb_dev *ndev)
+{
+	if (ndev->ntb.topo == NTB_TOPO_SEC)
+		return 1;
+
+	return NTB_LNK_STA_ACTIVE(ndev->lnk_sta);
+}
+
+enum ntb_topo xeon_ppd_topo(struct intel_ntb_dev *ndev, u8 ppd)
+{
+	switch (ppd & XEON_PPD_TOPO_MASK) {
+	case XEON_PPD_TOPO_B2B_USD:
+		return NTB_TOPO_B2B_USD;
+
+	case XEON_PPD_TOPO_B2B_DSD:
+		return NTB_TOPO_B2B_DSD;
+
+	case XEON_PPD_TOPO_PRI_USD:
+	case XEON_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */
+		return NTB_TOPO_PRI;
+
+	case XEON_PPD_TOPO_SEC_USD:
+	case XEON_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */
+		return NTB_TOPO_SEC;
+	}
+
+	return NTB_TOPO_NONE;
+}
+
+static inline int xeon_ppd_bar4_split(struct intel_ntb_dev *ndev, u8 ppd)
+{
+	if (ppd & XEON_PPD_SPLIT_BAR_MASK) {
+		dev_dbg(&ndev->ntb.pdev->dev, "PPD %d split bar\n", ppd);
+		return 1;
+	}
+	return 0;
+}
+
+static int xeon_init_isr(struct intel_ntb_dev *ndev)
+{
+	return ndev_init_isr(ndev, XEON_DB_MSIX_VECTOR_COUNT,
+			     XEON_DB_MSIX_VECTOR_COUNT,
+			     XEON_DB_MSIX_VECTOR_SHIFT,
+			     XEON_DB_TOTAL_SHIFT);
+}
+
+static void xeon_deinit_isr(struct intel_ntb_dev *ndev)
+{
+	ndev_deinit_isr(ndev);
+}
+
+static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
+			     const struct intel_b2b_addr *addr,
+			     const struct intel_b2b_addr *peer_addr)
+{
+	struct pci_dev *pdev;
+	void __iomem *mmio;
+	resource_size_t bar_size;
+	phys_addr_t bar_addr;
+	int b2b_bar;
+	u8 bar_sz;
+
+	pdev = ndev->ntb.pdev;
+	mmio = ndev->self_mmio;
+
+	if (ndev->b2b_idx == UINT_MAX) {
+		dev_dbg(&pdev->dev, "not using b2b mw\n");
+		b2b_bar = 0;
+		ndev->b2b_off = 0;
+	} else {
+		b2b_bar = ndev_mw_to_bar(ndev, ndev->b2b_idx);
+		if (b2b_bar < 0)
+			return -EIO;
+
+		dev_dbg(&pdev->dev, "using b2b mw bar %d\n", b2b_bar);
+
+		bar_size = pci_resource_len(ndev->ntb.pdev, b2b_bar);
+
+		dev_dbg(&pdev->dev, "b2b bar size %#llx\n", bar_size);
+
+		if (b2b_mw_share && XEON_B2B_MIN_SIZE <= bar_size >> 1) {
+			dev_dbg(&pdev->dev, "b2b using first half of bar\n");
+			ndev->b2b_off = bar_size >> 1;
+		} else if (XEON_B2B_MIN_SIZE <= bar_size) {
+			dev_dbg(&pdev->dev, "b2b using whole bar\n");
+			ndev->b2b_off = 0;
+			--ndev->mw_count;
+		} else {
+			dev_dbg(&pdev->dev, "b2b bar size is too small\n");
+			return -EIO;
+		}
+	}
+
+	/* Reset the secondary bar sizes to match the primary bar sizes,
+	 * except disable or halve the size of the b2b secondary bar.
+	 *
+	 * Note: code for each specific bar size register, because the register
+	 * offsets are not in a consistent order (bar5sz comes after ppd, odd).
+	 */
+	pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &bar_sz);
+	dev_dbg(&pdev->dev, "PBAR23SZ %#x\n", bar_sz);
+	if (b2b_bar == 2) {
+		if (ndev->b2b_off)
+			bar_sz -= 1;
+		else
+			bar_sz = 0;
+	}
+	pci_write_config_byte(pdev, XEON_SBAR23SZ_OFFSET, bar_sz);
+	pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &bar_sz);
+	dev_dbg(&pdev->dev, "SBAR23SZ %#x\n", bar_sz);
+
+	if (!ndev->bar4_split) {
+		pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &bar_sz);
+		dev_dbg(&pdev->dev, "PBAR45SZ %#x\n", bar_sz);
+		if (b2b_bar == 4) {
+			if (ndev->b2b_off)
+				bar_sz -= 1;
+			else
+				bar_sz = 0;
+		}
+		pci_write_config_byte(pdev, XEON_SBAR45SZ_OFFSET, bar_sz);
+		pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &bar_sz);
+		dev_dbg(&pdev->dev, "SBAR45SZ %#x\n", bar_sz);
+	} else {
+		pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &bar_sz);
+		dev_dbg(&pdev->dev, "PBAR4SZ %#x\n", bar_sz);
+		if (b2b_bar == 4) {
+			if (ndev->b2b_off)
+				bar_sz -= 1;
+			else
+				bar_sz = 0;
+		}
+		pci_write_config_byte(pdev, XEON_SBAR4SZ_OFFSET, bar_sz);
+		pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &bar_sz);
+		dev_dbg(&pdev->dev, "SBAR4SZ %#x\n", bar_sz);
+
+		pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &bar_sz);
+		dev_dbg(&pdev->dev, "PBAR5SZ %#x\n", bar_sz);
+		if (b2b_bar == 5) {
+			if (ndev->b2b_off)
+				bar_sz -= 1;
+			else
+				bar_sz = 0;
+		}
+		pci_write_config_byte(pdev, XEON_SBAR5SZ_OFFSET, bar_sz);
+		pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &bar_sz);
+		dev_dbg(&pdev->dev, "SBAR5SZ %#x\n", bar_sz);
+	}
+
+	/* SBAR01 hit by first part of the b2b bar */
+	if (b2b_bar == 0)
+		bar_addr = addr->bar0_addr;
+	else if (b2b_bar == 2)
+		bar_addr = addr->bar2_addr64;
+	else if (b2b_bar == 4 && !ndev->bar4_split)
+		bar_addr = addr->bar4_addr64;
+	else if (b2b_bar == 4)
+		bar_addr = addr->bar4_addr32;
+	else if (b2b_bar == 5)
+		bar_addr = addr->bar5_addr32;
+	else
+		return -EIO;
+
+	dev_dbg(&pdev->dev, "SBAR01 %#018llx\n", bar_addr);
+	iowrite64(bar_addr, mmio + XEON_SBAR0BASE_OFFSET);
+
+	/* Other SBAR are normally hit by the PBAR xlat, except for b2b bar.
+	 * The b2b bar is either disabled above, or configured half-size, and
+	 * it starts at the PBAR xlat + offset.
+	 */
+
+	bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0);
+	iowrite64(bar_addr, mmio + XEON_SBAR23BASE_OFFSET);
+	bar_addr = ioread64(mmio + XEON_SBAR23BASE_OFFSET);
+	dev_dbg(&pdev->dev, "SBAR23 %#018llx\n", bar_addr);
+
+	if (!ndev->bar4_split) {
+		bar_addr = addr->bar4_addr64 +
+			(b2b_bar == 4 ? ndev->b2b_off : 0);
+		iowrite64(bar_addr, mmio + XEON_SBAR45BASE_OFFSET);
+		bar_addr = ioread64(mmio + XEON_SBAR45BASE_OFFSET);
+		dev_dbg(&pdev->dev, "SBAR45 %#018llx\n", bar_addr);
+	} else {
+		bar_addr = addr->bar4_addr32 +
+			(b2b_bar == 4 ? ndev->b2b_off : 0);
+		iowrite32(bar_addr, mmio + XEON_SBAR4BASE_OFFSET);
+		bar_addr = ioread32(mmio + XEON_SBAR4BASE_OFFSET);
+		dev_dbg(&pdev->dev, "SBAR4 %#010llx\n", bar_addr);
+
+		bar_addr = addr->bar5_addr32 +
+			(b2b_bar == 5 ? ndev->b2b_off : 0);
+		iowrite32(bar_addr, mmio + XEON_SBAR5BASE_OFFSET);
+		bar_addr = ioread32(mmio + XEON_SBAR5BASE_OFFSET);
+		dev_dbg(&pdev->dev, "SBAR5 %#010llx\n", bar_addr);
+	}
+
+	/* setup incoming bar limits == base addrs (zero length windows) */
+
+	bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0);
+	iowrite64(bar_addr, mmio + XEON_SBAR23LMT_OFFSET);
+	bar_addr = ioread64(mmio + XEON_SBAR23LMT_OFFSET);
+	dev_dbg(&pdev->dev, "SBAR23LMT %#018llx\n", bar_addr);
+
+	if (!ndev->bar4_split) {
+		bar_addr = addr->bar4_addr64 +
+			(b2b_bar == 4 ? ndev->b2b_off : 0);
+		iowrite64(bar_addr, mmio + XEON_SBAR45LMT_OFFSET);
+		bar_addr = ioread64(mmio + XEON_SBAR45LMT_OFFSET);
+		dev_dbg(&pdev->dev, "SBAR45LMT %#018llx\n", bar_addr);
+	} else {
+		bar_addr = addr->bar4_addr32 +
+			(b2b_bar == 4 ? ndev->b2b_off : 0);
+		iowrite32(bar_addr, mmio + XEON_SBAR4LMT_OFFSET);
+		bar_addr = ioread32(mmio + XEON_SBAR4LMT_OFFSET);
+		dev_dbg(&pdev->dev, "SBAR4LMT %#010llx\n", bar_addr);
+
+		bar_addr = addr->bar5_addr32 +
+			(b2b_bar == 5 ? ndev->b2b_off : 0);
+		iowrite32(bar_addr, mmio + XEON_SBAR5LMT_OFFSET);
+		bar_addr = ioread32(mmio + XEON_SBAR5LMT_OFFSET);
+		dev_dbg(&pdev->dev, "SBAR5LMT %#05llx\n", bar_addr);
+	}
+
+	/* zero incoming translation addrs */
+	iowrite64(0, mmio + XEON_SBAR23XLAT_OFFSET);
+
+	if (!ndev->bar4_split) {
+		iowrite64(0, mmio + XEON_SBAR45XLAT_OFFSET);
+	} else {
+		iowrite32(0, mmio + XEON_SBAR4XLAT_OFFSET);
+		iowrite32(0, mmio + XEON_SBAR5XLAT_OFFSET);
+	}
+
+	/* zero outgoing translation limits (whole bar size windows) */
+	iowrite64(0, mmio + XEON_PBAR23LMT_OFFSET);
+	if (!ndev->bar4_split) {
+		iowrite64(0, mmio + XEON_PBAR45LMT_OFFSET);
+	} else {
+		iowrite32(0, mmio + XEON_PBAR4LMT_OFFSET);
+		iowrite32(0, mmio + XEON_PBAR5LMT_OFFSET);
+	}
+
+	/* set outgoing translation offsets */
+	bar_addr = peer_addr->bar2_addr64;
+	iowrite64(bar_addr, mmio + XEON_PBAR23XLAT_OFFSET);
+	bar_addr = ioread64(mmio + XEON_PBAR23XLAT_OFFSET);
+	dev_dbg(&pdev->dev, "PBAR23XLAT %#018llx\n", bar_addr);
+
+	if (!ndev->bar4_split) {
+		bar_addr = peer_addr->bar4_addr64;
+		iowrite64(bar_addr, mmio + XEON_PBAR45XLAT_OFFSET);
+		bar_addr = ioread64(mmio + XEON_PBAR45XLAT_OFFSET);
+		dev_dbg(&pdev->dev, "PBAR45XLAT %#018llx\n", bar_addr);
+	} else {
+		bar_addr = peer_addr->bar4_addr32;
+		iowrite32(bar_addr, mmio + XEON_PBAR4XLAT_OFFSET);
+		bar_addr = ioread32(mmio + XEON_PBAR4XLAT_OFFSET);
+		dev_dbg(&pdev->dev, "PBAR4XLAT %#010llx\n", bar_addr);
+
+		bar_addr = peer_addr->bar5_addr32;
+		iowrite32(bar_addr, mmio + XEON_PBAR5XLAT_OFFSET);
+		bar_addr = ioread32(mmio + XEON_PBAR5XLAT_OFFSET);
+		dev_dbg(&pdev->dev, "PBAR5XLAT %#010llx\n", bar_addr);
+	}
+
+	/* set the translation offset for b2b registers */
+	if (b2b_bar == 0)
+		bar_addr = peer_addr->bar0_addr;
+	else if (b2b_bar == 2)
+		bar_addr = peer_addr->bar2_addr64;
+	else if (b2b_bar == 4 && !ndev->bar4_split)
+		bar_addr = peer_addr->bar4_addr64;
+	else if (b2b_bar == 4)
+		bar_addr = peer_addr->bar4_addr32;
+	else if (b2b_bar == 5)
+		bar_addr = peer_addr->bar5_addr32;
+	else
+		return -EIO;
+
+	/* B2B_XLAT_OFFSET is 64bit, but can only take 32bit writes */
+	dev_dbg(&pdev->dev, "B2BXLAT %#018llx\n", bar_addr);
+	iowrite32(bar_addr, mmio + XEON_B2B_XLAT_OFFSETL);
+	iowrite32(bar_addr >> 32, mmio + XEON_B2B_XLAT_OFFSETU);
+
+	if (b2b_bar) {
+		/* map peer ntb mmio config space registers */
+		ndev->peer_mmio = pci_iomap(pdev, b2b_bar,
+					    XEON_B2B_MIN_SIZE);
+		if (!ndev->peer_mmio)
+			return -EIO;
+
+		ndev->peer_addr = pci_resource_start(pdev, b2b_bar);
+	}
+
+	return 0;
+}
+
+static int xeon_init_ntb(struct intel_ntb_dev *ndev)
+{
+	struct device *dev = &ndev->ntb.pdev->dev;
+	int rc;
+	u32 ntb_ctl;
+
+	if (ndev->bar4_split)
+		ndev->mw_count = HSX_SPLIT_BAR_MW_COUNT;
+	else
+		ndev->mw_count = XEON_MW_COUNT;
+
+	ndev->spad_count = XEON_SPAD_COUNT;
+	ndev->db_count = XEON_DB_COUNT;
+	ndev->db_link_mask = XEON_DB_LINK_BIT;
+
+	switch (ndev->ntb.topo) {
+	case NTB_TOPO_PRI:
+		if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) {
+			dev_err(dev, "NTB Primary config disabled\n");
+			return -EINVAL;
+		}
+
+		/* enable link to allow secondary side device to appear */
+		ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl);
+		ntb_ctl &= ~NTB_CTL_DISABLE;
+		iowrite32(ntb_ctl, ndev->self_mmio + ndev->reg->ntb_ctl);
+
+		/* use half the spads for the peer */
+		ndev->spad_count >>= 1;
+		ndev->self_reg = &xeon_pri_reg;
+		ndev->peer_reg = &xeon_sec_reg;
+		ndev->xlat_reg = &xeon_sec_xlat;
+		break;
+
+	case NTB_TOPO_SEC:
+		if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) {
+			dev_err(dev, "NTB Secondary config disabled\n");
+			return -EINVAL;
+		}
+		/* use half the spads for the peer */
+		ndev->spad_count >>= 1;
+		ndev->self_reg = &xeon_sec_reg;
+		ndev->peer_reg = &xeon_pri_reg;
+		ndev->xlat_reg = &xeon_pri_xlat;
+		break;
+
+	case NTB_TOPO_B2B_USD:
+	case NTB_TOPO_B2B_DSD:
+		ndev->self_reg = &xeon_pri_reg;
+		ndev->peer_reg = &xeon_b2b_reg;
+		ndev->xlat_reg = &xeon_sec_xlat;
+
+		if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) {
+			ndev->peer_reg = &xeon_pri_reg;
+
+			if (b2b_mw_idx < 0)
+				ndev->b2b_idx = b2b_mw_idx + ndev->mw_count;
+			else
+				ndev->b2b_idx = b2b_mw_idx;
+
+			if (ndev->b2b_idx >= ndev->mw_count) {
+				dev_dbg(dev,
+					"b2b_mw_idx %d invalid for mw_count %u\n",
+					b2b_mw_idx, ndev->mw_count);
+				return -EINVAL;
+			}
+
+			dev_dbg(dev, "setting up b2b mw idx %d means %d\n",
+				b2b_mw_idx, ndev->b2b_idx);
+
+		} else if (ndev->hwerr_flags & NTB_HWERR_B2BDOORBELL_BIT14) {
+			dev_warn(dev, "Reduce doorbell count by 1\n");
+			ndev->db_count -= 1;
+		}
+
+		if (ndev->ntb.topo == NTB_TOPO_B2B_USD) {
+			rc = xeon_setup_b2b_mw(ndev,
+					       &xeon_b2b_dsd_addr,
+					       &xeon_b2b_usd_addr);
+		} else {
+			rc = xeon_setup_b2b_mw(ndev,
+					       &xeon_b2b_usd_addr,
+					       &xeon_b2b_dsd_addr);
+		}
+		if (rc)
+			return rc;
+
+		/* Enable Bus Master and Memory Space on the secondary side */
+		iowrite16(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER,
+			  ndev->self_mmio + XEON_SPCICMD_OFFSET);
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+	ndev->reg->db_iowrite(ndev->db_valid_mask,
+			      ndev->self_mmio +
+			      ndev->self_reg->db_mask);
+
+	return 0;
+}
+
+static int xeon_init_dev(struct intel_ntb_dev *ndev)
+{
+	struct pci_dev *pdev;
+	u8 ppd;
+	int rc, mem;
+
+	pdev = ndev->ntb.pdev;
+
+	switch (pdev->device) {
+	/* There is a Xeon hardware errata related to writes to SDOORBELL or
+	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO Space,
+	 * which may hang the system.  To workaround this use the second memory
+	 * window to access the interrupt and scratch pad registers on the
+	 * remote system.
+	 */
+	case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
+		ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP;
+		break;
+	}
+
+	switch (pdev->device) {
+	/* There is a hardware errata related to accessing any register in
+	 * SB01BASE in the presence of bidirectional traffic crossing the NTB.
+	 */
+	case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
+		ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP;
+		break;
+	}
+
+	switch (pdev->device) {
+	/* HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
+	 * mirrored to the remote system.  Shrink the number of bits by one,
+	 * since bit 14 is the last bit.
+	 */
+	case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
+		ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14;
+		break;
+	}
+
+	ndev->reg = &xeon_reg;
+
+	rc = pci_read_config_byte(pdev, XEON_PPD_OFFSET, &ppd);
+	if (rc)
+		return -EIO;
+
+	ndev->ntb.topo = xeon_ppd_topo(ndev, ppd);
+	dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd,
+		ntb_topo_string(ndev->ntb.topo));
+	if (ndev->ntb.topo == NTB_TOPO_NONE)
+		return -EINVAL;
+
+	if (ndev->ntb.topo != NTB_TOPO_SEC) {
+		ndev->bar4_split = xeon_ppd_bar4_split(ndev, ppd);
+		dev_dbg(&pdev->dev, "ppd %#x bar4_split %d\n",
+			ppd, ndev->bar4_split);
+	} else {
+		/* This is a way for transparent BAR to figure out if we are
+		 * doing split BAR or not. There is no way for the hw on the
+		 * transparent side to know and set the PPD.
+		 */
+		mem = pci_select_bars(pdev, IORESOURCE_MEM);
+		ndev->bar4_split = hweight32(mem) ==
+			HSX_SPLIT_BAR_MW_COUNT + 1;
+		dev_dbg(&pdev->dev, "mem %#x bar4_split %d\n",
+			mem, ndev->bar4_split);
+	}
+
+	rc = xeon_init_ntb(ndev);
+	if (rc)
+		return rc;
+
+	return xeon_init_isr(ndev);
+}
+
+static void xeon_deinit_dev(struct intel_ntb_dev *ndev)
+{
+	xeon_deinit_isr(ndev);
+}
+
+static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev)
+{
+	int rc;
+
+	pci_set_drvdata(pdev, ndev);
+
+	rc = pci_enable_device(pdev);
+	if (rc)
+		goto err_pci_enable;
+
+	rc = pci_request_regions(pdev, NTB_NAME);
+	if (rc)
+		goto err_pci_regions;
+
+	pci_set_master(pdev);
+
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc) {
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc)
+			goto err_dma_mask;
+		dev_warn(&pdev->dev, "Cannot DMA highmem\n");
+	}
+
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc) {
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc)
+			goto err_dma_mask;
+		dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
+	}
+	rc = dma_coerce_mask_and_coherent(&ndev->ntb.dev,
+					  dma_get_mask(&pdev->dev));
+	if (rc)
+		goto err_dma_mask;
+
+	ndev->self_mmio = pci_iomap(pdev, 0, 0);
+	if (!ndev->self_mmio) {
+		rc = -EIO;
+		goto err_mmio;
+	}
+	ndev->peer_mmio = ndev->self_mmio;
+	ndev->peer_addr = pci_resource_start(pdev, 0);
+
+	return 0;
+
+err_mmio:
+err_dma_mask:
+	pci_clear_master(pdev);
+	pci_release_regions(pdev);
+err_pci_regions:
+	pci_disable_device(pdev);
+err_pci_enable:
+	pci_set_drvdata(pdev, NULL);
+	return rc;
+}
+
+static void intel_ntb_deinit_pci(struct intel_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+
+	if (ndev->peer_mmio && ndev->peer_mmio != ndev->self_mmio)
+		pci_iounmap(pdev, ndev->peer_mmio);
+	pci_iounmap(pdev, ndev->self_mmio);
+
+	pci_clear_master(pdev);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static inline void ndev_init_struct(struct intel_ntb_dev *ndev,
+				    struct pci_dev *pdev)
+{
+	ndev->ntb.pdev = pdev;
+	ndev->ntb.topo = NTB_TOPO_NONE;
+	ndev->ntb.ops = &intel_ntb_ops;
+
+	ndev->b2b_off = 0;
+	ndev->b2b_idx = UINT_MAX;
+
+	ndev->bar4_split = 0;
+
+	ndev->mw_count = 0;
+	ndev->spad_count = 0;
+	ndev->db_count = 0;
+	ndev->db_vec_count = 0;
+	ndev->db_vec_shift = 0;
+
+	ndev->ntb_ctl = 0;
+	ndev->lnk_sta = 0;
+
+	ndev->db_valid_mask = 0;
+	ndev->db_link_mask = 0;
+	ndev->db_mask = 0;
+
+	spin_lock_init(&ndev->db_mask_lock);
+}
+
+static int intel_ntb_pci_probe(struct pci_dev *pdev,
+			       const struct pci_device_id *id)
+{
+	struct intel_ntb_dev *ndev;
+	int rc, node;
+
+	node = dev_to_node(&pdev->dev);
+
+	if (pdev_is_gen1(pdev)) {
+		ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
+		if (!ndev) {
+			rc = -ENOMEM;
+			goto err_ndev;
+		}
+
+		ndev_init_struct(ndev, pdev);
+
+		rc = intel_ntb_init_pci(ndev, pdev);
+		if (rc)
+			goto err_init_pci;
+
+		rc = xeon_init_dev(ndev);
+		if (rc)
+			goto err_init_dev;
+
+	} else if (pdev_is_gen3(pdev)) {
+		ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
+		if (!ndev) {
+			rc = -ENOMEM;
+			goto err_ndev;
+		}
+
+		ndev_init_struct(ndev, pdev);
+		ndev->ntb.ops = &intel_ntb3_ops;
+
+		rc = intel_ntb_init_pci(ndev, pdev);
+		if (rc)
+			goto err_init_pci;
+
+		rc = gen3_init_dev(ndev);
+		if (rc)
+			goto err_init_dev;
+
+	} else {
+		rc = -EINVAL;
+		goto err_ndev;
+	}
+
+	ndev_reset_unsafe_flags(ndev);
+
+	ndev->reg->poll_link(ndev);
+
+	ndev_init_debugfs(ndev);
+
+	rc = ntb_register_device(&ndev->ntb);
+	if (rc)
+		goto err_register;
+
+	dev_info(&pdev->dev, "NTB device registered.\n");
+
+	return 0;
+
+err_register:
+	ndev_deinit_debugfs(ndev);
+	if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev))
+		xeon_deinit_dev(ndev);
+err_init_dev:
+	intel_ntb_deinit_pci(ndev);
+err_init_pci:
+	kfree(ndev);
+err_ndev:
+	return rc;
+}
+
+static void intel_ntb_pci_remove(struct pci_dev *pdev)
+{
+	struct intel_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+	ntb_unregister_device(&ndev->ntb);
+	ndev_deinit_debugfs(ndev);
+	if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev))
+		xeon_deinit_dev(ndev);
+	intel_ntb_deinit_pci(ndev);
+	kfree(ndev);
+}
+
+static const struct intel_ntb_reg xeon_reg = {
+	.poll_link		= xeon_poll_link,
+	.link_is_up		= xeon_link_is_up,
+	.db_ioread		= xeon_db_ioread,
+	.db_iowrite		= xeon_db_iowrite,
+	.db_size		= sizeof(u32),
+	.ntb_ctl		= XEON_NTBCNTL_OFFSET,
+	.mw_bar			= {2, 4, 5},
+};
+
+static const struct intel_ntb_alt_reg xeon_pri_reg = {
+	.db_bell		= XEON_PDOORBELL_OFFSET,
+	.db_mask		= XEON_PDBMSK_OFFSET,
+	.spad			= XEON_SPAD_OFFSET,
+};
+
+static const struct intel_ntb_alt_reg xeon_sec_reg = {
+	.db_bell		= XEON_SDOORBELL_OFFSET,
+	.db_mask		= XEON_SDBMSK_OFFSET,
+	/* second half of the scratchpads */
+	.spad			= XEON_SPAD_OFFSET + (XEON_SPAD_COUNT << 1),
+};
+
+static const struct intel_ntb_alt_reg xeon_b2b_reg = {
+	.db_bell		= XEON_B2B_DOORBELL_OFFSET,
+	.spad			= XEON_B2B_SPAD_OFFSET,
+};
+
+static const struct intel_ntb_xlat_reg xeon_pri_xlat = {
+	/* Note: no primary .bar0_base visible to the secondary side.
+	 *
+	 * The secondary side cannot get the base address stored in primary
+	 * bars.  The base address is necessary to set the limit register to
+	 * any value other than zero, or unlimited.
+	 *
+	 * WITHOUT THE BASE ADDRESS, THE SECONDARY SIDE CANNOT DISABLE the
+	 * window by setting the limit equal to base, nor can it limit the size
+	 * of the memory window by setting the limit to base + size.
+	 */
+	.bar2_limit		= XEON_PBAR23LMT_OFFSET,
+	.bar2_xlat		= XEON_PBAR23XLAT_OFFSET,
+};
+
+static const struct intel_ntb_xlat_reg xeon_sec_xlat = {
+	.bar0_base		= XEON_SBAR0BASE_OFFSET,
+	.bar2_limit		= XEON_SBAR23LMT_OFFSET,
+	.bar2_xlat		= XEON_SBAR23XLAT_OFFSET,
+};
+
+struct intel_b2b_addr xeon_b2b_usd_addr = {
+	.bar2_addr64		= XEON_B2B_BAR2_ADDR64,
+	.bar4_addr64		= XEON_B2B_BAR4_ADDR64,
+	.bar4_addr32		= XEON_B2B_BAR4_ADDR32,
+	.bar5_addr32		= XEON_B2B_BAR5_ADDR32,
+};
+
+struct intel_b2b_addr xeon_b2b_dsd_addr = {
+	.bar2_addr64		= XEON_B2B_BAR2_ADDR64,
+	.bar4_addr64		= XEON_B2B_BAR4_ADDR64,
+	.bar4_addr32		= XEON_B2B_BAR4_ADDR32,
+	.bar5_addr32		= XEON_B2B_BAR5_ADDR32,
+};
+
+/* operations for primary side of local ntb */
+static const struct ntb_dev_ops intel_ntb_ops = {
+	.mw_count		= intel_ntb_mw_count,
+	.mw_get_align		= intel_ntb_mw_get_align,
+	.mw_set_trans		= intel_ntb_mw_set_trans,
+	.peer_mw_count		= intel_ntb_peer_mw_count,
+	.peer_mw_get_addr	= intel_ntb_peer_mw_get_addr,
+	.link_is_up		= intel_ntb_link_is_up,
+	.link_enable		= intel_ntb_link_enable,
+	.link_disable		= intel_ntb_link_disable,
+	.db_is_unsafe		= intel_ntb_db_is_unsafe,
+	.db_valid_mask		= intel_ntb_db_valid_mask,
+	.db_vector_count	= intel_ntb_db_vector_count,
+	.db_vector_mask		= intel_ntb_db_vector_mask,
+	.db_read		= intel_ntb_db_read,
+	.db_clear		= intel_ntb_db_clear,
+	.db_set_mask		= intel_ntb_db_set_mask,
+	.db_clear_mask		= intel_ntb_db_clear_mask,
+	.peer_db_addr		= intel_ntb_peer_db_addr,
+	.peer_db_set		= intel_ntb_peer_db_set,
+	.spad_is_unsafe		= intel_ntb_spad_is_unsafe,
+	.spad_count		= intel_ntb_spad_count,
+	.spad_read		= intel_ntb_spad_read,
+	.spad_write		= intel_ntb_spad_write,
+	.peer_spad_addr		= intel_ntb_peer_spad_addr,
+	.peer_spad_read		= intel_ntb_peer_spad_read,
+	.peer_spad_write	= intel_ntb_peer_spad_write,
+};
+
+static const struct file_operations intel_ntb_debugfs_info = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ndev_debugfs_read,
+};
+
+static const struct pci_device_id intel_ntb_pci_tbl[] = {
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BDX)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_BDX)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_BDX)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SKX)},
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl);
+
+static struct pci_driver intel_ntb_pci_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = intel_ntb_pci_tbl,
+	.probe = intel_ntb_pci_probe,
+	.remove = intel_ntb_pci_remove,
+};
+
+static int __init intel_ntb_pci_driver_init(void)
+{
+	pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+	if (debugfs_initialized())
+		debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	return pci_register_driver(&intel_ntb_pci_driver);
+}
+module_init(intel_ntb_pci_driver_init);
+
+static void __exit intel_ntb_pci_driver_exit(void)
+{
+	pci_unregister_driver(&intel_ntb_pci_driver);
+
+	debugfs_remove_recursive(debugfs_dir);
+}
+module_exit(intel_ntb_pci_driver_exit);
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.h b/drivers/ntb/hw/intel/ntb_hw_gen1.h
new file mode 100644
index 0000000..ad8ec14
--- /dev/null
+++ b/drivers/ntb/hw/intel/ntb_hw_gen1.h
@@ -0,0 +1,182 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2012-2017 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2012-2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NTB_INTEL_GEN1_H_
+#define _NTB_INTEL_GEN1_H_
+
+#include "ntb_hw_intel.h"
+
+/* Intel Gen1 Xeon hardware */
+#define XEON_PBAR23LMT_OFFSET		0x0000
+#define XEON_PBAR45LMT_OFFSET		0x0008
+#define XEON_PBAR4LMT_OFFSET		0x0008
+#define XEON_PBAR5LMT_OFFSET		0x000c
+#define XEON_PBAR23XLAT_OFFSET		0x0010
+#define XEON_PBAR45XLAT_OFFSET		0x0018
+#define XEON_PBAR4XLAT_OFFSET		0x0018
+#define XEON_PBAR5XLAT_OFFSET		0x001c
+#define XEON_SBAR23LMT_OFFSET		0x0020
+#define XEON_SBAR45LMT_OFFSET		0x0028
+#define XEON_SBAR4LMT_OFFSET		0x0028
+#define XEON_SBAR5LMT_OFFSET		0x002c
+#define XEON_SBAR23XLAT_OFFSET		0x0030
+#define XEON_SBAR45XLAT_OFFSET		0x0038
+#define XEON_SBAR4XLAT_OFFSET		0x0038
+#define XEON_SBAR5XLAT_OFFSET		0x003c
+#define XEON_SBAR0BASE_OFFSET		0x0040
+#define XEON_SBAR23BASE_OFFSET		0x0048
+#define XEON_SBAR45BASE_OFFSET		0x0050
+#define XEON_SBAR4BASE_OFFSET		0x0050
+#define XEON_SBAR5BASE_OFFSET		0x0054
+#define XEON_SBDF_OFFSET		0x005c
+#define XEON_NTBCNTL_OFFSET		0x0058
+#define XEON_PDOORBELL_OFFSET		0x0060
+#define XEON_PDBMSK_OFFSET		0x0062
+#define XEON_SDOORBELL_OFFSET		0x0064
+#define XEON_SDBMSK_OFFSET		0x0066
+#define XEON_USMEMMISS_OFFSET		0x0070
+#define XEON_SPAD_OFFSET		0x0080
+#define XEON_PBAR23SZ_OFFSET		0x00d0
+#define XEON_PBAR45SZ_OFFSET		0x00d1
+#define XEON_PBAR4SZ_OFFSET		0x00d1
+#define XEON_SBAR23SZ_OFFSET		0x00d2
+#define XEON_SBAR45SZ_OFFSET		0x00d3
+#define XEON_SBAR4SZ_OFFSET		0x00d3
+#define XEON_PPD_OFFSET			0x00d4
+#define XEON_PBAR5SZ_OFFSET		0x00d5
+#define XEON_SBAR5SZ_OFFSET		0x00d6
+#define XEON_WCCNTRL_OFFSET		0x00e0
+#define XEON_UNCERRSTS_OFFSET		0x014c
+#define XEON_CORERRSTS_OFFSET		0x0158
+#define XEON_LINK_STATUS_OFFSET		0x01a2
+#define XEON_SPCICMD_OFFSET		0x0504
+#define XEON_DEVCTRL_OFFSET		0x0598
+#define XEON_DEVSTS_OFFSET		0x059a
+#define XEON_SLINK_STATUS_OFFSET	0x05a2
+#define XEON_B2B_SPAD_OFFSET		0x0100
+#define XEON_B2B_DOORBELL_OFFSET	0x0140
+#define XEON_B2B_XLAT_OFFSETL		0x0144
+#define XEON_B2B_XLAT_OFFSETU		0x0148
+#define XEON_PPD_CONN_MASK		0x03
+#define XEON_PPD_CONN_TRANSPARENT	0x00
+#define XEON_PPD_CONN_B2B		0x01
+#define XEON_PPD_CONN_RP		0x02
+#define XEON_PPD_DEV_MASK		0x10
+#define XEON_PPD_DEV_USD		0x00
+#define XEON_PPD_DEV_DSD		0x10
+#define XEON_PPD_SPLIT_BAR_MASK		0x40
+
+#define XEON_PPD_TOPO_MASK	(XEON_PPD_CONN_MASK | XEON_PPD_DEV_MASK)
+#define XEON_PPD_TOPO_PRI_USD	(XEON_PPD_CONN_RP | XEON_PPD_DEV_USD)
+#define XEON_PPD_TOPO_PRI_DSD	(XEON_PPD_CONN_RP | XEON_PPD_DEV_DSD)
+#define XEON_PPD_TOPO_SEC_USD	(XEON_PPD_CONN_TRANSPARENT | XEON_PPD_DEV_USD)
+#define XEON_PPD_TOPO_SEC_DSD	(XEON_PPD_CONN_TRANSPARENT | XEON_PPD_DEV_DSD)
+#define XEON_PPD_TOPO_B2B_USD	(XEON_PPD_CONN_B2B | XEON_PPD_DEV_USD)
+#define XEON_PPD_TOPO_B2B_DSD	(XEON_PPD_CONN_B2B | XEON_PPD_DEV_DSD)
+
+#define XEON_MW_COUNT			2
+#define HSX_SPLIT_BAR_MW_COUNT		3
+#define XEON_DB_COUNT			15
+#define XEON_DB_LINK			15
+#define XEON_DB_LINK_BIT			BIT_ULL(XEON_DB_LINK)
+#define XEON_DB_MSIX_VECTOR_COUNT	4
+#define XEON_DB_MSIX_VECTOR_SHIFT	5
+#define XEON_DB_TOTAL_SHIFT		16
+#define XEON_SPAD_COUNT			16
+
+/* Use the following addresses for translation between b2b ntb devices in case
+ * the hardware default values are not reliable. */
+#define XEON_B2B_BAR0_ADDR	0x1000000000000000ull
+#define XEON_B2B_BAR2_ADDR64	0x2000000000000000ull
+#define XEON_B2B_BAR4_ADDR64	0x4000000000000000ull
+#define XEON_B2B_BAR4_ADDR32	0x20000000u
+#define XEON_B2B_BAR5_ADDR32	0x40000000u
+
+/* The peer ntb secondary config space is 32KB fixed size */
+#define XEON_B2B_MIN_SIZE		0x8000
+
+/* flags to indicate hardware errata */
+#define NTB_HWERR_SDOORBELL_LOCKUP	BIT_ULL(0)
+#define NTB_HWERR_SB01BASE_LOCKUP	BIT_ULL(1)
+#define NTB_HWERR_B2BDOORBELL_BIT14	BIT_ULL(2)
+#define NTB_HWERR_MSIX_VECTOR32_BAD	BIT_ULL(3)
+
+extern struct intel_b2b_addr xeon_b2b_usd_addr;
+extern struct intel_b2b_addr xeon_b2b_dsd_addr;
+
+int ndev_init_isr(struct intel_ntb_dev *ndev, int msix_min, int msix_max,
+		int msix_shift, int total_shift);
+enum ntb_topo xeon_ppd_topo(struct intel_ntb_dev *ndev, u8 ppd);
+u64 ndev_db_read(struct intel_ntb_dev *ndev, void __iomem *mmio);
+int ndev_db_write(struct intel_ntb_dev *ndev, u64 db_bits,
+				void __iomem *mmio);
+int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx);
+int intel_ntb_mw_count(struct ntb_dev *ntb, int pidx);
+int intel_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int idx,
+		resource_size_t *addr_align, resource_size_t *size_align,
+		resource_size_t *size_max);
+int intel_ntb_peer_mw_count(struct ntb_dev *ntb);
+int intel_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+		phys_addr_t *base, resource_size_t *size);
+u64 intel_ntb_link_is_up(struct ntb_dev *ntb, enum ntb_speed *speed,
+		enum ntb_width *width);
+int intel_ntb_link_disable(struct ntb_dev *ntb);
+u64 intel_ntb_db_valid_mask(struct ntb_dev *ntb);
+int intel_ntb_db_vector_count(struct ntb_dev *ntb);
+u64 intel_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector);
+int intel_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits);
+int intel_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits);
+int intel_ntb_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
+		resource_size_t *db_size);
+int intel_ntb_spad_is_unsafe(struct ntb_dev *ntb);
+int intel_ntb_spad_count(struct ntb_dev *ntb);
+u32 intel_ntb_spad_read(struct ntb_dev *ntb, int idx);
+int intel_ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val);
+u32 intel_ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx);
+int intel_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx,
+		u32 val);
+int intel_ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx, int sidx,
+				    phys_addr_t *spad_addr);
+int xeon_link_is_up(struct intel_ntb_dev *ndev);
+
+#endif
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.c b/drivers/ntb/hw/intel/ntb_hw_gen3.c
new file mode 100644
index 0000000..b3fa247
--- /dev/null
+++ b/drivers/ntb/hw/intel/ntb_hw_gen3.c
@@ -0,0 +1,597 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel PCIe GEN3 NTB Linux driver
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_intel.h"
+#include "ntb_hw_gen1.h"
+#include "ntb_hw_gen3.h"
+
+static int gen3_poll_link(struct intel_ntb_dev *ndev);
+
+static const struct intel_ntb_reg gen3_reg = {
+	.poll_link		= gen3_poll_link,
+	.link_is_up		= xeon_link_is_up,
+	.db_ioread		= gen3_db_ioread,
+	.db_iowrite		= gen3_db_iowrite,
+	.db_size		= sizeof(u32),
+	.ntb_ctl		= GEN3_NTBCNTL_OFFSET,
+	.mw_bar			= {2, 4},
+};
+
+static const struct intel_ntb_alt_reg gen3_pri_reg = {
+	.db_bell		= GEN3_EM_DOORBELL_OFFSET,
+	.db_clear		= GEN3_IM_INT_STATUS_OFFSET,
+	.db_mask		= GEN3_IM_INT_DISABLE_OFFSET,
+	.spad			= GEN3_IM_SPAD_OFFSET,
+};
+
+static const struct intel_ntb_alt_reg gen3_b2b_reg = {
+	.db_bell		= GEN3_IM_DOORBELL_OFFSET,
+	.db_clear		= GEN3_EM_INT_STATUS_OFFSET,
+	.db_mask		= GEN3_EM_INT_DISABLE_OFFSET,
+	.spad			= GEN3_B2B_SPAD_OFFSET,
+};
+
+static const struct intel_ntb_xlat_reg gen3_sec_xlat = {
+/*	.bar0_base		= GEN3_EMBAR0_OFFSET, */
+	.bar2_limit		= GEN3_IMBAR1XLMT_OFFSET,
+	.bar2_xlat		= GEN3_IMBAR1XBASE_OFFSET,
+};
+
+static int gen3_poll_link(struct intel_ntb_dev *ndev)
+{
+	u16 reg_val;
+	int rc;
+
+	ndev->reg->db_iowrite(ndev->db_link_mask,
+			      ndev->self_mmio +
+			      ndev->self_reg->db_clear);
+
+	rc = pci_read_config_word(ndev->ntb.pdev,
+				  GEN3_LINK_STATUS_OFFSET, &reg_val);
+	if (rc)
+		return 0;
+
+	if (reg_val == ndev->lnk_sta)
+		return 0;
+
+	ndev->lnk_sta = reg_val;
+
+	return 1;
+}
+
+static int gen3_init_isr(struct intel_ntb_dev *ndev)
+{
+	int i;
+
+	/*
+	 * The MSIX vectors and the interrupt status bits are not lined up
+	 * on Skylake. By default the link status bit is bit 32, however it
+	 * is by default MSIX vector0. We need to fixup to line them up.
+	 * The vectors at reset is 1-32,0. We need to reprogram to 0-32.
+	 */
+
+	for (i = 0; i < GEN3_DB_MSIX_VECTOR_COUNT; i++)
+		iowrite8(i, ndev->self_mmio + GEN3_INTVEC_OFFSET + i);
+
+	/* move link status down one as workaround */
+	if (ndev->hwerr_flags & NTB_HWERR_MSIX_VECTOR32_BAD) {
+		iowrite8(GEN3_DB_MSIX_VECTOR_COUNT - 2,
+			 ndev->self_mmio + GEN3_INTVEC_OFFSET +
+			 (GEN3_DB_MSIX_VECTOR_COUNT - 1));
+	}
+
+	return ndev_init_isr(ndev, GEN3_DB_MSIX_VECTOR_COUNT,
+			     GEN3_DB_MSIX_VECTOR_COUNT,
+			     GEN3_DB_MSIX_VECTOR_SHIFT,
+			     GEN3_DB_TOTAL_SHIFT);
+}
+
+static int gen3_setup_b2b_mw(struct intel_ntb_dev *ndev,
+			    const struct intel_b2b_addr *addr,
+			    const struct intel_b2b_addr *peer_addr)
+{
+	struct pci_dev *pdev;
+	void __iomem *mmio;
+	phys_addr_t bar_addr;
+
+	pdev = ndev->ntb.pdev;
+	mmio = ndev->self_mmio;
+
+	/* setup incoming bar limits == base addrs (zero length windows) */
+	bar_addr = addr->bar2_addr64;
+	iowrite64(bar_addr, mmio + GEN3_IMBAR1XLMT_OFFSET);
+	bar_addr = ioread64(mmio + GEN3_IMBAR1XLMT_OFFSET);
+	dev_dbg(&pdev->dev, "IMBAR1XLMT %#018llx\n", bar_addr);
+
+	bar_addr = addr->bar4_addr64;
+	iowrite64(bar_addr, mmio + GEN3_IMBAR2XLMT_OFFSET);
+	bar_addr = ioread64(mmio + GEN3_IMBAR2XLMT_OFFSET);
+	dev_dbg(&pdev->dev, "IMBAR2XLMT %#018llx\n", bar_addr);
+
+	/* zero incoming translation addrs */
+	iowrite64(0, mmio + GEN3_IMBAR1XBASE_OFFSET);
+	iowrite64(0, mmio + GEN3_IMBAR2XBASE_OFFSET);
+
+	ndev->peer_mmio = ndev->self_mmio;
+
+	return 0;
+}
+
+static int gen3_init_ntb(struct intel_ntb_dev *ndev)
+{
+	int rc;
+
+
+	ndev->mw_count = XEON_MW_COUNT;
+	ndev->spad_count = GEN3_SPAD_COUNT;
+	ndev->db_count = GEN3_DB_COUNT;
+	ndev->db_link_mask = GEN3_DB_LINK_BIT;
+
+	/* DB fixup for using 31 right now */
+	if (ndev->hwerr_flags & NTB_HWERR_MSIX_VECTOR32_BAD)
+		ndev->db_link_mask |= BIT_ULL(31);
+
+	switch (ndev->ntb.topo) {
+	case NTB_TOPO_B2B_USD:
+	case NTB_TOPO_B2B_DSD:
+		ndev->self_reg = &gen3_pri_reg;
+		ndev->peer_reg = &gen3_b2b_reg;
+		ndev->xlat_reg = &gen3_sec_xlat;
+
+		if (ndev->ntb.topo == NTB_TOPO_B2B_USD) {
+			rc = gen3_setup_b2b_mw(ndev,
+					      &xeon_b2b_dsd_addr,
+					      &xeon_b2b_usd_addr);
+		} else {
+			rc = gen3_setup_b2b_mw(ndev,
+					      &xeon_b2b_usd_addr,
+					      &xeon_b2b_dsd_addr);
+		}
+
+		if (rc)
+			return rc;
+
+		/* Enable Bus Master and Memory Space on the secondary side */
+		iowrite16(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER,
+			  ndev->self_mmio + GEN3_SPCICMD_OFFSET);
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+	ndev->reg->db_iowrite(ndev->db_valid_mask,
+			      ndev->self_mmio +
+			      ndev->self_reg->db_mask);
+
+	return 0;
+}
+
+int gen3_init_dev(struct intel_ntb_dev *ndev)
+{
+	struct pci_dev *pdev;
+	u8 ppd;
+	int rc;
+
+	pdev = ndev->ntb.pdev;
+
+	ndev->reg = &gen3_reg;
+
+	rc = pci_read_config_byte(pdev, XEON_PPD_OFFSET, &ppd);
+	if (rc)
+		return -EIO;
+
+	ndev->ntb.topo = xeon_ppd_topo(ndev, ppd);
+	dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd,
+		ntb_topo_string(ndev->ntb.topo));
+	if (ndev->ntb.topo == NTB_TOPO_NONE)
+		return -EINVAL;
+
+	ndev->hwerr_flags |= NTB_HWERR_MSIX_VECTOR32_BAD;
+
+	rc = gen3_init_ntb(ndev);
+	if (rc)
+		return rc;
+
+	return gen3_init_isr(ndev);
+}
+
+ssize_t ndev_ntb3_debugfs_read(struct file *filp, char __user *ubuf,
+				      size_t count, loff_t *offp)
+{
+	struct intel_ntb_dev *ndev;
+	void __iomem *mmio;
+	char *buf;
+	size_t buf_size;
+	ssize_t ret, off;
+	union { u64 v64; u32 v32; u16 v16; } u;
+
+	ndev = filp->private_data;
+	mmio = ndev->self_mmio;
+
+	buf_size = min(count, 0x800ul);
+
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	off = 0;
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "NTB Device Information:\n");
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Connection Topology -\t%s\n",
+			 ntb_topo_string(ndev->ntb.topo));
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "NTB CTL -\t\t%#06x\n", ndev->ntb_ctl);
+	off += scnprintf(buf + off, buf_size - off,
+			 "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
+
+	if (!ndev->reg->link_is_up(ndev))
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Status -\t\tDown\n");
+	else {
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Status -\t\tUp\n");
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Speed -\t\tPCI-E Gen %u\n",
+				 NTB_LNK_STA_SPEED(ndev->lnk_sta));
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Width -\t\tx%u\n",
+				 NTB_LNK_STA_WIDTH(ndev->lnk_sta));
+	}
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Memory Window Count -\t%u\n", ndev->mw_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Scratchpad Count -\t%u\n", ndev->spad_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Count -\t%u\n", ndev->db_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Vector Count -\t%u\n", ndev->db_vec_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Vector Shift -\t%u\n", ndev->db_vec_shift);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Link Mask -\t%#llx\n", ndev->db_link_mask);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Mask Cached -\t%#llx\n", ndev->db_mask);
+
+	u.v64 = ndev_db_read(ndev, mmio + ndev->self_reg->db_mask);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Mask -\t\t%#llx\n", u.v64);
+
+	u.v64 = ndev_db_read(ndev, mmio + ndev->self_reg->db_bell);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Bell -\t\t%#llx\n", u.v64);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "\nNTB Incoming XLAT:\n");
+
+	u.v64 = ioread64(mmio + GEN3_IMBAR1XBASE_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "IMBAR1XBASE -\t\t%#018llx\n", u.v64);
+
+	u.v64 = ioread64(mmio + GEN3_IMBAR2XBASE_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "IMBAR2XBASE -\t\t%#018llx\n", u.v64);
+
+	u.v64 = ioread64(mmio + GEN3_IMBAR1XLMT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "IMBAR1XLMT -\t\t\t%#018llx\n", u.v64);
+
+	u.v64 = ioread64(mmio + GEN3_IMBAR2XLMT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "IMBAR2XLMT -\t\t\t%#018llx\n", u.v64);
+
+	if (ntb_topo_is_b2b(ndev->ntb.topo)) {
+		off += scnprintf(buf + off, buf_size - off,
+				 "\nNTB Outgoing B2B XLAT:\n");
+
+		u.v64 = ioread64(mmio + GEN3_EMBAR1XBASE_OFFSET);
+		off += scnprintf(buf + off, buf_size - off,
+				 "EMBAR1XBASE -\t\t%#018llx\n", u.v64);
+
+		u.v64 = ioread64(mmio + GEN3_EMBAR2XBASE_OFFSET);
+		off += scnprintf(buf + off, buf_size - off,
+				 "EMBAR2XBASE -\t\t%#018llx\n", u.v64);
+
+		u.v64 = ioread64(mmio + GEN3_EMBAR1XLMT_OFFSET);
+		off += scnprintf(buf + off, buf_size - off,
+				 "EMBAR1XLMT -\t\t%#018llx\n", u.v64);
+
+		u.v64 = ioread64(mmio + GEN3_EMBAR2XLMT_OFFSET);
+		off += scnprintf(buf + off, buf_size - off,
+				 "EMBAR2XLMT -\t\t%#018llx\n", u.v64);
+
+		off += scnprintf(buf + off, buf_size - off,
+				 "\nNTB Secondary BAR:\n");
+
+		u.v64 = ioread64(mmio + GEN3_EMBAR0_OFFSET);
+		off += scnprintf(buf + off, buf_size - off,
+				 "EMBAR0 -\t\t%#018llx\n", u.v64);
+
+		u.v64 = ioread64(mmio + GEN3_EMBAR1_OFFSET);
+		off += scnprintf(buf + off, buf_size - off,
+				 "EMBAR1 -\t\t%#018llx\n", u.v64);
+
+		u.v64 = ioread64(mmio + GEN3_EMBAR2_OFFSET);
+		off += scnprintf(buf + off, buf_size - off,
+				 "EMBAR2 -\t\t%#018llx\n", u.v64);
+	}
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "\nNTB Statistics:\n");
+
+	u.v16 = ioread16(mmio + GEN3_USMEMMISS_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Upstream Memory Miss -\t%u\n", u.v16);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "\nNTB Hardware Errors:\n");
+
+	if (!pci_read_config_word(ndev->ntb.pdev,
+				  GEN3_DEVSTS_OFFSET, &u.v16))
+		off += scnprintf(buf + off, buf_size - off,
+				 "DEVSTS -\t\t%#06x\n", u.v16);
+
+	if (!pci_read_config_word(ndev->ntb.pdev,
+				  GEN3_LINK_STATUS_OFFSET, &u.v16))
+		off += scnprintf(buf + off, buf_size - off,
+				 "LNKSTS -\t\t%#06x\n", u.v16);
+
+	if (!pci_read_config_dword(ndev->ntb.pdev,
+				   GEN3_UNCERRSTS_OFFSET, &u.v32))
+		off += scnprintf(buf + off, buf_size - off,
+				 "UNCERRSTS -\t\t%#06x\n", u.v32);
+
+	if (!pci_read_config_dword(ndev->ntb.pdev,
+				   GEN3_CORERRSTS_OFFSET, &u.v32))
+		off += scnprintf(buf + off, buf_size - off,
+				 "CORERRSTS -\t\t%#06x\n", u.v32);
+
+	ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
+	kfree(buf);
+	return ret;
+}
+
+static int intel_ntb3_link_enable(struct ntb_dev *ntb,
+				  enum ntb_speed max_speed,
+				  enum ntb_width max_width)
+{
+	struct intel_ntb_dev *ndev;
+	u32 ntb_ctl;
+
+	ndev = container_of(ntb, struct intel_ntb_dev, ntb);
+
+	dev_dbg(&ntb->pdev->dev,
+		"Enabling link with max_speed %d max_width %d\n",
+		max_speed, max_width);
+
+	if (max_speed != NTB_SPEED_AUTO)
+		dev_dbg(&ntb->pdev->dev, "ignoring max_speed %d\n", max_speed);
+	if (max_width != NTB_WIDTH_AUTO)
+		dev_dbg(&ntb->pdev->dev, "ignoring max_width %d\n", max_width);
+
+	ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl);
+	ntb_ctl &= ~(NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK);
+	ntb_ctl |= NTB_CTL_P2S_BAR2_SNOOP | NTB_CTL_S2P_BAR2_SNOOP;
+	ntb_ctl |= NTB_CTL_P2S_BAR4_SNOOP | NTB_CTL_S2P_BAR4_SNOOP;
+	iowrite32(ntb_ctl, ndev->self_mmio + ndev->reg->ntb_ctl);
+
+	return 0;
+}
+static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
+				   dma_addr_t addr, resource_size_t size)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+	unsigned long xlat_reg, limit_reg;
+	resource_size_t bar_size, mw_size;
+	void __iomem *mmio;
+	u64 base, limit, reg_val;
+	int bar;
+
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
+		idx += 1;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	bar_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+	if (idx == ndev->b2b_idx)
+		mw_size = bar_size - ndev->b2b_off;
+	else
+		mw_size = bar_size;
+
+	/* hardware requires that addr is aligned to bar size */
+	if (addr & (bar_size - 1))
+		return -EINVAL;
+
+	/* make sure the range fits in the usable mw size */
+	if (size > mw_size)
+		return -EINVAL;
+
+	mmio = ndev->self_mmio;
+	xlat_reg = ndev->xlat_reg->bar2_xlat + (idx * 0x10);
+	limit_reg = ndev->xlat_reg->bar2_limit + (idx * 0x10);
+	base = pci_resource_start(ndev->ntb.pdev, bar);
+
+	/* Set the limit if supported, if size is not mw_size */
+	if (limit_reg && size != mw_size)
+		limit = base + size;
+	else
+		limit = base + mw_size;
+
+	/* set and verify setting the translation address */
+	iowrite64(addr, mmio + xlat_reg);
+	reg_val = ioread64(mmio + xlat_reg);
+	if (reg_val != addr) {
+		iowrite64(0, mmio + xlat_reg);
+		return -EIO;
+	}
+
+	dev_dbg(&ntb->pdev->dev, "BAR %d IMBARXBASE: %#Lx\n", bar, reg_val);
+
+	/* set and verify setting the limit */
+	iowrite64(limit, mmio + limit_reg);
+	reg_val = ioread64(mmio + limit_reg);
+	if (reg_val != limit) {
+		iowrite64(base, mmio + limit_reg);
+		iowrite64(0, mmio + xlat_reg);
+		return -EIO;
+	}
+
+	dev_dbg(&ntb->pdev->dev, "BAR %d IMBARXLMT: %#Lx\n", bar, reg_val);
+
+	/* setup the EP */
+	limit_reg = ndev->xlat_reg->bar2_limit + (idx * 0x10) + 0x4000;
+	base = ioread64(mmio + GEN3_EMBAR1_OFFSET + (8 * idx));
+	base &= ~0xf;
+
+	if (limit_reg && size != mw_size)
+		limit = base + size;
+	else
+		limit = base + mw_size;
+
+	/* set and verify setting the limit */
+	iowrite64(limit, mmio + limit_reg);
+	reg_val = ioread64(mmio + limit_reg);
+	if (reg_val != limit) {
+		iowrite64(base, mmio + limit_reg);
+		iowrite64(0, mmio + xlat_reg);
+		return -EIO;
+	}
+
+	dev_dbg(&ntb->pdev->dev, "BAR %d EMBARXLMT: %#Lx\n", bar, reg_val);
+
+	return 0;
+}
+
+static int intel_ntb3_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+	int bit;
+
+	if (db_bits & ~ndev->db_valid_mask)
+		return -EINVAL;
+
+	while (db_bits) {
+		bit = __ffs(db_bits);
+		iowrite32(1, ndev->peer_mmio +
+				ndev->peer_reg->db_bell + (bit * 4));
+		db_bits &= db_bits - 1;
+	}
+
+	return 0;
+}
+
+static u64 intel_ntb3_db_read(struct ntb_dev *ntb)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_db_read(ndev,
+			    ndev->self_mmio +
+			    ndev->self_reg->db_clear);
+}
+
+static int intel_ntb3_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	return ndev_db_write(ndev, db_bits,
+			     ndev->self_mmio +
+			     ndev->self_reg->db_clear);
+}
+
+const struct ntb_dev_ops intel_ntb3_ops = {
+	.mw_count		= intel_ntb_mw_count,
+	.mw_get_align		= intel_ntb_mw_get_align,
+	.mw_set_trans		= intel_ntb3_mw_set_trans,
+	.peer_mw_count		= intel_ntb_peer_mw_count,
+	.peer_mw_get_addr	= intel_ntb_peer_mw_get_addr,
+	.link_is_up		= intel_ntb_link_is_up,
+	.link_enable		= intel_ntb3_link_enable,
+	.link_disable		= intel_ntb_link_disable,
+	.db_valid_mask		= intel_ntb_db_valid_mask,
+	.db_vector_count	= intel_ntb_db_vector_count,
+	.db_vector_mask		= intel_ntb_db_vector_mask,
+	.db_read		= intel_ntb3_db_read,
+	.db_clear		= intel_ntb3_db_clear,
+	.db_set_mask		= intel_ntb_db_set_mask,
+	.db_clear_mask		= intel_ntb_db_clear_mask,
+	.peer_db_addr		= intel_ntb_peer_db_addr,
+	.peer_db_set		= intel_ntb3_peer_db_set,
+	.spad_is_unsafe		= intel_ntb_spad_is_unsafe,
+	.spad_count		= intel_ntb_spad_count,
+	.spad_read		= intel_ntb_spad_read,
+	.spad_write		= intel_ntb_spad_write,
+	.peer_spad_addr		= intel_ntb_peer_spad_addr,
+	.peer_spad_read		= intel_ntb_peer_spad_read,
+	.peer_spad_write	= intel_ntb_peer_spad_write,
+};
+
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.h b/drivers/ntb/hw/intel/ntb_hw_gen3.h
new file mode 100644
index 0000000..75fb86c
--- /dev/null
+++ b/drivers/ntb/hw/intel/ntb_hw_gen3.h
@@ -0,0 +1,110 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2012-2017 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2012-2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NTB_INTEL_GEN3_H_
+#define _NTB_INTEL_GEN3_H_
+
+#include "ntb_hw_intel.h"
+
+/* Intel Skylake Xeon hardware */
+#define GEN3_IMBAR1SZ_OFFSET		0x00d0
+#define GEN3_IMBAR2SZ_OFFSET		0x00d1
+#define GEN3_EMBAR1SZ_OFFSET		0x00d2
+#define GEN3_EMBAR2SZ_OFFSET		0x00d3
+#define GEN3_DEVCTRL_OFFSET		0x0098
+#define GEN3_DEVSTS_OFFSET		0x009a
+#define GEN3_UNCERRSTS_OFFSET		0x014c
+#define GEN3_CORERRSTS_OFFSET		0x0158
+#define GEN3_LINK_STATUS_OFFSET		0x01a2
+
+#define GEN3_NTBCNTL_OFFSET		0x0000
+#define GEN3_IMBAR1XBASE_OFFSET		0x0010		/* SBAR2XLAT */
+#define GEN3_IMBAR1XLMT_OFFSET		0x0018		/* SBAR2LMT */
+#define GEN3_IMBAR2XBASE_OFFSET		0x0020		/* SBAR4XLAT */
+#define GEN3_IMBAR2XLMT_OFFSET		0x0028		/* SBAR4LMT */
+#define GEN3_IM_INT_STATUS_OFFSET	0x0040
+#define GEN3_IM_INT_DISABLE_OFFSET	0x0048
+#define GEN3_IM_SPAD_OFFSET		0x0080		/* SPAD */
+#define GEN3_USMEMMISS_OFFSET		0x0070
+#define GEN3_INTVEC_OFFSET		0x00d0
+#define GEN3_IM_DOORBELL_OFFSET		0x0100		/* SDOORBELL0 */
+#define GEN3_B2B_SPAD_OFFSET		0x0180		/* B2B SPAD */
+#define GEN3_EMBAR0XBASE_OFFSET		0x4008		/* B2B_XLAT */
+#define GEN3_EMBAR1XBASE_OFFSET		0x4010		/* PBAR2XLAT */
+#define GEN3_EMBAR1XLMT_OFFSET		0x4018		/* PBAR2LMT */
+#define GEN3_EMBAR2XBASE_OFFSET		0x4020		/* PBAR4XLAT */
+#define GEN3_EMBAR2XLMT_OFFSET		0x4028		/* PBAR4LMT */
+#define GEN3_EM_INT_STATUS_OFFSET	0x4040
+#define GEN3_EM_INT_DISABLE_OFFSET	0x4048
+#define GEN3_EM_SPAD_OFFSET		0x4080		/* remote SPAD */
+#define GEN3_EM_DOORBELL_OFFSET		0x4100		/* PDOORBELL0 */
+#define GEN3_SPCICMD_OFFSET		0x4504		/* SPCICMD */
+#define GEN3_EMBAR0_OFFSET		0x4510		/* SBAR0BASE */
+#define GEN3_EMBAR1_OFFSET		0x4518		/* SBAR23BASE */
+#define GEN3_EMBAR2_OFFSET		0x4520		/* SBAR45BASE */
+
+#define GEN3_DB_COUNT			32
+#define GEN3_DB_LINK			32
+#define GEN3_DB_LINK_BIT		BIT_ULL(GEN3_DB_LINK)
+#define GEN3_DB_MSIX_VECTOR_COUNT	33
+#define GEN3_DB_MSIX_VECTOR_SHIFT	1
+#define GEN3_DB_TOTAL_SHIFT		33
+#define GEN3_SPAD_COUNT			16
+
+static inline u64 gen3_db_ioread(void __iomem *mmio)
+{
+	return ioread64(mmio);
+}
+
+static inline void gen3_db_iowrite(u64 bits, void __iomem *mmio)
+{
+	iowrite64(bits, mmio);
+}
+
+ssize_t ndev_ntb3_debugfs_read(struct file *filp, char __user *ubuf,
+				      size_t count, loff_t *offp);
+int gen3_init_dev(struct intel_ntb_dev *ndev);
+
+extern const struct ntb_dev_ops intel_ntb3_ops;
+
+#endif
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h
new file mode 100644
index 0000000..c49ff89
--- /dev/null
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.h
@@ -0,0 +1,250 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Jon Mason <jon.mason@intel.com>
+ */
+
+#ifndef NTB_HW_INTEL_H
+#define NTB_HW_INTEL_H
+
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+/* PCI device IDs */
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_JSF	0x3725
+#define PCI_DEVICE_ID_INTEL_NTB_PS_JSF	0x3726
+#define PCI_DEVICE_ID_INTEL_NTB_SS_JSF	0x3727
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_SNB	0x3C0D
+#define PCI_DEVICE_ID_INTEL_NTB_PS_SNB	0x3C0E
+#define PCI_DEVICE_ID_INTEL_NTB_SS_SNB	0x3C0F
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_IVT	0x0E0D
+#define PCI_DEVICE_ID_INTEL_NTB_PS_IVT	0x0E0E
+#define PCI_DEVICE_ID_INTEL_NTB_SS_IVT	0x0E0F
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_HSX	0x2F0D
+#define PCI_DEVICE_ID_INTEL_NTB_PS_HSX	0x2F0E
+#define PCI_DEVICE_ID_INTEL_NTB_SS_HSX	0x2F0F
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_BDX	0x6F0D
+#define PCI_DEVICE_ID_INTEL_NTB_PS_BDX	0x6F0E
+#define PCI_DEVICE_ID_INTEL_NTB_SS_BDX	0x6F0F
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_SKX	0x201C
+
+/* Ntb control and link status */
+#define NTB_CTL_CFG_LOCK		BIT(0)
+#define NTB_CTL_DISABLE			BIT(1)
+#define NTB_CTL_S2P_BAR2_SNOOP		BIT(2)
+#define NTB_CTL_P2S_BAR2_SNOOP		BIT(4)
+#define NTB_CTL_S2P_BAR4_SNOOP		BIT(6)
+#define NTB_CTL_P2S_BAR4_SNOOP		BIT(8)
+#define NTB_CTL_S2P_BAR5_SNOOP		BIT(12)
+#define NTB_CTL_P2S_BAR5_SNOOP		BIT(14)
+
+#define NTB_LNK_STA_ACTIVE_BIT		0x2000
+#define NTB_LNK_STA_SPEED_MASK		0x000f
+#define NTB_LNK_STA_WIDTH_MASK		0x03f0
+#define NTB_LNK_STA_ACTIVE(x)		(!!((x) & NTB_LNK_STA_ACTIVE_BIT))
+#define NTB_LNK_STA_SPEED(x)		((x) & NTB_LNK_STA_SPEED_MASK)
+#define NTB_LNK_STA_WIDTH(x)		(((x) & NTB_LNK_STA_WIDTH_MASK) >> 4)
+
+/* flags to indicate unsafe api */
+#define NTB_UNSAFE_DB			BIT_ULL(0)
+#define NTB_UNSAFE_SPAD			BIT_ULL(1)
+
+#define NTB_BAR_MASK_64			~(0xfull)
+#define NTB_BAR_MASK_32			~(0xfu)
+
+struct intel_ntb_dev;
+
+struct intel_ntb_reg {
+	int (*poll_link)(struct intel_ntb_dev *ndev);
+	int (*link_is_up)(struct intel_ntb_dev *ndev);
+	u64 (*db_ioread)(void __iomem *mmio);
+	void (*db_iowrite)(u64 db_bits, void __iomem *mmio);
+	unsigned long			ntb_ctl;
+	resource_size_t			db_size;
+	int				mw_bar[];
+};
+
+struct intel_ntb_alt_reg {
+	unsigned long			db_bell;
+	unsigned long			db_mask;
+	unsigned long			db_clear;
+	unsigned long			spad;
+};
+
+struct intel_ntb_xlat_reg {
+	unsigned long			bar0_base;
+	unsigned long			bar2_xlat;
+	unsigned long			bar2_limit;
+};
+
+struct intel_b2b_addr {
+	phys_addr_t			bar0_addr;
+	phys_addr_t			bar2_addr64;
+	phys_addr_t			bar4_addr64;
+	phys_addr_t			bar4_addr32;
+	phys_addr_t			bar5_addr32;
+};
+
+struct intel_ntb_vec {
+	struct intel_ntb_dev		*ndev;
+	int				num;
+};
+
+struct intel_ntb_dev {
+	struct ntb_dev			ntb;
+
+	/* offset of peer bar0 in b2b bar */
+	unsigned long			b2b_off;
+	/* mw idx used to access peer bar0 */
+	unsigned int			b2b_idx;
+
+	/* BAR45 is split into BAR4 and BAR5 */
+	bool				bar4_split;
+
+	u32				ntb_ctl;
+	u32				lnk_sta;
+
+	unsigned char			mw_count;
+	unsigned char			spad_count;
+	unsigned char			db_count;
+	unsigned char			db_vec_count;
+	unsigned char			db_vec_shift;
+
+	u64				db_valid_mask;
+	u64				db_link_mask;
+	u64				db_mask;
+
+	/* synchronize rmw access of db_mask and hw reg */
+	spinlock_t			db_mask_lock;
+
+	struct msix_entry		*msix;
+	struct intel_ntb_vec		*vec;
+
+	const struct intel_ntb_reg	*reg;
+	const struct intel_ntb_alt_reg	*self_reg;
+	const struct intel_ntb_alt_reg	*peer_reg;
+	const struct intel_ntb_xlat_reg	*xlat_reg;
+	void				__iomem *self_mmio;
+	void				__iomem *peer_mmio;
+	phys_addr_t			peer_addr;
+
+	unsigned long			last_ts;
+	struct delayed_work		hb_timer;
+
+	unsigned long			hwerr_flags;
+	unsigned long			unsafe_flags;
+	unsigned long			unsafe_flags_ignore;
+
+	struct dentry			*debugfs_dir;
+	struct dentry			*debugfs_info;
+};
+
+#define ntb_ndev(__ntb) container_of(__ntb, struct intel_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct intel_ntb_dev, \
+				     hb_timer.work)
+
+static inline int pdev_is_gen1(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
+		return 1;
+	}
+	return 0;
+}
+
+static inline int pdev_is_gen3(struct pci_dev *pdev)
+{
+	if (pdev->device == PCI_DEVICE_ID_INTEL_NTB_B2B_SKX)
+		return 1;
+
+	return 0;
+}
+
+#ifndef ioread64
+#ifdef readq
+#define ioread64 readq
+#else
+#define ioread64 _ioread64
+static inline u64 _ioread64(void __iomem *mmio)
+{
+	u64 low, high;
+
+	low = ioread32(mmio);
+	high = ioread32(mmio + sizeof(u32));
+	return low | (high << 32);
+}
+#endif
+#endif
+
+#ifndef iowrite64
+#ifdef writeq
+#define iowrite64 writeq
+#else
+#define iowrite64 _iowrite64
+static inline void _iowrite64(u64 val, void __iomem *mmio)
+{
+	iowrite32(val, mmio);
+	iowrite32(val >> 32, mmio + sizeof(u32));
+}
+#endif
+#endif
+
+#endif
diff --git a/drivers/ntb/hw/mscc/Kconfig b/drivers/ntb/hw/mscc/Kconfig
new file mode 100644
index 0000000..013ed67
--- /dev/null
+++ b/drivers/ntb/hw/mscc/Kconfig
@@ -0,0 +1,9 @@
+config NTB_SWITCHTEC
+	tristate "MicroSemi Switchtec Non-Transparent Bridge Support"
+	select PCI_SW_SWITCHTEC
+	help
+	 Enables NTB support for Switchtec PCI switches. This also
+	 selects the Switchtec management driver as they share the same
+	 hardware interface.
+
+	 If unsure, say N.
diff --git a/drivers/ntb/hw/mscc/Makefile b/drivers/ntb/hw/mscc/Makefile
new file mode 100644
index 0000000..064686e
--- /dev/null
+++ b/drivers/ntb/hw/mscc/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_SWITCHTEC) += ntb_hw_switchtec.o
diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
new file mode 100644
index 0000000..5ee5f40
--- /dev/null
+++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
@@ -0,0 +1,1588 @@
+/*
+ * Microsemi Switchtec(tm) PCIe Management Driver
+ * Copyright (c) 2017, Microsemi Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/switchtec.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+MODULE_DESCRIPTION("Microsemi Switchtec(tm) NTB Driver");
+MODULE_VERSION("0.1");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Microsemi Corporation");
+
+static ulong max_mw_size = SZ_2M;
+module_param(max_mw_size, ulong, 0644);
+MODULE_PARM_DESC(max_mw_size,
+	"Max memory window size reported to the upper layer");
+
+static bool use_lut_mws;
+module_param(use_lut_mws, bool, 0644);
+MODULE_PARM_DESC(use_lut_mws,
+		 "Enable the use of the LUT based memory windows");
+
+#ifndef ioread64
+#ifdef readq
+#define ioread64 readq
+#else
+#define ioread64 _ioread64
+static inline u64 _ioread64(void __iomem *mmio)
+{
+	u64 low, high;
+
+	low = ioread32(mmio);
+	high = ioread32(mmio + sizeof(u32));
+	return low | (high << 32);
+}
+#endif
+#endif
+
+#ifndef iowrite64
+#ifdef writeq
+#define iowrite64 writeq
+#else
+#define iowrite64 _iowrite64
+static inline void _iowrite64(u64 val, void __iomem *mmio)
+{
+	iowrite32(val, mmio);
+	iowrite32(val >> 32, mmio + sizeof(u32));
+}
+#endif
+#endif
+
+#define SWITCHTEC_NTB_MAGIC 0x45CC0001
+#define MAX_MWS     128
+
+struct shared_mw {
+	u32 magic;
+	u32 link_sta;
+	u32 partition_id;
+	u64 mw_sizes[MAX_MWS];
+	u32 spad[128];
+};
+
+#define MAX_DIRECT_MW ARRAY_SIZE(((struct ntb_ctrl_regs *)(0))->bar_entry)
+#define LUT_SIZE SZ_64K
+
+struct switchtec_ntb {
+	struct ntb_dev ntb;
+	struct switchtec_dev *stdev;
+
+	int self_partition;
+	int peer_partition;
+
+	int doorbell_irq;
+	int message_irq;
+
+	struct ntb_info_regs __iomem *mmio_ntb;
+	struct ntb_ctrl_regs __iomem *mmio_ctrl;
+	struct ntb_dbmsg_regs __iomem *mmio_dbmsg;
+	struct ntb_ctrl_regs __iomem *mmio_self_ctrl;
+	struct ntb_ctrl_regs __iomem *mmio_peer_ctrl;
+	struct ntb_dbmsg_regs __iomem *mmio_self_dbmsg;
+	struct ntb_dbmsg_regs __iomem *mmio_peer_dbmsg;
+
+	void __iomem *mmio_xlink_win;
+
+	struct shared_mw *self_shared;
+	struct shared_mw __iomem *peer_shared;
+	dma_addr_t self_shared_dma;
+
+	u64 db_mask;
+	u64 db_valid_mask;
+	int db_shift;
+	int db_peer_shift;
+
+	/* synchronize rmw access of db_mask and hw reg */
+	spinlock_t db_mask_lock;
+
+	int nr_direct_mw;
+	int nr_lut_mw;
+	int nr_rsvd_luts;
+	int direct_mw_to_bar[MAX_DIRECT_MW];
+
+	int peer_nr_direct_mw;
+	int peer_nr_lut_mw;
+	int peer_direct_mw_to_bar[MAX_DIRECT_MW];
+
+	bool link_is_up;
+	enum ntb_speed link_speed;
+	enum ntb_width link_width;
+	struct work_struct link_reinit_work;
+};
+
+static struct switchtec_ntb *ntb_sndev(struct ntb_dev *ntb)
+{
+	return container_of(ntb, struct switchtec_ntb, ntb);
+}
+
+static int switchtec_ntb_part_op(struct switchtec_ntb *sndev,
+				 struct ntb_ctrl_regs __iomem *ctl,
+				 u32 op, int wait_status)
+{
+	static const char * const op_text[] = {
+		[NTB_CTRL_PART_OP_LOCK] = "lock",
+		[NTB_CTRL_PART_OP_CFG] = "configure",
+		[NTB_CTRL_PART_OP_RESET] = "reset",
+	};
+
+	int i;
+	u32 ps;
+	int status;
+
+	switch (op) {
+	case NTB_CTRL_PART_OP_LOCK:
+		status = NTB_CTRL_PART_STATUS_LOCKING;
+		break;
+	case NTB_CTRL_PART_OP_CFG:
+		status = NTB_CTRL_PART_STATUS_CONFIGURING;
+		break;
+	case NTB_CTRL_PART_OP_RESET:
+		status = NTB_CTRL_PART_STATUS_RESETTING;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	iowrite32(op, &ctl->partition_op);
+
+	for (i = 0; i < 1000; i++) {
+		if (msleep_interruptible(50) != 0) {
+			iowrite32(NTB_CTRL_PART_OP_RESET, &ctl->partition_op);
+			return -EINTR;
+		}
+
+		ps = ioread32(&ctl->partition_status) & 0xFFFF;
+
+		if (ps != status)
+			break;
+	}
+
+	if (ps == wait_status)
+		return 0;
+
+	if (ps == status) {
+		dev_err(&sndev->stdev->dev,
+			"Timed out while performing %s (%d). (%08x)\n",
+			op_text[op], op,
+			ioread32(&ctl->partition_status));
+
+		return -ETIMEDOUT;
+	}
+
+	return -EIO;
+}
+
+static int switchtec_ntb_send_msg(struct switchtec_ntb *sndev, int idx,
+				  u32 val)
+{
+	if (idx < 0 || idx >= ARRAY_SIZE(sndev->mmio_peer_dbmsg->omsg))
+		return -EINVAL;
+
+	iowrite32(val, &sndev->mmio_peer_dbmsg->omsg[idx].msg);
+
+	return 0;
+}
+
+static int switchtec_ntb_mw_count(struct ntb_dev *ntb, int pidx)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+	int nr_direct_mw = sndev->peer_nr_direct_mw;
+	int nr_lut_mw = sndev->peer_nr_lut_mw - sndev->nr_rsvd_luts;
+
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	if (!use_lut_mws)
+		nr_lut_mw = 0;
+
+	return nr_direct_mw + nr_lut_mw;
+}
+
+static int lut_index(struct switchtec_ntb *sndev, int mw_idx)
+{
+	return mw_idx - sndev->nr_direct_mw + sndev->nr_rsvd_luts;
+}
+
+static int peer_lut_index(struct switchtec_ntb *sndev, int mw_idx)
+{
+	return mw_idx - sndev->peer_nr_direct_mw + sndev->nr_rsvd_luts;
+}
+
+static int switchtec_ntb_mw_get_align(struct ntb_dev *ntb, int pidx,
+				      int widx, resource_size_t *addr_align,
+				      resource_size_t *size_align,
+				      resource_size_t *size_max)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+	int lut;
+	resource_size_t size;
+
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	lut = widx >= sndev->peer_nr_direct_mw;
+	size = ioread64(&sndev->peer_shared->mw_sizes[widx]);
+
+	if (size == 0)
+		return -EINVAL;
+
+	if (addr_align)
+		*addr_align = lut ? size : SZ_4K;
+
+	if (size_align)
+		*size_align = lut ? size : SZ_4K;
+
+	if (size_max)
+		*size_max = size;
+
+	return 0;
+}
+
+static void switchtec_ntb_mw_clr_direct(struct switchtec_ntb *sndev, int idx)
+{
+	struct ntb_ctrl_regs __iomem *ctl = sndev->mmio_peer_ctrl;
+	int bar = sndev->peer_direct_mw_to_bar[idx];
+	u32 ctl_val;
+
+	ctl_val = ioread32(&ctl->bar_entry[bar].ctl);
+	ctl_val &= ~NTB_CTRL_BAR_DIR_WIN_EN;
+	iowrite32(ctl_val, &ctl->bar_entry[bar].ctl);
+	iowrite32(0, &ctl->bar_entry[bar].win_size);
+	iowrite64(sndev->self_partition, &ctl->bar_entry[bar].xlate_addr);
+}
+
+static void switchtec_ntb_mw_clr_lut(struct switchtec_ntb *sndev, int idx)
+{
+	struct ntb_ctrl_regs __iomem *ctl = sndev->mmio_peer_ctrl;
+
+	iowrite64(0, &ctl->lut_entry[peer_lut_index(sndev, idx)]);
+}
+
+static void switchtec_ntb_mw_set_direct(struct switchtec_ntb *sndev, int idx,
+					dma_addr_t addr, resource_size_t size)
+{
+	int xlate_pos = ilog2(size);
+	int bar = sndev->peer_direct_mw_to_bar[idx];
+	struct ntb_ctrl_regs __iomem *ctl = sndev->mmio_peer_ctrl;
+	u32 ctl_val;
+
+	ctl_val = ioread32(&ctl->bar_entry[bar].ctl);
+	ctl_val |= NTB_CTRL_BAR_DIR_WIN_EN;
+
+	iowrite32(ctl_val, &ctl->bar_entry[bar].ctl);
+	iowrite32(xlate_pos | size, &ctl->bar_entry[bar].win_size);
+	iowrite64(sndev->self_partition | addr,
+		  &ctl->bar_entry[bar].xlate_addr);
+}
+
+static void switchtec_ntb_mw_set_lut(struct switchtec_ntb *sndev, int idx,
+				     dma_addr_t addr, resource_size_t size)
+{
+	struct ntb_ctrl_regs __iomem *ctl = sndev->mmio_peer_ctrl;
+
+	iowrite64((NTB_CTRL_LUT_EN | (sndev->self_partition << 1) | addr),
+		  &ctl->lut_entry[peer_lut_index(sndev, idx)]);
+}
+
+static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
+				      dma_addr_t addr, resource_size_t size)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+	struct ntb_ctrl_regs __iomem *ctl = sndev->mmio_peer_ctrl;
+	int xlate_pos = ilog2(size);
+	int nr_direct_mw = sndev->peer_nr_direct_mw;
+	int rc;
+
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	dev_dbg(&sndev->stdev->dev, "MW %d: part %d addr %pad size %pap\n",
+		widx, pidx, &addr, &size);
+
+	if (widx >= switchtec_ntb_mw_count(ntb, pidx))
+		return -EINVAL;
+
+	if (xlate_pos < 12)
+		return -EINVAL;
+
+	if (!IS_ALIGNED(addr, BIT_ULL(xlate_pos))) {
+		/*
+		 * In certain circumstances we can get a buffer that is
+		 * not aligned to its size. (Most of the time
+		 * dma_alloc_coherent ensures this). This can happen when
+		 * using large buffers allocated by the CMA
+		 * (see CMA_CONFIG_ALIGNMENT)
+		 */
+		dev_err(&sndev->stdev->dev,
+			"ERROR: Memory window address is not aligned to it's size!\n");
+		return -EINVAL;
+	}
+
+	rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_LOCK,
+				   NTB_CTRL_PART_STATUS_LOCKED);
+	if (rc)
+		return rc;
+
+	if (addr == 0 || size == 0) {
+		if (widx < nr_direct_mw)
+			switchtec_ntb_mw_clr_direct(sndev, widx);
+		else
+			switchtec_ntb_mw_clr_lut(sndev, widx);
+	} else {
+		if (widx < nr_direct_mw)
+			switchtec_ntb_mw_set_direct(sndev, widx, addr, size);
+		else
+			switchtec_ntb_mw_set_lut(sndev, widx, addr, size);
+	}
+
+	rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_CFG,
+				   NTB_CTRL_PART_STATUS_NORMAL);
+
+	if (rc == -EIO) {
+		dev_err(&sndev->stdev->dev,
+			"Hardware reported an error configuring mw %d: %08x\n",
+			widx, ioread32(&ctl->bar_error));
+
+		if (widx < nr_direct_mw)
+			switchtec_ntb_mw_clr_direct(sndev, widx);
+		else
+			switchtec_ntb_mw_clr_lut(sndev, widx);
+
+		switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_CFG,
+				      NTB_CTRL_PART_STATUS_NORMAL);
+	}
+
+	return rc;
+}
+
+static int switchtec_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+	int nr_lut_mw = sndev->nr_lut_mw - sndev->nr_rsvd_luts;
+
+	return sndev->nr_direct_mw + (use_lut_mws ? nr_lut_mw : 0);
+}
+
+static int switchtec_ntb_direct_get_addr(struct switchtec_ntb *sndev,
+					 int idx, phys_addr_t *base,
+					 resource_size_t *size)
+{
+	int bar = sndev->direct_mw_to_bar[idx];
+	size_t offset = 0;
+
+	if (bar < 0)
+		return -EINVAL;
+
+	if (idx == 0) {
+		/*
+		 * This is the direct BAR shared with the LUTs
+		 * which means the actual window will be offset
+		 * by the size of all the LUT entries.
+		 */
+
+		offset = LUT_SIZE * sndev->nr_lut_mw;
+	}
+
+	if (base)
+		*base = pci_resource_start(sndev->ntb.pdev, bar) + offset;
+
+	if (size) {
+		*size = pci_resource_len(sndev->ntb.pdev, bar) - offset;
+		if (offset && *size > offset)
+			*size = offset;
+
+		if (*size > max_mw_size)
+			*size = max_mw_size;
+	}
+
+	return 0;
+}
+
+static int switchtec_ntb_lut_get_addr(struct switchtec_ntb *sndev,
+				      int idx, phys_addr_t *base,
+				      resource_size_t *size)
+{
+	int bar = sndev->direct_mw_to_bar[0];
+	int offset;
+
+	offset = LUT_SIZE * lut_index(sndev, idx);
+
+	if (base)
+		*base = pci_resource_start(sndev->ntb.pdev, bar) + offset;
+
+	if (size)
+		*size = LUT_SIZE;
+
+	return 0;
+}
+
+static int switchtec_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+					  phys_addr_t *base,
+					  resource_size_t *size)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	if (idx < sndev->nr_direct_mw)
+		return switchtec_ntb_direct_get_addr(sndev, idx, base, size);
+	else if (idx < switchtec_ntb_peer_mw_count(ntb))
+		return switchtec_ntb_lut_get_addr(sndev, idx, base, size);
+	else
+		return -EINVAL;
+}
+
+static void switchtec_ntb_part_link_speed(struct switchtec_ntb *sndev,
+					  int partition,
+					  enum ntb_speed *speed,
+					  enum ntb_width *width)
+{
+	struct switchtec_dev *stdev = sndev->stdev;
+
+	u32 pff = ioread32(&stdev->mmio_part_cfg[partition].vep_pff_inst_id);
+	u32 linksta = ioread32(&stdev->mmio_pff_csr[pff].pci_cap_region[13]);
+
+	if (speed)
+		*speed = (linksta >> 16) & 0xF;
+
+	if (width)
+		*width = (linksta >> 20) & 0x3F;
+}
+
+static void switchtec_ntb_set_link_speed(struct switchtec_ntb *sndev)
+{
+	enum ntb_speed self_speed, peer_speed;
+	enum ntb_width self_width, peer_width;
+
+	if (!sndev->link_is_up) {
+		sndev->link_speed = NTB_SPEED_NONE;
+		sndev->link_width = NTB_WIDTH_NONE;
+		return;
+	}
+
+	switchtec_ntb_part_link_speed(sndev, sndev->self_partition,
+				      &self_speed, &self_width);
+	switchtec_ntb_part_link_speed(sndev, sndev->peer_partition,
+				      &peer_speed, &peer_width);
+
+	sndev->link_speed = min(self_speed, peer_speed);
+	sndev->link_width = min(self_width, peer_width);
+}
+
+static int crosslink_is_enabled(struct switchtec_ntb *sndev)
+{
+	struct ntb_info_regs __iomem *inf = sndev->mmio_ntb;
+
+	return ioread8(&inf->ntp_info[sndev->peer_partition].xlink_enabled);
+}
+
+static void crosslink_init_dbmsgs(struct switchtec_ntb *sndev)
+{
+	int i;
+	u32 msg_map = 0;
+
+	if (!crosslink_is_enabled(sndev))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(sndev->mmio_peer_dbmsg->imsg); i++) {
+		int m = i | sndev->self_partition << 2;
+
+		msg_map |= m << i * 8;
+	}
+
+	iowrite32(msg_map, &sndev->mmio_peer_dbmsg->msg_map);
+	iowrite64(sndev->db_valid_mask << sndev->db_peer_shift,
+		  &sndev->mmio_peer_dbmsg->odb_mask);
+}
+
+enum switchtec_msg {
+	LINK_MESSAGE = 0,
+	MSG_LINK_UP = 1,
+	MSG_LINK_DOWN = 2,
+	MSG_CHECK_LINK = 3,
+	MSG_LINK_FORCE_DOWN = 4,
+};
+
+static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev);
+
+static void link_reinit_work(struct work_struct *work)
+{
+	struct switchtec_ntb *sndev;
+
+	sndev = container_of(work, struct switchtec_ntb, link_reinit_work);
+
+	switchtec_ntb_reinit_peer(sndev);
+}
+
+static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
+				     enum switchtec_msg msg)
+{
+	int link_sta;
+	int old = sndev->link_is_up;
+
+	if (msg == MSG_LINK_FORCE_DOWN) {
+		schedule_work(&sndev->link_reinit_work);
+
+		if (sndev->link_is_up) {
+			sndev->link_is_up = 0;
+			ntb_link_event(&sndev->ntb);
+			dev_info(&sndev->stdev->dev, "ntb link forced down\n");
+		}
+
+		return;
+	}
+
+	link_sta = sndev->self_shared->link_sta;
+	if (link_sta) {
+		u64 peer = ioread64(&sndev->peer_shared->magic);
+
+		if ((peer & 0xFFFFFFFF) == SWITCHTEC_NTB_MAGIC)
+			link_sta = peer >> 32;
+		else
+			link_sta = 0;
+	}
+
+	sndev->link_is_up = link_sta;
+	switchtec_ntb_set_link_speed(sndev);
+
+	if (link_sta != old) {
+		switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_CHECK_LINK);
+		ntb_link_event(&sndev->ntb);
+		dev_info(&sndev->stdev->dev, "ntb link %s\n",
+			 link_sta ? "up" : "down");
+
+		if (link_sta)
+			crosslink_init_dbmsgs(sndev);
+	}
+}
+
+static void switchtec_ntb_link_notification(struct switchtec_dev *stdev)
+{
+	struct switchtec_ntb *sndev = stdev->sndev;
+
+	switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
+}
+
+static u64 switchtec_ntb_link_is_up(struct ntb_dev *ntb,
+				    enum ntb_speed *speed,
+				    enum ntb_width *width)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	if (speed)
+		*speed = sndev->link_speed;
+	if (width)
+		*width = sndev->link_width;
+
+	return sndev->link_is_up;
+}
+
+static int switchtec_ntb_link_enable(struct ntb_dev *ntb,
+				     enum ntb_speed max_speed,
+				     enum ntb_width max_width)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	dev_dbg(&sndev->stdev->dev, "enabling link\n");
+
+	sndev->self_shared->link_sta = 1;
+	switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP);
+
+	switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
+
+	return 0;
+}
+
+static int switchtec_ntb_link_disable(struct ntb_dev *ntb)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	dev_dbg(&sndev->stdev->dev, "disabling link\n");
+
+	sndev->self_shared->link_sta = 0;
+	switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_DOWN);
+
+	switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
+
+	return 0;
+}
+
+static u64 switchtec_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	return sndev->db_valid_mask;
+}
+
+static int switchtec_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+	return 1;
+}
+
+static u64 switchtec_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	if (db_vector < 0 || db_vector > 1)
+		return 0;
+
+	return sndev->db_valid_mask;
+}
+
+static u64 switchtec_ntb_db_read(struct ntb_dev *ntb)
+{
+	u64 ret;
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	ret = ioread64(&sndev->mmio_self_dbmsg->idb) >> sndev->db_shift;
+
+	return ret & sndev->db_valid_mask;
+}
+
+static int switchtec_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	iowrite64(db_bits << sndev->db_shift, &sndev->mmio_self_dbmsg->idb);
+
+	return 0;
+}
+
+static int switchtec_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	unsigned long irqflags;
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	if (db_bits & ~sndev->db_valid_mask)
+		return -EINVAL;
+
+	spin_lock_irqsave(&sndev->db_mask_lock, irqflags);
+
+	sndev->db_mask |= db_bits << sndev->db_shift;
+	iowrite64(~sndev->db_mask, &sndev->mmio_self_dbmsg->idb_mask);
+
+	spin_unlock_irqrestore(&sndev->db_mask_lock, irqflags);
+
+	return 0;
+}
+
+static int switchtec_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	unsigned long irqflags;
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	if (db_bits & ~sndev->db_valid_mask)
+		return -EINVAL;
+
+	spin_lock_irqsave(&sndev->db_mask_lock, irqflags);
+
+	sndev->db_mask &= ~(db_bits << sndev->db_shift);
+	iowrite64(~sndev->db_mask, &sndev->mmio_self_dbmsg->idb_mask);
+
+	spin_unlock_irqrestore(&sndev->db_mask_lock, irqflags);
+
+	return 0;
+}
+
+static u64 switchtec_ntb_db_read_mask(struct ntb_dev *ntb)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	return (sndev->db_mask >> sndev->db_shift) & sndev->db_valid_mask;
+}
+
+static int switchtec_ntb_peer_db_addr(struct ntb_dev *ntb,
+				      phys_addr_t *db_addr,
+				      resource_size_t *db_size)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+	unsigned long offset;
+
+	offset = (unsigned long)sndev->mmio_peer_dbmsg->odb -
+		(unsigned long)sndev->stdev->mmio;
+
+	offset += sndev->db_shift / 8;
+
+	if (db_addr)
+		*db_addr = pci_resource_start(ntb->pdev, 0) + offset;
+	if (db_size)
+		*db_size = sizeof(u32);
+
+	return 0;
+}
+
+static int switchtec_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	iowrite64(db_bits << sndev->db_peer_shift,
+		  &sndev->mmio_peer_dbmsg->odb);
+
+	return 0;
+}
+
+static int switchtec_ntb_spad_count(struct ntb_dev *ntb)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	return ARRAY_SIZE(sndev->self_shared->spad);
+}
+
+static u32 switchtec_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	if (idx < 0 || idx >= ARRAY_SIZE(sndev->self_shared->spad))
+		return 0;
+
+	if (!sndev->self_shared)
+		return 0;
+
+	return sndev->self_shared->spad[idx];
+}
+
+static int switchtec_ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	if (idx < 0 || idx >= ARRAY_SIZE(sndev->self_shared->spad))
+		return -EINVAL;
+
+	if (!sndev->self_shared)
+		return -EIO;
+
+	sndev->self_shared->spad[idx] = val;
+
+	return 0;
+}
+
+static u32 switchtec_ntb_peer_spad_read(struct ntb_dev *ntb, int pidx,
+					int sidx)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	if (sidx < 0 || sidx >= ARRAY_SIZE(sndev->peer_shared->spad))
+		return 0;
+
+	if (!sndev->peer_shared)
+		return 0;
+
+	return ioread32(&sndev->peer_shared->spad[sidx]);
+}
+
+static int switchtec_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx,
+					 int sidx, u32 val)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	if (sidx < 0 || sidx >= ARRAY_SIZE(sndev->peer_shared->spad))
+		return -EINVAL;
+
+	if (!sndev->peer_shared)
+		return -EIO;
+
+	iowrite32(val, &sndev->peer_shared->spad[sidx]);
+
+	return 0;
+}
+
+static int switchtec_ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx,
+					int sidx, phys_addr_t *spad_addr)
+{
+	struct switchtec_ntb *sndev = ntb_sndev(ntb);
+	unsigned long offset;
+
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	offset = (unsigned long)&sndev->peer_shared->spad[sidx] -
+		(unsigned long)sndev->stdev->mmio;
+
+	if (spad_addr)
+		*spad_addr = pci_resource_start(ntb->pdev, 0) + offset;
+
+	return 0;
+}
+
+static const struct ntb_dev_ops switchtec_ntb_ops = {
+	.mw_count		= switchtec_ntb_mw_count,
+	.mw_get_align		= switchtec_ntb_mw_get_align,
+	.mw_set_trans		= switchtec_ntb_mw_set_trans,
+	.peer_mw_count		= switchtec_ntb_peer_mw_count,
+	.peer_mw_get_addr	= switchtec_ntb_peer_mw_get_addr,
+	.link_is_up		= switchtec_ntb_link_is_up,
+	.link_enable		= switchtec_ntb_link_enable,
+	.link_disable		= switchtec_ntb_link_disable,
+	.db_valid_mask		= switchtec_ntb_db_valid_mask,
+	.db_vector_count	= switchtec_ntb_db_vector_count,
+	.db_vector_mask		= switchtec_ntb_db_vector_mask,
+	.db_read		= switchtec_ntb_db_read,
+	.db_clear		= switchtec_ntb_db_clear,
+	.db_set_mask		= switchtec_ntb_db_set_mask,
+	.db_clear_mask		= switchtec_ntb_db_clear_mask,
+	.db_read_mask		= switchtec_ntb_db_read_mask,
+	.peer_db_addr		= switchtec_ntb_peer_db_addr,
+	.peer_db_set		= switchtec_ntb_peer_db_set,
+	.spad_count		= switchtec_ntb_spad_count,
+	.spad_read		= switchtec_ntb_spad_read,
+	.spad_write		= switchtec_ntb_spad_write,
+	.peer_spad_read		= switchtec_ntb_peer_spad_read,
+	.peer_spad_write	= switchtec_ntb_peer_spad_write,
+	.peer_spad_addr		= switchtec_ntb_peer_spad_addr,
+};
+
+static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
+{
+	u64 tpart_vec;
+	int self;
+	u64 part_map;
+	int bit;
+
+	sndev->ntb.pdev = sndev->stdev->pdev;
+	sndev->ntb.topo = NTB_TOPO_SWITCH;
+	sndev->ntb.ops = &switchtec_ntb_ops;
+
+	INIT_WORK(&sndev->link_reinit_work, link_reinit_work);
+
+	sndev->self_partition = sndev->stdev->partition;
+
+	sndev->mmio_ntb = sndev->stdev->mmio_ntb;
+
+	self = sndev->self_partition;
+	tpart_vec = ioread32(&sndev->mmio_ntb->ntp_info[self].target_part_high);
+	tpart_vec <<= 32;
+	tpart_vec |= ioread32(&sndev->mmio_ntb->ntp_info[self].target_part_low);
+
+	part_map = ioread64(&sndev->mmio_ntb->ep_map);
+	part_map &= ~(1 << sndev->self_partition);
+
+	if (!ffs(tpart_vec)) {
+		if (sndev->stdev->partition_count != 2) {
+			dev_err(&sndev->stdev->dev,
+				"ntb target partition not defined\n");
+			return -ENODEV;
+		}
+
+		bit = ffs(part_map);
+		if (!bit) {
+			dev_err(&sndev->stdev->dev,
+				"peer partition is not NT partition\n");
+			return -ENODEV;
+		}
+
+		sndev->peer_partition = bit - 1;
+	} else {
+		if (ffs(tpart_vec) != fls(tpart_vec)) {
+			dev_err(&sndev->stdev->dev,
+				"ntb driver only supports 1 pair of 1-1 ntb mapping\n");
+			return -ENODEV;
+		}
+
+		sndev->peer_partition = ffs(tpart_vec) - 1;
+		if (!(part_map & (1 << sndev->peer_partition))) {
+			dev_err(&sndev->stdev->dev,
+				"ntb target partition is not NT partition\n");
+			return -ENODEV;
+		}
+	}
+
+	dev_dbg(&sndev->stdev->dev, "Partition ID %d of %d\n",
+		sndev->self_partition, sndev->stdev->partition_count);
+
+	sndev->mmio_ctrl = (void * __iomem)sndev->mmio_ntb +
+		SWITCHTEC_NTB_REG_CTRL_OFFSET;
+	sndev->mmio_dbmsg = (void * __iomem)sndev->mmio_ntb +
+		SWITCHTEC_NTB_REG_DBMSG_OFFSET;
+
+	sndev->mmio_self_ctrl = &sndev->mmio_ctrl[sndev->self_partition];
+	sndev->mmio_peer_ctrl = &sndev->mmio_ctrl[sndev->peer_partition];
+	sndev->mmio_self_dbmsg = &sndev->mmio_dbmsg[sndev->self_partition];
+	sndev->mmio_peer_dbmsg = sndev->mmio_self_dbmsg;
+
+	return 0;
+}
+
+static int config_rsvd_lut_win(struct switchtec_ntb *sndev,
+			       struct ntb_ctrl_regs __iomem *ctl,
+			       int lut_idx, int partition, u64 addr)
+{
+	int peer_bar = sndev->peer_direct_mw_to_bar[0];
+	u32 ctl_val;
+	int rc;
+
+	rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_LOCK,
+				   NTB_CTRL_PART_STATUS_LOCKED);
+	if (rc)
+		return rc;
+
+	ctl_val = ioread32(&ctl->bar_entry[peer_bar].ctl);
+	ctl_val &= 0xFF;
+	ctl_val |= NTB_CTRL_BAR_LUT_WIN_EN;
+	ctl_val |= ilog2(LUT_SIZE) << 8;
+	ctl_val |= (sndev->nr_lut_mw - 1) << 14;
+	iowrite32(ctl_val, &ctl->bar_entry[peer_bar].ctl);
+
+	iowrite64((NTB_CTRL_LUT_EN | (partition << 1) | addr),
+		  &ctl->lut_entry[lut_idx]);
+
+	rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_CFG,
+				   NTB_CTRL_PART_STATUS_NORMAL);
+	if (rc) {
+		u32 bar_error, lut_error;
+
+		bar_error = ioread32(&ctl->bar_error);
+		lut_error = ioread32(&ctl->lut_error);
+		dev_err(&sndev->stdev->dev,
+			"Error setting up reserved lut window: %08x / %08x\n",
+			bar_error, lut_error);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int config_req_id_table(struct switchtec_ntb *sndev,
+			       struct ntb_ctrl_regs __iomem *mmio_ctrl,
+			       int *req_ids, int count)
+{
+	int i, rc = 0;
+	u32 error;
+	u32 proxy_id;
+
+	if (ioread32(&mmio_ctrl->req_id_table_size) < count) {
+		dev_err(&sndev->stdev->dev,
+			"Not enough requester IDs available.\n");
+		return -EFAULT;
+	}
+
+	rc = switchtec_ntb_part_op(sndev, mmio_ctrl,
+				   NTB_CTRL_PART_OP_LOCK,
+				   NTB_CTRL_PART_STATUS_LOCKED);
+	if (rc)
+		return rc;
+
+	iowrite32(NTB_PART_CTRL_ID_PROT_DIS,
+		  &mmio_ctrl->partition_ctrl);
+
+	for (i = 0; i < count; i++) {
+		iowrite32(req_ids[i] << 16 | NTB_CTRL_REQ_ID_EN,
+			  &mmio_ctrl->req_id_table[i]);
+
+		proxy_id = ioread32(&mmio_ctrl->req_id_table[i]);
+		dev_dbg(&sndev->stdev->dev,
+			"Requester ID %02X:%02X.%X -> BB:%02X.%X\n",
+			req_ids[i] >> 8, (req_ids[i] >> 3) & 0x1F,
+			req_ids[i] & 0x7, (proxy_id >> 4) & 0x1F,
+			(proxy_id >> 1) & 0x7);
+	}
+
+	rc = switchtec_ntb_part_op(sndev, mmio_ctrl,
+				   NTB_CTRL_PART_OP_CFG,
+				   NTB_CTRL_PART_STATUS_NORMAL);
+
+	if (rc == -EIO) {
+		error = ioread32(&mmio_ctrl->req_id_error);
+		dev_err(&sndev->stdev->dev,
+			"Error setting up the requester ID table: %08x\n",
+			error);
+	}
+
+	return 0;
+}
+
+static int crosslink_setup_mws(struct switchtec_ntb *sndev, int ntb_lut_idx,
+			       u64 *mw_addrs, int mw_count)
+{
+	int rc, i;
+	struct ntb_ctrl_regs __iomem *ctl = sndev->mmio_self_ctrl;
+	u64 addr;
+	size_t size, offset;
+	int bar;
+	int xlate_pos;
+	u32 ctl_val;
+
+	rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_LOCK,
+				   NTB_CTRL_PART_STATUS_LOCKED);
+	if (rc)
+		return rc;
+
+	for (i = 0; i < sndev->nr_lut_mw; i++) {
+		if (i == ntb_lut_idx)
+			continue;
+
+		addr = mw_addrs[0] + LUT_SIZE * i;
+
+		iowrite64((NTB_CTRL_LUT_EN | (sndev->peer_partition << 1) |
+			   addr),
+			  &ctl->lut_entry[i]);
+	}
+
+	sndev->nr_direct_mw = min_t(int, sndev->nr_direct_mw, mw_count);
+
+	for (i = 0; i < sndev->nr_direct_mw; i++) {
+		bar = sndev->direct_mw_to_bar[i];
+		offset = (i == 0) ? LUT_SIZE * sndev->nr_lut_mw : 0;
+		addr = mw_addrs[i] + offset;
+		size = pci_resource_len(sndev->ntb.pdev, bar) - offset;
+		xlate_pos = ilog2(size);
+
+		if (offset && size > offset)
+			size = offset;
+
+		ctl_val = ioread32(&ctl->bar_entry[bar].ctl);
+		ctl_val |= NTB_CTRL_BAR_DIR_WIN_EN;
+
+		iowrite32(ctl_val, &ctl->bar_entry[bar].ctl);
+		iowrite32(xlate_pos | size, &ctl->bar_entry[bar].win_size);
+		iowrite64(sndev->peer_partition | addr,
+			  &ctl->bar_entry[bar].xlate_addr);
+	}
+
+	rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_CFG,
+				   NTB_CTRL_PART_STATUS_NORMAL);
+	if (rc) {
+		u32 bar_error, lut_error;
+
+		bar_error = ioread32(&ctl->bar_error);
+		lut_error = ioread32(&ctl->lut_error);
+		dev_err(&sndev->stdev->dev,
+			"Error setting up cross link windows: %08x / %08x\n",
+			bar_error, lut_error);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int crosslink_setup_req_ids(struct switchtec_ntb *sndev,
+	struct ntb_ctrl_regs __iomem *mmio_ctrl)
+{
+	int req_ids[16];
+	int i;
+	u32 proxy_id;
+
+	for (i = 0; i < ARRAY_SIZE(req_ids); i++) {
+		proxy_id = ioread32(&sndev->mmio_self_ctrl->req_id_table[i]);
+
+		if (!(proxy_id & NTB_CTRL_REQ_ID_EN))
+			break;
+
+		req_ids[i] = ((proxy_id >> 1) & 0xFF);
+	}
+
+	return config_req_id_table(sndev, mmio_ctrl, req_ids, i);
+}
+
+/*
+ * In crosslink configuration there is a virtual partition in the
+ * middle of the two switches. The BARs in this partition have to be
+ * enumerated and assigned addresses.
+ */
+static int crosslink_enum_partition(struct switchtec_ntb *sndev,
+				    u64 *bar_addrs)
+{
+	struct part_cfg_regs __iomem *part_cfg =
+		&sndev->stdev->mmio_part_cfg_all[sndev->peer_partition];
+	u32 pff = ioread32(&part_cfg->vep_pff_inst_id);
+	struct pff_csr_regs __iomem *mmio_pff =
+		&sndev->stdev->mmio_pff_csr[pff];
+	const u64 bar_space = 0x1000000000LL;
+	u64 bar_addr;
+	int bar_cnt = 0;
+	int i;
+
+	iowrite16(0x6, &mmio_pff->pcicmd);
+
+	for (i = 0; i < ARRAY_SIZE(mmio_pff->pci_bar64); i++) {
+		iowrite64(bar_space * i, &mmio_pff->pci_bar64[i]);
+		bar_addr = ioread64(&mmio_pff->pci_bar64[i]);
+		bar_addr &= ~0xf;
+
+		dev_dbg(&sndev->stdev->dev,
+			"Crosslink BAR%d addr: %llx\n",
+			i, bar_addr);
+
+		if (bar_addr != bar_space * i)
+			continue;
+
+		bar_addrs[bar_cnt++] = bar_addr;
+	}
+
+	return bar_cnt;
+}
+
+static int switchtec_ntb_init_crosslink(struct switchtec_ntb *sndev)
+{
+	int rc;
+	int bar = sndev->direct_mw_to_bar[0];
+	const int ntb_lut_idx = 1;
+	u64 bar_addrs[6];
+	u64 addr;
+	int offset;
+	int bar_cnt;
+
+	if (!crosslink_is_enabled(sndev))
+		return 0;
+
+	dev_info(&sndev->stdev->dev, "Using crosslink configuration\n");
+	sndev->ntb.topo = NTB_TOPO_CROSSLINK;
+
+	bar_cnt = crosslink_enum_partition(sndev, bar_addrs);
+	if (bar_cnt < sndev->nr_direct_mw + 1) {
+		dev_err(&sndev->stdev->dev,
+			"Error enumerating crosslink partition\n");
+		return -EINVAL;
+	}
+
+	addr = (bar_addrs[0] + SWITCHTEC_GAS_NTB_OFFSET +
+		SWITCHTEC_NTB_REG_DBMSG_OFFSET +
+		sizeof(struct ntb_dbmsg_regs) * sndev->peer_partition);
+
+	offset = addr & (LUT_SIZE - 1);
+	addr -= offset;
+
+	rc = config_rsvd_lut_win(sndev, sndev->mmio_self_ctrl, ntb_lut_idx,
+				 sndev->peer_partition, addr);
+	if (rc)
+		return rc;
+
+	rc = crosslink_setup_mws(sndev, ntb_lut_idx, &bar_addrs[1],
+				 bar_cnt - 1);
+	if (rc)
+		return rc;
+
+	rc = crosslink_setup_req_ids(sndev, sndev->mmio_peer_ctrl);
+	if (rc)
+		return rc;
+
+	sndev->mmio_xlink_win = pci_iomap_range(sndev->stdev->pdev, bar,
+						LUT_SIZE, LUT_SIZE);
+	if (!sndev->mmio_xlink_win) {
+		rc = -ENOMEM;
+		return rc;
+	}
+
+	sndev->mmio_peer_dbmsg = sndev->mmio_xlink_win + offset;
+	sndev->nr_rsvd_luts++;
+
+	crosslink_init_dbmsgs(sndev);
+
+	return 0;
+}
+
+static void switchtec_ntb_deinit_crosslink(struct switchtec_ntb *sndev)
+{
+	if (sndev->mmio_xlink_win)
+		pci_iounmap(sndev->stdev->pdev, sndev->mmio_xlink_win);
+}
+
+static int map_bars(int *map, struct ntb_ctrl_regs __iomem *ctrl)
+{
+	int i;
+	int cnt = 0;
+
+	for (i = 0; i < ARRAY_SIZE(ctrl->bar_entry); i++) {
+		u32 r = ioread32(&ctrl->bar_entry[i].ctl);
+
+		if (r & NTB_CTRL_BAR_VALID)
+			map[cnt++] = i;
+	}
+
+	return cnt;
+}
+
+static void switchtec_ntb_init_mw(struct switchtec_ntb *sndev)
+{
+	sndev->nr_direct_mw = map_bars(sndev->direct_mw_to_bar,
+				       sndev->mmio_self_ctrl);
+
+	sndev->nr_lut_mw = ioread16(&sndev->mmio_self_ctrl->lut_table_entries);
+	sndev->nr_lut_mw = rounddown_pow_of_two(sndev->nr_lut_mw);
+
+	dev_dbg(&sndev->stdev->dev, "MWs: %d direct, %d lut\n",
+		sndev->nr_direct_mw, sndev->nr_lut_mw);
+
+	sndev->peer_nr_direct_mw = map_bars(sndev->peer_direct_mw_to_bar,
+					    sndev->mmio_peer_ctrl);
+
+	sndev->peer_nr_lut_mw =
+		ioread16(&sndev->mmio_peer_ctrl->lut_table_entries);
+	sndev->peer_nr_lut_mw = rounddown_pow_of_two(sndev->peer_nr_lut_mw);
+
+	dev_dbg(&sndev->stdev->dev, "Peer MWs: %d direct, %d lut\n",
+		sndev->peer_nr_direct_mw, sndev->peer_nr_lut_mw);
+
+}
+
+/*
+ * There are 64 doorbells in the switch hardware but this is
+ * shared among all partitions. So we must split them in half
+ * (32 for each partition). However, the message interrupts are
+ * also shared with the top 4 doorbells so we just limit this to
+ * 28 doorbells per partition.
+ *
+ * In crosslink mode, each side has it's own dbmsg register so
+ * they can each use all 60 of the available doorbells.
+ */
+static void switchtec_ntb_init_db(struct switchtec_ntb *sndev)
+{
+	sndev->db_mask = 0x0FFFFFFFFFFFFFFFULL;
+
+	if (sndev->mmio_peer_dbmsg != sndev->mmio_self_dbmsg) {
+		sndev->db_shift = 0;
+		sndev->db_peer_shift = 0;
+		sndev->db_valid_mask = sndev->db_mask;
+	} else if (sndev->self_partition < sndev->peer_partition) {
+		sndev->db_shift = 0;
+		sndev->db_peer_shift = 32;
+		sndev->db_valid_mask = 0x0FFFFFFF;
+	} else {
+		sndev->db_shift = 32;
+		sndev->db_peer_shift = 0;
+		sndev->db_valid_mask = 0x0FFFFFFF;
+	}
+
+	iowrite64(~sndev->db_mask, &sndev->mmio_self_dbmsg->idb_mask);
+	iowrite64(sndev->db_valid_mask << sndev->db_peer_shift,
+		  &sndev->mmio_peer_dbmsg->odb_mask);
+
+	dev_dbg(&sndev->stdev->dev, "dbs: shift %d/%d, mask %016llx\n",
+		sndev->db_shift, sndev->db_peer_shift, sndev->db_valid_mask);
+}
+
+static void switchtec_ntb_init_msgs(struct switchtec_ntb *sndev)
+{
+	int i;
+	u32 msg_map = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sndev->mmio_self_dbmsg->imsg); i++) {
+		int m = i | sndev->peer_partition << 2;
+
+		msg_map |= m << i * 8;
+	}
+
+	iowrite32(msg_map, &sndev->mmio_self_dbmsg->msg_map);
+
+	for (i = 0; i < ARRAY_SIZE(sndev->mmio_self_dbmsg->imsg); i++)
+		iowrite64(NTB_DBMSG_IMSG_STATUS | NTB_DBMSG_IMSG_MASK,
+			  &sndev->mmio_self_dbmsg->imsg[i]);
+}
+
+static int
+switchtec_ntb_init_req_id_table(struct switchtec_ntb *sndev)
+{
+	int req_ids[2];
+
+	/*
+	 * Root Complex Requester ID (which is 0:00.0)
+	 */
+	req_ids[0] = 0;
+
+	/*
+	 * Host Bridge Requester ID (as read from the mmap address)
+	 */
+	req_ids[1] = ioread16(&sndev->mmio_ntb->requester_id);
+
+	return config_req_id_table(sndev, sndev->mmio_self_ctrl, req_ids,
+				   ARRAY_SIZE(req_ids));
+}
+
+static void switchtec_ntb_init_shared(struct switchtec_ntb *sndev)
+{
+	int i;
+
+	memset(sndev->self_shared, 0, LUT_SIZE);
+	sndev->self_shared->magic = SWITCHTEC_NTB_MAGIC;
+	sndev->self_shared->partition_id = sndev->stdev->partition;
+
+	for (i = 0; i < sndev->nr_direct_mw; i++) {
+		int bar = sndev->direct_mw_to_bar[i];
+		resource_size_t sz = pci_resource_len(sndev->stdev->pdev, bar);
+
+		if (i == 0)
+			sz = min_t(resource_size_t, sz,
+				   LUT_SIZE * sndev->nr_lut_mw);
+
+		sndev->self_shared->mw_sizes[i] = sz;
+	}
+
+	for (i = 0; i < sndev->nr_lut_mw; i++) {
+		int idx = sndev->nr_direct_mw + i;
+
+		sndev->self_shared->mw_sizes[idx] = LUT_SIZE;
+	}
+}
+
+static int switchtec_ntb_init_shared_mw(struct switchtec_ntb *sndev)
+{
+	int self_bar = sndev->direct_mw_to_bar[0];
+	int rc;
+
+	sndev->nr_rsvd_luts++;
+	sndev->self_shared = dma_zalloc_coherent(&sndev->stdev->pdev->dev,
+						 LUT_SIZE,
+						 &sndev->self_shared_dma,
+						 GFP_KERNEL);
+	if (!sndev->self_shared) {
+		dev_err(&sndev->stdev->dev,
+			"unable to allocate memory for shared mw\n");
+		return -ENOMEM;
+	}
+
+	switchtec_ntb_init_shared(sndev);
+
+	rc = config_rsvd_lut_win(sndev, sndev->mmio_peer_ctrl, 0,
+				 sndev->self_partition,
+				 sndev->self_shared_dma);
+	if (rc)
+		goto unalloc_and_exit;
+
+	sndev->peer_shared = pci_iomap(sndev->stdev->pdev, self_bar, LUT_SIZE);
+	if (!sndev->peer_shared) {
+		rc = -ENOMEM;
+		goto unalloc_and_exit;
+	}
+
+	dev_dbg(&sndev->stdev->dev, "Shared MW Ready\n");
+	return 0;
+
+unalloc_and_exit:
+	dma_free_coherent(&sndev->stdev->pdev->dev, LUT_SIZE,
+			  sndev->self_shared, sndev->self_shared_dma);
+
+	return rc;
+}
+
+static void switchtec_ntb_deinit_shared_mw(struct switchtec_ntb *sndev)
+{
+	if (sndev->peer_shared)
+		pci_iounmap(sndev->stdev->pdev, sndev->peer_shared);
+
+	if (sndev->self_shared)
+		dma_free_coherent(&sndev->stdev->pdev->dev, LUT_SIZE,
+				  sndev->self_shared,
+				  sndev->self_shared_dma);
+	sndev->nr_rsvd_luts--;
+}
+
+static irqreturn_t switchtec_ntb_doorbell_isr(int irq, void *dev)
+{
+	struct switchtec_ntb *sndev = dev;
+
+	dev_dbg(&sndev->stdev->dev, "doorbell\n");
+
+	ntb_db_event(&sndev->ntb, 0);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t switchtec_ntb_message_isr(int irq, void *dev)
+{
+	int i;
+	struct switchtec_ntb *sndev = dev;
+
+	for (i = 0; i < ARRAY_SIZE(sndev->mmio_self_dbmsg->imsg); i++) {
+		u64 msg = ioread64(&sndev->mmio_self_dbmsg->imsg[i]);
+
+		if (msg & NTB_DBMSG_IMSG_STATUS) {
+			dev_dbg(&sndev->stdev->dev, "message: %d %08x\n",
+				i, (u32)msg);
+			iowrite8(1, &sndev->mmio_self_dbmsg->imsg[i].status);
+
+			if (i == LINK_MESSAGE)
+				switchtec_ntb_check_link(sndev, msg);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int switchtec_ntb_init_db_msg_irq(struct switchtec_ntb *sndev)
+{
+	int i;
+	int rc;
+	int doorbell_irq = 0;
+	int message_irq = 0;
+	int event_irq;
+	int idb_vecs = sizeof(sndev->mmio_self_dbmsg->idb_vec_map);
+
+	event_irq = ioread32(&sndev->stdev->mmio_part_cfg->vep_vector_number);
+
+	while (doorbell_irq == event_irq)
+		doorbell_irq++;
+	while (message_irq == doorbell_irq ||
+	       message_irq == event_irq)
+		message_irq++;
+
+	dev_dbg(&sndev->stdev->dev, "irqs - event: %d, db: %d, msgs: %d\n",
+		event_irq, doorbell_irq, message_irq);
+
+	for (i = 0; i < idb_vecs - 4; i++)
+		iowrite8(doorbell_irq,
+			 &sndev->mmio_self_dbmsg->idb_vec_map[i]);
+
+	for (; i < idb_vecs; i++)
+		iowrite8(message_irq,
+			 &sndev->mmio_self_dbmsg->idb_vec_map[i]);
+
+	sndev->doorbell_irq = pci_irq_vector(sndev->stdev->pdev, doorbell_irq);
+	sndev->message_irq = pci_irq_vector(sndev->stdev->pdev, message_irq);
+
+	rc = request_irq(sndev->doorbell_irq,
+			 switchtec_ntb_doorbell_isr, 0,
+			 "switchtec_ntb_doorbell", sndev);
+	if (rc)
+		return rc;
+
+	rc = request_irq(sndev->message_irq,
+			 switchtec_ntb_message_isr, 0,
+			 "switchtec_ntb_message", sndev);
+	if (rc) {
+		free_irq(sndev->doorbell_irq, sndev);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void switchtec_ntb_deinit_db_msg_irq(struct switchtec_ntb *sndev)
+{
+	free_irq(sndev->doorbell_irq, sndev);
+	free_irq(sndev->message_irq, sndev);
+}
+
+static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev)
+{
+	dev_info(&sndev->stdev->dev, "peer reinitialized\n");
+	switchtec_ntb_deinit_shared_mw(sndev);
+	switchtec_ntb_init_mw(sndev);
+	return switchtec_ntb_init_shared_mw(sndev);
+}
+
+static int switchtec_ntb_add(struct device *dev,
+			     struct class_interface *class_intf)
+{
+	struct switchtec_dev *stdev = to_stdev(dev);
+	struct switchtec_ntb *sndev;
+	int rc;
+
+	stdev->sndev = NULL;
+
+	if (stdev->pdev->class != (PCI_CLASS_BRIDGE_OTHER << 8))
+		return -ENODEV;
+
+	sndev = kzalloc_node(sizeof(*sndev), GFP_KERNEL, dev_to_node(dev));
+	if (!sndev)
+		return -ENOMEM;
+
+	sndev->stdev = stdev;
+	rc = switchtec_ntb_init_sndev(sndev);
+	if (rc)
+		goto free_and_exit;
+
+	switchtec_ntb_init_mw(sndev);
+
+	rc = switchtec_ntb_init_req_id_table(sndev);
+	if (rc)
+		goto free_and_exit;
+
+	rc = switchtec_ntb_init_crosslink(sndev);
+	if (rc)
+		goto free_and_exit;
+
+	switchtec_ntb_init_db(sndev);
+	switchtec_ntb_init_msgs(sndev);
+
+	rc = switchtec_ntb_init_shared_mw(sndev);
+	if (rc)
+		goto deinit_crosslink;
+
+	rc = switchtec_ntb_init_db_msg_irq(sndev);
+	if (rc)
+		goto deinit_shared_and_exit;
+
+	/*
+	 * If this host crashed, the other host may think the link is
+	 * still up. Tell them to force it down (it will go back up
+	 * once we register the ntb device).
+	 */
+	switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_FORCE_DOWN);
+
+	rc = ntb_register_device(&sndev->ntb);
+	if (rc)
+		goto deinit_and_exit;
+
+	stdev->sndev = sndev;
+	stdev->link_notifier = switchtec_ntb_link_notification;
+	dev_info(dev, "NTB device registered\n");
+
+	return 0;
+
+deinit_and_exit:
+	switchtec_ntb_deinit_db_msg_irq(sndev);
+deinit_shared_and_exit:
+	switchtec_ntb_deinit_shared_mw(sndev);
+deinit_crosslink:
+	switchtec_ntb_deinit_crosslink(sndev);
+free_and_exit:
+	kfree(sndev);
+	dev_err(dev, "failed to register ntb device: %d\n", rc);
+	return rc;
+}
+
+static void switchtec_ntb_remove(struct device *dev,
+				 struct class_interface *class_intf)
+{
+	struct switchtec_dev *stdev = to_stdev(dev);
+	struct switchtec_ntb *sndev = stdev->sndev;
+
+	if (!sndev)
+		return;
+
+	stdev->link_notifier = NULL;
+	stdev->sndev = NULL;
+	ntb_unregister_device(&sndev->ntb);
+	switchtec_ntb_deinit_db_msg_irq(sndev);
+	switchtec_ntb_deinit_shared_mw(sndev);
+	switchtec_ntb_deinit_crosslink(sndev);
+	kfree(sndev);
+	dev_info(dev, "ntb device unregistered\n");
+}
+
+static struct class_interface switchtec_interface  = {
+	.add_dev = switchtec_ntb_add,
+	.remove_dev = switchtec_ntb_remove,
+};
+
+static int __init switchtec_ntb_init(void)
+{
+	switchtec_interface.class = switchtec_class;
+	return class_interface_register(&switchtec_interface);
+}
+module_init(switchtec_ntb_init);
+
+static void __exit switchtec_ntb_exit(void)
+{
+	class_interface_unregister(&switchtec_interface);
+}
+module_exit(switchtec_ntb_exit);
diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c
new file mode 100644
index 0000000..2581ab7
--- /dev/null
+++ b/drivers/ntb/ntb.c
@@ -0,0 +1,318 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Allen Hubbe <Allen.Hubbe@emc.com>
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+#define DRIVER_NAME			"ntb"
+#define DRIVER_DESCRIPTION		"PCIe NTB Driver Framework"
+
+#define DRIVER_VERSION			"1.0"
+#define DRIVER_RELDATE			"24 March 2015"
+#define DRIVER_AUTHOR			"Allen Hubbe <Allen.Hubbe@emc.com>"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+
+static struct bus_type ntb_bus;
+static void ntb_dev_release(struct device *dev);
+
+int __ntb_register_client(struct ntb_client *client, struct module *mod,
+			  const char *mod_name)
+{
+	if (!client)
+		return -EINVAL;
+	if (!ntb_client_ops_is_valid(&client->ops))
+		return -EINVAL;
+
+	memset(&client->drv, 0, sizeof(client->drv));
+	client->drv.bus = &ntb_bus;
+	client->drv.name = mod_name;
+	client->drv.owner = mod;
+
+	return driver_register(&client->drv);
+}
+EXPORT_SYMBOL(__ntb_register_client);
+
+void ntb_unregister_client(struct ntb_client *client)
+{
+	driver_unregister(&client->drv);
+}
+EXPORT_SYMBOL(ntb_unregister_client);
+
+int ntb_register_device(struct ntb_dev *ntb)
+{
+	if (!ntb)
+		return -EINVAL;
+	if (!ntb->pdev)
+		return -EINVAL;
+	if (!ntb->ops)
+		return -EINVAL;
+	if (!ntb_dev_ops_is_valid(ntb->ops))
+		return -EINVAL;
+
+	init_completion(&ntb->released);
+
+	ntb->dev.bus = &ntb_bus;
+	ntb->dev.parent = &ntb->pdev->dev;
+	ntb->dev.release = ntb_dev_release;
+	dev_set_name(&ntb->dev, "%s", pci_name(ntb->pdev));
+
+	ntb->ctx = NULL;
+	ntb->ctx_ops = NULL;
+	spin_lock_init(&ntb->ctx_lock);
+
+	return device_register(&ntb->dev);
+}
+EXPORT_SYMBOL(ntb_register_device);
+
+void ntb_unregister_device(struct ntb_dev *ntb)
+{
+	device_unregister(&ntb->dev);
+	wait_for_completion(&ntb->released);
+}
+EXPORT_SYMBOL(ntb_unregister_device);
+
+int ntb_set_ctx(struct ntb_dev *ntb, void *ctx,
+		const struct ntb_ctx_ops *ctx_ops)
+{
+	unsigned long irqflags;
+
+	if (!ntb_ctx_ops_is_valid(ctx_ops))
+		return -EINVAL;
+	if (ntb->ctx_ops)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ntb->ctx_lock, irqflags);
+	{
+		ntb->ctx = ctx;
+		ntb->ctx_ops = ctx_ops;
+	}
+	spin_unlock_irqrestore(&ntb->ctx_lock, irqflags);
+
+	return 0;
+}
+EXPORT_SYMBOL(ntb_set_ctx);
+
+void ntb_clear_ctx(struct ntb_dev *ntb)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&ntb->ctx_lock, irqflags);
+	{
+		ntb->ctx_ops = NULL;
+		ntb->ctx = NULL;
+	}
+	spin_unlock_irqrestore(&ntb->ctx_lock, irqflags);
+}
+EXPORT_SYMBOL(ntb_clear_ctx);
+
+void ntb_link_event(struct ntb_dev *ntb)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&ntb->ctx_lock, irqflags);
+	{
+		if (ntb->ctx_ops && ntb->ctx_ops->link_event)
+			ntb->ctx_ops->link_event(ntb->ctx);
+	}
+	spin_unlock_irqrestore(&ntb->ctx_lock, irqflags);
+}
+EXPORT_SYMBOL(ntb_link_event);
+
+void ntb_db_event(struct ntb_dev *ntb, int vector)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&ntb->ctx_lock, irqflags);
+	{
+		if (ntb->ctx_ops && ntb->ctx_ops->db_event)
+			ntb->ctx_ops->db_event(ntb->ctx, vector);
+	}
+	spin_unlock_irqrestore(&ntb->ctx_lock, irqflags);
+}
+EXPORT_SYMBOL(ntb_db_event);
+
+void ntb_msg_event(struct ntb_dev *ntb)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&ntb->ctx_lock, irqflags);
+	{
+		if (ntb->ctx_ops && ntb->ctx_ops->msg_event)
+			ntb->ctx_ops->msg_event(ntb->ctx);
+	}
+	spin_unlock_irqrestore(&ntb->ctx_lock, irqflags);
+}
+EXPORT_SYMBOL(ntb_msg_event);
+
+int ntb_default_port_number(struct ntb_dev *ntb)
+{
+	switch (ntb->topo) {
+	case NTB_TOPO_PRI:
+	case NTB_TOPO_B2B_USD:
+		return NTB_PORT_PRI_USD;
+	case NTB_TOPO_SEC:
+	case NTB_TOPO_B2B_DSD:
+		return NTB_PORT_SEC_DSD;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ntb_default_port_number);
+
+int ntb_default_peer_port_count(struct ntb_dev *ntb)
+{
+	return NTB_DEF_PEER_CNT;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_count);
+
+int ntb_default_peer_port_number(struct ntb_dev *ntb, int pidx)
+{
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	switch (ntb->topo) {
+	case NTB_TOPO_PRI:
+	case NTB_TOPO_B2B_USD:
+		return NTB_PORT_SEC_DSD;
+	case NTB_TOPO_SEC:
+	case NTB_TOPO_B2B_DSD:
+		return NTB_PORT_PRI_USD;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_number);
+
+int ntb_default_peer_port_idx(struct ntb_dev *ntb, int port)
+{
+	int peer_port = ntb_default_peer_port_number(ntb, NTB_DEF_PEER_IDX);
+
+	if (peer_port == -EINVAL || port != peer_port)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_idx);
+
+static int ntb_probe(struct device *dev)
+{
+	struct ntb_dev *ntb;
+	struct ntb_client *client;
+	int rc;
+
+	get_device(dev);
+	ntb = dev_ntb(dev);
+	client = drv_ntb_client(dev->driver);
+
+	rc = client->ops.probe(client, ntb);
+	if (rc)
+		put_device(dev);
+
+	return rc;
+}
+
+static int ntb_remove(struct device *dev)
+{
+	struct ntb_dev *ntb;
+	struct ntb_client *client;
+
+	if (dev->driver) {
+		ntb = dev_ntb(dev);
+		client = drv_ntb_client(dev->driver);
+
+		client->ops.remove(client, ntb);
+		put_device(dev);
+	}
+
+	return 0;
+}
+
+static void ntb_dev_release(struct device *dev)
+{
+	struct ntb_dev *ntb = dev_ntb(dev);
+
+	complete(&ntb->released);
+}
+
+static struct bus_type ntb_bus = {
+	.name = "ntb",
+	.probe = ntb_probe,
+	.remove = ntb_remove,
+};
+
+static int __init ntb_driver_init(void)
+{
+	return bus_register(&ntb_bus);
+}
+module_init(ntb_driver_init);
+
+static void __exit ntb_driver_exit(void)
+{
+	bus_unregister(&ntb_bus);
+}
+module_exit(ntb_driver_exit);
+
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
new file mode 100644
index 0000000..9398959
--- /dev/null
+++ b/drivers/ntb/ntb_transport.c
@@ -0,0 +1,2261 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * PCIe NTB Transport Linux driver
+ *
+ * Contact Information:
+ * Jon Mason <jon.mason@intel.com>
+ */
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include "linux/ntb.h"
+#include "linux/ntb_transport.h"
+
+#define NTB_TRANSPORT_VERSION	4
+#define NTB_TRANSPORT_VER	"4"
+#define NTB_TRANSPORT_NAME	"ntb_transport"
+#define NTB_TRANSPORT_DESC	"Software Queue-Pair Transport over NTB"
+#define NTB_TRANSPORT_MIN_SPADS (MW0_SZ_HIGH + 2)
+
+MODULE_DESCRIPTION(NTB_TRANSPORT_DESC);
+MODULE_VERSION(NTB_TRANSPORT_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static unsigned long max_mw_size;
+module_param(max_mw_size, ulong, 0644);
+MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
+
+static unsigned int transport_mtu = 0x10000;
+module_param(transport_mtu, uint, 0644);
+MODULE_PARM_DESC(transport_mtu, "Maximum size of NTB transport packets");
+
+static unsigned char max_num_clients;
+module_param(max_num_clients, byte, 0644);
+MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients");
+
+static unsigned int copy_bytes = 1024;
+module_param(copy_bytes, uint, 0644);
+MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to copy instead of DMA");
+
+static bool use_dma;
+module_param(use_dma, bool, 0644);
+MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
+
+static struct dentry *nt_debugfs_dir;
+
+/* Only two-ports NTB devices are supported */
+#define PIDX		NTB_DEF_PEER_IDX
+
+struct ntb_queue_entry {
+	/* ntb_queue list reference */
+	struct list_head entry;
+	/* pointers to data to be transferred */
+	void *cb_data;
+	void *buf;
+	unsigned int len;
+	unsigned int flags;
+	int retries;
+	int errors;
+	unsigned int tx_index;
+	unsigned int rx_index;
+
+	struct ntb_transport_qp *qp;
+	union {
+		struct ntb_payload_header __iomem *tx_hdr;
+		struct ntb_payload_header *rx_hdr;
+	};
+};
+
+struct ntb_rx_info {
+	unsigned int entry;
+};
+
+struct ntb_transport_qp {
+	struct ntb_transport_ctx *transport;
+	struct ntb_dev *ndev;
+	void *cb_data;
+	struct dma_chan *tx_dma_chan;
+	struct dma_chan *rx_dma_chan;
+
+	bool client_ready;
+	bool link_is_up;
+	bool active;
+
+	u8 qp_num;	/* Only 64 QP's are allowed.  0-63 */
+	u64 qp_bit;
+
+	struct ntb_rx_info __iomem *rx_info;
+	struct ntb_rx_info *remote_rx_info;
+
+	void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+			   void *data, int len);
+	struct list_head tx_free_q;
+	spinlock_t ntb_tx_free_q_lock;
+	void __iomem *tx_mw;
+	dma_addr_t tx_mw_phys;
+	unsigned int tx_index;
+	unsigned int tx_max_entry;
+	unsigned int tx_max_frame;
+
+	void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+			   void *data, int len);
+	struct list_head rx_post_q;
+	struct list_head rx_pend_q;
+	struct list_head rx_free_q;
+	/* ntb_rx_q_lock: synchronize access to rx_XXXX_q */
+	spinlock_t ntb_rx_q_lock;
+	void *rx_buff;
+	unsigned int rx_index;
+	unsigned int rx_max_entry;
+	unsigned int rx_max_frame;
+	unsigned int rx_alloc_entry;
+	dma_cookie_t last_cookie;
+	struct tasklet_struct rxc_db_work;
+
+	void (*event_handler)(void *data, int status);
+	struct delayed_work link_work;
+	struct work_struct link_cleanup;
+
+	struct dentry *debugfs_dir;
+	struct dentry *debugfs_stats;
+
+	/* Stats */
+	u64 rx_bytes;
+	u64 rx_pkts;
+	u64 rx_ring_empty;
+	u64 rx_err_no_buf;
+	u64 rx_err_oflow;
+	u64 rx_err_ver;
+	u64 rx_memcpy;
+	u64 rx_async;
+	u64 tx_bytes;
+	u64 tx_pkts;
+	u64 tx_ring_full;
+	u64 tx_err_no_buf;
+	u64 tx_memcpy;
+	u64 tx_async;
+};
+
+struct ntb_transport_mw {
+	phys_addr_t phys_addr;
+	resource_size_t phys_size;
+	void __iomem *vbase;
+	size_t xlat_size;
+	size_t buff_size;
+	void *virt_addr;
+	dma_addr_t dma_addr;
+};
+
+struct ntb_transport_client_dev {
+	struct list_head entry;
+	struct ntb_transport_ctx *nt;
+	struct device dev;
+};
+
+struct ntb_transport_ctx {
+	struct list_head entry;
+	struct list_head client_devs;
+
+	struct ntb_dev *ndev;
+
+	struct ntb_transport_mw *mw_vec;
+	struct ntb_transport_qp *qp_vec;
+	unsigned int mw_count;
+	unsigned int qp_count;
+	u64 qp_bitmap;
+	u64 qp_bitmap_free;
+
+	bool link_is_up;
+	struct delayed_work link_work;
+	struct work_struct link_cleanup;
+
+	struct dentry *debugfs_node_dir;
+};
+
+enum {
+	DESC_DONE_FLAG = BIT(0),
+	LINK_DOWN_FLAG = BIT(1),
+};
+
+struct ntb_payload_header {
+	unsigned int ver;
+	unsigned int len;
+	unsigned int flags;
+};
+
+enum {
+	VERSION = 0,
+	QP_LINKS,
+	NUM_QPS,
+	NUM_MWS,
+	MW0_SZ_HIGH,
+	MW0_SZ_LOW,
+};
+
+#define dev_client_dev(__dev) \
+	container_of((__dev), struct ntb_transport_client_dev, dev)
+
+#define drv_client(__drv) \
+	container_of((__drv), struct ntb_transport_client, driver)
+
+#define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
+#define NTB_QP_DEF_NUM_ENTRIES	100
+#define NTB_LINK_DOWN_TIMEOUT	10
+
+static void ntb_transport_rxc_db(unsigned long data);
+static const struct ntb_ctx_ops ntb_transport_ops;
+static struct ntb_client ntb_transport_client;
+static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
+			       struct ntb_queue_entry *entry);
+static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset);
+static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset);
+static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset);
+
+
+static int ntb_transport_bus_match(struct device *dev,
+				   struct device_driver *drv)
+{
+	return !strncmp(dev_name(dev), drv->name, strlen(drv->name));
+}
+
+static int ntb_transport_bus_probe(struct device *dev)
+{
+	const struct ntb_transport_client *client;
+	int rc = -EINVAL;
+
+	get_device(dev);
+
+	client = drv_client(dev->driver);
+	rc = client->probe(dev);
+	if (rc)
+		put_device(dev);
+
+	return rc;
+}
+
+static int ntb_transport_bus_remove(struct device *dev)
+{
+	const struct ntb_transport_client *client;
+
+	client = drv_client(dev->driver);
+	client->remove(dev);
+
+	put_device(dev);
+
+	return 0;
+}
+
+static struct bus_type ntb_transport_bus = {
+	.name = "ntb_transport",
+	.match = ntb_transport_bus_match,
+	.probe = ntb_transport_bus_probe,
+	.remove = ntb_transport_bus_remove,
+};
+
+static LIST_HEAD(ntb_transport_list);
+
+static int ntb_bus_init(struct ntb_transport_ctx *nt)
+{
+	list_add_tail(&nt->entry, &ntb_transport_list);
+	return 0;
+}
+
+static void ntb_bus_remove(struct ntb_transport_ctx *nt)
+{
+	struct ntb_transport_client_dev *client_dev, *cd;
+
+	list_for_each_entry_safe(client_dev, cd, &nt->client_devs, entry) {
+		dev_err(client_dev->dev.parent, "%s still attached to bus, removing\n",
+			dev_name(&client_dev->dev));
+		list_del(&client_dev->entry);
+		device_unregister(&client_dev->dev);
+	}
+
+	list_del(&nt->entry);
+}
+
+static void ntb_transport_client_release(struct device *dev)
+{
+	struct ntb_transport_client_dev *client_dev;
+
+	client_dev = dev_client_dev(dev);
+	kfree(client_dev);
+}
+
+/**
+ * ntb_transport_unregister_client_dev - Unregister NTB client device
+ * @device_name: Name of NTB client device
+ *
+ * Unregister an NTB client device with the NTB transport layer
+ */
+void ntb_transport_unregister_client_dev(char *device_name)
+{
+	struct ntb_transport_client_dev *client, *cd;
+	struct ntb_transport_ctx *nt;
+
+	list_for_each_entry(nt, &ntb_transport_list, entry)
+		list_for_each_entry_safe(client, cd, &nt->client_devs, entry)
+			if (!strncmp(dev_name(&client->dev), device_name,
+				     strlen(device_name))) {
+				list_del(&client->entry);
+				device_unregister(&client->dev);
+			}
+}
+EXPORT_SYMBOL_GPL(ntb_transport_unregister_client_dev);
+
+/**
+ * ntb_transport_register_client_dev - Register NTB client device
+ * @device_name: Name of NTB client device
+ *
+ * Register an NTB client device with the NTB transport layer
+ */
+int ntb_transport_register_client_dev(char *device_name)
+{
+	struct ntb_transport_client_dev *client_dev;
+	struct ntb_transport_ctx *nt;
+	int node;
+	int rc, i = 0;
+
+	if (list_empty(&ntb_transport_list))
+		return -ENODEV;
+
+	list_for_each_entry(nt, &ntb_transport_list, entry) {
+		struct device *dev;
+
+		node = dev_to_node(&nt->ndev->dev);
+
+		client_dev = kzalloc_node(sizeof(*client_dev),
+					  GFP_KERNEL, node);
+		if (!client_dev) {
+			rc = -ENOMEM;
+			goto err;
+		}
+
+		dev = &client_dev->dev;
+
+		/* setup and register client devices */
+		dev_set_name(dev, "%s%d", device_name, i);
+		dev->bus = &ntb_transport_bus;
+		dev->release = ntb_transport_client_release;
+		dev->parent = &nt->ndev->dev;
+
+		rc = device_register(dev);
+		if (rc) {
+			kfree(client_dev);
+			goto err;
+		}
+
+		list_add_tail(&client_dev->entry, &nt->client_devs);
+		i++;
+	}
+
+	return 0;
+
+err:
+	ntb_transport_unregister_client_dev(device_name);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_register_client_dev);
+
+/**
+ * ntb_transport_register_client - Register NTB client driver
+ * @drv: NTB client driver to be registered
+ *
+ * Register an NTB client driver with the NTB transport layer
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_transport_register_client(struct ntb_transport_client *drv)
+{
+	drv->driver.bus = &ntb_transport_bus;
+
+	if (list_empty(&ntb_transport_list))
+		return -ENODEV;
+
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(ntb_transport_register_client);
+
+/**
+ * ntb_transport_unregister_client - Unregister NTB client driver
+ * @drv: NTB client driver to be unregistered
+ *
+ * Unregister an NTB client driver with the NTB transport layer
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+void ntb_transport_unregister_client(struct ntb_transport_client *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(ntb_transport_unregister_client);
+
+static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
+			    loff_t *offp)
+{
+	struct ntb_transport_qp *qp;
+	char *buf;
+	ssize_t ret, out_offset, out_count;
+
+	qp = filp->private_data;
+
+	if (!qp || !qp->link_is_up)
+		return 0;
+
+	out_count = 1000;
+
+	buf = kmalloc(out_count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	out_offset = 0;
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "\nNTB QP stats:\n\n");
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_bytes - \t%llu\n", qp->rx_bytes);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_pkts - \t%llu\n", qp->rx_pkts);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_memcpy - \t%llu\n", qp->rx_memcpy);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_async - \t%llu\n", qp->rx_async);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_ring_empty - %llu\n", qp->rx_ring_empty);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_err_oflow - \t%llu\n", qp->rx_err_oflow);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_err_ver - \t%llu\n", qp->rx_err_ver);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_buff - \t0x%p\n", qp->rx_buff);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_index - \t%u\n", qp->rx_index);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_max_entry - \t%u\n", qp->rx_max_entry);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
+
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_bytes - \t%llu\n", qp->tx_bytes);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_pkts - \t%llu\n", qp->tx_pkts);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_memcpy - \t%llu\n", qp->tx_memcpy);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_async - \t%llu\n", qp->tx_async);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_ring_full - \t%llu\n", qp->tx_ring_full);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_mw - \t0x%p\n", qp->tx_mw);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_index (H) - \t%u\n", qp->tx_index);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "RRI (T) - \t%u\n",
+			       qp->remote_rx_info->entry);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_max_entry - \t%u\n", qp->tx_max_entry);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "free tx - \t%u\n",
+			       ntb_transport_tx_free_entry(qp));
+
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "\n");
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Using TX DMA - \t%s\n",
+			       qp->tx_dma_chan ? "Yes" : "No");
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "Using RX DMA - \t%s\n",
+			       qp->rx_dma_chan ? "Yes" : "No");
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "QP Link - \t%s\n",
+			       qp->link_is_up ? "Up" : "Down");
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "\n");
+
+	if (out_offset > out_count)
+		out_offset = out_count;
+
+	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations ntb_qp_debugfs_stats = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = debugfs_read,
+};
+
+static void ntb_list_add(spinlock_t *lock, struct list_head *entry,
+			 struct list_head *list)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(lock, flags);
+	list_add_tail(entry, list);
+	spin_unlock_irqrestore(lock, flags);
+}
+
+static struct ntb_queue_entry *ntb_list_rm(spinlock_t *lock,
+					   struct list_head *list)
+{
+	struct ntb_queue_entry *entry;
+	unsigned long flags;
+
+	spin_lock_irqsave(lock, flags);
+	if (list_empty(list)) {
+		entry = NULL;
+		goto out;
+	}
+	entry = list_first_entry(list, struct ntb_queue_entry, entry);
+	list_del(&entry->entry);
+
+out:
+	spin_unlock_irqrestore(lock, flags);
+
+	return entry;
+}
+
+static struct ntb_queue_entry *ntb_list_mv(spinlock_t *lock,
+					   struct list_head *list,
+					   struct list_head *to_list)
+{
+	struct ntb_queue_entry *entry;
+	unsigned long flags;
+
+	spin_lock_irqsave(lock, flags);
+
+	if (list_empty(list)) {
+		entry = NULL;
+	} else {
+		entry = list_first_entry(list, struct ntb_queue_entry, entry);
+		list_move_tail(&entry->entry, to_list);
+	}
+
+	spin_unlock_irqrestore(lock, flags);
+
+	return entry;
+}
+
+static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
+				     unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+	struct ntb_transport_mw *mw;
+	struct ntb_dev *ndev = nt->ndev;
+	struct ntb_queue_entry *entry;
+	unsigned int rx_size, num_qps_mw;
+	unsigned int mw_num, mw_count, qp_count;
+	unsigned int i;
+	int node;
+
+	mw_count = nt->mw_count;
+	qp_count = nt->qp_count;
+
+	mw_num = QP_TO_MW(nt, qp_num);
+	mw = &nt->mw_vec[mw_num];
+
+	if (!mw->virt_addr)
+		return -ENOMEM;
+
+	if (mw_num < qp_count % mw_count)
+		num_qps_mw = qp_count / mw_count + 1;
+	else
+		num_qps_mw = qp_count / mw_count;
+
+	rx_size = (unsigned int)mw->xlat_size / num_qps_mw;
+	qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count);
+	rx_size -= sizeof(struct ntb_rx_info);
+
+	qp->remote_rx_info = qp->rx_buff + rx_size;
+
+	/* Due to housekeeping, there must be atleast 2 buffs */
+	qp->rx_max_frame = min(transport_mtu, rx_size / 2);
+	qp->rx_max_entry = rx_size / qp->rx_max_frame;
+	qp->rx_index = 0;
+
+	/*
+	 * Checking to see if we have more entries than the default.
+	 * We should add additional entries if that is the case so we
+	 * can be in sync with the transport frames.
+	 */
+	node = dev_to_node(&ndev->dev);
+	for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
+		entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node);
+		if (!entry)
+			return -ENOMEM;
+
+		entry->qp = qp;
+		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
+			     &qp->rx_free_q);
+		qp->rx_alloc_entry++;
+	}
+
+	qp->remote_rx_info->entry = qp->rx_max_entry - 1;
+
+	/* setup the hdr offsets with 0's */
+	for (i = 0; i < qp->rx_max_entry; i++) {
+		void *offset = (qp->rx_buff + qp->rx_max_frame * (i + 1) -
+				sizeof(struct ntb_payload_header));
+		memset(offset, 0, sizeof(struct ntb_payload_header));
+	}
+
+	qp->rx_pkts = 0;
+	qp->tx_pkts = 0;
+	qp->tx_index = 0;
+
+	return 0;
+}
+
+static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
+{
+	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+	struct pci_dev *pdev = nt->ndev->pdev;
+
+	if (!mw->virt_addr)
+		return;
+
+	ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
+	dma_free_coherent(&pdev->dev, mw->buff_size,
+			  mw->virt_addr, mw->dma_addr);
+	mw->xlat_size = 0;
+	mw->buff_size = 0;
+	mw->virt_addr = NULL;
+}
+
+static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
+		      resource_size_t size)
+{
+	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+	struct pci_dev *pdev = nt->ndev->pdev;
+	size_t xlat_size, buff_size;
+	resource_size_t xlat_align;
+	resource_size_t xlat_align_size;
+	int rc;
+
+	if (!size)
+		return -EINVAL;
+
+	rc = ntb_mw_get_align(nt->ndev, PIDX, num_mw, &xlat_align,
+			      &xlat_align_size, NULL);
+	if (rc)
+		return rc;
+
+	xlat_size = round_up(size, xlat_align_size);
+	buff_size = round_up(size, xlat_align);
+
+	/* No need to re-setup */
+	if (mw->xlat_size == xlat_size)
+		return 0;
+
+	if (mw->buff_size)
+		ntb_free_mw(nt, num_mw);
+
+	/* Alloc memory for receiving data.  Must be aligned */
+	mw->xlat_size = xlat_size;
+	mw->buff_size = buff_size;
+
+	mw->virt_addr = dma_alloc_coherent(&pdev->dev, buff_size,
+					   &mw->dma_addr, GFP_KERNEL);
+	if (!mw->virt_addr) {
+		mw->xlat_size = 0;
+		mw->buff_size = 0;
+		dev_err(&pdev->dev, "Unable to alloc MW buff of size %zu\n",
+			buff_size);
+		return -ENOMEM;
+	}
+
+	/*
+	 * we must ensure that the memory address allocated is BAR size
+	 * aligned in order for the XLAT register to take the value. This
+	 * is a requirement of the hardware. It is recommended to setup CMA
+	 * for BAR sizes equal or greater than 4MB.
+	 */
+	if (!IS_ALIGNED(mw->dma_addr, xlat_align)) {
+		dev_err(&pdev->dev, "DMA memory %pad is not aligned\n",
+			&mw->dma_addr);
+		ntb_free_mw(nt, num_mw);
+		return -ENOMEM;
+	}
+
+	/* Notify HW the memory location of the receive buffer */
+	rc = ntb_mw_set_trans(nt->ndev, PIDX, num_mw, mw->dma_addr,
+			      mw->xlat_size);
+	if (rc) {
+		dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw);
+		ntb_free_mw(nt, num_mw);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
+{
+	qp->link_is_up = false;
+	qp->active = false;
+
+	qp->tx_index = 0;
+	qp->rx_index = 0;
+	qp->rx_bytes = 0;
+	qp->rx_pkts = 0;
+	qp->rx_ring_empty = 0;
+	qp->rx_err_no_buf = 0;
+	qp->rx_err_oflow = 0;
+	qp->rx_err_ver = 0;
+	qp->rx_memcpy = 0;
+	qp->rx_async = 0;
+	qp->tx_bytes = 0;
+	qp->tx_pkts = 0;
+	qp->tx_ring_full = 0;
+	qp->tx_err_no_buf = 0;
+	qp->tx_memcpy = 0;
+	qp->tx_async = 0;
+}
+
+static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
+{
+	struct ntb_transport_ctx *nt = qp->transport;
+	struct pci_dev *pdev = nt->ndev->pdev;
+
+	dev_info(&pdev->dev, "qp %d: Link Cleanup\n", qp->qp_num);
+
+	cancel_delayed_work_sync(&qp->link_work);
+	ntb_qp_link_down_reset(qp);
+
+	if (qp->event_handler)
+		qp->event_handler(qp->cb_data, qp->link_is_up);
+}
+
+static void ntb_qp_link_cleanup_work(struct work_struct *work)
+{
+	struct ntb_transport_qp *qp = container_of(work,
+						   struct ntb_transport_qp,
+						   link_cleanup);
+	struct ntb_transport_ctx *nt = qp->transport;
+
+	ntb_qp_link_cleanup(qp);
+
+	if (nt->link_is_up)
+		schedule_delayed_work(&qp->link_work,
+				      msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
+}
+
+static void ntb_qp_link_down(struct ntb_transport_qp *qp)
+{
+	schedule_work(&qp->link_cleanup);
+}
+
+static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
+{
+	struct ntb_transport_qp *qp;
+	u64 qp_bitmap_alloc;
+	unsigned int i, count;
+
+	qp_bitmap_alloc = nt->qp_bitmap & ~nt->qp_bitmap_free;
+
+	/* Pass along the info to any clients */
+	for (i = 0; i < nt->qp_count; i++)
+		if (qp_bitmap_alloc & BIT_ULL(i)) {
+			qp = &nt->qp_vec[i];
+			ntb_qp_link_cleanup(qp);
+			cancel_work_sync(&qp->link_cleanup);
+			cancel_delayed_work_sync(&qp->link_work);
+		}
+
+	if (!nt->link_is_up)
+		cancel_delayed_work_sync(&nt->link_work);
+
+	/* The scratchpad registers keep the values if the remote side
+	 * goes down, blast them now to give them a sane value the next
+	 * time they are accessed
+	 */
+	count = ntb_spad_count(nt->ndev);
+	for (i = 0; i < count; i++)
+		ntb_spad_write(nt->ndev, i, 0);
+}
+
+static void ntb_transport_link_cleanup_work(struct work_struct *work)
+{
+	struct ntb_transport_ctx *nt =
+		container_of(work, struct ntb_transport_ctx, link_cleanup);
+
+	ntb_transport_link_cleanup(nt);
+}
+
+static void ntb_transport_event_callback(void *data)
+{
+	struct ntb_transport_ctx *nt = data;
+
+	if (ntb_link_is_up(nt->ndev, NULL, NULL) == 1)
+		schedule_delayed_work(&nt->link_work, 0);
+	else
+		schedule_work(&nt->link_cleanup);
+}
+
+static void ntb_transport_link_work(struct work_struct *work)
+{
+	struct ntb_transport_ctx *nt =
+		container_of(work, struct ntb_transport_ctx, link_work.work);
+	struct ntb_dev *ndev = nt->ndev;
+	struct pci_dev *pdev = ndev->pdev;
+	resource_size_t size;
+	u32 val;
+	int rc = 0, i, spad;
+
+	/* send the local info, in the opposite order of the way we read it */
+	for (i = 0; i < nt->mw_count; i++) {
+		size = nt->mw_vec[i].phys_size;
+
+		if (max_mw_size && size > max_mw_size)
+			size = max_mw_size;
+
+		spad = MW0_SZ_HIGH + (i * 2);
+		ntb_peer_spad_write(ndev, PIDX, spad, upper_32_bits(size));
+
+		spad = MW0_SZ_LOW + (i * 2);
+		ntb_peer_spad_write(ndev, PIDX, spad, lower_32_bits(size));
+	}
+
+	ntb_peer_spad_write(ndev, PIDX, NUM_MWS, nt->mw_count);
+
+	ntb_peer_spad_write(ndev, PIDX, NUM_QPS, nt->qp_count);
+
+	ntb_peer_spad_write(ndev, PIDX, VERSION, NTB_TRANSPORT_VERSION);
+
+	/* Query the remote side for its info */
+	val = ntb_spad_read(ndev, VERSION);
+	dev_dbg(&pdev->dev, "Remote version = %d\n", val);
+	if (val != NTB_TRANSPORT_VERSION)
+		goto out;
+
+	val = ntb_spad_read(ndev, NUM_QPS);
+	dev_dbg(&pdev->dev, "Remote max number of qps = %d\n", val);
+	if (val != nt->qp_count)
+		goto out;
+
+	val = ntb_spad_read(ndev, NUM_MWS);
+	dev_dbg(&pdev->dev, "Remote number of mws = %d\n", val);
+	if (val != nt->mw_count)
+		goto out;
+
+	for (i = 0; i < nt->mw_count; i++) {
+		u64 val64;
+
+		val = ntb_spad_read(ndev, MW0_SZ_HIGH + (i * 2));
+		val64 = (u64)val << 32;
+
+		val = ntb_spad_read(ndev, MW0_SZ_LOW + (i * 2));
+		val64 |= val;
+
+		dev_dbg(&pdev->dev, "Remote MW%d size = %#llx\n", i, val64);
+
+		rc = ntb_set_mw(nt, i, val64);
+		if (rc)
+			goto out1;
+	}
+
+	nt->link_is_up = true;
+
+	for (i = 0; i < nt->qp_count; i++) {
+		struct ntb_transport_qp *qp = &nt->qp_vec[i];
+
+		ntb_transport_setup_qp_mw(nt, i);
+
+		if (qp->client_ready)
+			schedule_delayed_work(&qp->link_work, 0);
+	}
+
+	return;
+
+out1:
+	for (i = 0; i < nt->mw_count; i++)
+		ntb_free_mw(nt, i);
+
+	/* if there's an actual failure, we should just bail */
+	if (rc < 0)
+		return;
+
+out:
+	if (ntb_link_is_up(ndev, NULL, NULL) == 1)
+		schedule_delayed_work(&nt->link_work,
+				      msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
+}
+
+static void ntb_qp_link_work(struct work_struct *work)
+{
+	struct ntb_transport_qp *qp = container_of(work,
+						   struct ntb_transport_qp,
+						   link_work.work);
+	struct pci_dev *pdev = qp->ndev->pdev;
+	struct ntb_transport_ctx *nt = qp->transport;
+	int val;
+
+	WARN_ON(!nt->link_is_up);
+
+	val = ntb_spad_read(nt->ndev, QP_LINKS);
+
+	ntb_peer_spad_write(nt->ndev, PIDX, QP_LINKS, val | BIT(qp->qp_num));
+
+	/* query remote spad for qp ready bits */
+	dev_dbg_ratelimited(&pdev->dev, "Remote QP link status = %x\n", val);
+
+	/* See if the remote side is up */
+	if (val & BIT(qp->qp_num)) {
+		dev_info(&pdev->dev, "qp %d: Link Up\n", qp->qp_num);
+		qp->link_is_up = true;
+		qp->active = true;
+
+		if (qp->event_handler)
+			qp->event_handler(qp->cb_data, qp->link_is_up);
+
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
+	} else if (nt->link_is_up)
+		schedule_delayed_work(&qp->link_work,
+				      msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
+}
+
+static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
+				    unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp;
+	phys_addr_t mw_base;
+	resource_size_t mw_size;
+	unsigned int num_qps_mw, tx_size;
+	unsigned int mw_num, mw_count, qp_count;
+	u64 qp_offset;
+
+	mw_count = nt->mw_count;
+	qp_count = nt->qp_count;
+
+	mw_num = QP_TO_MW(nt, qp_num);
+
+	qp = &nt->qp_vec[qp_num];
+	qp->qp_num = qp_num;
+	qp->transport = nt;
+	qp->ndev = nt->ndev;
+	qp->client_ready = false;
+	qp->event_handler = NULL;
+	ntb_qp_link_down_reset(qp);
+
+	if (mw_num < qp_count % mw_count)
+		num_qps_mw = qp_count / mw_count + 1;
+	else
+		num_qps_mw = qp_count / mw_count;
+
+	mw_base = nt->mw_vec[mw_num].phys_addr;
+	mw_size = nt->mw_vec[mw_num].phys_size;
+
+	if (max_mw_size && mw_size > max_mw_size)
+		mw_size = max_mw_size;
+
+	tx_size = (unsigned int)mw_size / num_qps_mw;
+	qp_offset = tx_size * (qp_num / mw_count);
+
+	qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
+	if (!qp->tx_mw)
+		return -EINVAL;
+
+	qp->tx_mw_phys = mw_base + qp_offset;
+	if (!qp->tx_mw_phys)
+		return -EINVAL;
+
+	tx_size -= sizeof(struct ntb_rx_info);
+	qp->rx_info = qp->tx_mw + tx_size;
+
+	/* Due to housekeeping, there must be atleast 2 buffs */
+	qp->tx_max_frame = min(transport_mtu, tx_size / 2);
+	qp->tx_max_entry = tx_size / qp->tx_max_frame;
+
+	if (nt->debugfs_node_dir) {
+		char debugfs_name[4];
+
+		snprintf(debugfs_name, 4, "qp%d", qp_num);
+		qp->debugfs_dir = debugfs_create_dir(debugfs_name,
+						     nt->debugfs_node_dir);
+
+		qp->debugfs_stats = debugfs_create_file("stats", S_IRUSR,
+							qp->debugfs_dir, qp,
+							&ntb_qp_debugfs_stats);
+	} else {
+		qp->debugfs_dir = NULL;
+		qp->debugfs_stats = NULL;
+	}
+
+	INIT_DELAYED_WORK(&qp->link_work, ntb_qp_link_work);
+	INIT_WORK(&qp->link_cleanup, ntb_qp_link_cleanup_work);
+
+	spin_lock_init(&qp->ntb_rx_q_lock);
+	spin_lock_init(&qp->ntb_tx_free_q_lock);
+
+	INIT_LIST_HEAD(&qp->rx_post_q);
+	INIT_LIST_HEAD(&qp->rx_pend_q);
+	INIT_LIST_HEAD(&qp->rx_free_q);
+	INIT_LIST_HEAD(&qp->tx_free_q);
+
+	tasklet_init(&qp->rxc_db_work, ntb_transport_rxc_db,
+		     (unsigned long)qp);
+
+	return 0;
+}
+
+static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
+{
+	struct ntb_transport_ctx *nt;
+	struct ntb_transport_mw *mw;
+	unsigned int mw_count, qp_count, spad_count, max_mw_count_for_spads;
+	u64 qp_bitmap;
+	int node;
+	int rc, i;
+
+	mw_count = ntb_peer_mw_count(ndev);
+
+	if (!ndev->ops->mw_set_trans) {
+		dev_err(&ndev->dev, "Inbound MW based NTB API is required\n");
+		return -EINVAL;
+	}
+
+	if (ntb_db_is_unsafe(ndev))
+		dev_dbg(&ndev->dev,
+			"doorbell is unsafe, proceed anyway...\n");
+	if (ntb_spad_is_unsafe(ndev))
+		dev_dbg(&ndev->dev,
+			"scratchpad is unsafe, proceed anyway...\n");
+
+	if (ntb_peer_port_count(ndev) != NTB_DEF_PEER_CNT)
+		dev_warn(&ndev->dev, "Multi-port NTB devices unsupported\n");
+
+	node = dev_to_node(&ndev->dev);
+
+	nt = kzalloc_node(sizeof(*nt), GFP_KERNEL, node);
+	if (!nt)
+		return -ENOMEM;
+
+	nt->ndev = ndev;
+	spad_count = ntb_spad_count(ndev);
+
+	/* Limit the MW's based on the availability of scratchpads */
+
+	if (spad_count < NTB_TRANSPORT_MIN_SPADS) {
+		nt->mw_count = 0;
+		rc = -EINVAL;
+		goto err;
+	}
+
+	max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2;
+	nt->mw_count = min(mw_count, max_mw_count_for_spads);
+
+	nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec),
+				  GFP_KERNEL, node);
+	if (!nt->mw_vec) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	for (i = 0; i < mw_count; i++) {
+		mw = &nt->mw_vec[i];
+
+		rc = ntb_peer_mw_get_addr(ndev, i, &mw->phys_addr,
+					  &mw->phys_size);
+		if (rc)
+			goto err1;
+
+		mw->vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
+		if (!mw->vbase) {
+			rc = -ENOMEM;
+			goto err1;
+		}
+
+		mw->buff_size = 0;
+		mw->xlat_size = 0;
+		mw->virt_addr = NULL;
+		mw->dma_addr = 0;
+	}
+
+	qp_bitmap = ntb_db_valid_mask(ndev);
+
+	qp_count = ilog2(qp_bitmap);
+	if (max_num_clients && max_num_clients < qp_count)
+		qp_count = max_num_clients;
+	else if (nt->mw_count < qp_count)
+		qp_count = nt->mw_count;
+
+	qp_bitmap &= BIT_ULL(qp_count) - 1;
+
+	nt->qp_count = qp_count;
+	nt->qp_bitmap = qp_bitmap;
+	nt->qp_bitmap_free = qp_bitmap;
+
+	nt->qp_vec = kcalloc_node(qp_count, sizeof(*nt->qp_vec),
+				  GFP_KERNEL, node);
+	if (!nt->qp_vec) {
+		rc = -ENOMEM;
+		goto err1;
+	}
+
+	if (nt_debugfs_dir) {
+		nt->debugfs_node_dir =
+			debugfs_create_dir(pci_name(ndev->pdev),
+					   nt_debugfs_dir);
+	}
+
+	for (i = 0; i < qp_count; i++) {
+		rc = ntb_transport_init_queue(nt, i);
+		if (rc)
+			goto err2;
+	}
+
+	INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work);
+	INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup_work);
+
+	rc = ntb_set_ctx(ndev, nt, &ntb_transport_ops);
+	if (rc)
+		goto err2;
+
+	INIT_LIST_HEAD(&nt->client_devs);
+	rc = ntb_bus_init(nt);
+	if (rc)
+		goto err3;
+
+	nt->link_is_up = false;
+	ntb_link_enable(ndev, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	ntb_link_event(ndev);
+
+	return 0;
+
+err3:
+	ntb_clear_ctx(ndev);
+err2:
+	kfree(nt->qp_vec);
+err1:
+	while (i--) {
+		mw = &nt->mw_vec[i];
+		iounmap(mw->vbase);
+	}
+	kfree(nt->mw_vec);
+err:
+	kfree(nt);
+	return rc;
+}
+
+static void ntb_transport_free(struct ntb_client *self, struct ntb_dev *ndev)
+{
+	struct ntb_transport_ctx *nt = ndev->ctx;
+	struct ntb_transport_qp *qp;
+	u64 qp_bitmap_alloc;
+	int i;
+
+	ntb_transport_link_cleanup(nt);
+	cancel_work_sync(&nt->link_cleanup);
+	cancel_delayed_work_sync(&nt->link_work);
+
+	qp_bitmap_alloc = nt->qp_bitmap & ~nt->qp_bitmap_free;
+
+	/* verify that all the qp's are freed */
+	for (i = 0; i < nt->qp_count; i++) {
+		qp = &nt->qp_vec[i];
+		if (qp_bitmap_alloc & BIT_ULL(i))
+			ntb_transport_free_queue(qp);
+		debugfs_remove_recursive(qp->debugfs_dir);
+	}
+
+	ntb_link_disable(ndev);
+	ntb_clear_ctx(ndev);
+
+	ntb_bus_remove(nt);
+
+	for (i = nt->mw_count; i--; ) {
+		ntb_free_mw(nt, i);
+		iounmap(nt->mw_vec[i].vbase);
+	}
+
+	kfree(nt->qp_vec);
+	kfree(nt->mw_vec);
+	kfree(nt);
+}
+
+static void ntb_complete_rxc(struct ntb_transport_qp *qp)
+{
+	struct ntb_queue_entry *entry;
+	void *cb_data;
+	unsigned int len;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&qp->ntb_rx_q_lock, irqflags);
+
+	while (!list_empty(&qp->rx_post_q)) {
+		entry = list_first_entry(&qp->rx_post_q,
+					 struct ntb_queue_entry, entry);
+		if (!(entry->flags & DESC_DONE_FLAG))
+			break;
+
+		entry->rx_hdr->flags = 0;
+		iowrite32(entry->rx_index, &qp->rx_info->entry);
+
+		cb_data = entry->cb_data;
+		len = entry->len;
+
+		list_move_tail(&entry->entry, &qp->rx_free_q);
+
+		spin_unlock_irqrestore(&qp->ntb_rx_q_lock, irqflags);
+
+		if (qp->rx_handler && qp->client_ready)
+			qp->rx_handler(qp, qp->cb_data, cb_data, len);
+
+		spin_lock_irqsave(&qp->ntb_rx_q_lock, irqflags);
+	}
+
+	spin_unlock_irqrestore(&qp->ntb_rx_q_lock, irqflags);
+}
+
+static void ntb_rx_copy_callback(void *data,
+				 const struct dmaengine_result *res)
+{
+	struct ntb_queue_entry *entry = data;
+
+	/* we need to check DMA results if we are using DMA */
+	if (res) {
+		enum dmaengine_tx_result dma_err = res->result;
+
+		switch (dma_err) {
+		case DMA_TRANS_READ_FAILED:
+		case DMA_TRANS_WRITE_FAILED:
+			entry->errors++;
+		case DMA_TRANS_ABORTED:
+		{
+			struct ntb_transport_qp *qp = entry->qp;
+			void *offset = qp->rx_buff + qp->rx_max_frame *
+					qp->rx_index;
+
+			ntb_memcpy_rx(entry, offset);
+			qp->rx_memcpy++;
+			return;
+		}
+
+		case DMA_TRANS_NOERROR:
+		default:
+			break;
+		}
+	}
+
+	entry->flags |= DESC_DONE_FLAG;
+
+	ntb_complete_rxc(entry->qp);
+}
+
+static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset)
+{
+	void *buf = entry->buf;
+	size_t len = entry->len;
+
+	memcpy(buf, offset, len);
+
+	/* Ensure that the data is fully copied out before clearing the flag */
+	wmb();
+
+	ntb_rx_copy_callback(entry, NULL);
+}
+
+static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset)
+{
+	struct dma_async_tx_descriptor *txd;
+	struct ntb_transport_qp *qp = entry->qp;
+	struct dma_chan *chan = qp->rx_dma_chan;
+	struct dma_device *device;
+	size_t pay_off, buff_off, len;
+	struct dmaengine_unmap_data *unmap;
+	dma_cookie_t cookie;
+	void *buf = entry->buf;
+
+	len = entry->len;
+	device = chan->device;
+	pay_off = (size_t)offset & ~PAGE_MASK;
+	buff_off = (size_t)buf & ~PAGE_MASK;
+
+	if (!is_dma_copy_aligned(device, pay_off, buff_off, len))
+		goto err;
+
+	unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
+	if (!unmap)
+		goto err;
+
+	unmap->len = len;
+	unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset),
+				      pay_off, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(device->dev, unmap->addr[0]))
+		goto err_get_unmap;
+
+	unmap->to_cnt = 1;
+
+	unmap->addr[1] = dma_map_page(device->dev, virt_to_page(buf),
+				      buff_off, len, DMA_FROM_DEVICE);
+	if (dma_mapping_error(device->dev, unmap->addr[1]))
+		goto err_get_unmap;
+
+	unmap->from_cnt = 1;
+
+	txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+					     unmap->addr[0], len,
+					     DMA_PREP_INTERRUPT);
+	if (!txd)
+		goto err_get_unmap;
+
+	txd->callback_result = ntb_rx_copy_callback;
+	txd->callback_param = entry;
+	dma_set_unmap(txd, unmap);
+
+	cookie = dmaengine_submit(txd);
+	if (dma_submit_error(cookie))
+		goto err_set_unmap;
+
+	dmaengine_unmap_put(unmap);
+
+	qp->last_cookie = cookie;
+
+	qp->rx_async++;
+
+	return 0;
+
+err_set_unmap:
+	dmaengine_unmap_put(unmap);
+err_get_unmap:
+	dmaengine_unmap_put(unmap);
+err:
+	return -ENXIO;
+}
+
+static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
+{
+	struct ntb_transport_qp *qp = entry->qp;
+	struct dma_chan *chan = qp->rx_dma_chan;
+	int res;
+
+	if (!chan)
+		goto err;
+
+	if (entry->len < copy_bytes)
+		goto err;
+
+	res = ntb_async_rx_submit(entry, offset);
+	if (res < 0)
+		goto err;
+
+	if (!entry->retries)
+		qp->rx_async++;
+
+	return;
+
+err:
+	ntb_memcpy_rx(entry, offset);
+	qp->rx_memcpy++;
+}
+
+static int ntb_process_rxc(struct ntb_transport_qp *qp)
+{
+	struct ntb_payload_header *hdr;
+	struct ntb_queue_entry *entry;
+	void *offset;
+
+	offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index;
+	hdr = offset + qp->rx_max_frame - sizeof(struct ntb_payload_header);
+
+	dev_dbg(&qp->ndev->pdev->dev, "qp %d: RX ver %u len %d flags %x\n",
+		qp->qp_num, hdr->ver, hdr->len, hdr->flags);
+
+	if (!(hdr->flags & DESC_DONE_FLAG)) {
+		dev_dbg(&qp->ndev->pdev->dev, "done flag not set\n");
+		qp->rx_ring_empty++;
+		return -EAGAIN;
+	}
+
+	if (hdr->flags & LINK_DOWN_FLAG) {
+		dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n");
+		ntb_qp_link_down(qp);
+		hdr->flags = 0;
+		return -EAGAIN;
+	}
+
+	if (hdr->ver != (u32)qp->rx_pkts) {
+		dev_dbg(&qp->ndev->pdev->dev,
+			"version mismatch, expected %llu - got %u\n",
+			qp->rx_pkts, hdr->ver);
+		qp->rx_err_ver++;
+		return -EIO;
+	}
+
+	entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q);
+	if (!entry) {
+		dev_dbg(&qp->ndev->pdev->dev, "no receive buffer\n");
+		qp->rx_err_no_buf++;
+		return -EAGAIN;
+	}
+
+	entry->rx_hdr = hdr;
+	entry->rx_index = qp->rx_index;
+
+	if (hdr->len > entry->len) {
+		dev_dbg(&qp->ndev->pdev->dev,
+			"receive buffer overflow! Wanted %d got %d\n",
+			hdr->len, entry->len);
+		qp->rx_err_oflow++;
+
+		entry->len = -EIO;
+		entry->flags |= DESC_DONE_FLAG;
+
+		ntb_complete_rxc(qp);
+	} else {
+		dev_dbg(&qp->ndev->pdev->dev,
+			"RX OK index %u ver %u size %d into buf size %d\n",
+			qp->rx_index, hdr->ver, hdr->len, entry->len);
+
+		qp->rx_bytes += hdr->len;
+		qp->rx_pkts++;
+
+		entry->len = hdr->len;
+
+		ntb_async_rx(entry, offset);
+	}
+
+	qp->rx_index++;
+	qp->rx_index %= qp->rx_max_entry;
+
+	return 0;
+}
+
+static void ntb_transport_rxc_db(unsigned long data)
+{
+	struct ntb_transport_qp *qp = (void *)data;
+	int rc, i;
+
+	dev_dbg(&qp->ndev->pdev->dev, "%s: doorbell %d received\n",
+		__func__, qp->qp_num);
+
+	/* Limit the number of packets processed in a single interrupt to
+	 * provide fairness to others
+	 */
+	for (i = 0; i < qp->rx_max_entry; i++) {
+		rc = ntb_process_rxc(qp);
+		if (rc)
+			break;
+	}
+
+	if (i && qp->rx_dma_chan)
+		dma_async_issue_pending(qp->rx_dma_chan);
+
+	if (i == qp->rx_max_entry) {
+		/* there is more work to do */
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
+	} else if (ntb_db_read(qp->ndev) & BIT_ULL(qp->qp_num)) {
+		/* the doorbell bit is set: clear it */
+		ntb_db_clear(qp->ndev, BIT_ULL(qp->qp_num));
+		/* ntb_db_read ensures ntb_db_clear write is committed */
+		ntb_db_read(qp->ndev);
+
+		/* an interrupt may have arrived between finishing
+		 * ntb_process_rxc and clearing the doorbell bit:
+		 * there might be some more work to do.
+		 */
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
+	}
+}
+
+static void ntb_tx_copy_callback(void *data,
+				 const struct dmaengine_result *res)
+{
+	struct ntb_queue_entry *entry = data;
+	struct ntb_transport_qp *qp = entry->qp;
+	struct ntb_payload_header __iomem *hdr = entry->tx_hdr;
+
+	/* we need to check DMA results if we are using DMA */
+	if (res) {
+		enum dmaengine_tx_result dma_err = res->result;
+
+		switch (dma_err) {
+		case DMA_TRANS_READ_FAILED:
+		case DMA_TRANS_WRITE_FAILED:
+			entry->errors++;
+		case DMA_TRANS_ABORTED:
+		{
+			void __iomem *offset =
+				qp->tx_mw + qp->tx_max_frame *
+				entry->tx_index;
+
+			/* resubmit via CPU */
+			ntb_memcpy_tx(entry, offset);
+			qp->tx_memcpy++;
+			return;
+		}
+
+		case DMA_TRANS_NOERROR:
+		default:
+			break;
+		}
+	}
+
+	iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
+
+	ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
+
+	/* The entry length can only be zero if the packet is intended to be a
+	 * "link down" or similar.  Since no payload is being sent in these
+	 * cases, there is nothing to add to the completion queue.
+	 */
+	if (entry->len > 0) {
+		qp->tx_bytes += entry->len;
+
+		if (qp->tx_handler)
+			qp->tx_handler(qp, qp->cb_data, entry->cb_data,
+				       entry->len);
+	}
+
+	ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q);
+}
+
+static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset)
+{
+#ifdef ARCH_HAS_NOCACHE_UACCESS
+	/*
+	 * Using non-temporal mov to improve performance on non-cached
+	 * writes, even though we aren't actually copying from user space.
+	 */
+	__copy_from_user_inatomic_nocache(offset, entry->buf, entry->len);
+#else
+	memcpy_toio(offset, entry->buf, entry->len);
+#endif
+
+	/* Ensure that the data is fully copied out before setting the flags */
+	wmb();
+
+	ntb_tx_copy_callback(entry, NULL);
+}
+
+static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
+			       struct ntb_queue_entry *entry)
+{
+	struct dma_async_tx_descriptor *txd;
+	struct dma_chan *chan = qp->tx_dma_chan;
+	struct dma_device *device;
+	size_t len = entry->len;
+	void *buf = entry->buf;
+	size_t dest_off, buff_off;
+	struct dmaengine_unmap_data *unmap;
+	dma_addr_t dest;
+	dma_cookie_t cookie;
+
+	device = chan->device;
+	dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
+	buff_off = (size_t)buf & ~PAGE_MASK;
+	dest_off = (size_t)dest & ~PAGE_MASK;
+
+	if (!is_dma_copy_aligned(device, buff_off, dest_off, len))
+		goto err;
+
+	unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+	if (!unmap)
+		goto err;
+
+	unmap->len = len;
+	unmap->addr[0] = dma_map_page(device->dev, virt_to_page(buf),
+				      buff_off, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(device->dev, unmap->addr[0]))
+		goto err_get_unmap;
+
+	unmap->to_cnt = 1;
+
+	txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
+					     DMA_PREP_INTERRUPT);
+	if (!txd)
+		goto err_get_unmap;
+
+	txd->callback_result = ntb_tx_copy_callback;
+	txd->callback_param = entry;
+	dma_set_unmap(txd, unmap);
+
+	cookie = dmaengine_submit(txd);
+	if (dma_submit_error(cookie))
+		goto err_set_unmap;
+
+	dmaengine_unmap_put(unmap);
+
+	dma_async_issue_pending(chan);
+
+	return 0;
+err_set_unmap:
+	dmaengine_unmap_put(unmap);
+err_get_unmap:
+	dmaengine_unmap_put(unmap);
+err:
+	return -ENXIO;
+}
+
+static void ntb_async_tx(struct ntb_transport_qp *qp,
+			 struct ntb_queue_entry *entry)
+{
+	struct ntb_payload_header __iomem *hdr;
+	struct dma_chan *chan = qp->tx_dma_chan;
+	void __iomem *offset;
+	int res;
+
+	entry->tx_index = qp->tx_index;
+	offset = qp->tx_mw + qp->tx_max_frame * entry->tx_index;
+	hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
+	entry->tx_hdr = hdr;
+
+	iowrite32(entry->len, &hdr->len);
+	iowrite32((u32)qp->tx_pkts, &hdr->ver);
+
+	if (!chan)
+		goto err;
+
+	if (entry->len < copy_bytes)
+		goto err;
+
+	res = ntb_async_tx_submit(qp, entry);
+	if (res < 0)
+		goto err;
+
+	if (!entry->retries)
+		qp->tx_async++;
+
+	return;
+
+err:
+	ntb_memcpy_tx(entry, offset);
+	qp->tx_memcpy++;
+}
+
+static int ntb_process_tx(struct ntb_transport_qp *qp,
+			  struct ntb_queue_entry *entry)
+{
+	if (qp->tx_index == qp->remote_rx_info->entry) {
+		qp->tx_ring_full++;
+		return -EAGAIN;
+	}
+
+	if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
+		if (qp->tx_handler)
+			qp->tx_handler(qp, qp->cb_data, NULL, -EIO);
+
+		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
+			     &qp->tx_free_q);
+		return 0;
+	}
+
+	ntb_async_tx(qp, entry);
+
+	qp->tx_index++;
+	qp->tx_index %= qp->tx_max_entry;
+
+	qp->tx_pkts++;
+
+	return 0;
+}
+
+static void ntb_send_link_down(struct ntb_transport_qp *qp)
+{
+	struct pci_dev *pdev = qp->ndev->pdev;
+	struct ntb_queue_entry *entry;
+	int i, rc;
+
+	if (!qp->link_is_up)
+		return;
+
+	dev_info(&pdev->dev, "qp %d: Send Link Down\n", qp->qp_num);
+
+	for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) {
+		entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+		if (entry)
+			break;
+		msleep(100);
+	}
+
+	if (!entry)
+		return;
+
+	entry->cb_data = NULL;
+	entry->buf = NULL;
+	entry->len = 0;
+	entry->flags = LINK_DOWN_FLAG;
+
+	rc = ntb_process_tx(qp, entry);
+	if (rc)
+		dev_err(&pdev->dev, "ntb: QP%d unable to send linkdown msg\n",
+			qp->qp_num);
+
+	ntb_qp_link_down_reset(qp);
+}
+
+static bool ntb_dma_filter_fn(struct dma_chan *chan, void *node)
+{
+	return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
+}
+
+/**
+ * ntb_transport_create_queue - Create a new NTB transport layer queue
+ * @rx_handler: receive callback function
+ * @tx_handler: transmit callback function
+ * @event_handler: event callback function
+ *
+ * Create a new NTB transport layer queue and provide the queue with a callback
+ * routine for both transmit and receive.  The receive callback routine will be
+ * used to pass up data when the transport has received it on the queue.   The
+ * transmit callback routine will be called when the transport has completed the
+ * transmission of the data on the queue and the data is ready to be freed.
+ *
+ * RETURNS: pointer to newly created ntb_queue, NULL on error.
+ */
+struct ntb_transport_qp *
+ntb_transport_create_queue(void *data, struct device *client_dev,
+			   const struct ntb_queue_handlers *handlers)
+{
+	struct ntb_dev *ndev;
+	struct pci_dev *pdev;
+	struct ntb_transport_ctx *nt;
+	struct ntb_queue_entry *entry;
+	struct ntb_transport_qp *qp;
+	u64 qp_bit;
+	unsigned int free_queue;
+	dma_cap_mask_t dma_mask;
+	int node;
+	int i;
+
+	ndev = dev_ntb(client_dev->parent);
+	pdev = ndev->pdev;
+	nt = ndev->ctx;
+
+	node = dev_to_node(&ndev->dev);
+
+	free_queue = ffs(nt->qp_bitmap_free);
+	if (!free_queue)
+		goto err;
+
+	/* decrement free_queue to make it zero based */
+	free_queue--;
+
+	qp = &nt->qp_vec[free_queue];
+	qp_bit = BIT_ULL(qp->qp_num);
+
+	nt->qp_bitmap_free &= ~qp_bit;
+
+	qp->cb_data = data;
+	qp->rx_handler = handlers->rx_handler;
+	qp->tx_handler = handlers->tx_handler;
+	qp->event_handler = handlers->event_handler;
+
+	dma_cap_zero(dma_mask);
+	dma_cap_set(DMA_MEMCPY, dma_mask);
+
+	if (use_dma) {
+		qp->tx_dma_chan =
+			dma_request_channel(dma_mask, ntb_dma_filter_fn,
+					    (void *)(unsigned long)node);
+		if (!qp->tx_dma_chan)
+			dev_info(&pdev->dev, "Unable to allocate TX DMA channel\n");
+
+		qp->rx_dma_chan =
+			dma_request_channel(dma_mask, ntb_dma_filter_fn,
+					    (void *)(unsigned long)node);
+		if (!qp->rx_dma_chan)
+			dev_info(&pdev->dev, "Unable to allocate RX DMA channel\n");
+	} else {
+		qp->tx_dma_chan = NULL;
+		qp->rx_dma_chan = NULL;
+	}
+
+	dev_dbg(&pdev->dev, "Using %s memcpy for TX\n",
+		qp->tx_dma_chan ? "DMA" : "CPU");
+
+	dev_dbg(&pdev->dev, "Using %s memcpy for RX\n",
+		qp->rx_dma_chan ? "DMA" : "CPU");
+
+	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+		entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node);
+		if (!entry)
+			goto err1;
+
+		entry->qp = qp;
+		ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
+			     &qp->rx_free_q);
+	}
+	qp->rx_alloc_entry = NTB_QP_DEF_NUM_ENTRIES;
+
+	for (i = 0; i < qp->tx_max_entry; i++) {
+		entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node);
+		if (!entry)
+			goto err2;
+
+		entry->qp = qp;
+		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
+			     &qp->tx_free_q);
+	}
+
+	ntb_db_clear(qp->ndev, qp_bit);
+	ntb_db_clear_mask(qp->ndev, qp_bit);
+
+	dev_info(&pdev->dev, "NTB Transport QP %d created\n", qp->qp_num);
+
+	return qp;
+
+err2:
+	while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
+		kfree(entry);
+err1:
+	qp->rx_alloc_entry = 0;
+	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
+		kfree(entry);
+	if (qp->tx_dma_chan)
+		dma_release_channel(qp->tx_dma_chan);
+	if (qp->rx_dma_chan)
+		dma_release_channel(qp->rx_dma_chan);
+	nt->qp_bitmap_free |= qp_bit;
+err:
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_create_queue);
+
+/**
+ * ntb_transport_free_queue - Frees NTB transport queue
+ * @qp: NTB queue to be freed
+ *
+ * Frees NTB transport queue
+ */
+void ntb_transport_free_queue(struct ntb_transport_qp *qp)
+{
+	struct pci_dev *pdev;
+	struct ntb_queue_entry *entry;
+	u64 qp_bit;
+
+	if (!qp)
+		return;
+
+	pdev = qp->ndev->pdev;
+
+	qp->active = false;
+
+	if (qp->tx_dma_chan) {
+		struct dma_chan *chan = qp->tx_dma_chan;
+		/* Putting the dma_chan to NULL will force any new traffic to be
+		 * processed by the CPU instead of the DAM engine
+		 */
+		qp->tx_dma_chan = NULL;
+
+		/* Try to be nice and wait for any queued DMA engine
+		 * transactions to process before smashing it with a rock
+		 */
+		dma_sync_wait(chan, qp->last_cookie);
+		dmaengine_terminate_all(chan);
+		dma_release_channel(chan);
+	}
+
+	if (qp->rx_dma_chan) {
+		struct dma_chan *chan = qp->rx_dma_chan;
+		/* Putting the dma_chan to NULL will force any new traffic to be
+		 * processed by the CPU instead of the DAM engine
+		 */
+		qp->rx_dma_chan = NULL;
+
+		/* Try to be nice and wait for any queued DMA engine
+		 * transactions to process before smashing it with a rock
+		 */
+		dma_sync_wait(chan, qp->last_cookie);
+		dmaengine_terminate_all(chan);
+		dma_release_channel(chan);
+	}
+
+	qp_bit = BIT_ULL(qp->qp_num);
+
+	ntb_db_set_mask(qp->ndev, qp_bit);
+	tasklet_kill(&qp->rxc_db_work);
+
+	cancel_delayed_work_sync(&qp->link_work);
+
+	qp->cb_data = NULL;
+	qp->rx_handler = NULL;
+	qp->tx_handler = NULL;
+	qp->event_handler = NULL;
+
+	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
+		kfree(entry);
+
+	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q))) {
+		dev_warn(&pdev->dev, "Freeing item from non-empty rx_pend_q\n");
+		kfree(entry);
+	}
+
+	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q))) {
+		dev_warn(&pdev->dev, "Freeing item from non-empty rx_post_q\n");
+		kfree(entry);
+	}
+
+	while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
+		kfree(entry);
+
+	qp->transport->qp_bitmap_free |= qp_bit;
+
+	dev_info(&pdev->dev, "NTB Transport QP %d freed\n", qp->qp_num);
+}
+EXPORT_SYMBOL_GPL(ntb_transport_free_queue);
+
+/**
+ * ntb_transport_rx_remove - Dequeues enqueued rx packet
+ * @qp: NTB queue to be freed
+ * @len: pointer to variable to write enqueued buffers length
+ *
+ * Dequeues unused buffers from receive queue.  Should only be used during
+ * shutdown of qp.
+ *
+ * RETURNS: NULL error value on error, or void* for success.
+ */
+void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len)
+{
+	struct ntb_queue_entry *entry;
+	void *buf;
+
+	if (!qp || qp->client_ready)
+		return NULL;
+
+	entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q);
+	if (!entry)
+		return NULL;
+
+	buf = entry->cb_data;
+	*len = entry->len;
+
+	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
+
+	return buf;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_rx_remove);
+
+/**
+ * ntb_transport_rx_enqueue - Enqueue a new NTB queue entry
+ * @qp: NTB transport layer queue the entry is to be enqueued on
+ * @cb: per buffer pointer for callback function to use
+ * @data: pointer to data buffer that incoming packets will be copied into
+ * @len: length of the data buffer
+ *
+ * Enqueue a new receive buffer onto the transport queue into which a NTB
+ * payload can be received into.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+			     unsigned int len)
+{
+	struct ntb_queue_entry *entry;
+
+	if (!qp)
+		return -EINVAL;
+
+	entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->cb_data = cb;
+	entry->buf = data;
+	entry->len = len;
+	entry->flags = 0;
+	entry->retries = 0;
+	entry->errors = 0;
+	entry->rx_index = 0;
+
+	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);
+
+	if (qp->active)
+		tasklet_schedule(&qp->rxc_db_work);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_rx_enqueue);
+
+/**
+ * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry
+ * @qp: NTB transport layer queue the entry is to be enqueued on
+ * @cb: per buffer pointer for callback function to use
+ * @data: pointer to data buffer that will be sent
+ * @len: length of the data buffer
+ *
+ * Enqueue a new transmit buffer onto the transport queue from which a NTB
+ * payload will be transmitted.  This assumes that a lock is being held to
+ * serialize access to the qp.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+			     unsigned int len)
+{
+	struct ntb_queue_entry *entry;
+	int rc;
+
+	if (!qp || !qp->link_is_up || !len)
+		return -EINVAL;
+
+	entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+	if (!entry) {
+		qp->tx_err_no_buf++;
+		return -EBUSY;
+	}
+
+	entry->cb_data = cb;
+	entry->buf = data;
+	entry->len = len;
+	entry->flags = 0;
+	entry->errors = 0;
+	entry->retries = 0;
+	entry->tx_index = 0;
+
+	rc = ntb_process_tx(qp, entry);
+	if (rc)
+		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
+			     &qp->tx_free_q);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_tx_enqueue);
+
+/**
+ * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
+ * @qp: NTB transport layer queue to be enabled
+ *
+ * Notify NTB transport layer of client readiness to use queue
+ */
+void ntb_transport_link_up(struct ntb_transport_qp *qp)
+{
+	if (!qp)
+		return;
+
+	qp->client_ready = true;
+
+	if (qp->transport->link_is_up)
+		schedule_delayed_work(&qp->link_work, 0);
+}
+EXPORT_SYMBOL_GPL(ntb_transport_link_up);
+
+/**
+ * ntb_transport_link_down - Notify NTB transport to no longer enqueue data
+ * @qp: NTB transport layer queue to be disabled
+ *
+ * Notify NTB transport layer of client's desire to no longer receive data on
+ * transport queue specified.  It is the client's responsibility to ensure all
+ * entries on queue are purged or otherwise handled appropriately.
+ */
+void ntb_transport_link_down(struct ntb_transport_qp *qp)
+{
+	int val;
+
+	if (!qp)
+		return;
+
+	qp->client_ready = false;
+
+	val = ntb_spad_read(qp->ndev, QP_LINKS);
+
+	ntb_peer_spad_write(qp->ndev, PIDX, QP_LINKS, val & ~BIT(qp->qp_num));
+
+	if (qp->link_is_up)
+		ntb_send_link_down(qp);
+	else
+		cancel_delayed_work_sync(&qp->link_work);
+}
+EXPORT_SYMBOL_GPL(ntb_transport_link_down);
+
+/**
+ * ntb_transport_link_query - Query transport link state
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query connectivity to the remote system of the NTB transport queue
+ *
+ * RETURNS: true for link up or false for link down
+ */
+bool ntb_transport_link_query(struct ntb_transport_qp *qp)
+{
+	if (!qp)
+		return false;
+
+	return qp->link_is_up;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_link_query);
+
+/**
+ * ntb_transport_qp_num - Query the qp number
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query qp number of the NTB transport queue
+ *
+ * RETURNS: a zero based number specifying the qp number
+ */
+unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp)
+{
+	if (!qp)
+		return 0;
+
+	return qp->qp_num;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_qp_num);
+
+/**
+ * ntb_transport_max_size - Query the max payload size of a qp
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query the maximum payload size permissible on the given qp
+ *
+ * RETURNS: the max payload size of a qp
+ */
+unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp)
+{
+	unsigned int max_size;
+	unsigned int copy_align;
+	struct dma_chan *rx_chan, *tx_chan;
+
+	if (!qp)
+		return 0;
+
+	rx_chan = qp->rx_dma_chan;
+	tx_chan = qp->tx_dma_chan;
+
+	copy_align = max(rx_chan ? rx_chan->device->copy_align : 0,
+			 tx_chan ? tx_chan->device->copy_align : 0);
+
+	/* If DMA engine usage is possible, try to find the max size for that */
+	max_size = qp->tx_max_frame - sizeof(struct ntb_payload_header);
+	max_size = round_down(max_size, 1 << copy_align);
+
+	return max_size;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_max_size);
+
+unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
+{
+	unsigned int head = qp->tx_index;
+	unsigned int tail = qp->remote_rx_info->entry;
+
+	return tail > head ? tail - head : qp->tx_max_entry + tail - head;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry);
+
+static void ntb_transport_doorbell_callback(void *data, int vector)
+{
+	struct ntb_transport_ctx *nt = data;
+	struct ntb_transport_qp *qp;
+	u64 db_bits;
+	unsigned int qp_num;
+
+	db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free &
+		   ntb_db_vector_mask(nt->ndev, vector));
+
+	while (db_bits) {
+		qp_num = __ffs(db_bits);
+		qp = &nt->qp_vec[qp_num];
+
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
+
+		db_bits &= ~BIT_ULL(qp_num);
+	}
+}
+
+static const struct ntb_ctx_ops ntb_transport_ops = {
+	.link_event = ntb_transport_event_callback,
+	.db_event = ntb_transport_doorbell_callback,
+};
+
+static struct ntb_client ntb_transport_client = {
+	.ops = {
+		.probe = ntb_transport_probe,
+		.remove = ntb_transport_free,
+	},
+};
+
+static int __init ntb_transport_init(void)
+{
+	int rc;
+
+	pr_info("%s, version %s\n", NTB_TRANSPORT_DESC, NTB_TRANSPORT_VER);
+
+	if (debugfs_initialized())
+		nt_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	rc = bus_register(&ntb_transport_bus);
+	if (rc)
+		goto err_bus;
+
+	rc = ntb_register_client(&ntb_transport_client);
+	if (rc)
+		goto err_client;
+
+	return 0;
+
+err_client:
+	bus_unregister(&ntb_transport_bus);
+err_bus:
+	debugfs_remove_recursive(nt_debugfs_dir);
+	return rc;
+}
+module_init(ntb_transport_init);
+
+static void __exit ntb_transport_exit(void)
+{
+	ntb_unregister_client(&ntb_transport_client);
+	bus_unregister(&ntb_transport_bus);
+	debugfs_remove_recursive(nt_debugfs_dir);
+}
+module_exit(ntb_transport_exit);
diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig
new file mode 100644
index 0000000..a5d0eda
--- /dev/null
+++ b/drivers/ntb/test/Kconfig
@@ -0,0 +1,27 @@
+config NTB_PINGPONG
+	tristate "NTB Ping Pong Test Client"
+	help
+	 This is a simple ping pong driver that exercises the scratchpads and
+	 doorbells of the ntb hardware.  This driver may be used to test that
+	 your ntb hardware and drivers are functioning at a basic level.
+
+	 If unsure, say N.
+
+config NTB_TOOL
+	tristate "NTB Debugging Tool Test Client"
+	help
+	 This is a simple debugging driver that enables the doorbell and
+	 scratchpad registers to be read and written from the debugfs.  This
+	 enables more complicated debugging to be scripted from user space.
+	 This driver may be used to test that your ntb hardware and drivers are
+	 functioning at a basic level.
+
+	 If unsure, say N.
+
+config NTB_PERF
+	tristate "NTB RAW Perf Measuring Tool"
+	help
+	 This is a tool to measure raw NTB performance by transferring data
+	 to and from the window without additional software interaction.
+
+	 If unsure, say N.
diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile
new file mode 100644
index 0000000..9e77e0b
--- /dev/null
+++ b/drivers/ntb/test/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
+obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
+obj-$(CONFIG_NTB_PERF) += ntb_perf.o
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
new file mode 100644
index 0000000..2a9d6b0
--- /dev/null
+++ b/drivers/ntb/test/ntb_perf.c
@@ -0,0 +1,1515 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2017 T-Platforms. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2017 T-Platforms. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * PCIe NTB Perf Linux driver
+ */
+
+/*
+ * How to use this tool, by example.
+ *
+ * Assuming $DBG_DIR is something like:
+ * '/sys/kernel/debug/ntb_perf/0000:00:03.0'
+ * Suppose aside from local device there is at least one remote device
+ * connected to NTB with index 0.
+ *-----------------------------------------------------------------------------
+ * Eg: install driver with specified chunk/total orders and dma-enabled flag
+ *
+ * root@self# insmod ntb_perf.ko chunk_order=19 total_order=28 use_dma
+ *-----------------------------------------------------------------------------
+ * Eg: check NTB ports (index) and MW mapping information
+ *
+ * root@self# cat $DBG_DIR/info
+ *-----------------------------------------------------------------------------
+ * Eg: start performance test with peer (index 0) and get the test metrics
+ *
+ * root@self# echo 0 > $DBG_DIR/run
+ * root@self# cat $DBG_DIR/run
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/pci.h>
+#include <linux/ktime.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <linux/random.h>
+#include <linux/ntb.h>
+
+#define DRIVER_NAME		"ntb_perf"
+#define DRIVER_VERSION		"2.0"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR("Dave Jiang <dave.jiang@intel.com>");
+MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool");
+
+#define MAX_THREADS_CNT		32
+#define DEF_THREADS_CNT		1
+#define MAX_CHUNK_SIZE		SZ_1M
+#define MAX_CHUNK_ORDER		20 /* no larger than 1M */
+
+#define DMA_TRIES		100
+#define DMA_MDELAY		10
+
+#define MSG_TRIES		500
+#define MSG_UDELAY_LOW		1000
+#define MSG_UDELAY_HIGH		2000
+
+#define PERF_BUF_LEN 1024
+
+static unsigned long max_mw_size;
+module_param(max_mw_size, ulong, 0644);
+MODULE_PARM_DESC(max_mw_size, "Upper limit of memory window size");
+
+static unsigned char chunk_order = 19; /* 512K */
+module_param(chunk_order, byte, 0644);
+MODULE_PARM_DESC(chunk_order, "Data chunk order [2^n] to transfer");
+
+static unsigned char total_order = 30; /* 1G */
+module_param(total_order, byte, 0644);
+MODULE_PARM_DESC(total_order, "Total data order [2^n] to transfer");
+
+static bool use_dma; /* default to 0 */
+module_param(use_dma, bool, 0644);
+MODULE_PARM_DESC(use_dma, "Use DMA engine to measure performance");
+
+/*==============================================================================
+ *                         Perf driver data definition
+ *==============================================================================
+ */
+
+enum perf_cmd {
+	PERF_CMD_INVAL = -1,/* invalid spad command */
+	PERF_CMD_SSIZE = 0, /* send out buffer size */
+	PERF_CMD_RSIZE = 1, /* recv in  buffer size */
+	PERF_CMD_SXLAT = 2, /* send in  buffer xlat */
+	PERF_CMD_RXLAT = 3, /* recv out buffer xlat */
+	PERF_CMD_CLEAR = 4, /* clear allocated memory */
+	PERF_STS_DONE  = 5, /* init is done */
+	PERF_STS_LNKUP = 6, /* link up state flag */
+};
+
+struct perf_ctx;
+
+struct perf_peer {
+	struct perf_ctx	*perf;
+	int pidx;
+	int gidx;
+
+	/* Outbound MW params */
+	u64 outbuf_xlat;
+	resource_size_t outbuf_size;
+	void __iomem *outbuf;
+
+	/* Inbound MW params */
+	dma_addr_t inbuf_xlat;
+	resource_size_t inbuf_size;
+	void		*inbuf;
+
+	/* NTB connection setup service */
+	struct work_struct	service;
+	unsigned long		sts;
+};
+#define to_peer_service(__work) \
+	container_of(__work, struct perf_peer, service)
+
+struct perf_thread {
+	struct perf_ctx *perf;
+	int tidx;
+
+	/* DMA-based test sync parameters */
+	atomic_t dma_sync;
+	wait_queue_head_t dma_wait;
+	struct dma_chan *dma_chan;
+
+	/* Data source and measured statistics */
+	void *src;
+	u64 copied;
+	ktime_t duration;
+	int status;
+	struct work_struct work;
+};
+#define to_thread_work(__work) \
+	container_of(__work, struct perf_thread, work)
+
+struct perf_ctx {
+	struct ntb_dev *ntb;
+
+	/* Global device index and peers descriptors */
+	int gidx;
+	int pcnt;
+	struct perf_peer *peers;
+
+	/* Performance measuring work-threads interface */
+	unsigned long busy_flag;
+	wait_queue_head_t twait;
+	atomic_t tsync;
+	u8 tcnt;
+	struct perf_peer *test_peer;
+	struct perf_thread threads[MAX_THREADS_CNT];
+
+	/* Scratchpad/Message IO operations */
+	int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, u64 data);
+	int (*cmd_recv)(struct perf_ctx *perf, int *pidx, enum perf_cmd *cmd,
+			u64 *data);
+
+	struct dentry *dbgfs_dir;
+};
+
+/*
+ * Scratchpads-base commands interface
+ */
+#define PERF_SPAD_CNT(_pcnt) \
+	(3*((_pcnt) + 1))
+#define PERF_SPAD_CMD(_gidx) \
+	(3*(_gidx))
+#define PERF_SPAD_LDATA(_gidx) \
+	(3*(_gidx) + 1)
+#define PERF_SPAD_HDATA(_gidx) \
+	(3*(_gidx) + 2)
+#define PERF_SPAD_NOTIFY(_gidx) \
+	(BIT_ULL(_gidx))
+
+/*
+ * Messages-base commands interface
+ */
+#define PERF_MSG_CNT		3
+#define PERF_MSG_CMD		0
+#define PERF_MSG_LDATA		1
+#define PERF_MSG_HDATA		2
+
+/*==============================================================================
+ *                           Static data declarations
+ *==============================================================================
+ */
+
+static struct dentry *perf_dbgfs_topdir;
+
+static struct workqueue_struct *perf_wq __read_mostly;
+
+/*==============================================================================
+ *                  NTB cross-link commands execution service
+ *==============================================================================
+ */
+
+static void perf_terminate_test(struct perf_ctx *perf);
+
+static inline bool perf_link_is_up(struct perf_peer *peer)
+{
+	u64 link;
+
+	link = ntb_link_is_up(peer->perf->ntb, NULL, NULL);
+	return !!(link & BIT_ULL_MASK(peer->pidx));
+}
+
+static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd,
+			      u64 data)
+{
+	struct perf_ctx *perf = peer->perf;
+	int try;
+	u32 sts;
+
+	dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data);
+
+	/*
+	 * Perform predefined number of attempts before give up.
+	 * We are sending the data to the port specific scratchpad, so
+	 * to prevent a multi-port access race-condition. Additionally
+	 * there is no need in local locking since only thread-safe
+	 * service work is using this method.
+	 */
+	for (try = 0; try < MSG_TRIES; try++) {
+		if (!perf_link_is_up(peer))
+			return -ENOLINK;
+
+		sts = ntb_peer_spad_read(perf->ntb, peer->pidx,
+					 PERF_SPAD_CMD(perf->gidx));
+		if (sts != PERF_CMD_INVAL) {
+			usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH);
+			continue;
+		}
+
+		ntb_peer_spad_write(perf->ntb, peer->pidx,
+				    PERF_SPAD_LDATA(perf->gidx),
+				    lower_32_bits(data));
+		ntb_peer_spad_write(perf->ntb, peer->pidx,
+				    PERF_SPAD_HDATA(perf->gidx),
+				    upper_32_bits(data));
+		mmiowb();
+		ntb_peer_spad_write(perf->ntb, peer->pidx,
+				    PERF_SPAD_CMD(perf->gidx),
+				    cmd);
+		mmiowb();
+		ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx));
+
+		dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n",
+			PERF_SPAD_NOTIFY(peer->gidx));
+
+		break;
+	}
+
+	return try < MSG_TRIES ? 0 : -EAGAIN;
+}
+
+static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx,
+			      enum perf_cmd *cmd, u64 *data)
+{
+	struct perf_peer *peer;
+	u32 val;
+
+	ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
+
+	/*
+	 * We start scanning all over, since cleared DB may have been set
+	 * by any peer. Yes, it makes peer with smaller index being
+	 * serviced with greater priority, but it's convenient for spad
+	 * and message code unification and simplicity.
+	 */
+	for (*pidx = 0; *pidx < perf->pcnt; (*pidx)++) {
+		peer = &perf->peers[*pidx];
+
+		if (!perf_link_is_up(peer))
+			continue;
+
+		val = ntb_spad_read(perf->ntb, PERF_SPAD_CMD(peer->gidx));
+		if (val == PERF_CMD_INVAL)
+			continue;
+
+		*cmd = val;
+
+		val = ntb_spad_read(perf->ntb, PERF_SPAD_LDATA(peer->gidx));
+		*data = val;
+
+		val = ntb_spad_read(perf->ntb, PERF_SPAD_HDATA(peer->gidx));
+		*data |= (u64)val << 32;
+
+		/* Next command can be retrieved from now */
+		ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx),
+			       PERF_CMD_INVAL);
+
+		dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data);
+
+		return 0;
+	}
+
+	return -ENODATA;
+}
+
+static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd,
+			     u64 data)
+{
+	struct perf_ctx *perf = peer->perf;
+	int try, ret;
+	u64 outbits;
+
+	dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data);
+
+	/*
+	 * Perform predefined number of attempts before give up. Message
+	 * registers are free of race-condition problem when accessed
+	 * from different ports, so we don't need splitting registers
+	 * by global device index. We also won't have local locking,
+	 * since the method is used from service work only.
+	 */
+	outbits = ntb_msg_outbits(perf->ntb);
+	for (try = 0; try < MSG_TRIES; try++) {
+		if (!perf_link_is_up(peer))
+			return -ENOLINK;
+
+		ret = ntb_msg_clear_sts(perf->ntb, outbits);
+		if (ret)
+			return ret;
+
+		ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_LDATA,
+				   lower_32_bits(data));
+
+		if (ntb_msg_read_sts(perf->ntb) & outbits) {
+			usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH);
+			continue;
+		}
+
+		ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA,
+				   upper_32_bits(data));
+		mmiowb();
+
+		/* This call shall trigger peer message event */
+		ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, cmd);
+
+		break;
+	}
+
+	return try < MSG_TRIES ? 0 : -EAGAIN;
+}
+
+static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx,
+			     enum perf_cmd *cmd, u64 *data)
+{
+	u64 inbits;
+	u32 val;
+
+	inbits = ntb_msg_inbits(perf->ntb);
+
+	if (hweight64(ntb_msg_read_sts(perf->ntb) & inbits) < 3)
+		return -ENODATA;
+
+	val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_CMD);
+	*cmd = val;
+
+	val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_LDATA);
+	*data = val;
+
+	val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_HDATA);
+	*data |= (u64)val << 32;
+
+	/* Next command can be retrieved from now */
+	ntb_msg_clear_sts(perf->ntb, inbits);
+
+	dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data);
+
+	return 0;
+}
+
+static int perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, u64 data)
+{
+	struct perf_ctx *perf = peer->perf;
+
+	if (cmd == PERF_CMD_SSIZE || cmd == PERF_CMD_SXLAT)
+		return perf->cmd_send(peer, cmd, data);
+
+	dev_err(&perf->ntb->dev, "Send invalid command\n");
+	return -EINVAL;
+}
+
+static int perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd)
+{
+	switch (cmd) {
+	case PERF_CMD_SSIZE:
+	case PERF_CMD_RSIZE:
+	case PERF_CMD_SXLAT:
+	case PERF_CMD_RXLAT:
+	case PERF_CMD_CLEAR:
+		break;
+	default:
+		dev_err(&peer->perf->ntb->dev, "Exec invalid command\n");
+		return -EINVAL;
+	}
+
+	/* No need of memory barrier, since bit ops have invernal lock */
+	set_bit(cmd, &peer->sts);
+
+	dev_dbg(&peer->perf->ntb->dev, "CMD exec: %d\n", cmd);
+
+	(void)queue_work(system_highpri_wq, &peer->service);
+
+	return 0;
+}
+
+static int perf_cmd_recv(struct perf_ctx *perf)
+{
+	struct perf_peer *peer;
+	int ret, pidx, cmd;
+	u64 data;
+
+	while (!(ret = perf->cmd_recv(perf, &pidx, &cmd, &data))) {
+		peer = &perf->peers[pidx];
+
+		switch (cmd) {
+		case PERF_CMD_SSIZE:
+			peer->inbuf_size = data;
+			return perf_cmd_exec(peer, PERF_CMD_RSIZE);
+		case PERF_CMD_SXLAT:
+			peer->outbuf_xlat = data;
+			return perf_cmd_exec(peer, PERF_CMD_RXLAT);
+		default:
+			dev_err(&perf->ntb->dev, "Recv invalid command\n");
+			return -EINVAL;
+		}
+	}
+
+	/* Return 0 if no data left to process, otherwise an error */
+	return ret == -ENODATA ? 0 : ret;
+}
+
+static void perf_link_event(void *ctx)
+{
+	struct perf_ctx *perf = ctx;
+	struct perf_peer *peer;
+	bool lnk_up;
+	int pidx;
+
+	for (pidx = 0; pidx < perf->pcnt; pidx++) {
+		peer = &perf->peers[pidx];
+
+		lnk_up = perf_link_is_up(peer);
+
+		if (lnk_up &&
+		    !test_and_set_bit(PERF_STS_LNKUP, &peer->sts)) {
+			perf_cmd_exec(peer, PERF_CMD_SSIZE);
+		} else if (!lnk_up &&
+			   test_and_clear_bit(PERF_STS_LNKUP, &peer->sts)) {
+			perf_cmd_exec(peer, PERF_CMD_CLEAR);
+		}
+	}
+}
+
+static void perf_db_event(void *ctx, int vec)
+{
+	struct perf_ctx *perf = ctx;
+
+	dev_dbg(&perf->ntb->dev, "DB vec %d mask %#llx bits %#llx\n", vec,
+		ntb_db_vector_mask(perf->ntb, vec), ntb_db_read(perf->ntb));
+
+	/* Just receive all available commands */
+	(void)perf_cmd_recv(perf);
+}
+
+static void perf_msg_event(void *ctx)
+{
+	struct perf_ctx *perf = ctx;
+
+	dev_dbg(&perf->ntb->dev, "Msg status bits %#llx\n",
+		ntb_msg_read_sts(perf->ntb));
+
+	/* Messages are only sent one-by-one */
+	(void)perf_cmd_recv(perf);
+}
+
+static const struct ntb_ctx_ops perf_ops = {
+	.link_event = perf_link_event,
+	.db_event = perf_db_event,
+	.msg_event = perf_msg_event
+};
+
+static void perf_free_outbuf(struct perf_peer *peer)
+{
+	(void)ntb_peer_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx);
+}
+
+static int perf_setup_outbuf(struct perf_peer *peer)
+{
+	struct perf_ctx *perf = peer->perf;
+	int ret;
+
+	/* Outbuf size can be unaligned due to custom max_mw_size */
+	ret = ntb_peer_mw_set_trans(perf->ntb, peer->pidx, peer->gidx,
+				    peer->outbuf_xlat, peer->outbuf_size);
+	if (ret) {
+		dev_err(&perf->ntb->dev, "Failed to set outbuf translation\n");
+		return ret;
+	}
+
+	/* Initialization is finally done */
+	set_bit(PERF_STS_DONE, &peer->sts);
+
+	return 0;
+}
+
+static void perf_free_inbuf(struct perf_peer *peer)
+{
+	if (!peer->inbuf)
+		return;
+
+	(void)ntb_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx);
+	dma_free_coherent(&peer->perf->ntb->dev, peer->inbuf_size,
+			  peer->inbuf, peer->inbuf_xlat);
+	peer->inbuf = NULL;
+}
+
+static int perf_setup_inbuf(struct perf_peer *peer)
+{
+	resource_size_t xlat_align, size_align, size_max;
+	struct perf_ctx *perf = peer->perf;
+	int ret;
+
+	/* Get inbound MW parameters */
+	ret = ntb_mw_get_align(perf->ntb, peer->pidx, perf->gidx,
+			       &xlat_align, &size_align, &size_max);
+	if (ret) {
+		dev_err(&perf->ntb->dev, "Couldn't get inbuf restrictions\n");
+		return ret;
+	}
+
+	if (peer->inbuf_size > size_max) {
+		dev_err(&perf->ntb->dev, "Too big inbuf size %pa > %pa\n",
+			&peer->inbuf_size, &size_max);
+		return -EINVAL;
+	}
+
+	peer->inbuf_size = round_up(peer->inbuf_size, size_align);
+
+	perf_free_inbuf(peer);
+
+	peer->inbuf = dma_alloc_coherent(&perf->ntb->dev, peer->inbuf_size,
+					 &peer->inbuf_xlat, GFP_KERNEL);
+	if (!peer->inbuf) {
+		dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n",
+			&peer->inbuf_size);
+		return -ENOMEM;
+	}
+	if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) {
+		dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n");
+		goto err_free_inbuf;
+	}
+
+	ret = ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx,
+			       peer->inbuf_xlat, peer->inbuf_size);
+	if (ret) {
+		dev_err(&perf->ntb->dev, "Failed to set inbuf translation\n");
+		goto err_free_inbuf;
+	}
+
+	/*
+	 * We submit inbuf xlat transmission cmd for execution here to follow
+	 * the code architecture, even though this method is called from service
+	 * work itself so the command will be executed right after it returns.
+	 */
+	(void)perf_cmd_exec(peer, PERF_CMD_SXLAT);
+
+	return 0;
+
+err_free_inbuf:
+	perf_free_inbuf(peer);
+
+	return ret;
+}
+
+static void perf_service_work(struct work_struct *work)
+{
+	struct perf_peer *peer = to_peer_service(work);
+
+	if (test_and_clear_bit(PERF_CMD_SSIZE, &peer->sts))
+		perf_cmd_send(peer, PERF_CMD_SSIZE, peer->outbuf_size);
+
+	if (test_and_clear_bit(PERF_CMD_RSIZE, &peer->sts))
+		perf_setup_inbuf(peer);
+
+	if (test_and_clear_bit(PERF_CMD_SXLAT, &peer->sts))
+		perf_cmd_send(peer, PERF_CMD_SXLAT, peer->inbuf_xlat);
+
+	if (test_and_clear_bit(PERF_CMD_RXLAT, &peer->sts))
+		perf_setup_outbuf(peer);
+
+	if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) {
+		clear_bit(PERF_STS_DONE, &peer->sts);
+		if (test_bit(0, &peer->perf->busy_flag) &&
+		    peer == peer->perf->test_peer) {
+			dev_warn(&peer->perf->ntb->dev,
+				"Freeing while test on-fly\n");
+			perf_terminate_test(peer->perf);
+		}
+		perf_free_outbuf(peer);
+		perf_free_inbuf(peer);
+	}
+}
+
+static int perf_init_service(struct perf_ctx *perf)
+{
+	u64 mask;
+
+	if (ntb_peer_mw_count(perf->ntb) < perf->pcnt + 1) {
+		dev_err(&perf->ntb->dev, "Not enough memory windows\n");
+		return -EINVAL;
+	}
+
+	if (ntb_msg_count(perf->ntb) >= PERF_MSG_CNT) {
+		perf->cmd_send = perf_msg_cmd_send;
+		perf->cmd_recv = perf_msg_cmd_recv;
+
+		dev_dbg(&perf->ntb->dev, "Message service initialized\n");
+
+		return 0;
+	}
+
+	dev_dbg(&perf->ntb->dev, "Message service unsupported\n");
+
+	mask = GENMASK_ULL(perf->pcnt, 0);
+	if (ntb_spad_count(perf->ntb) >= PERF_SPAD_CNT(perf->pcnt) &&
+	    (ntb_db_valid_mask(perf->ntb) & mask) == mask) {
+		perf->cmd_send = perf_spad_cmd_send;
+		perf->cmd_recv = perf_spad_cmd_recv;
+
+		dev_dbg(&perf->ntb->dev, "Scratchpad service initialized\n");
+
+		return 0;
+	}
+
+	dev_dbg(&perf->ntb->dev, "Scratchpad service unsupported\n");
+
+	dev_err(&perf->ntb->dev, "Command services unsupported\n");
+
+	return -EINVAL;
+}
+
+static int perf_enable_service(struct perf_ctx *perf)
+{
+	u64 mask, incmd_bit;
+	int ret, sidx, scnt;
+
+	mask = ntb_db_valid_mask(perf->ntb);
+	(void)ntb_db_set_mask(perf->ntb, mask);
+
+	ret = ntb_set_ctx(perf->ntb, perf, &perf_ops);
+	if (ret)
+		return ret;
+
+	if (perf->cmd_send == perf_msg_cmd_send) {
+		u64 inbits, outbits;
+
+		inbits = ntb_msg_inbits(perf->ntb);
+		outbits = ntb_msg_outbits(perf->ntb);
+		(void)ntb_msg_set_mask(perf->ntb, inbits | outbits);
+
+		incmd_bit = BIT_ULL(__ffs64(inbits));
+		ret = ntb_msg_clear_mask(perf->ntb, incmd_bit);
+
+		dev_dbg(&perf->ntb->dev, "MSG sts unmasked %#llx\n", incmd_bit);
+	} else {
+		scnt = ntb_spad_count(perf->ntb);
+		for (sidx = 0; sidx < scnt; sidx++)
+			ntb_spad_write(perf->ntb, sidx, PERF_CMD_INVAL);
+		incmd_bit = PERF_SPAD_NOTIFY(perf->gidx);
+		ret = ntb_db_clear_mask(perf->ntb, incmd_bit);
+
+		dev_dbg(&perf->ntb->dev, "DB bits unmasked %#llx\n", incmd_bit);
+	}
+	if (ret) {
+		ntb_clear_ctx(perf->ntb);
+		return ret;
+	}
+
+	ntb_link_enable(perf->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	/* Might be not necessary */
+	ntb_link_event(perf->ntb);
+
+	return 0;
+}
+
+static void perf_disable_service(struct perf_ctx *perf)
+{
+	int pidx;
+
+	ntb_link_disable(perf->ntb);
+
+	if (perf->cmd_send == perf_msg_cmd_send) {
+		u64 inbits;
+
+		inbits = ntb_msg_inbits(perf->ntb);
+		(void)ntb_msg_set_mask(perf->ntb, inbits);
+	} else {
+		(void)ntb_db_set_mask(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
+	}
+
+	ntb_clear_ctx(perf->ntb);
+
+	for (pidx = 0; pidx < perf->pcnt; pidx++)
+		perf_cmd_exec(&perf->peers[pidx], PERF_CMD_CLEAR);
+
+	for (pidx = 0; pidx < perf->pcnt; pidx++)
+		flush_work(&perf->peers[pidx].service);
+}
+
+/*==============================================================================
+ *                      Performance measuring work-thread
+ *==============================================================================
+ */
+
+static void perf_dma_copy_callback(void *data)
+{
+	struct perf_thread *pthr = data;
+
+	atomic_dec(&pthr->dma_sync);
+	wake_up(&pthr->dma_wait);
+}
+
+static int perf_copy_chunk(struct perf_thread *pthr,
+			   void __iomem *dst, void *src, size_t len)
+{
+	struct dma_async_tx_descriptor *tx;
+	struct dmaengine_unmap_data *unmap;
+	struct device *dma_dev;
+	int try = 0, ret = 0;
+
+	if (!use_dma) {
+		memcpy_toio(dst, src, len);
+		goto ret_check_tsync;
+	}
+
+	dma_dev = pthr->dma_chan->device->dev;
+
+	if (!is_dma_copy_aligned(pthr->dma_chan->device, offset_in_page(src),
+				 offset_in_page(dst), len))
+		return -EIO;
+
+	unmap = dmaengine_get_unmap_data(dma_dev, 2, GFP_NOWAIT);
+	if (!unmap)
+		return -ENOMEM;
+
+	unmap->len = len;
+	unmap->addr[0] = dma_map_page(dma_dev, virt_to_page(src),
+		offset_in_page(src), len, DMA_TO_DEVICE);
+	if (dma_mapping_error(dma_dev, unmap->addr[0])) {
+		ret = -EIO;
+		goto err_free_resource;
+	}
+	unmap->to_cnt = 1;
+
+	unmap->addr[1] = dma_map_page(dma_dev, virt_to_page(dst),
+		offset_in_page(dst), len, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dma_dev, unmap->addr[1])) {
+		ret = -EIO;
+		goto err_free_resource;
+	}
+	unmap->from_cnt = 1;
+
+	do {
+		tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, unmap->addr[1],
+			unmap->addr[0], len, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!tx)
+			msleep(DMA_MDELAY);
+	} while (!tx && (try++ < DMA_TRIES));
+
+	if (!tx) {
+		ret = -EIO;
+		goto err_free_resource;
+	}
+
+	tx->callback = perf_dma_copy_callback;
+	tx->callback_param = pthr;
+	dma_set_unmap(tx, unmap);
+
+	ret = dma_submit_error(dmaengine_submit(tx));
+	if (ret) {
+		dmaengine_unmap_put(unmap);
+		goto err_free_resource;
+	}
+
+	dmaengine_unmap_put(unmap);
+
+	atomic_inc(&pthr->dma_sync);
+	dma_async_issue_pending(pthr->dma_chan);
+
+ret_check_tsync:
+	return likely(atomic_read(&pthr->perf->tsync) > 0) ? 0 : -EINTR;
+
+err_free_resource:
+	dmaengine_unmap_put(unmap);
+
+	return ret;
+}
+
+static bool perf_dma_filter(struct dma_chan *chan, void *data)
+{
+	struct perf_ctx *perf = data;
+	int node;
+
+	node = dev_to_node(&perf->ntb->dev);
+
+	return node == NUMA_NO_NODE || node == dev_to_node(chan->device->dev);
+}
+
+static int perf_init_test(struct perf_thread *pthr)
+{
+	struct perf_ctx *perf = pthr->perf;
+	dma_cap_mask_t dma_mask;
+
+	pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL,
+				 dev_to_node(&perf->ntb->dev));
+	if (!pthr->src)
+		return -ENOMEM;
+
+	get_random_bytes(pthr->src, perf->test_peer->outbuf_size);
+
+	if (!use_dma)
+		return 0;
+
+	dma_cap_zero(dma_mask);
+	dma_cap_set(DMA_MEMCPY, dma_mask);
+	pthr->dma_chan = dma_request_channel(dma_mask, perf_dma_filter, perf);
+	if (!pthr->dma_chan) {
+		dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n",
+			pthr->tidx);
+		atomic_dec(&perf->tsync);
+		wake_up(&perf->twait);
+		kfree(pthr->src);
+		return -ENODEV;
+	}
+
+	atomic_set(&pthr->dma_sync, 0);
+
+	return 0;
+}
+
+static int perf_run_test(struct perf_thread *pthr)
+{
+	struct perf_peer *peer = pthr->perf->test_peer;
+	struct perf_ctx *perf = pthr->perf;
+	void __iomem *flt_dst, *bnd_dst;
+	u64 total_size, chunk_size;
+	void *flt_src;
+	int ret = 0;
+
+	total_size = 1ULL << total_order;
+	chunk_size = 1ULL << chunk_order;
+	chunk_size = min_t(u64, peer->outbuf_size, chunk_size);
+
+	flt_src = pthr->src;
+	bnd_dst = peer->outbuf + peer->outbuf_size;
+	flt_dst = peer->outbuf;
+
+	pthr->duration = ktime_get();
+
+	/* Copied field is cleared on test launch stage */
+	while (pthr->copied < total_size) {
+		ret = perf_copy_chunk(pthr, flt_dst, flt_src, chunk_size);
+		if (ret) {
+			dev_err(&perf->ntb->dev, "%d: Got error %d on test\n",
+				pthr->tidx, ret);
+			return ret;
+		}
+
+		pthr->copied += chunk_size;
+
+		flt_dst += chunk_size;
+		flt_src += chunk_size;
+		if (flt_dst >= bnd_dst || flt_dst < peer->outbuf) {
+			flt_dst = peer->outbuf;
+			flt_src = pthr->src;
+		}
+
+		/* Give up CPU to give a chance for other threads to use it */
+		schedule();
+	}
+
+	return 0;
+}
+
+static int perf_sync_test(struct perf_thread *pthr)
+{
+	struct perf_ctx *perf = pthr->perf;
+
+	if (!use_dma)
+		goto no_dma_ret;
+
+	wait_event(pthr->dma_wait,
+		   (atomic_read(&pthr->dma_sync) == 0 ||
+		    atomic_read(&perf->tsync) < 0));
+
+	if (atomic_read(&perf->tsync) < 0)
+		return -EINTR;
+
+no_dma_ret:
+	pthr->duration = ktime_sub(ktime_get(), pthr->duration);
+
+	dev_dbg(&perf->ntb->dev, "%d: copied %llu bytes\n",
+		pthr->tidx, pthr->copied);
+
+	dev_dbg(&perf->ntb->dev, "%d: lasted %llu usecs\n",
+		pthr->tidx, ktime_to_us(pthr->duration));
+
+	dev_dbg(&perf->ntb->dev, "%d: %llu MBytes/s\n", pthr->tidx,
+		div64_u64(pthr->copied, ktime_to_us(pthr->duration)));
+
+	return 0;
+}
+
+static void perf_clear_test(struct perf_thread *pthr)
+{
+	struct perf_ctx *perf = pthr->perf;
+
+	if (!use_dma)
+		goto no_dma_notify;
+
+	/*
+	 * If test finished without errors, termination isn't needed.
+	 * We call it anyway just to be sure of the transfers completion.
+	 */
+	(void)dmaengine_terminate_sync(pthr->dma_chan);
+
+	dma_release_channel(pthr->dma_chan);
+
+no_dma_notify:
+	atomic_dec(&perf->tsync);
+	wake_up(&perf->twait);
+	kfree(pthr->src);
+}
+
+static void perf_thread_work(struct work_struct *work)
+{
+	struct perf_thread *pthr = to_thread_work(work);
+	int ret;
+
+	/*
+	 * Perform stages in compliance with use_dma flag value.
+	 * Test status is changed only if error happened, otherwise
+	 * status -ENODATA is kept while test is on-fly. Results
+	 * synchronization is performed only if test fininshed
+	 * without an error or interruption.
+	 */
+	ret = perf_init_test(pthr);
+	if (ret) {
+		pthr->status = ret;
+		return;
+	}
+
+	ret = perf_run_test(pthr);
+	if (ret) {
+		pthr->status = ret;
+		goto err_clear_test;
+	}
+
+	pthr->status = perf_sync_test(pthr);
+
+err_clear_test:
+	perf_clear_test(pthr);
+}
+
+static int perf_set_tcnt(struct perf_ctx *perf, u8 tcnt)
+{
+	if (tcnt == 0 || tcnt > MAX_THREADS_CNT)
+		return -EINVAL;
+
+	if (test_and_set_bit_lock(0, &perf->busy_flag))
+		return -EBUSY;
+
+	perf->tcnt = tcnt;
+
+	clear_bit_unlock(0, &perf->busy_flag);
+
+	return 0;
+}
+
+static void perf_terminate_test(struct perf_ctx *perf)
+{
+	int tidx;
+
+	atomic_set(&perf->tsync, -1);
+	wake_up(&perf->twait);
+
+	for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
+		wake_up(&perf->threads[tidx].dma_wait);
+		cancel_work_sync(&perf->threads[tidx].work);
+	}
+}
+
+static int perf_submit_test(struct perf_peer *peer)
+{
+	struct perf_ctx *perf = peer->perf;
+	struct perf_thread *pthr;
+	int tidx, ret;
+
+	if (!test_bit(PERF_STS_DONE, &peer->sts))
+		return -ENOLINK;
+
+	if (test_and_set_bit_lock(0, &perf->busy_flag))
+		return -EBUSY;
+
+	perf->test_peer = peer;
+	atomic_set(&perf->tsync, perf->tcnt);
+
+	for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
+		pthr = &perf->threads[tidx];
+
+		pthr->status = -ENODATA;
+		pthr->copied = 0;
+		pthr->duration = ktime_set(0, 0);
+		if (tidx < perf->tcnt)
+			(void)queue_work(perf_wq, &pthr->work);
+	}
+
+	ret = wait_event_interruptible(perf->twait,
+				       atomic_read(&perf->tsync) <= 0);
+	if (ret == -ERESTARTSYS) {
+		perf_terminate_test(perf);
+		ret = -EINTR;
+	}
+
+	clear_bit_unlock(0, &perf->busy_flag);
+
+	return ret;
+}
+
+static int perf_read_stats(struct perf_ctx *perf, char *buf,
+			   size_t size, ssize_t *pos)
+{
+	struct perf_thread *pthr;
+	int tidx;
+
+	if (test_and_set_bit_lock(0, &perf->busy_flag))
+		return -EBUSY;
+
+	(*pos) += scnprintf(buf + *pos, size - *pos,
+		"    Peer %d test statistics:\n", perf->test_peer->pidx);
+
+	for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
+		pthr = &perf->threads[tidx];
+
+		if (pthr->status == -ENODATA)
+			continue;
+
+		if (pthr->status) {
+			(*pos) += scnprintf(buf + *pos, size - *pos,
+				"%d: error status %d\n", tidx, pthr->status);
+			continue;
+		}
+
+		(*pos) += scnprintf(buf + *pos, size - *pos,
+			"%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n",
+			tidx, pthr->copied, ktime_to_us(pthr->duration),
+			div64_u64(pthr->copied, ktime_to_us(pthr->duration)));
+	}
+
+	clear_bit_unlock(0, &perf->busy_flag);
+
+	return 0;
+}
+
+static void perf_init_threads(struct perf_ctx *perf)
+{
+	struct perf_thread *pthr;
+	int tidx;
+
+	perf->tcnt = DEF_THREADS_CNT;
+	perf->test_peer = &perf->peers[0];
+	init_waitqueue_head(&perf->twait);
+
+	for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
+		pthr = &perf->threads[tidx];
+
+		pthr->perf = perf;
+		pthr->tidx = tidx;
+		pthr->status = -ENODATA;
+		init_waitqueue_head(&pthr->dma_wait);
+		INIT_WORK(&pthr->work, perf_thread_work);
+	}
+}
+
+static void perf_clear_threads(struct perf_ctx *perf)
+{
+	perf_terminate_test(perf);
+}
+
+/*==============================================================================
+ *                               DebugFS nodes
+ *==============================================================================
+ */
+
+static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf,
+				    size_t size, loff_t *offp)
+{
+	struct perf_ctx *perf = filep->private_data;
+	struct perf_peer *peer;
+	size_t buf_size;
+	ssize_t pos = 0;
+	int ret, pidx;
+	char *buf;
+
+	buf_size = min_t(size_t, size, 0x1000U);
+
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	pos += scnprintf(buf + pos, buf_size - pos,
+		"    Performance measuring tool info:\n\n");
+
+	pos += scnprintf(buf + pos, buf_size - pos,
+		"Local port %d, Global index %d\n", ntb_port_number(perf->ntb),
+		perf->gidx);
+	pos += scnprintf(buf + pos, buf_size - pos, "Test status: ");
+	if (test_bit(0, &perf->busy_flag)) {
+		pos += scnprintf(buf + pos, buf_size - pos,
+			"on-fly with port %d (%d)\n",
+			ntb_peer_port_number(perf->ntb, perf->test_peer->pidx),
+			perf->test_peer->pidx);
+	} else {
+		pos += scnprintf(buf + pos, buf_size - pos, "idle\n");
+	}
+
+	for (pidx = 0; pidx < perf->pcnt; pidx++) {
+		peer = &perf->peers[pidx];
+
+		pos += scnprintf(buf + pos, buf_size - pos,
+			"Port %d (%d), Global index %d:\n",
+			ntb_peer_port_number(perf->ntb, peer->pidx), peer->pidx,
+			peer->gidx);
+
+		pos += scnprintf(buf + pos, buf_size - pos,
+			"\tLink status: %s\n",
+			test_bit(PERF_STS_LNKUP, &peer->sts) ? "up" : "down");
+
+		pos += scnprintf(buf + pos, buf_size - pos,
+			"\tOut buffer addr 0x%pK\n", peer->outbuf);
+
+		pos += scnprintf(buf + pos, buf_size - pos,
+			"\tOut buffer size %pa\n", &peer->outbuf_size);
+
+		pos += scnprintf(buf + pos, buf_size - pos,
+			"\tOut buffer xlat 0x%016llx[p]\n", peer->outbuf_xlat);
+
+		if (!peer->inbuf) {
+			pos += scnprintf(buf + pos, buf_size - pos,
+				"\tIn buffer addr: unallocated\n");
+			continue;
+		}
+
+		pos += scnprintf(buf + pos, buf_size - pos,
+			"\tIn buffer addr 0x%pK\n", peer->inbuf);
+
+		pos += scnprintf(buf + pos, buf_size - pos,
+			"\tIn buffer size %pa\n", &peer->inbuf_size);
+
+		pos += scnprintf(buf + pos, buf_size - pos,
+			"\tIn buffer xlat %pad[p]\n", &peer->inbuf_xlat);
+	}
+
+	ret = simple_read_from_buffer(ubuf, size, offp, buf, pos);
+	kfree(buf);
+
+	return ret;
+}
+
+static const struct file_operations perf_dbgfs_info = {
+	.open = simple_open,
+	.read = perf_dbgfs_read_info
+};
+
+static ssize_t perf_dbgfs_read_run(struct file *filep, char __user *ubuf,
+				   size_t size, loff_t *offp)
+{
+	struct perf_ctx *perf = filep->private_data;
+	ssize_t ret, pos = 0;
+	char *buf;
+
+	buf = kmalloc(PERF_BUF_LEN, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = perf_read_stats(perf, buf, PERF_BUF_LEN, &pos);
+	if (ret)
+		goto err_free;
+
+	ret = simple_read_from_buffer(ubuf, size, offp, buf, pos);
+err_free:
+	kfree(buf);
+
+	return ret;
+}
+
+static ssize_t perf_dbgfs_write_run(struct file *filep, const char __user *ubuf,
+				    size_t size, loff_t *offp)
+{
+	struct perf_ctx *perf = filep->private_data;
+	struct perf_peer *peer;
+	int pidx, ret;
+
+	ret = kstrtoint_from_user(ubuf, size, 0, &pidx);
+	if (ret)
+		return ret;
+
+	if (pidx < 0 || pidx >= perf->pcnt)
+		return -EINVAL;
+
+	peer = &perf->peers[pidx];
+
+	ret = perf_submit_test(peer);
+	if (ret)
+		return ret;
+
+	return size;
+}
+
+static const struct file_operations perf_dbgfs_run = {
+	.open = simple_open,
+	.read = perf_dbgfs_read_run,
+	.write = perf_dbgfs_write_run
+};
+
+static ssize_t perf_dbgfs_read_tcnt(struct file *filep, char __user *ubuf,
+				    size_t size, loff_t *offp)
+{
+	struct perf_ctx *perf = filep->private_data;
+	char buf[8];
+	ssize_t pos;
+
+	pos = scnprintf(buf, sizeof(buf), "%hhu\n", perf->tcnt);
+
+	return simple_read_from_buffer(ubuf, size, offp, buf, pos);
+}
+
+static ssize_t perf_dbgfs_write_tcnt(struct file *filep,
+				     const char __user *ubuf,
+				     size_t size, loff_t *offp)
+{
+	struct perf_ctx *perf = filep->private_data;
+	int ret;
+	u8 val;
+
+	ret = kstrtou8_from_user(ubuf, size, 0, &val);
+	if (ret)
+		return ret;
+
+	ret = perf_set_tcnt(perf, val);
+	if (ret)
+		return ret;
+
+	return size;
+}
+
+static const struct file_operations perf_dbgfs_tcnt = {
+	.open = simple_open,
+	.read = perf_dbgfs_read_tcnt,
+	.write = perf_dbgfs_write_tcnt
+};
+
+static void perf_setup_dbgfs(struct perf_ctx *perf)
+{
+	struct pci_dev *pdev = perf->ntb->pdev;
+
+	perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir);
+	if (!perf->dbgfs_dir) {
+		dev_warn(&perf->ntb->dev, "DebugFS unsupported\n");
+		return;
+	}
+
+	debugfs_create_file("info", 0600, perf->dbgfs_dir, perf,
+			    &perf_dbgfs_info);
+
+	debugfs_create_file("run", 0600, perf->dbgfs_dir, perf,
+			    &perf_dbgfs_run);
+
+	debugfs_create_file("threads_count", 0600, perf->dbgfs_dir, perf,
+			    &perf_dbgfs_tcnt);
+
+	/* They are made read-only for test exec safety and integrity */
+	debugfs_create_u8("chunk_order", 0500, perf->dbgfs_dir, &chunk_order);
+
+	debugfs_create_u8("total_order", 0500, perf->dbgfs_dir, &total_order);
+
+	debugfs_create_bool("use_dma", 0500, perf->dbgfs_dir, &use_dma);
+}
+
+static void perf_clear_dbgfs(struct perf_ctx *perf)
+{
+	debugfs_remove_recursive(perf->dbgfs_dir);
+}
+
+/*==============================================================================
+ *                        Basic driver initialization
+ *==============================================================================
+ */
+
+static struct perf_ctx *perf_create_data(struct ntb_dev *ntb)
+{
+	struct perf_ctx *perf;
+
+	perf = devm_kzalloc(&ntb->dev, sizeof(*perf), GFP_KERNEL);
+	if (!perf)
+		return ERR_PTR(-ENOMEM);
+
+	perf->pcnt = ntb_peer_port_count(ntb);
+	perf->peers = devm_kcalloc(&ntb->dev, perf->pcnt, sizeof(*perf->peers),
+				  GFP_KERNEL);
+	if (!perf->peers)
+		return ERR_PTR(-ENOMEM);
+
+	perf->ntb = ntb;
+
+	return perf;
+}
+
+static int perf_setup_peer_mw(struct perf_peer *peer)
+{
+	struct perf_ctx *perf = peer->perf;
+	phys_addr_t phys_addr;
+	int ret;
+
+	/* Get outbound MW parameters and map it */
+	ret = ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr,
+				   &peer->outbuf_size);
+	if (ret)
+		return ret;
+
+	peer->outbuf = devm_ioremap_wc(&perf->ntb->dev, phys_addr,
+					peer->outbuf_size);
+	if (!peer->outbuf)
+		return -ENOMEM;
+
+	if (max_mw_size && peer->outbuf_size > max_mw_size) {
+		peer->outbuf_size = max_mw_size;
+		dev_warn(&peer->perf->ntb->dev,
+			"Peer %d outbuf reduced to %pa\n", peer->pidx,
+			&peer->outbuf_size);
+	}
+
+	return 0;
+}
+
+static int perf_init_peers(struct perf_ctx *perf)
+{
+	struct perf_peer *peer;
+	int pidx, lport, ret;
+
+	lport = ntb_port_number(perf->ntb);
+	perf->gidx = -1;
+	for (pidx = 0; pidx < perf->pcnt; pidx++) {
+		peer = &perf->peers[pidx];
+
+		peer->perf = perf;
+		peer->pidx = pidx;
+		if (lport < ntb_peer_port_number(perf->ntb, pidx)) {
+			if (perf->gidx == -1)
+				perf->gidx = pidx;
+			peer->gidx = pidx + 1;
+		} else {
+			peer->gidx = pidx;
+		}
+		INIT_WORK(&peer->service, perf_service_work);
+	}
+	if (perf->gidx == -1)
+		perf->gidx = pidx;
+
+	for (pidx = 0; pidx < perf->pcnt; pidx++) {
+		ret = perf_setup_peer_mw(&perf->peers[pidx]);
+		if (ret)
+			return ret;
+	}
+
+	dev_dbg(&perf->ntb->dev, "Global port index %d\n", perf->gidx);
+
+	return 0;
+}
+
+static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+	struct perf_ctx *perf;
+	int ret;
+
+	perf = perf_create_data(ntb);
+	if (IS_ERR(perf))
+		return PTR_ERR(perf);
+
+	ret = perf_init_peers(perf);
+	if (ret)
+		return ret;
+
+	perf_init_threads(perf);
+
+	ret = perf_init_service(perf);
+	if (ret)
+		return ret;
+
+	ret = perf_enable_service(perf);
+	if (ret)
+		return ret;
+
+	perf_setup_dbgfs(perf);
+
+	return 0;
+}
+
+static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+	struct perf_ctx *perf = ntb->ctx;
+
+	perf_clear_dbgfs(perf);
+
+	perf_disable_service(perf);
+
+	perf_clear_threads(perf);
+}
+
+static struct ntb_client perf_client = {
+	.ops = {
+		.probe = perf_probe,
+		.remove = perf_remove
+	}
+};
+
+static int __init perf_init(void)
+{
+	int ret;
+
+	if (chunk_order > MAX_CHUNK_ORDER) {
+		chunk_order = MAX_CHUNK_ORDER;
+		pr_info("Chunk order reduced to %hhu\n", chunk_order);
+	}
+
+	if (total_order < chunk_order) {
+		total_order = chunk_order;
+		pr_info("Total data order reduced to %hhu\n", total_order);
+	}
+
+	perf_wq = alloc_workqueue("perf_wq", WQ_UNBOUND | WQ_SYSFS, 0);
+	if (!perf_wq)
+		return -ENOMEM;
+
+	if (debugfs_initialized())
+		perf_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	ret = ntb_register_client(&perf_client);
+	if (ret) {
+		debugfs_remove_recursive(perf_dbgfs_topdir);
+		destroy_workqueue(perf_wq);
+	}
+
+	return ret;
+}
+module_init(perf_init);
+
+static void __exit perf_exit(void)
+{
+	ntb_unregister_client(&perf_client);
+	debugfs_remove_recursive(perf_dbgfs_topdir);
+	destroy_workqueue(perf_wq);
+}
+module_exit(perf_exit);
+
diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c
new file mode 100644
index 0000000..65865e4
--- /dev/null
+++ b/drivers/ntb/test/ntb_pingpong.c
@@ -0,0 +1,438 @@
+/*
+ *   This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2017 T-Platforms. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2017 T-Platforms. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * PCIe NTB Pingpong Linux driver
+ */
+
+/*
+ * How to use this tool, by example.
+ *
+ * Assuming $DBG_DIR is something like:
+ * '/sys/kernel/debug/ntb_perf/0000:00:03.0'
+ * Suppose aside from local device there is at least one remote device
+ * connected to NTB with index 0.
+ *-----------------------------------------------------------------------------
+ * Eg: install driver with specified delay between doorbell event and response
+ *
+ * root@self# insmod ntb_pingpong.ko delay_ms=1000
+ *-----------------------------------------------------------------------------
+ * Eg: get number of ping-pong cycles performed
+ *
+ * root@self# cat $DBG_DIR/count
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitops.h>
+
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/hrtimer.h>
+#include <linux/debugfs.h>
+
+#include <linux/ntb.h>
+
+#define DRIVER_NAME		"ntb_pingpong"
+#define DRIVER_VERSION		"2.0"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR("Allen Hubbe <Allen.Hubbe@emc.com>");
+MODULE_DESCRIPTION("PCIe NTB Simple Pingpong Client");
+
+static unsigned int unsafe;
+module_param(unsafe, uint, 0644);
+MODULE_PARM_DESC(unsafe, "Run even though ntb operations may be unsafe");
+
+static unsigned int delay_ms = 1000;
+module_param(delay_ms, uint, 0644);
+MODULE_PARM_DESC(delay_ms, "Milliseconds to delay the response to peer");
+
+struct pp_ctx {
+	struct ntb_dev *ntb;
+	struct hrtimer timer;
+	u64 in_db;
+	u64 out_db;
+	int out_pidx;
+	u64 nmask;
+	u64 pmask;
+	atomic_t count;
+	spinlock_t lock;
+	struct dentry *dbgfs_dir;
+};
+#define to_pp_timer(__timer) \
+	container_of(__timer, struct pp_ctx, timer)
+
+static struct dentry *pp_dbgfs_topdir;
+
+static int pp_find_next_peer(struct pp_ctx *pp)
+{
+	u64 link, out_db;
+	int pidx;
+
+	link = ntb_link_is_up(pp->ntb, NULL, NULL);
+
+	/* Find next available peer */
+	if (link & pp->nmask) {
+		pidx = __ffs64(link & pp->nmask);
+		out_db = BIT_ULL(pidx + 1);
+	} else if (link & pp->pmask) {
+		pidx = __ffs64(link & pp->pmask);
+		out_db = BIT_ULL(pidx);
+	} else {
+		return -ENODEV;
+	}
+
+	spin_lock(&pp->lock);
+	pp->out_pidx = pidx;
+	pp->out_db = out_db;
+	spin_unlock(&pp->lock);
+
+	return 0;
+}
+
+static void pp_setup(struct pp_ctx *pp)
+{
+	int ret;
+
+	ntb_db_set_mask(pp->ntb, pp->in_db);
+
+	hrtimer_cancel(&pp->timer);
+
+	ret = pp_find_next_peer(pp);
+	if (ret == -ENODEV) {
+		dev_dbg(&pp->ntb->dev, "Got no peers, so cancel\n");
+		return;
+	}
+
+	dev_dbg(&pp->ntb->dev, "Ping-pong started with port %d, db %#llx\n",
+		ntb_peer_port_number(pp->ntb, pp->out_pidx), pp->out_db);
+
+	hrtimer_start(&pp->timer, ms_to_ktime(delay_ms), HRTIMER_MODE_REL);
+}
+
+static void pp_clear(struct pp_ctx *pp)
+{
+	hrtimer_cancel(&pp->timer);
+
+	ntb_db_set_mask(pp->ntb, pp->in_db);
+
+	dev_dbg(&pp->ntb->dev, "Ping-pong cancelled\n");
+}
+
+static void pp_ping(struct pp_ctx *pp)
+{
+	u32 count;
+
+	count = atomic_read(&pp->count);
+
+	spin_lock(&pp->lock);
+	ntb_peer_spad_write(pp->ntb, pp->out_pidx, 0, count);
+	ntb_peer_msg_write(pp->ntb, pp->out_pidx, 0, count);
+
+	dev_dbg(&pp->ntb->dev, "Ping port %d spad %#x, msg %#x\n",
+		ntb_peer_port_number(pp->ntb, pp->out_pidx), count, count);
+
+	ntb_peer_db_set(pp->ntb, pp->out_db);
+	ntb_db_clear_mask(pp->ntb, pp->in_db);
+	spin_unlock(&pp->lock);
+}
+
+static void pp_pong(struct pp_ctx *pp)
+{
+	u32 msg_data = -1, spad_data = -1;
+	int pidx = 0;
+
+	/* Read pong data */
+	spad_data = ntb_spad_read(pp->ntb, 0);
+	msg_data = ntb_msg_read(pp->ntb, &pidx, 0);
+	ntb_msg_clear_sts(pp->ntb, -1);
+
+	/*
+	 * Scratchpad and message data may differ, since message register can't
+	 * be rewritten unless status is cleared. Additionally either of them
+	 * might be unsupported
+	 */
+	dev_dbg(&pp->ntb->dev, "Pong spad %#x, msg %#x (port %d)\n",
+		spad_data, msg_data, ntb_peer_port_number(pp->ntb, pidx));
+
+	atomic_inc(&pp->count);
+
+	ntb_db_set_mask(pp->ntb, pp->in_db);
+	ntb_db_clear(pp->ntb, pp->in_db);
+
+	hrtimer_start(&pp->timer, ms_to_ktime(delay_ms), HRTIMER_MODE_REL);
+}
+
+static enum hrtimer_restart pp_timer_func(struct hrtimer *t)
+{
+	struct pp_ctx *pp = to_pp_timer(t);
+
+	pp_ping(pp);
+
+	return HRTIMER_NORESTART;
+}
+
+static void pp_link_event(void *ctx)
+{
+	struct pp_ctx *pp = ctx;
+
+	pp_setup(pp);
+}
+
+static void pp_db_event(void *ctx, int vec)
+{
+	struct pp_ctx *pp = ctx;
+
+	pp_pong(pp);
+}
+
+static const struct ntb_ctx_ops pp_ops = {
+	.link_event = pp_link_event,
+	.db_event = pp_db_event
+};
+
+static int pp_check_ntb(struct ntb_dev *ntb)
+{
+	u64 pmask;
+
+	if (ntb_db_is_unsafe(ntb)) {
+		dev_dbg(&ntb->dev, "Doorbell is unsafe\n");
+		if (!unsafe)
+			return -EINVAL;
+	}
+
+	if (ntb_spad_is_unsafe(ntb)) {
+		dev_dbg(&ntb->dev, "Scratchpad is unsafe\n");
+		if (!unsafe)
+			return -EINVAL;
+	}
+
+	pmask = GENMASK_ULL(ntb_peer_port_count(ntb), 0);
+	if ((ntb_db_valid_mask(ntb) & pmask) != pmask) {
+		dev_err(&ntb->dev, "Unsupported DB configuration\n");
+		return -EINVAL;
+	}
+
+	if (ntb_spad_count(ntb) < 1 && ntb_msg_count(ntb) < 1) {
+		dev_err(&ntb->dev, "Scratchpads and messages unsupported\n");
+		return -EINVAL;
+	} else if (ntb_spad_count(ntb) < 1) {
+		dev_dbg(&ntb->dev, "Scratchpads unsupported\n");
+	} else if (ntb_msg_count(ntb) < 1) {
+		dev_dbg(&ntb->dev, "Messages unsupported\n");
+	}
+
+	return 0;
+}
+
+static struct pp_ctx *pp_create_data(struct ntb_dev *ntb)
+{
+	struct pp_ctx *pp;
+
+	pp = devm_kzalloc(&ntb->dev, sizeof(*pp), GFP_KERNEL);
+	if (!pp)
+		return ERR_PTR(-ENOMEM);
+
+	pp->ntb = ntb;
+	atomic_set(&pp->count, 0);
+	spin_lock_init(&pp->lock);
+	hrtimer_init(&pp->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	pp->timer.function = pp_timer_func;
+
+	return pp;
+}
+
+static void pp_init_flds(struct pp_ctx *pp)
+{
+	int pidx, lport, pcnt;
+
+	/* Find global port index */
+	lport = ntb_port_number(pp->ntb);
+	pcnt = ntb_peer_port_count(pp->ntb);
+	for (pidx = 0; pidx < pcnt; pidx++) {
+		if (lport < ntb_peer_port_number(pp->ntb, pidx))
+			break;
+	}
+
+	pp->in_db = BIT_ULL(pidx);
+	pp->pmask = GENMASK_ULL(pidx, 0) >> 1;
+	pp->nmask = GENMASK_ULL(pcnt - 1, pidx);
+
+	dev_dbg(&pp->ntb->dev, "Inbound db %#llx, prev %#llx, next %#llx\n",
+		pp->in_db, pp->pmask, pp->nmask);
+}
+
+static int pp_mask_events(struct pp_ctx *pp)
+{
+	u64 db_mask, msg_mask;
+	int ret;
+
+	db_mask = ntb_db_valid_mask(pp->ntb);
+	ret = ntb_db_set_mask(pp->ntb, db_mask);
+	if (ret)
+		return ret;
+
+	/* Skip message events masking if unsupported */
+	if (ntb_msg_count(pp->ntb) < 1)
+		return 0;
+
+	msg_mask = ntb_msg_outbits(pp->ntb) | ntb_msg_inbits(pp->ntb);
+	return ntb_msg_set_mask(pp->ntb, msg_mask);
+}
+
+static int pp_setup_ctx(struct pp_ctx *pp)
+{
+	int ret;
+
+	ret = ntb_set_ctx(pp->ntb, pp, &pp_ops);
+	if (ret)
+		return ret;
+
+	ntb_link_enable(pp->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	/* Might be not necessary */
+	ntb_link_event(pp->ntb);
+
+	return 0;
+}
+
+static void pp_clear_ctx(struct pp_ctx *pp)
+{
+	ntb_link_disable(pp->ntb);
+
+	ntb_clear_ctx(pp->ntb);
+}
+
+static void pp_setup_dbgfs(struct pp_ctx *pp)
+{
+	struct pci_dev *pdev = pp->ntb->pdev;
+	void *ret;
+
+	pp->dbgfs_dir = debugfs_create_dir(pci_name(pdev), pp_dbgfs_topdir);
+
+	ret = debugfs_create_atomic_t("count", 0600, pp->dbgfs_dir, &pp->count);
+	if (!ret)
+		dev_warn(&pp->ntb->dev, "DebugFS unsupported\n");
+}
+
+static void pp_clear_dbgfs(struct pp_ctx *pp)
+{
+	debugfs_remove_recursive(pp->dbgfs_dir);
+}
+
+static int pp_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+	struct pp_ctx *pp;
+	int ret;
+
+	ret = pp_check_ntb(ntb);
+	if (ret)
+		return ret;
+
+	pp = pp_create_data(ntb);
+	if (IS_ERR(pp))
+		return PTR_ERR(pp);
+
+	pp_init_flds(pp);
+
+	ret = pp_mask_events(pp);
+	if (ret)
+		return ret;
+
+	ret = pp_setup_ctx(pp);
+	if (ret)
+		return ret;
+
+	pp_setup_dbgfs(pp);
+
+	return 0;
+}
+
+static void pp_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+	struct pp_ctx *pp = ntb->ctx;
+
+	pp_clear_dbgfs(pp);
+
+	pp_clear_ctx(pp);
+
+	pp_clear(pp);
+}
+
+static struct ntb_client pp_client = {
+	.ops = {
+		.probe = pp_probe,
+		.remove = pp_remove
+	}
+};
+
+static int __init pp_init(void)
+{
+	int ret;
+
+	if (debugfs_initialized())
+		pp_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	ret = ntb_register_client(&pp_client);
+	if (ret)
+		debugfs_remove_recursive(pp_dbgfs_topdir);
+
+	return ret;
+}
+module_init(pp_init);
+
+static void __exit pp_exit(void)
+{
+	ntb_unregister_client(&pp_client);
+	debugfs_remove_recursive(pp_dbgfs_topdir);
+}
+module_exit(pp_exit);
+
diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
new file mode 100644
index 0000000..d592c0f
--- /dev/null
+++ b/drivers/ntb/test/ntb_tool.c
@@ -0,0 +1,1693 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2017 T-Platforms All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2017 T-Platforms All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * PCIe NTB Debugging Tool Linux driver
+ */
+
+/*
+ * How to use this tool, by example.
+ *
+ * Assuming $DBG_DIR is something like:
+ * '/sys/kernel/debug/ntb_tool/0000:00:03.0'
+ * Suppose aside from local device there is at least one remote device
+ * connected to NTB with index 0.
+ *-----------------------------------------------------------------------------
+ * Eg: check local/peer device information.
+ *
+ * # Get local device port number
+ * root@self# cat $DBG_DIR/port
+ *
+ * # Check local device functionality
+ * root@self# ls $DBG_DIR
+ * db            msg1          msg_sts     peer4/        port
+ * db_event      msg2          peer0/      peer5/        spad0
+ * db_mask       msg3          peer1/      peer_db       spad1
+ * link          msg_event     peer2/      peer_db_mask  spad2
+ * msg0          msg_mask      peer3/      peer_spad     spad3
+ * # As one can see it supports:
+ * # 1) four inbound message registers
+ * # 2) four inbound scratchpads
+ * # 3) up to six peer devices
+ *
+ * # Check peer device port number
+ * root@self# cat $DBG_DIR/peer0/port
+ *
+ * # Check peer device(s) functionality to be used
+ * root@self# ls $DBG_DIR/peer0
+ * link             mw_trans0       mw_trans6        port
+ * link_event       mw_trans1       mw_trans7        spad0
+ * msg0             mw_trans2       peer_mw_trans0   spad1
+ * msg1             mw_trans3       peer_mw_trans1   spad2
+ * msg2             mw_trans4       peer_mw_trans2   spad3
+ * msg3             mw_trans5       peer_mw_trans3
+ * # As one can see we got:
+ * # 1) four outbound message registers
+ * # 2) four outbound scratchpads
+ * # 3) eight inbound memory windows
+ * # 4) four outbound memory windows
+ *-----------------------------------------------------------------------------
+ * Eg: NTB link tests
+ *
+ * # Set local link up/down
+ * root@self# echo Y > $DBG_DIR/link
+ * root@self# echo N > $DBG_DIR/link
+ *
+ * # Check if link with peer device is up/down:
+ * root@self# cat $DBG_DIR/peer0/link
+ *
+ * # Block until the link is up/down
+ * root@self# echo Y > $DBG_DIR/peer0/link_event
+ * root@self# echo N > $DBG_DIR/peer0/link_event
+ *-----------------------------------------------------------------------------
+ * Eg: Doorbell registers tests (some functionality might be absent)
+ *
+ * # Set/clear/get local doorbell
+ * root@self# echo 's 1' > $DBG_DIR/db
+ * root@self# echo 'c 1' > $DBG_DIR/db
+ * root@self# cat  $DBG_DIR/db
+ *
+ * # Set/clear/get local doorbell mask
+ * root@self# echo 's 1' > $DBG_DIR/db_mask
+ * root@self# echo 'c 1' > $DBG_DIR/db_mask
+ * root@self# cat $DBG_DIR/db_mask
+ *
+ * # Ring/clear/get peer doorbell
+ * root@peer# echo 's 1' > $DBG_DIR/peer_db
+ * root@peer# echo 'c 1' > $DBG_DIR/peer_db
+ * root@peer# cat $DBG_DIR/peer_db
+ *
+ * # Set/clear/get peer doorbell mask
+ * root@self# echo 's 1' > $DBG_DIR/peer_db_mask
+ * root@self# echo 'c 1' > $DBG_DIR/peer_db_mask
+ * root@self# cat $DBG_DIR/peer_db_mask
+ *
+ * # Block until local doorbell is set with specified value
+ * root@self# echo 1 > $DBG_DIR/db_event
+ *-----------------------------------------------------------------------------
+ * Eg: Message registers tests (functionality might be absent)
+ *
+ * # Set/clear/get in/out message registers status
+ * root@self# echo 's 1' > $DBG_DIR/msg_sts
+ * root@self# echo 'c 1' > $DBG_DIR/msg_sts
+ * root@self# cat $DBG_DIR/msg_sts
+ *
+ * # Set/clear in/out message registers mask
+ * root@self# echo 's 1' > $DBG_DIR/msg_mask
+ * root@self# echo 'c 1' > $DBG_DIR/msg_mask
+ *
+ * # Get inbound message register #0 value and source of port index
+ * root@self# cat  $DBG_DIR/msg0
+ *
+ * # Send some data to peer over outbound message register #0
+ * root@self# echo 0x01020304 > $DBG_DIR/peer0/msg0
+ *-----------------------------------------------------------------------------
+ * Eg: Scratchpad registers tests (functionality might be absent)
+ *
+ * # Write/read to/from local scratchpad register #0
+ * root@peer# echo 0x01020304 > $DBG_DIR/spad0
+ * root@peer# cat $DBG_DIR/spad0
+ *
+ * # Write/read to/from peer scratchpad register #0
+ * root@peer# echo 0x01020304 > $DBG_DIR/peer0/spad0
+ * root@peer# cat $DBG_DIR/peer0/spad0
+ *-----------------------------------------------------------------------------
+ * Eg: Memory windows tests
+ *
+ * # Create inbound memory window buffer of specified size/get its base address
+ * root@peer# echo 16384 > $DBG_DIR/peer0/mw_trans0
+ * root@peer# cat $DBG_DIR/peer0/mw_trans0
+ *
+ * # Write/read data to/from inbound memory window
+ * root@peer# echo Hello > $DBG_DIR/peer0/mw0
+ * root@peer# head -c 7 $DBG_DIR/peer0/mw0
+ *
+ * # Map outbound memory window/check it settings (on peer device)
+ * root@peer# echo 0xADD0BA5E:16384 > $DBG_DIR/peer0/peer_mw_trans0
+ * root@peer# cat $DBG_DIR/peer0/peer_mw_trans0
+ *
+ * # Write/read data to/from outbound memory window (on peer device)
+ * root@peer# echo olleH > $DBG_DIR/peer0/peer_mw0
+ * root@peer# head -c 7 $DBG_DIR/peer0/peer_mw0
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <linux/ntb.h>
+
+#define DRIVER_NAME		"ntb_tool"
+#define DRIVER_VERSION		"2.0"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR("Allen Hubbe <Allen.Hubbe@emc.com>");
+MODULE_DESCRIPTION("PCIe NTB Debugging Tool");
+
+/*
+ * Inbound and outbound memory windows descriptor. Union members selection
+ * depends on the MW type the structure describes. mm_base/dma_base are the
+ * virtual and DMA address of an inbound MW. io_base/tr_base are the MMIO
+ * mapped virtual and xlat addresses of an outbound MW respectively.
+ */
+struct tool_mw {
+	int widx;
+	int pidx;
+	struct tool_ctx *tc;
+	union {
+		u8 *mm_base;
+		u8 __iomem *io_base;
+	};
+	union {
+		dma_addr_t dma_base;
+		u64 tr_base;
+	};
+	resource_size_t size;
+	struct dentry *dbgfs_file;
+};
+
+/*
+ * Wrapper structure is used to distinguish the outbound MW peers reference
+ * within the corresponding DebugFS directory IO operation.
+ */
+struct tool_mw_wrap {
+	int pidx;
+	struct tool_mw *mw;
+};
+
+struct tool_msg {
+	int midx;
+	int pidx;
+	struct tool_ctx *tc;
+};
+
+struct tool_spad {
+	int sidx;
+	int pidx;
+	struct tool_ctx *tc;
+};
+
+struct tool_peer {
+	int pidx;
+	struct tool_ctx *tc;
+	int inmw_cnt;
+	struct tool_mw *inmws;
+	int outmw_cnt;
+	struct tool_mw_wrap *outmws;
+	int outmsg_cnt;
+	struct tool_msg *outmsgs;
+	int outspad_cnt;
+	struct tool_spad *outspads;
+	struct dentry *dbgfs_dir;
+};
+
+struct tool_ctx {
+	struct ntb_dev *ntb;
+	wait_queue_head_t link_wq;
+	wait_queue_head_t db_wq;
+	wait_queue_head_t msg_wq;
+	int outmw_cnt;
+	struct tool_mw *outmws;
+	int peer_cnt;
+	struct tool_peer *peers;
+	int inmsg_cnt;
+	struct tool_msg *inmsgs;
+	int inspad_cnt;
+	struct tool_spad *inspads;
+	struct dentry *dbgfs_dir;
+};
+
+#define TOOL_FOPS_RDWR(__name, __read, __write) \
+	const struct file_operations __name = {	\
+		.owner = THIS_MODULE,		\
+		.open = simple_open,		\
+		.read = __read,			\
+		.write = __write,		\
+	}
+
+#define TOOL_BUF_LEN 32
+
+static struct dentry *tool_dbgfs_topdir;
+
+/*==============================================================================
+ *                               NTB events handlers
+ *==============================================================================
+ */
+
+static void tool_link_event(void *ctx)
+{
+	struct tool_ctx *tc = ctx;
+	enum ntb_speed speed;
+	enum ntb_width width;
+	int up;
+
+	up = ntb_link_is_up(tc->ntb, &speed, &width);
+
+	dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n",
+		up ? "up" : "down", speed, width);
+
+	wake_up(&tc->link_wq);
+}
+
+static void tool_db_event(void *ctx, int vec)
+{
+	struct tool_ctx *tc = ctx;
+	u64 db_bits, db_mask;
+
+	db_mask = ntb_db_vector_mask(tc->ntb, vec);
+	db_bits = ntb_db_read(tc->ntb);
+
+	dev_dbg(&tc->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
+		vec, db_mask, db_bits);
+
+	wake_up(&tc->db_wq);
+}
+
+static void tool_msg_event(void *ctx)
+{
+	struct tool_ctx *tc = ctx;
+	u64 msg_sts;
+
+	msg_sts = ntb_msg_read_sts(tc->ntb);
+
+	dev_dbg(&tc->ntb->dev, "message bits %#llx\n", msg_sts);
+
+	wake_up(&tc->msg_wq);
+}
+
+static const struct ntb_ctx_ops tool_ops = {
+	.link_event = tool_link_event,
+	.db_event = tool_db_event,
+	.msg_event = tool_msg_event
+};
+
+/*==============================================================================
+ *                        Common read/write methods
+ *==============================================================================
+ */
+
+static ssize_t tool_fn_read(struct tool_ctx *tc, char __user *ubuf,
+			    size_t size, loff_t *offp,
+			    u64 (*fn_read)(struct ntb_dev *))
+{
+	size_t buf_size;
+	char buf[TOOL_BUF_LEN];
+	ssize_t pos;
+
+	if (!fn_read)
+		return -EINVAL;
+
+	buf_size = min(size, sizeof(buf));
+
+	pos = scnprintf(buf, buf_size, "%#llx\n", fn_read(tc->ntb));
+
+	return simple_read_from_buffer(ubuf, size, offp, buf, pos);
+}
+
+static ssize_t tool_fn_write(struct tool_ctx *tc,
+			     const char __user *ubuf,
+			     size_t size, loff_t *offp,
+			     int (*fn_set)(struct ntb_dev *, u64),
+			     int (*fn_clear)(struct ntb_dev *, u64))
+{
+	char *buf, cmd;
+	ssize_t ret;
+	u64 bits;
+	int n;
+
+	buf = kmalloc(size + 1, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = simple_write_to_buffer(buf, size, offp, ubuf, size);
+	if (ret < 0) {
+		kfree(buf);
+		return ret;
+	}
+
+	buf[size] = 0;
+
+	n = sscanf(buf, "%c %lli", &cmd, &bits);
+
+	kfree(buf);
+
+	if (n != 2) {
+		ret = -EINVAL;
+	} else if (cmd == 's') {
+		if (!fn_set)
+			ret = -EINVAL;
+		else
+			ret = fn_set(tc->ntb, bits);
+	} else if (cmd == 'c') {
+		if (!fn_clear)
+			ret = -EINVAL;
+		else
+			ret = fn_clear(tc->ntb, bits);
+	} else {
+		ret = -EINVAL;
+	}
+
+	return ret ? : size;
+}
+
+/*==============================================================================
+ *                            Port read/write methods
+ *==============================================================================
+ */
+
+static ssize_t tool_port_read(struct file *filep, char __user *ubuf,
+			      size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+	char buf[TOOL_BUF_LEN];
+	int pos;
+
+	pos = scnprintf(buf, sizeof(buf), "%d\n", ntb_port_number(tc->ntb));
+
+	return simple_read_from_buffer(ubuf, size, offp, buf, pos);
+}
+
+static TOOL_FOPS_RDWR(tool_port_fops,
+		      tool_port_read,
+		      NULL);
+
+static ssize_t tool_peer_port_read(struct file *filep, char __user *ubuf,
+				   size_t size, loff_t *offp)
+{
+	struct tool_peer *peer = filep->private_data;
+	struct tool_ctx *tc = peer->tc;
+	char buf[TOOL_BUF_LEN];
+	int pos;
+
+	pos = scnprintf(buf, sizeof(buf), "%d\n",
+		ntb_peer_port_number(tc->ntb, peer->pidx));
+
+	return simple_read_from_buffer(ubuf, size, offp, buf, pos);
+}
+
+static TOOL_FOPS_RDWR(tool_peer_port_fops,
+		      tool_peer_port_read,
+		      NULL);
+
+static int tool_init_peers(struct tool_ctx *tc)
+{
+	int pidx;
+
+	tc->peer_cnt = ntb_peer_port_count(tc->ntb);
+	tc->peers = devm_kcalloc(&tc->ntb->dev, tc->peer_cnt,
+				 sizeof(*tc->peers), GFP_KERNEL);
+	if (tc->peers == NULL)
+		return -ENOMEM;
+
+	for (pidx = 0; pidx < tc->peer_cnt; pidx++) {
+		tc->peers[pidx].pidx = pidx;
+		tc->peers[pidx].tc = tc;
+	}
+
+	return 0;
+}
+
+/*==============================================================================
+ *                       Link state read/write methods
+ *==============================================================================
+ */
+
+static ssize_t tool_link_write(struct file *filep, const char __user *ubuf,
+			       size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+	bool val;
+	int ret;
+
+	ret = kstrtobool_from_user(ubuf, size, &val);
+	if (ret)
+		return ret;
+
+	if (val)
+		ret = ntb_link_enable(tc->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	else
+		ret = ntb_link_disable(tc->ntb);
+
+	if (ret)
+		return ret;
+
+	return size;
+}
+
+static TOOL_FOPS_RDWR(tool_link_fops,
+		      NULL,
+		      tool_link_write);
+
+static ssize_t tool_peer_link_read(struct file *filep, char __user *ubuf,
+				   size_t size, loff_t *offp)
+{
+	struct tool_peer *peer = filep->private_data;
+	struct tool_ctx *tc = peer->tc;
+	char buf[3];
+
+	if (ntb_link_is_up(tc->ntb, NULL, NULL) & BIT(peer->pidx))
+		buf[0] = 'Y';
+	else
+		buf[0] = 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+
+	return simple_read_from_buffer(ubuf, size, offp, buf, 3);
+}
+
+static TOOL_FOPS_RDWR(tool_peer_link_fops,
+		      tool_peer_link_read,
+		      NULL);
+
+static ssize_t tool_peer_link_event_write(struct file *filep,
+					  const char __user *ubuf,
+					  size_t size, loff_t *offp)
+{
+	struct tool_peer *peer = filep->private_data;
+	struct tool_ctx *tc = peer->tc;
+	u64 link_msk;
+	bool val;
+	int ret;
+
+	ret = kstrtobool_from_user(ubuf, size, &val);
+	if (ret)
+		return ret;
+
+	link_msk = BIT_ULL_MASK(peer->pidx);
+
+	if (wait_event_interruptible(tc->link_wq,
+		!!(ntb_link_is_up(tc->ntb, NULL, NULL) & link_msk) == val))
+		return -ERESTART;
+
+	return size;
+}
+
+static TOOL_FOPS_RDWR(tool_peer_link_event_fops,
+		      NULL,
+		      tool_peer_link_event_write);
+
+/*==============================================================================
+ *                  Memory windows read/write/setting methods
+ *==============================================================================
+ */
+
+static ssize_t tool_mw_read(struct file *filep, char __user *ubuf,
+			    size_t size, loff_t *offp)
+{
+	struct tool_mw *inmw = filep->private_data;
+
+	if (inmw->mm_base == NULL)
+		return -ENXIO;
+
+	return simple_read_from_buffer(ubuf, size, offp,
+				       inmw->mm_base, inmw->size);
+}
+
+static ssize_t tool_mw_write(struct file *filep, const char __user *ubuf,
+			     size_t size, loff_t *offp)
+{
+	struct tool_mw *inmw = filep->private_data;
+
+	if (inmw->mm_base == NULL)
+		return -ENXIO;
+
+	return simple_write_to_buffer(inmw->mm_base, inmw->size, offp,
+				      ubuf, size);
+}
+
+static TOOL_FOPS_RDWR(tool_mw_fops,
+		      tool_mw_read,
+		      tool_mw_write);
+
+static int tool_setup_mw(struct tool_ctx *tc, int pidx, int widx,
+			 size_t req_size)
+{
+	resource_size_t size, addr_align, size_align;
+	struct tool_mw *inmw = &tc->peers[pidx].inmws[widx];
+	char buf[TOOL_BUF_LEN];
+	int ret;
+
+	if (inmw->mm_base != NULL)
+		return 0;
+
+	ret = ntb_mw_get_align(tc->ntb, pidx, widx, &addr_align,
+				&size_align, &size);
+	if (ret)
+		return ret;
+
+	inmw->size = min_t(resource_size_t, req_size, size);
+	inmw->size = round_up(inmw->size, addr_align);
+	inmw->size = round_up(inmw->size, size_align);
+	inmw->mm_base = dma_alloc_coherent(&tc->ntb->dev, inmw->size,
+					   &inmw->dma_base, GFP_KERNEL);
+	if (!inmw->mm_base)
+		return -ENOMEM;
+
+	if (!IS_ALIGNED(inmw->dma_base, addr_align)) {
+		ret = -ENOMEM;
+		goto err_free_dma;
+	}
+
+	ret = ntb_mw_set_trans(tc->ntb, pidx, widx, inmw->dma_base, inmw->size);
+	if (ret)
+		goto err_free_dma;
+
+	snprintf(buf, sizeof(buf), "mw%d", widx);
+	inmw->dbgfs_file = debugfs_create_file(buf, 0600,
+					       tc->peers[pidx].dbgfs_dir, inmw,
+					       &tool_mw_fops);
+
+	return 0;
+
+err_free_dma:
+	dma_free_coherent(&tc->ntb->dev, inmw->size, inmw->mm_base,
+			  inmw->dma_base);
+	inmw->mm_base = NULL;
+	inmw->dma_base = 0;
+	inmw->size = 0;
+
+	return ret;
+}
+
+static void tool_free_mw(struct tool_ctx *tc, int pidx, int widx)
+{
+	struct tool_mw *inmw = &tc->peers[pidx].inmws[widx];
+
+	debugfs_remove(inmw->dbgfs_file);
+
+	if (inmw->mm_base != NULL) {
+		ntb_mw_clear_trans(tc->ntb, pidx, widx);
+		dma_free_coherent(&tc->ntb->dev, inmw->size,
+				  inmw->mm_base, inmw->dma_base);
+	}
+
+	inmw->mm_base = NULL;
+	inmw->dma_base = 0;
+	inmw->size = 0;
+	inmw->dbgfs_file = NULL;
+}
+
+static ssize_t tool_mw_trans_read(struct file *filep, char __user *ubuf,
+				  size_t size, loff_t *offp)
+{
+	struct tool_mw *inmw = filep->private_data;
+	resource_size_t addr_align;
+	resource_size_t size_align;
+	resource_size_t size_max;
+	ssize_t ret, off = 0;
+	size_t buf_size;
+	char *buf;
+
+	buf_size = min_t(size_t, size, 512);
+
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = ntb_mw_get_align(inmw->tc->ntb, inmw->pidx, inmw->widx,
+			       &addr_align, &size_align, &size_max);
+	if (ret)
+		goto err;
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Inbound MW     \t%d\n",
+			 inmw->widx);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Port           \t%d (%d)\n",
+			 ntb_peer_port_number(inmw->tc->ntb, inmw->pidx),
+			 inmw->pidx);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Window Address \t0x%pK\n", inmw->mm_base);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "DMA Address    \t%pad\n",
+			 &inmw->dma_base);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Window Size    \t%pa[p]\n",
+			 &inmw->size);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Alignment      \t%pa[p]\n",
+			 &addr_align);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Size Alignment \t%pa[p]\n",
+			 &size_align);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Size Max       \t%pa[p]\n",
+			 &size_max);
+
+	ret = simple_read_from_buffer(ubuf, size, offp, buf, off);
+
+err:
+	kfree(buf);
+
+	return ret;
+}
+
+static ssize_t tool_mw_trans_write(struct file *filep, const char __user *ubuf,
+				   size_t size, loff_t *offp)
+{
+	struct tool_mw *inmw = filep->private_data;
+	unsigned int val;
+	int ret;
+
+	ret = kstrtouint_from_user(ubuf, size, 0, &val);
+	if (ret)
+		return ret;
+
+	tool_free_mw(inmw->tc, inmw->pidx, inmw->widx);
+	if (val) {
+		ret = tool_setup_mw(inmw->tc, inmw->pidx, inmw->widx, val);
+		if (ret)
+			return ret;
+	}
+
+	return size;
+}
+
+static TOOL_FOPS_RDWR(tool_mw_trans_fops,
+		      tool_mw_trans_read,
+		      tool_mw_trans_write);
+
+static ssize_t tool_peer_mw_read(struct file *filep, char __user *ubuf,
+				 size_t size, loff_t *offp)
+{
+	struct tool_mw *outmw = filep->private_data;
+	loff_t pos = *offp;
+	ssize_t ret;
+	void *buf;
+
+	if (outmw->io_base == NULL)
+		return -EIO;
+
+	if (pos >= outmw->size || !size)
+		return 0;
+
+	if (size > outmw->size - pos)
+		size = outmw->size - pos;
+
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	memcpy_fromio(buf, outmw->io_base + pos, size);
+	ret = copy_to_user(ubuf, buf, size);
+	if (ret == size) {
+		ret = -EFAULT;
+		goto err_free;
+	}
+
+	size -= ret;
+	*offp = pos + size;
+	ret = size;
+
+err_free:
+	kfree(buf);
+
+	return ret;
+}
+
+static ssize_t tool_peer_mw_write(struct file *filep, const char __user *ubuf,
+				  size_t size, loff_t *offp)
+{
+	struct tool_mw *outmw = filep->private_data;
+	ssize_t ret;
+	loff_t pos = *offp;
+	void *buf;
+
+	if (outmw->io_base == NULL)
+		return -EIO;
+
+	if (pos >= outmw->size || !size)
+		return 0;
+	if (size > outmw->size - pos)
+		size = outmw->size - pos;
+
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = copy_from_user(buf, ubuf, size);
+	if (ret == size) {
+		ret = -EFAULT;
+		goto err_free;
+	}
+
+	size -= ret;
+	*offp = pos + size;
+	ret = size;
+
+	memcpy_toio(outmw->io_base + pos, buf, size);
+
+err_free:
+	kfree(buf);
+
+	return ret;
+}
+
+static TOOL_FOPS_RDWR(tool_peer_mw_fops,
+		      tool_peer_mw_read,
+		      tool_peer_mw_write);
+
+static int tool_setup_peer_mw(struct tool_ctx *tc, int pidx, int widx,
+			      u64 req_addr, size_t req_size)
+{
+	struct tool_mw *outmw = &tc->outmws[widx];
+	resource_size_t map_size;
+	phys_addr_t map_base;
+	char buf[TOOL_BUF_LEN];
+	int ret;
+
+	if (outmw->io_base != NULL)
+		return 0;
+
+	ret = ntb_peer_mw_get_addr(tc->ntb, widx, &map_base, &map_size);
+	if (ret)
+		return ret;
+
+	ret = ntb_peer_mw_set_trans(tc->ntb, pidx, widx, req_addr, req_size);
+	if (ret)
+		return ret;
+
+	outmw->io_base = ioremap_wc(map_base, map_size);
+	if (outmw->io_base == NULL) {
+		ret = -EFAULT;
+		goto err_clear_trans;
+	}
+
+	outmw->tr_base = req_addr;
+	outmw->size = req_size;
+	outmw->pidx = pidx;
+
+	snprintf(buf, sizeof(buf), "peer_mw%d", widx);
+	outmw->dbgfs_file = debugfs_create_file(buf, 0600,
+					       tc->peers[pidx].dbgfs_dir, outmw,
+					       &tool_peer_mw_fops);
+
+	return 0;
+
+err_clear_trans:
+	ntb_peer_mw_clear_trans(tc->ntb, pidx, widx);
+
+	return ret;
+}
+
+static void tool_free_peer_mw(struct tool_ctx *tc, int widx)
+{
+	struct tool_mw *outmw = &tc->outmws[widx];
+
+	debugfs_remove(outmw->dbgfs_file);
+
+	if (outmw->io_base != NULL) {
+		iounmap(tc->outmws[widx].io_base);
+		ntb_peer_mw_clear_trans(tc->ntb, outmw->pidx, widx);
+	}
+
+	outmw->io_base = NULL;
+	outmw->tr_base = 0;
+	outmw->size = 0;
+	outmw->pidx = -1;
+	outmw->dbgfs_file = NULL;
+}
+
+static ssize_t tool_peer_mw_trans_read(struct file *filep, char __user *ubuf,
+					size_t size, loff_t *offp)
+{
+	struct tool_mw_wrap *outmw_wrap = filep->private_data;
+	struct tool_mw *outmw = outmw_wrap->mw;
+	resource_size_t map_size;
+	phys_addr_t map_base;
+	ssize_t off = 0;
+	size_t buf_size;
+	char *buf;
+	int ret;
+
+	ret = ntb_peer_mw_get_addr(outmw->tc->ntb, outmw->widx,
+				  &map_base, &map_size);
+	if (ret)
+		return ret;
+
+	buf_size = min_t(size_t, size, 512);
+
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Outbound MW:        \t%d\n", outmw->widx);
+
+	if (outmw->io_base != NULL) {
+		off += scnprintf(buf + off, buf_size - off,
+			"Port attached       \t%d (%d)\n",
+			ntb_peer_port_number(outmw->tc->ntb, outmw->pidx),
+			outmw->pidx);
+	} else {
+		off += scnprintf(buf + off, buf_size - off,
+				 "Port attached       \t-1 (-1)\n");
+	}
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Virtual address     \t0x%pK\n", outmw->io_base);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Phys Address        \t%pa[p]\n", &map_base);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Mapping Size        \t%pa[p]\n", &map_size);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Translation Address \t0x%016llx\n", outmw->tr_base);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Window Size         \t%pa[p]\n", &outmw->size);
+
+	ret = simple_read_from_buffer(ubuf, size, offp, buf, off);
+	kfree(buf);
+
+	return ret;
+}
+
+static ssize_t tool_peer_mw_trans_write(struct file *filep,
+					const char __user *ubuf,
+					size_t size, loff_t *offp)
+{
+	struct tool_mw_wrap *outmw_wrap = filep->private_data;
+	struct tool_mw *outmw = outmw_wrap->mw;
+	size_t buf_size, wsize;
+	char buf[TOOL_BUF_LEN];
+	int ret, n;
+	u64 addr;
+
+	buf_size = min(size, (sizeof(buf) - 1));
+	if (copy_from_user(buf, ubuf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+
+	n = sscanf(buf, "%lli:%zi", &addr, &wsize);
+	if (n != 2)
+		return -EINVAL;
+
+	tool_free_peer_mw(outmw->tc, outmw->widx);
+	if (wsize) {
+		ret = tool_setup_peer_mw(outmw->tc, outmw_wrap->pidx,
+					 outmw->widx, addr, wsize);
+		if (ret)
+			return ret;
+	}
+
+	return size;
+}
+
+static TOOL_FOPS_RDWR(tool_peer_mw_trans_fops,
+		      tool_peer_mw_trans_read,
+		      tool_peer_mw_trans_write);
+
+static int tool_init_mws(struct tool_ctx *tc)
+{
+	int widx, pidx;
+
+	/* Initialize outbound memory windows */
+	tc->outmw_cnt = ntb_peer_mw_count(tc->ntb);
+	tc->outmws = devm_kcalloc(&tc->ntb->dev, tc->outmw_cnt,
+				  sizeof(*tc->outmws), GFP_KERNEL);
+	if (tc->outmws == NULL)
+		return -ENOMEM;
+
+	for (widx = 0; widx < tc->outmw_cnt; widx++) {
+		tc->outmws[widx].widx = widx;
+		tc->outmws[widx].pidx = -1;
+		tc->outmws[widx].tc = tc;
+	}
+
+	/* Initialize inbound memory windows and outbound MWs wrapper */
+	for (pidx = 0; pidx < tc->peer_cnt; pidx++) {
+		tc->peers[pidx].inmw_cnt = ntb_mw_count(tc->ntb, pidx);
+		tc->peers[pidx].inmws =
+			devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].inmw_cnt,
+				    sizeof(*tc->peers[pidx].inmws), GFP_KERNEL);
+		if (tc->peers[pidx].inmws == NULL)
+			return -ENOMEM;
+
+		for (widx = 0; widx < tc->peers[pidx].inmw_cnt; widx++) {
+			tc->peers[pidx].inmws[widx].widx = widx;
+			tc->peers[pidx].inmws[widx].pidx = pidx;
+			tc->peers[pidx].inmws[widx].tc = tc;
+		}
+
+		tc->peers[pidx].outmw_cnt = ntb_peer_mw_count(tc->ntb);
+		tc->peers[pidx].outmws =
+			devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].outmw_cnt,
+				   sizeof(*tc->peers[pidx].outmws), GFP_KERNEL);
+
+		for (widx = 0; widx < tc->peers[pidx].outmw_cnt; widx++) {
+			tc->peers[pidx].outmws[widx].pidx = pidx;
+			tc->peers[pidx].outmws[widx].mw = &tc->outmws[widx];
+		}
+	}
+
+	return 0;
+}
+
+static void tool_clear_mws(struct tool_ctx *tc)
+{
+	int widx, pidx;
+
+	/* Free outbound memory windows */
+	for (widx = 0; widx < tc->outmw_cnt; widx++)
+		tool_free_peer_mw(tc, widx);
+
+	/* Free outbound memory windows */
+	for (pidx = 0; pidx < tc->peer_cnt; pidx++)
+		for (widx = 0; widx < tc->peers[pidx].inmw_cnt; widx++)
+			tool_free_mw(tc, pidx, widx);
+}
+
+/*==============================================================================
+ *                       Doorbell read/write methods
+ *==============================================================================
+ */
+
+static ssize_t tool_db_read(struct file *filep, char __user *ubuf,
+			    size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->db_read);
+}
+
+static ssize_t tool_db_write(struct file *filep, const char __user *ubuf,
+			     size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_write(tc, ubuf, size, offp, tc->ntb->ops->db_set,
+			     tc->ntb->ops->db_clear);
+}
+
+static TOOL_FOPS_RDWR(tool_db_fops,
+		      tool_db_read,
+		      tool_db_write);
+
+static ssize_t tool_db_valid_mask_read(struct file *filep, char __user *ubuf,
+				       size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->db_valid_mask);
+}
+
+static TOOL_FOPS_RDWR(tool_db_valid_mask_fops,
+		      tool_db_valid_mask_read,
+		      NULL);
+
+static ssize_t tool_db_mask_read(struct file *filep, char __user *ubuf,
+				 size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->db_read_mask);
+}
+
+static ssize_t tool_db_mask_write(struct file *filep, const char __user *ubuf,
+			       size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_write(tc, ubuf, size, offp, tc->ntb->ops->db_set_mask,
+			     tc->ntb->ops->db_clear_mask);
+}
+
+static TOOL_FOPS_RDWR(tool_db_mask_fops,
+		      tool_db_mask_read,
+		      tool_db_mask_write);
+
+static ssize_t tool_peer_db_read(struct file *filep, char __user *ubuf,
+				 size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->peer_db_read);
+}
+
+static ssize_t tool_peer_db_write(struct file *filep, const char __user *ubuf,
+				  size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_write(tc, ubuf, size, offp, tc->ntb->ops->peer_db_set,
+			     tc->ntb->ops->peer_db_clear);
+}
+
+static TOOL_FOPS_RDWR(tool_peer_db_fops,
+		      tool_peer_db_read,
+		      tool_peer_db_write);
+
+static ssize_t tool_peer_db_mask_read(struct file *filep, char __user *ubuf,
+				   size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_read(tc, ubuf, size, offp,
+			    tc->ntb->ops->peer_db_read_mask);
+}
+
+static ssize_t tool_peer_db_mask_write(struct file *filep,
+				       const char __user *ubuf,
+				       size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_write(tc, ubuf, size, offp,
+			     tc->ntb->ops->peer_db_set_mask,
+			     tc->ntb->ops->peer_db_clear_mask);
+}
+
+static TOOL_FOPS_RDWR(tool_peer_db_mask_fops,
+		      tool_peer_db_mask_read,
+		      tool_peer_db_mask_write);
+
+static ssize_t tool_db_event_write(struct file *filep,
+				   const char __user *ubuf,
+				   size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+	u64 val;
+	int ret;
+
+	ret = kstrtou64_from_user(ubuf, size, 0, &val);
+	if (ret)
+		return ret;
+
+	if (wait_event_interruptible(tc->db_wq, ntb_db_read(tc->ntb) == val))
+		return -ERESTART;
+
+	return size;
+}
+
+static TOOL_FOPS_RDWR(tool_db_event_fops,
+		      NULL,
+		      tool_db_event_write);
+
+/*==============================================================================
+ *                       Scratchpads read/write methods
+ *==============================================================================
+ */
+
+static ssize_t tool_spad_read(struct file *filep, char __user *ubuf,
+			      size_t size, loff_t *offp)
+{
+	struct tool_spad *spad = filep->private_data;
+	char buf[TOOL_BUF_LEN];
+	ssize_t pos;
+
+	if (!spad->tc->ntb->ops->spad_read)
+		return -EINVAL;
+
+	pos = scnprintf(buf, sizeof(buf), "%#x\n",
+		ntb_spad_read(spad->tc->ntb, spad->sidx));
+
+	return simple_read_from_buffer(ubuf, size, offp, buf, pos);
+}
+
+static ssize_t tool_spad_write(struct file *filep, const char __user *ubuf,
+			       size_t size, loff_t *offp)
+{
+	struct tool_spad *spad = filep->private_data;
+	u32 val;
+	int ret;
+
+	if (!spad->tc->ntb->ops->spad_write) {
+		dev_dbg(&spad->tc->ntb->dev, "no spad write fn\n");
+		return -EINVAL;
+	}
+
+	ret = kstrtou32_from_user(ubuf, size, 0, &val);
+	if (ret)
+		return ret;
+
+	ret = ntb_spad_write(spad->tc->ntb, spad->sidx, val);
+
+	return ret ?: size;
+}
+
+static TOOL_FOPS_RDWR(tool_spad_fops,
+		      tool_spad_read,
+		      tool_spad_write);
+
+static ssize_t tool_peer_spad_read(struct file *filep, char __user *ubuf,
+				   size_t size, loff_t *offp)
+{
+	struct tool_spad *spad = filep->private_data;
+	char buf[TOOL_BUF_LEN];
+	ssize_t pos;
+
+	if (!spad->tc->ntb->ops->peer_spad_read)
+		return -EINVAL;
+
+	pos = scnprintf(buf, sizeof(buf), "%#x\n",
+		ntb_peer_spad_read(spad->tc->ntb, spad->pidx, spad->sidx));
+
+	return simple_read_from_buffer(ubuf, size, offp, buf, pos);
+}
+
+static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf,
+				    size_t size, loff_t *offp)
+{
+	struct tool_spad *spad = filep->private_data;
+	u32 val;
+	int ret;
+
+	if (!spad->tc->ntb->ops->peer_spad_write) {
+		dev_dbg(&spad->tc->ntb->dev, "no spad write fn\n");
+		return -EINVAL;
+	}
+
+	ret = kstrtou32_from_user(ubuf, size, 0, &val);
+	if (ret)
+		return ret;
+
+	ret = ntb_peer_spad_write(spad->tc->ntb, spad->pidx, spad->sidx, val);
+
+	return ret ?: size;
+}
+
+static TOOL_FOPS_RDWR(tool_peer_spad_fops,
+		      tool_peer_spad_read,
+		      tool_peer_spad_write);
+
+static int tool_init_spads(struct tool_ctx *tc)
+{
+	int sidx, pidx;
+
+	/* Initialize inbound scratchpad structures */
+	tc->inspad_cnt = ntb_spad_count(tc->ntb);
+	tc->inspads = devm_kcalloc(&tc->ntb->dev, tc->inspad_cnt,
+				   sizeof(*tc->inspads), GFP_KERNEL);
+	if (tc->inspads == NULL)
+		return -ENOMEM;
+
+	for (sidx = 0; sidx < tc->inspad_cnt; sidx++) {
+		tc->inspads[sidx].sidx = sidx;
+		tc->inspads[sidx].pidx = -1;
+		tc->inspads[sidx].tc = tc;
+	}
+
+	/* Initialize outbound scratchpad structures */
+	for (pidx = 0; pidx < tc->peer_cnt; pidx++) {
+		tc->peers[pidx].outspad_cnt = ntb_spad_count(tc->ntb);
+		tc->peers[pidx].outspads =
+			devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].outspad_cnt,
+				sizeof(*tc->peers[pidx].outspads), GFP_KERNEL);
+		if (tc->peers[pidx].outspads == NULL)
+			return -ENOMEM;
+
+		for (sidx = 0; sidx < tc->peers[pidx].outspad_cnt; sidx++) {
+			tc->peers[pidx].outspads[sidx].sidx = sidx;
+			tc->peers[pidx].outspads[sidx].pidx = pidx;
+			tc->peers[pidx].outspads[sidx].tc = tc;
+		}
+	}
+
+	return 0;
+}
+
+/*==============================================================================
+ *                       Messages read/write methods
+ *==============================================================================
+ */
+
+static ssize_t tool_inmsg_read(struct file *filep, char __user *ubuf,
+			       size_t size, loff_t *offp)
+{
+	struct tool_msg *msg = filep->private_data;
+	char buf[TOOL_BUF_LEN];
+	ssize_t pos;
+	u32 data;
+	int pidx;
+
+	data = ntb_msg_read(msg->tc->ntb, &pidx, msg->midx);
+
+	pos = scnprintf(buf, sizeof(buf), "0x%08x<-%d\n", data, pidx);
+
+	return simple_read_from_buffer(ubuf, size, offp, buf, pos);
+}
+
+static TOOL_FOPS_RDWR(tool_inmsg_fops,
+		      tool_inmsg_read,
+		      NULL);
+
+static ssize_t tool_outmsg_write(struct file *filep,
+				 const char __user *ubuf,
+				 size_t size, loff_t *offp)
+{
+	struct tool_msg *msg = filep->private_data;
+	u32 val;
+	int ret;
+
+	ret = kstrtou32_from_user(ubuf, size, 0, &val);
+	if (ret)
+		return ret;
+
+	ret = ntb_peer_msg_write(msg->tc->ntb, msg->pidx, msg->midx, val);
+
+	return ret ? : size;
+}
+
+static TOOL_FOPS_RDWR(tool_outmsg_fops,
+		      NULL,
+		      tool_outmsg_write);
+
+static ssize_t tool_msg_sts_read(struct file *filep, char __user *ubuf,
+				 size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->msg_read_sts);
+}
+
+static ssize_t tool_msg_sts_write(struct file *filep, const char __user *ubuf,
+				  size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_write(tc, ubuf, size, offp, NULL,
+			     tc->ntb->ops->msg_clear_sts);
+}
+
+static TOOL_FOPS_RDWR(tool_msg_sts_fops,
+		      tool_msg_sts_read,
+		      tool_msg_sts_write);
+
+static ssize_t tool_msg_inbits_read(struct file *filep, char __user *ubuf,
+				    size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->msg_inbits);
+}
+
+static TOOL_FOPS_RDWR(tool_msg_inbits_fops,
+		      tool_msg_inbits_read,
+		      NULL);
+
+static ssize_t tool_msg_outbits_read(struct file *filep, char __user *ubuf,
+				     size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->msg_outbits);
+}
+
+static TOOL_FOPS_RDWR(tool_msg_outbits_fops,
+		      tool_msg_outbits_read,
+		      NULL);
+
+static ssize_t tool_msg_mask_write(struct file *filep, const char __user *ubuf,
+				   size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_fn_write(tc, ubuf, size, offp,
+			     tc->ntb->ops->msg_set_mask,
+			     tc->ntb->ops->msg_clear_mask);
+}
+
+static TOOL_FOPS_RDWR(tool_msg_mask_fops,
+		      NULL,
+		      tool_msg_mask_write);
+
+static ssize_t tool_msg_event_write(struct file *filep,
+				    const char __user *ubuf,
+				    size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+	u64 val;
+	int ret;
+
+	ret = kstrtou64_from_user(ubuf, size, 0, &val);
+	if (ret)
+		return ret;
+
+	if (wait_event_interruptible(tc->msg_wq,
+		ntb_msg_read_sts(tc->ntb) == val))
+		return -ERESTART;
+
+	return size;
+}
+
+static TOOL_FOPS_RDWR(tool_msg_event_fops,
+		      NULL,
+		      tool_msg_event_write);
+
+static int tool_init_msgs(struct tool_ctx *tc)
+{
+	int midx, pidx;
+
+	/* Initialize inbound message structures */
+	tc->inmsg_cnt = ntb_msg_count(tc->ntb);
+	tc->inmsgs = devm_kcalloc(&tc->ntb->dev, tc->inmsg_cnt,
+				   sizeof(*tc->inmsgs), GFP_KERNEL);
+	if (tc->inmsgs == NULL)
+		return -ENOMEM;
+
+	for (midx = 0; midx < tc->inmsg_cnt; midx++) {
+		tc->inmsgs[midx].midx = midx;
+		tc->inmsgs[midx].pidx = -1;
+		tc->inmsgs[midx].tc = tc;
+	}
+
+	/* Initialize outbound message structures */
+	for (pidx = 0; pidx < tc->peer_cnt; pidx++) {
+		tc->peers[pidx].outmsg_cnt = ntb_msg_count(tc->ntb);
+		tc->peers[pidx].outmsgs =
+			devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].outmsg_cnt,
+				sizeof(*tc->peers[pidx].outmsgs), GFP_KERNEL);
+		if (tc->peers[pidx].outmsgs == NULL)
+			return -ENOMEM;
+
+		for (midx = 0; midx < tc->peers[pidx].outmsg_cnt; midx++) {
+			tc->peers[pidx].outmsgs[midx].midx = midx;
+			tc->peers[pidx].outmsgs[midx].pidx = pidx;
+			tc->peers[pidx].outmsgs[midx].tc = tc;
+		}
+	}
+
+	return 0;
+}
+
+/*==============================================================================
+ *                          Initialization methods
+ *==============================================================================
+ */
+
+static struct tool_ctx *tool_create_data(struct ntb_dev *ntb)
+{
+	struct tool_ctx *tc;
+
+	tc = devm_kzalloc(&ntb->dev, sizeof(*tc), GFP_KERNEL);
+	if (tc == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	tc->ntb = ntb;
+	init_waitqueue_head(&tc->link_wq);
+	init_waitqueue_head(&tc->db_wq);
+	init_waitqueue_head(&tc->msg_wq);
+
+	if (ntb_db_is_unsafe(ntb))
+		dev_dbg(&ntb->dev, "doorbell is unsafe\n");
+
+	if (ntb_spad_is_unsafe(ntb))
+		dev_dbg(&ntb->dev, "scratchpad is unsafe\n");
+
+	return tc;
+}
+
+static void tool_clear_data(struct tool_ctx *tc)
+{
+	wake_up(&tc->link_wq);
+	wake_up(&tc->db_wq);
+	wake_up(&tc->msg_wq);
+}
+
+static int tool_init_ntb(struct tool_ctx *tc)
+{
+	return ntb_set_ctx(tc->ntb, tc, &tool_ops);
+}
+
+static void tool_clear_ntb(struct tool_ctx *tc)
+{
+	ntb_clear_ctx(tc->ntb);
+	ntb_link_disable(tc->ntb);
+}
+
+static void tool_setup_dbgfs(struct tool_ctx *tc)
+{
+	int pidx, widx, sidx, midx;
+	char buf[TOOL_BUF_LEN];
+
+	/* This modules is useless without dbgfs... */
+	if (!tool_dbgfs_topdir) {
+		tc->dbgfs_dir = NULL;
+		return;
+	}
+
+	tc->dbgfs_dir = debugfs_create_dir(dev_name(&tc->ntb->dev),
+					   tool_dbgfs_topdir);
+	if (!tc->dbgfs_dir)
+		return;
+
+	debugfs_create_file("port", 0600, tc->dbgfs_dir,
+			    tc, &tool_port_fops);
+
+	debugfs_create_file("link", 0600, tc->dbgfs_dir,
+			    tc, &tool_link_fops);
+
+	debugfs_create_file("db", 0600, tc->dbgfs_dir,
+			    tc, &tool_db_fops);
+
+	debugfs_create_file("db_valid_mask", 0600, tc->dbgfs_dir,
+			    tc, &tool_db_valid_mask_fops);
+
+	debugfs_create_file("db_mask", 0600, tc->dbgfs_dir,
+			    tc, &tool_db_mask_fops);
+
+	debugfs_create_file("db_event", 0600, tc->dbgfs_dir,
+			    tc, &tool_db_event_fops);
+
+	debugfs_create_file("peer_db", 0600, tc->dbgfs_dir,
+			    tc, &tool_peer_db_fops);
+
+	debugfs_create_file("peer_db_mask", 0600, tc->dbgfs_dir,
+			    tc, &tool_peer_db_mask_fops);
+
+	if (tc->inspad_cnt != 0) {
+		for (sidx = 0; sidx < tc->inspad_cnt; sidx++) {
+			snprintf(buf, sizeof(buf), "spad%d", sidx);
+
+			debugfs_create_file(buf, 0600, tc->dbgfs_dir,
+					   &tc->inspads[sidx], &tool_spad_fops);
+		}
+	}
+
+	if (tc->inmsg_cnt != 0) {
+		for (midx = 0; midx < tc->inmsg_cnt; midx++) {
+			snprintf(buf, sizeof(buf), "msg%d", midx);
+			debugfs_create_file(buf, 0600, tc->dbgfs_dir,
+					   &tc->inmsgs[midx], &tool_inmsg_fops);
+		}
+
+		debugfs_create_file("msg_sts", 0600, tc->dbgfs_dir,
+				    tc, &tool_msg_sts_fops);
+
+		debugfs_create_file("msg_inbits", 0600, tc->dbgfs_dir,
+				    tc, &tool_msg_inbits_fops);
+
+		debugfs_create_file("msg_outbits", 0600, tc->dbgfs_dir,
+				    tc, &tool_msg_outbits_fops);
+
+		debugfs_create_file("msg_mask", 0600, tc->dbgfs_dir,
+				    tc, &tool_msg_mask_fops);
+
+		debugfs_create_file("msg_event", 0600, tc->dbgfs_dir,
+				    tc, &tool_msg_event_fops);
+	}
+
+	for (pidx = 0; pidx < tc->peer_cnt; pidx++) {
+		snprintf(buf, sizeof(buf), "peer%d", pidx);
+		tc->peers[pidx].dbgfs_dir =
+			debugfs_create_dir(buf, tc->dbgfs_dir);
+
+		debugfs_create_file("port", 0600,
+				    tc->peers[pidx].dbgfs_dir,
+				    &tc->peers[pidx], &tool_peer_port_fops);
+
+		debugfs_create_file("link", 0200,
+				    tc->peers[pidx].dbgfs_dir,
+				    &tc->peers[pidx], &tool_peer_link_fops);
+
+		debugfs_create_file("link_event", 0200,
+				  tc->peers[pidx].dbgfs_dir,
+				  &tc->peers[pidx], &tool_peer_link_event_fops);
+
+		for (widx = 0; widx < tc->peers[pidx].inmw_cnt; widx++) {
+			snprintf(buf, sizeof(buf), "mw_trans%d", widx);
+			debugfs_create_file(buf, 0600,
+					    tc->peers[pidx].dbgfs_dir,
+					    &tc->peers[pidx].inmws[widx],
+					    &tool_mw_trans_fops);
+		}
+
+		for (widx = 0; widx < tc->peers[pidx].outmw_cnt; widx++) {
+			snprintf(buf, sizeof(buf), "peer_mw_trans%d", widx);
+			debugfs_create_file(buf, 0600,
+					    tc->peers[pidx].dbgfs_dir,
+					    &tc->peers[pidx].outmws[widx],
+					    &tool_peer_mw_trans_fops);
+		}
+
+		for (sidx = 0; sidx < tc->peers[pidx].outspad_cnt; sidx++) {
+			snprintf(buf, sizeof(buf), "spad%d", sidx);
+
+			debugfs_create_file(buf, 0600,
+					    tc->peers[pidx].dbgfs_dir,
+					    &tc->peers[pidx].outspads[sidx],
+					    &tool_peer_spad_fops);
+		}
+
+		for (midx = 0; midx < tc->peers[pidx].outmsg_cnt; midx++) {
+			snprintf(buf, sizeof(buf), "msg%d", midx);
+			debugfs_create_file(buf, 0600,
+					    tc->peers[pidx].dbgfs_dir,
+					    &tc->peers[pidx].outmsgs[midx],
+					    &tool_outmsg_fops);
+		}
+	}
+}
+
+static void tool_clear_dbgfs(struct tool_ctx *tc)
+{
+	debugfs_remove_recursive(tc->dbgfs_dir);
+}
+
+static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
+{
+	struct tool_ctx *tc;
+	int ret;
+
+	tc = tool_create_data(ntb);
+	if (IS_ERR(tc))
+		return PTR_ERR(tc);
+
+	ret = tool_init_peers(tc);
+	if (ret != 0)
+		goto err_clear_data;
+
+	ret = tool_init_mws(tc);
+	if (ret != 0)
+		goto err_clear_data;
+
+	ret = tool_init_spads(tc);
+	if (ret != 0)
+		goto err_clear_mws;
+
+	ret = tool_init_msgs(tc);
+	if (ret != 0)
+		goto err_clear_mws;
+
+	ret = tool_init_ntb(tc);
+	if (ret != 0)
+		goto err_clear_mws;
+
+	tool_setup_dbgfs(tc);
+
+	return 0;
+
+err_clear_mws:
+	tool_clear_mws(tc);
+
+err_clear_data:
+	tool_clear_data(tc);
+
+	return ret;
+}
+
+static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb)
+{
+	struct tool_ctx *tc = ntb->ctx;
+
+	tool_clear_dbgfs(tc);
+
+	tool_clear_ntb(tc);
+
+	tool_clear_mws(tc);
+
+	tool_clear_data(tc);
+}
+
+static struct ntb_client tool_client = {
+	.ops = {
+		.probe = tool_probe,
+		.remove = tool_remove,
+	}
+};
+
+static int __init tool_init(void)
+{
+	int ret;
+
+	if (debugfs_initialized())
+		tool_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	ret = ntb_register_client(&tool_client);
+	if (ret)
+		debugfs_remove_recursive(tool_dbgfs_topdir);
+
+	return ret;
+}
+module_init(tool_init);
+
+static void __exit tool_exit(void)
+{
+	ntb_unregister_client(&tool_client);
+	debugfs_remove_recursive(tool_dbgfs_topdir);
+}
+module_exit(tool_exit);
+