v4.19.13 snapshot.
diff --git a/drivers/firmware/tegra/Kconfig b/drivers/firmware/tegra/Kconfig
new file mode 100644
index 0000000..ff2730d
--- /dev/null
+++ b/drivers/firmware/tegra/Kconfig
@@ -0,0 +1,25 @@
+menu "Tegra firmware driver"
+
+config TEGRA_IVC
+	bool "Tegra IVC protocol"
+	depends on ARCH_TEGRA
+	help
+	  IVC (Inter-VM Communication) protocol is part of the IPC
+	  (Inter Processor Communication) framework on Tegra. It maintains the
+	  data and the different commuication channels in SysRAM or RAM and
+	  keeps the content is synchronization between host CPU and remote
+	  processors.
+
+config TEGRA_BPMP
+	bool "Tegra BPMP driver"
+	depends on ARCH_TEGRA && TEGRA_HSP_MBOX && TEGRA_IVC
+	help
+	  BPMP (Boot and Power Management Processor) is designed to off-loading
+	  the PM functions which include clock/DVFS/thermal/power from the CPU.
+	  It needs HSP as the HW synchronization and notification module and
+	  IVC module as the message communication protocol.
+
+	  This driver manages the IPC interface between host CPU and the
+	  firmware running on BPMP.
+
+endmenu
diff --git a/drivers/firmware/tegra/Makefile b/drivers/firmware/tegra/Makefile
new file mode 100644
index 0000000..1b826dc
--- /dev/null
+++ b/drivers/firmware/tegra/Makefile
@@ -0,0 +1,4 @@
+tegra-bpmp-y			= bpmp.o
+tegra-bpmp-$(CONFIG_DEBUG_FS)	+= bpmp-debugfs.o
+obj-$(CONFIG_TEGRA_BPMP)	+= tegra-bpmp.o
+obj-$(CONFIG_TEGRA_IVC)		+= ivc.o
diff --git a/drivers/firmware/tegra/bpmp-debugfs.c b/drivers/firmware/tegra/bpmp-debugfs.c
new file mode 100644
index 0000000..f7f6a0a
--- /dev/null
+++ b/drivers/firmware/tegra/bpmp-debugfs.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/uaccess.h>
+
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+
+struct seqbuf {
+	char *buf;
+	size_t pos;
+	size_t size;
+};
+
+static void seqbuf_init(struct seqbuf *seqbuf, void *buf, size_t size)
+{
+	seqbuf->buf = buf;
+	seqbuf->size = size;
+	seqbuf->pos = 0;
+}
+
+static size_t seqbuf_avail(struct seqbuf *seqbuf)
+{
+	return seqbuf->pos < seqbuf->size ? seqbuf->size - seqbuf->pos : 0;
+}
+
+static size_t seqbuf_status(struct seqbuf *seqbuf)
+{
+	return seqbuf->pos <= seqbuf->size ? 0 : -EOVERFLOW;
+}
+
+static int seqbuf_eof(struct seqbuf *seqbuf)
+{
+	return seqbuf->pos >= seqbuf->size;
+}
+
+static int seqbuf_read(struct seqbuf *seqbuf, void *buf, size_t nbyte)
+{
+	nbyte = min(nbyte, seqbuf_avail(seqbuf));
+	memcpy(buf, seqbuf->buf + seqbuf->pos, nbyte);
+	seqbuf->pos += nbyte;
+	return seqbuf_status(seqbuf);
+}
+
+static int seqbuf_read_u32(struct seqbuf *seqbuf, uint32_t *v)
+{
+	int err;
+
+	err = seqbuf_read(seqbuf, v, 4);
+	*v = le32_to_cpu(*v);
+	return err;
+}
+
+static int seqbuf_read_str(struct seqbuf *seqbuf, const char **str)
+{
+	*str = seqbuf->buf + seqbuf->pos;
+	seqbuf->pos += strnlen(*str, seqbuf_avail(seqbuf));
+	seqbuf->pos++;
+	return seqbuf_status(seqbuf);
+}
+
+static void seqbuf_seek(struct seqbuf *seqbuf, ssize_t offset)
+{
+	seqbuf->pos += offset;
+}
+
+/* map filename in Linux debugfs to corresponding entry in BPMP */
+static const char *get_filename(struct tegra_bpmp *bpmp,
+				const struct file *file, char *buf, int size)
+{
+	char root_path_buf[512];
+	const char *root_path;
+	const char *filename;
+	size_t root_len;
+
+	root_path = dentry_path(bpmp->debugfs_mirror, root_path_buf,
+				sizeof(root_path_buf));
+	if (IS_ERR(root_path))
+		return NULL;
+
+	root_len = strlen(root_path);
+
+	filename = dentry_path(file->f_path.dentry, buf, size);
+	if (IS_ERR(filename))
+		return NULL;
+
+	if (strlen(filename) < root_len ||
+			strncmp(filename, root_path, root_len))
+		return NULL;
+
+	filename += root_len;
+
+	return filename;
+}
+
+static int mrq_debugfs_read(struct tegra_bpmp *bpmp,
+			    dma_addr_t name, size_t sz_name,
+			    dma_addr_t data, size_t sz_data,
+			    size_t *nbytes)
+{
+	struct mrq_debugfs_request req = {
+		.cmd = cpu_to_le32(CMD_DEBUGFS_READ),
+		.fop = {
+			.fnameaddr = cpu_to_le32((uint32_t)name),
+			.fnamelen = cpu_to_le32((uint32_t)sz_name),
+			.dataaddr = cpu_to_le32((uint32_t)data),
+			.datalen = cpu_to_le32((uint32_t)sz_data),
+		},
+	};
+	struct mrq_debugfs_response resp;
+	struct tegra_bpmp_message msg = {
+		.mrq = MRQ_DEBUGFS,
+		.tx = {
+			.data = &req,
+			.size = sizeof(req),
+		},
+		.rx = {
+			.data = &resp,
+			.size = sizeof(resp),
+		},
+	};
+	int err;
+
+	err = tegra_bpmp_transfer(bpmp, &msg);
+	if (err < 0)
+		return err;
+
+	*nbytes = (size_t)resp.fop.nbytes;
+
+	return 0;
+}
+
+static int mrq_debugfs_write(struct tegra_bpmp *bpmp,
+			     dma_addr_t name, size_t sz_name,
+			     dma_addr_t data, size_t sz_data)
+{
+	const struct mrq_debugfs_request req = {
+		.cmd = cpu_to_le32(CMD_DEBUGFS_WRITE),
+		.fop = {
+			.fnameaddr = cpu_to_le32((uint32_t)name),
+			.fnamelen = cpu_to_le32((uint32_t)sz_name),
+			.dataaddr = cpu_to_le32((uint32_t)data),
+			.datalen = cpu_to_le32((uint32_t)sz_data),
+		},
+	};
+	struct tegra_bpmp_message msg = {
+		.mrq = MRQ_DEBUGFS,
+		.tx = {
+			.data = &req,
+			.size = sizeof(req),
+		},
+	};
+
+	return tegra_bpmp_transfer(bpmp, &msg);
+}
+
+static int mrq_debugfs_dumpdir(struct tegra_bpmp *bpmp, dma_addr_t addr,
+			       size_t size, size_t *nbytes)
+{
+	const struct mrq_debugfs_request req = {
+		.cmd = cpu_to_le32(CMD_DEBUGFS_DUMPDIR),
+		.dumpdir = {
+			.dataaddr = cpu_to_le32((uint32_t)addr),
+			.datalen = cpu_to_le32((uint32_t)size),
+		},
+	};
+	struct mrq_debugfs_response resp;
+	struct tegra_bpmp_message msg = {
+		.mrq = MRQ_DEBUGFS,
+		.tx = {
+			.data = &req,
+			.size = sizeof(req),
+		},
+		.rx = {
+			.data = &resp,
+			.size = sizeof(resp),
+		},
+	};
+	int err;
+
+	err = tegra_bpmp_transfer(bpmp, &msg);
+	if (err < 0)
+		return err;
+
+	*nbytes = (size_t)resp.dumpdir.nbytes;
+
+	return 0;
+}
+
+static int debugfs_show(struct seq_file *m, void *p)
+{
+	struct file *file = m->private;
+	struct inode *inode = file_inode(file);
+	struct tegra_bpmp *bpmp = inode->i_private;
+	const size_t datasize = m->size;
+	const size_t namesize = SZ_256;
+	void *datavirt, *namevirt;
+	dma_addr_t dataphys, namephys;
+	char buf[256];
+	const char *filename;
+	size_t len, nbytes;
+	int ret;
+
+	filename = get_filename(bpmp, file, buf, sizeof(buf));
+	if (!filename)
+		return -ENOENT;
+
+	namevirt = dma_alloc_coherent(bpmp->dev, namesize, &namephys,
+				      GFP_KERNEL | GFP_DMA32);
+	if (!namevirt)
+		return -ENOMEM;
+
+	datavirt = dma_alloc_coherent(bpmp->dev, datasize, &dataphys,
+				      GFP_KERNEL | GFP_DMA32);
+	if (!datavirt) {
+		ret = -ENOMEM;
+		goto free_namebuf;
+	}
+
+	len = strlen(filename);
+	strncpy(namevirt, filename, namesize);
+
+	ret = mrq_debugfs_read(bpmp, namephys, len, dataphys, datasize,
+			       &nbytes);
+
+	if (!ret)
+		seq_write(m, datavirt, nbytes);
+
+	dma_free_coherent(bpmp->dev, datasize, datavirt, dataphys);
+free_namebuf:
+	dma_free_coherent(bpmp->dev, namesize, namevirt, namephys);
+
+	return ret;
+}
+
+static int debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open_size(file, debugfs_show, file, SZ_128K);
+}
+
+static ssize_t debugfs_store(struct file *file, const char __user *buf,
+		size_t count, loff_t *f_pos)
+{
+	struct inode *inode = file_inode(file);
+	struct tegra_bpmp *bpmp = inode->i_private;
+	const size_t datasize = count;
+	const size_t namesize = SZ_256;
+	void *datavirt, *namevirt;
+	dma_addr_t dataphys, namephys;
+	char fnamebuf[256];
+	const char *filename;
+	size_t len;
+	int ret;
+
+	filename = get_filename(bpmp, file, fnamebuf, sizeof(fnamebuf));
+	if (!filename)
+		return -ENOENT;
+
+	namevirt = dma_alloc_coherent(bpmp->dev, namesize, &namephys,
+				      GFP_KERNEL | GFP_DMA32);
+	if (!namevirt)
+		return -ENOMEM;
+
+	datavirt = dma_alloc_coherent(bpmp->dev, datasize, &dataphys,
+				      GFP_KERNEL | GFP_DMA32);
+	if (!datavirt) {
+		ret = -ENOMEM;
+		goto free_namebuf;
+	}
+
+	len = strlen(filename);
+	strncpy(namevirt, filename, namesize);
+
+	if (copy_from_user(datavirt, buf, count)) {
+		ret = -EFAULT;
+		goto free_databuf;
+	}
+
+	ret = mrq_debugfs_write(bpmp, namephys, len, dataphys,
+				count);
+
+free_databuf:
+	dma_free_coherent(bpmp->dev, datasize, datavirt, dataphys);
+free_namebuf:
+	dma_free_coherent(bpmp->dev, namesize, namevirt, namephys);
+
+	return ret ?: count;
+}
+
+static const struct file_operations debugfs_fops = {
+	.open		= debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.write		= debugfs_store,
+	.release	= single_release,
+};
+
+static int bpmp_populate_dir(struct tegra_bpmp *bpmp, struct seqbuf *seqbuf,
+			     struct dentry *parent, uint32_t depth)
+{
+	int err;
+	uint32_t d, t;
+	const char *name;
+	struct dentry *dentry;
+
+	while (!seqbuf_eof(seqbuf)) {
+		err = seqbuf_read_u32(seqbuf, &d);
+		if (err < 0)
+			return err;
+
+		if (d < depth) {
+			seqbuf_seek(seqbuf, -4);
+			/* go up a level */
+			return 0;
+		} else if (d != depth) {
+			/* malformed data received from BPMP */
+			return -EIO;
+		}
+
+		err = seqbuf_read_u32(seqbuf, &t);
+		if (err < 0)
+			return err;
+		err = seqbuf_read_str(seqbuf, &name);
+		if (err < 0)
+			return err;
+
+		if (t & DEBUGFS_S_ISDIR) {
+			dentry = debugfs_create_dir(name, parent);
+			if (!dentry)
+				return -ENOMEM;
+			err = bpmp_populate_dir(bpmp, seqbuf, dentry, depth+1);
+			if (err < 0)
+				return err;
+		} else {
+			umode_t mode;
+
+			mode = t & DEBUGFS_S_IRUSR ? S_IRUSR : 0;
+			mode |= t & DEBUGFS_S_IWUSR ? S_IWUSR : 0;
+			dentry = debugfs_create_file(name, mode,
+						     parent, bpmp,
+						     &debugfs_fops);
+			if (!dentry)
+				return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static int create_debugfs_mirror(struct tegra_bpmp *bpmp, void *buf,
+				 size_t bufsize, struct dentry *root)
+{
+	struct seqbuf seqbuf;
+	int err;
+
+	bpmp->debugfs_mirror = debugfs_create_dir("debug", root);
+	if (!bpmp->debugfs_mirror)
+		return -ENOMEM;
+
+	seqbuf_init(&seqbuf, buf, bufsize);
+	err = bpmp_populate_dir(bpmp, &seqbuf, bpmp->debugfs_mirror, 0);
+	if (err < 0) {
+		debugfs_remove_recursive(bpmp->debugfs_mirror);
+		bpmp->debugfs_mirror = NULL;
+	}
+
+	return err;
+}
+
+static int mrq_is_supported(struct tegra_bpmp *bpmp, unsigned int mrq)
+{
+	struct mrq_query_abi_request req = { .mrq = cpu_to_le32(mrq) };
+	struct mrq_query_abi_response resp;
+	struct tegra_bpmp_message msg = {
+		.mrq = MRQ_QUERY_ABI,
+		.tx = {
+			.data = &req,
+			.size = sizeof(req),
+		},
+		.rx = {
+			.data = &resp,
+			.size = sizeof(resp),
+		},
+	};
+	int ret;
+
+	ret = tegra_bpmp_transfer(bpmp, &msg);
+	if (ret < 0) {
+		/* something went wrong; assume not supported */
+		dev_warn(bpmp->dev, "tegra_bpmp_transfer failed (%d)\n", ret);
+		return 0;
+	}
+
+	return resp.status ? 0 : 1;
+}
+
+int tegra_bpmp_init_debugfs(struct tegra_bpmp *bpmp)
+{
+	dma_addr_t phys;
+	void *virt;
+	const size_t sz = SZ_256K;
+	size_t nbytes;
+	int ret;
+	struct dentry *root;
+
+	if (!mrq_is_supported(bpmp, MRQ_DEBUGFS))
+		return 0;
+
+	root = debugfs_create_dir("bpmp", NULL);
+	if (!root)
+		return -ENOMEM;
+
+	virt = dma_alloc_coherent(bpmp->dev, sz, &phys,
+				  GFP_KERNEL | GFP_DMA32);
+	if (!virt) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = mrq_debugfs_dumpdir(bpmp, phys, sz, &nbytes);
+	if (ret < 0)
+		goto free;
+
+	ret = create_debugfs_mirror(bpmp, virt, nbytes, root);
+free:
+	dma_free_coherent(bpmp->dev, sz, virt, phys);
+out:
+	if (ret < 0)
+		debugfs_remove(root);
+
+	return ret;
+}
diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
new file mode 100644
index 0000000..14a456a
--- /dev/null
+++ b/drivers/firmware/tegra/bpmp.c
@@ -0,0 +1,882 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/clk/tegra.h>
+#include <linux/genalloc.h>
+#include <linux/mailbox_client.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/semaphore.h>
+#include <linux/sched/clock.h>
+
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+#include <soc/tegra/ivc.h>
+
+#define MSG_ACK		BIT(0)
+#define MSG_RING	BIT(1)
+
+static inline struct tegra_bpmp *
+mbox_client_to_bpmp(struct mbox_client *client)
+{
+	return container_of(client, struct tegra_bpmp, mbox.client);
+}
+
+struct tegra_bpmp *tegra_bpmp_get(struct device *dev)
+{
+	struct platform_device *pdev;
+	struct tegra_bpmp *bpmp;
+	struct device_node *np;
+
+	np = of_parse_phandle(dev->of_node, "nvidia,bpmp", 0);
+	if (!np)
+		return ERR_PTR(-ENOENT);
+
+	pdev = of_find_device_by_node(np);
+	if (!pdev) {
+		bpmp = ERR_PTR(-ENODEV);
+		goto put;
+	}
+
+	bpmp = platform_get_drvdata(pdev);
+	if (!bpmp) {
+		bpmp = ERR_PTR(-EPROBE_DEFER);
+		put_device(&pdev->dev);
+		goto put;
+	}
+
+put:
+	of_node_put(np);
+	return bpmp;
+}
+EXPORT_SYMBOL_GPL(tegra_bpmp_get);
+
+void tegra_bpmp_put(struct tegra_bpmp *bpmp)
+{
+	if (bpmp)
+		put_device(bpmp->dev);
+}
+EXPORT_SYMBOL_GPL(tegra_bpmp_put);
+
+static int
+tegra_bpmp_channel_get_thread_index(struct tegra_bpmp_channel *channel)
+{
+	struct tegra_bpmp *bpmp = channel->bpmp;
+	unsigned int count;
+	int index;
+
+	count = bpmp->soc->channels.thread.count;
+
+	index = channel - channel->bpmp->threaded_channels;
+	if (index < 0 || index >= count)
+		return -EINVAL;
+
+	return index;
+}
+
+static bool tegra_bpmp_message_valid(const struct tegra_bpmp_message *msg)
+{
+	return (msg->tx.size <= MSG_DATA_MIN_SZ) &&
+	       (msg->rx.size <= MSG_DATA_MIN_SZ) &&
+	       (msg->tx.size == 0 || msg->tx.data) &&
+	       (msg->rx.size == 0 || msg->rx.data);
+}
+
+static bool tegra_bpmp_master_acked(struct tegra_bpmp_channel *channel)
+{
+	void *frame;
+
+	frame = tegra_ivc_read_get_next_frame(channel->ivc);
+	if (IS_ERR(frame)) {
+		channel->ib = NULL;
+		return false;
+	}
+
+	channel->ib = frame;
+
+	return true;
+}
+
+static int tegra_bpmp_wait_ack(struct tegra_bpmp_channel *channel)
+{
+	unsigned long timeout = channel->bpmp->soc->channels.cpu_tx.timeout;
+	ktime_t end;
+
+	end = ktime_add_us(ktime_get(), timeout);
+
+	do {
+		if (tegra_bpmp_master_acked(channel))
+			return 0;
+	} while (ktime_before(ktime_get(), end));
+
+	return -ETIMEDOUT;
+}
+
+static bool tegra_bpmp_master_free(struct tegra_bpmp_channel *channel)
+{
+	void *frame;
+
+	frame = tegra_ivc_write_get_next_frame(channel->ivc);
+	if (IS_ERR(frame)) {
+		channel->ob = NULL;
+		return false;
+	}
+
+	channel->ob = frame;
+
+	return true;
+}
+
+static int tegra_bpmp_wait_master_free(struct tegra_bpmp_channel *channel)
+{
+	unsigned long timeout = channel->bpmp->soc->channels.cpu_tx.timeout;
+	ktime_t start, now;
+
+	start = ns_to_ktime(local_clock());
+
+	do {
+		if (tegra_bpmp_master_free(channel))
+			return 0;
+
+		now = ns_to_ktime(local_clock());
+	} while (ktime_us_delta(now, start) < timeout);
+
+	return -ETIMEDOUT;
+}
+
+static ssize_t __tegra_bpmp_channel_read(struct tegra_bpmp_channel *channel,
+					 void *data, size_t size, int *ret)
+{
+	int err;
+
+	if (data && size > 0)
+		memcpy(data, channel->ib->data, size);
+
+	err = tegra_ivc_read_advance(channel->ivc);
+	if (err < 0)
+		return err;
+
+	*ret = channel->ib->code;
+
+	return 0;
+}
+
+static ssize_t tegra_bpmp_channel_read(struct tegra_bpmp_channel *channel,
+				       void *data, size_t size, int *ret)
+{
+	struct tegra_bpmp *bpmp = channel->bpmp;
+	unsigned long flags;
+	ssize_t err;
+	int index;
+
+	index = tegra_bpmp_channel_get_thread_index(channel);
+	if (index < 0) {
+		err = index;
+		goto unlock;
+	}
+
+	spin_lock_irqsave(&bpmp->lock, flags);
+	err = __tegra_bpmp_channel_read(channel, data, size, ret);
+	clear_bit(index, bpmp->threaded.allocated);
+	spin_unlock_irqrestore(&bpmp->lock, flags);
+
+unlock:
+	up(&bpmp->threaded.lock);
+
+	return err;
+}
+
+static ssize_t __tegra_bpmp_channel_write(struct tegra_bpmp_channel *channel,
+					  unsigned int mrq, unsigned long flags,
+					  const void *data, size_t size)
+{
+	channel->ob->code = mrq;
+	channel->ob->flags = flags;
+
+	if (data && size > 0)
+		memcpy(channel->ob->data, data, size);
+
+	return tegra_ivc_write_advance(channel->ivc);
+}
+
+static struct tegra_bpmp_channel *
+tegra_bpmp_write_threaded(struct tegra_bpmp *bpmp, unsigned int mrq,
+			  const void *data, size_t size)
+{
+	unsigned long timeout = bpmp->soc->channels.thread.timeout;
+	unsigned int count = bpmp->soc->channels.thread.count;
+	struct tegra_bpmp_channel *channel;
+	unsigned long flags;
+	unsigned int index;
+	int err;
+
+	err = down_timeout(&bpmp->threaded.lock, usecs_to_jiffies(timeout));
+	if (err < 0)
+		return ERR_PTR(err);
+
+	spin_lock_irqsave(&bpmp->lock, flags);
+
+	index = find_first_zero_bit(bpmp->threaded.allocated, count);
+	if (index == count) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	channel = &bpmp->threaded_channels[index];
+
+	if (!tegra_bpmp_master_free(channel)) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	set_bit(index, bpmp->threaded.allocated);
+
+	err = __tegra_bpmp_channel_write(channel, mrq, MSG_ACK | MSG_RING,
+					 data, size);
+	if (err < 0)
+		goto clear_allocated;
+
+	set_bit(index, bpmp->threaded.busy);
+
+	spin_unlock_irqrestore(&bpmp->lock, flags);
+	return channel;
+
+clear_allocated:
+	clear_bit(index, bpmp->threaded.allocated);
+unlock:
+	spin_unlock_irqrestore(&bpmp->lock, flags);
+	up(&bpmp->threaded.lock);
+
+	return ERR_PTR(err);
+}
+
+static ssize_t tegra_bpmp_channel_write(struct tegra_bpmp_channel *channel,
+					unsigned int mrq, unsigned long flags,
+					const void *data, size_t size)
+{
+	int err;
+
+	err = tegra_bpmp_wait_master_free(channel);
+	if (err < 0)
+		return err;
+
+	return __tegra_bpmp_channel_write(channel, mrq, flags, data, size);
+}
+
+int tegra_bpmp_transfer_atomic(struct tegra_bpmp *bpmp,
+			       struct tegra_bpmp_message *msg)
+{
+	struct tegra_bpmp_channel *channel;
+	int err;
+
+	if (WARN_ON(!irqs_disabled()))
+		return -EPERM;
+
+	if (!tegra_bpmp_message_valid(msg))
+		return -EINVAL;
+
+	channel = bpmp->tx_channel;
+
+	spin_lock(&bpmp->atomic_tx_lock);
+
+	err = tegra_bpmp_channel_write(channel, msg->mrq, MSG_ACK,
+				       msg->tx.data, msg->tx.size);
+	if (err < 0) {
+		spin_unlock(&bpmp->atomic_tx_lock);
+		return err;
+	}
+
+	spin_unlock(&bpmp->atomic_tx_lock);
+
+	err = mbox_send_message(bpmp->mbox.channel, NULL);
+	if (err < 0)
+		return err;
+
+	mbox_client_txdone(bpmp->mbox.channel, 0);
+
+	err = tegra_bpmp_wait_ack(channel);
+	if (err < 0)
+		return err;
+
+	return __tegra_bpmp_channel_read(channel, msg->rx.data, msg->rx.size,
+					 &msg->rx.ret);
+}
+EXPORT_SYMBOL_GPL(tegra_bpmp_transfer_atomic);
+
+int tegra_bpmp_transfer(struct tegra_bpmp *bpmp,
+			struct tegra_bpmp_message *msg)
+{
+	struct tegra_bpmp_channel *channel;
+	unsigned long timeout;
+	int err;
+
+	if (WARN_ON(irqs_disabled()))
+		return -EPERM;
+
+	if (!tegra_bpmp_message_valid(msg))
+		return -EINVAL;
+
+	channel = tegra_bpmp_write_threaded(bpmp, msg->mrq, msg->tx.data,
+					    msg->tx.size);
+	if (IS_ERR(channel))
+		return PTR_ERR(channel);
+
+	err = mbox_send_message(bpmp->mbox.channel, NULL);
+	if (err < 0)
+		return err;
+
+	mbox_client_txdone(bpmp->mbox.channel, 0);
+
+	timeout = usecs_to_jiffies(bpmp->soc->channels.thread.timeout);
+
+	err = wait_for_completion_timeout(&channel->completion, timeout);
+	if (err == 0)
+		return -ETIMEDOUT;
+
+	return tegra_bpmp_channel_read(channel, msg->rx.data, msg->rx.size,
+				       &msg->rx.ret);
+}
+EXPORT_SYMBOL_GPL(tegra_bpmp_transfer);
+
+static struct tegra_bpmp_mrq *tegra_bpmp_find_mrq(struct tegra_bpmp *bpmp,
+						  unsigned int mrq)
+{
+	struct tegra_bpmp_mrq *entry;
+
+	list_for_each_entry(entry, &bpmp->mrqs, list)
+		if (entry->mrq == mrq)
+			return entry;
+
+	return NULL;
+}
+
+void tegra_bpmp_mrq_return(struct tegra_bpmp_channel *channel, int code,
+			   const void *data, size_t size)
+{
+	unsigned long flags = channel->ib->flags;
+	struct tegra_bpmp *bpmp = channel->bpmp;
+	struct tegra_bpmp_mb_data *frame;
+	int err;
+
+	if (WARN_ON(size > MSG_DATA_MIN_SZ))
+		return;
+
+	err = tegra_ivc_read_advance(channel->ivc);
+	if (WARN_ON(err < 0))
+		return;
+
+	if ((flags & MSG_ACK) == 0)
+		return;
+
+	frame = tegra_ivc_write_get_next_frame(channel->ivc);
+	if (WARN_ON(IS_ERR(frame)))
+		return;
+
+	frame->code = code;
+
+	if (data && size > 0)
+		memcpy(frame->data, data, size);
+
+	err = tegra_ivc_write_advance(channel->ivc);
+	if (WARN_ON(err < 0))
+		return;
+
+	if (flags & MSG_RING) {
+		err = mbox_send_message(bpmp->mbox.channel, NULL);
+		if (WARN_ON(err < 0))
+			return;
+
+		mbox_client_txdone(bpmp->mbox.channel, 0);
+	}
+}
+EXPORT_SYMBOL_GPL(tegra_bpmp_mrq_return);
+
+static void tegra_bpmp_handle_mrq(struct tegra_bpmp *bpmp,
+				  unsigned int mrq,
+				  struct tegra_bpmp_channel *channel)
+{
+	struct tegra_bpmp_mrq *entry;
+	u32 zero = 0;
+
+	spin_lock(&bpmp->lock);
+
+	entry = tegra_bpmp_find_mrq(bpmp, mrq);
+	if (!entry) {
+		spin_unlock(&bpmp->lock);
+		tegra_bpmp_mrq_return(channel, -EINVAL, &zero, sizeof(zero));
+		return;
+	}
+
+	entry->handler(mrq, channel, entry->data);
+
+	spin_unlock(&bpmp->lock);
+}
+
+int tegra_bpmp_request_mrq(struct tegra_bpmp *bpmp, unsigned int mrq,
+			   tegra_bpmp_mrq_handler_t handler, void *data)
+{
+	struct tegra_bpmp_mrq *entry;
+	unsigned long flags;
+
+	if (!handler)
+		return -EINVAL;
+
+	entry = devm_kzalloc(bpmp->dev, sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&bpmp->lock, flags);
+
+	entry->mrq = mrq;
+	entry->handler = handler;
+	entry->data = data;
+	list_add(&entry->list, &bpmp->mrqs);
+
+	spin_unlock_irqrestore(&bpmp->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tegra_bpmp_request_mrq);
+
+void tegra_bpmp_free_mrq(struct tegra_bpmp *bpmp, unsigned int mrq, void *data)
+{
+	struct tegra_bpmp_mrq *entry;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bpmp->lock, flags);
+
+	entry = tegra_bpmp_find_mrq(bpmp, mrq);
+	if (!entry)
+		goto unlock;
+
+	list_del(&entry->list);
+	devm_kfree(bpmp->dev, entry);
+
+unlock:
+	spin_unlock_irqrestore(&bpmp->lock, flags);
+}
+EXPORT_SYMBOL_GPL(tegra_bpmp_free_mrq);
+
+static void tegra_bpmp_mrq_handle_ping(unsigned int mrq,
+				       struct tegra_bpmp_channel *channel,
+				       void *data)
+{
+	struct mrq_ping_request *request;
+	struct mrq_ping_response response;
+
+	request = (struct mrq_ping_request *)channel->ib->data;
+
+	memset(&response, 0, sizeof(response));
+	response.reply = request->challenge << 1;
+
+	tegra_bpmp_mrq_return(channel, 0, &response, sizeof(response));
+}
+
+static int tegra_bpmp_ping(struct tegra_bpmp *bpmp)
+{
+	struct mrq_ping_response response;
+	struct mrq_ping_request request;
+	struct tegra_bpmp_message msg;
+	unsigned long flags;
+	ktime_t start, end;
+	int err;
+
+	memset(&request, 0, sizeof(request));
+	request.challenge = 1;
+
+	memset(&response, 0, sizeof(response));
+
+	memset(&msg, 0, sizeof(msg));
+	msg.mrq = MRQ_PING;
+	msg.tx.data = &request;
+	msg.tx.size = sizeof(request);
+	msg.rx.data = &response;
+	msg.rx.size = sizeof(response);
+
+	local_irq_save(flags);
+	start = ktime_get();
+	err = tegra_bpmp_transfer_atomic(bpmp, &msg);
+	end = ktime_get();
+	local_irq_restore(flags);
+
+	if (!err)
+		dev_dbg(bpmp->dev,
+			"ping ok: challenge: %u, response: %u, time: %lld\n",
+			request.challenge, response.reply,
+			ktime_to_us(ktime_sub(end, start)));
+
+	return err;
+}
+
+static int tegra_bpmp_get_firmware_tag(struct tegra_bpmp *bpmp, char *tag,
+				       size_t size)
+{
+	struct mrq_query_tag_request request;
+	struct tegra_bpmp_message msg;
+	unsigned long flags;
+	dma_addr_t phys;
+	void *virt;
+	int err;
+
+	virt = dma_alloc_coherent(bpmp->dev, MSG_DATA_MIN_SZ, &phys,
+				  GFP_KERNEL | GFP_DMA32);
+	if (!virt)
+		return -ENOMEM;
+
+	memset(&request, 0, sizeof(request));
+	request.addr = phys;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.mrq = MRQ_QUERY_TAG;
+	msg.tx.data = &request;
+	msg.tx.size = sizeof(request);
+
+	local_irq_save(flags);
+	err = tegra_bpmp_transfer_atomic(bpmp, &msg);
+	local_irq_restore(flags);
+
+	if (err == 0)
+		strlcpy(tag, virt, size);
+
+	dma_free_coherent(bpmp->dev, MSG_DATA_MIN_SZ, virt, phys);
+
+	return err;
+}
+
+static void tegra_bpmp_channel_signal(struct tegra_bpmp_channel *channel)
+{
+	unsigned long flags = channel->ob->flags;
+
+	if ((flags & MSG_RING) == 0)
+		return;
+
+	complete(&channel->completion);
+}
+
+static void tegra_bpmp_handle_rx(struct mbox_client *client, void *data)
+{
+	struct tegra_bpmp *bpmp = mbox_client_to_bpmp(client);
+	struct tegra_bpmp_channel *channel;
+	unsigned int i, count;
+	unsigned long *busy;
+
+	channel = bpmp->rx_channel;
+	count = bpmp->soc->channels.thread.count;
+	busy = bpmp->threaded.busy;
+
+	if (tegra_bpmp_master_acked(channel))
+		tegra_bpmp_handle_mrq(bpmp, channel->ib->code, channel);
+
+	spin_lock(&bpmp->lock);
+
+	for_each_set_bit(i, busy, count) {
+		struct tegra_bpmp_channel *channel;
+
+		channel = &bpmp->threaded_channels[i];
+
+		if (tegra_bpmp_master_acked(channel)) {
+			tegra_bpmp_channel_signal(channel);
+			clear_bit(i, busy);
+		}
+	}
+
+	spin_unlock(&bpmp->lock);
+}
+
+static void tegra_bpmp_ivc_notify(struct tegra_ivc *ivc, void *data)
+{
+	struct tegra_bpmp *bpmp = data;
+	int err;
+
+	if (WARN_ON(bpmp->mbox.channel == NULL))
+		return;
+
+	err = mbox_send_message(bpmp->mbox.channel, NULL);
+	if (err < 0)
+		return;
+
+	mbox_client_txdone(bpmp->mbox.channel, 0);
+}
+
+static int tegra_bpmp_channel_init(struct tegra_bpmp_channel *channel,
+				   struct tegra_bpmp *bpmp,
+				   unsigned int index)
+{
+	size_t message_size, queue_size;
+	unsigned int offset;
+	int err;
+
+	channel->ivc = devm_kzalloc(bpmp->dev, sizeof(*channel->ivc),
+				    GFP_KERNEL);
+	if (!channel->ivc)
+		return -ENOMEM;
+
+	message_size = tegra_ivc_align(MSG_MIN_SZ);
+	queue_size = tegra_ivc_total_queue_size(message_size);
+	offset = queue_size * index;
+
+	err = tegra_ivc_init(channel->ivc, NULL,
+			     bpmp->rx.virt + offset, bpmp->rx.phys + offset,
+			     bpmp->tx.virt + offset, bpmp->tx.phys + offset,
+			     1, message_size, tegra_bpmp_ivc_notify,
+			     bpmp);
+	if (err < 0) {
+		dev_err(bpmp->dev, "failed to setup IVC for channel %u: %d\n",
+			index, err);
+		return err;
+	}
+
+	init_completion(&channel->completion);
+	channel->bpmp = bpmp;
+
+	return 0;
+}
+
+static void tegra_bpmp_channel_reset(struct tegra_bpmp_channel *channel)
+{
+	/* reset the channel state */
+	tegra_ivc_reset(channel->ivc);
+
+	/* sync the channel state with BPMP */
+	while (tegra_ivc_notified(channel->ivc))
+		;
+}
+
+static void tegra_bpmp_channel_cleanup(struct tegra_bpmp_channel *channel)
+{
+	tegra_ivc_cleanup(channel->ivc);
+}
+
+static int tegra_bpmp_probe(struct platform_device *pdev)
+{
+	struct tegra_bpmp *bpmp;
+	unsigned int i;
+	char tag[32];
+	size_t size;
+	int err;
+
+	bpmp = devm_kzalloc(&pdev->dev, sizeof(*bpmp), GFP_KERNEL);
+	if (!bpmp)
+		return -ENOMEM;
+
+	bpmp->soc = of_device_get_match_data(&pdev->dev);
+	bpmp->dev = &pdev->dev;
+
+	bpmp->tx.pool = of_gen_pool_get(pdev->dev.of_node, "shmem", 0);
+	if (!bpmp->tx.pool) {
+		dev_err(&pdev->dev, "TX shmem pool not found\n");
+		return -ENOMEM;
+	}
+
+	bpmp->tx.virt = gen_pool_dma_alloc(bpmp->tx.pool, 4096, &bpmp->tx.phys);
+	if (!bpmp->tx.virt) {
+		dev_err(&pdev->dev, "failed to allocate from TX pool\n");
+		return -ENOMEM;
+	}
+
+	bpmp->rx.pool = of_gen_pool_get(pdev->dev.of_node, "shmem", 1);
+	if (!bpmp->rx.pool) {
+		dev_err(&pdev->dev, "RX shmem pool not found\n");
+		err = -ENOMEM;
+		goto free_tx;
+	}
+
+	bpmp->rx.virt = gen_pool_dma_alloc(bpmp->rx.pool, 4096, &bpmp->rx.phys);
+	if (!bpmp->rx.virt) {
+		dev_err(&pdev->dev, "failed to allocate from RX pool\n");
+		err = -ENOMEM;
+		goto free_tx;
+	}
+
+	INIT_LIST_HEAD(&bpmp->mrqs);
+	spin_lock_init(&bpmp->lock);
+
+	bpmp->threaded.count = bpmp->soc->channels.thread.count;
+	sema_init(&bpmp->threaded.lock, bpmp->threaded.count);
+
+	size = BITS_TO_LONGS(bpmp->threaded.count) * sizeof(long);
+
+	bpmp->threaded.allocated = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!bpmp->threaded.allocated) {
+		err = -ENOMEM;
+		goto free_rx;
+	}
+
+	bpmp->threaded.busy = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!bpmp->threaded.busy) {
+		err = -ENOMEM;
+		goto free_rx;
+	}
+
+	spin_lock_init(&bpmp->atomic_tx_lock);
+	bpmp->tx_channel = devm_kzalloc(&pdev->dev, sizeof(*bpmp->tx_channel),
+					GFP_KERNEL);
+	if (!bpmp->tx_channel) {
+		err = -ENOMEM;
+		goto free_rx;
+	}
+
+	bpmp->rx_channel = devm_kzalloc(&pdev->dev, sizeof(*bpmp->rx_channel),
+	                                GFP_KERNEL);
+	if (!bpmp->rx_channel) {
+		err = -ENOMEM;
+		goto free_rx;
+	}
+
+	bpmp->threaded_channels = devm_kcalloc(&pdev->dev, bpmp->threaded.count,
+					       sizeof(*bpmp->threaded_channels),
+					       GFP_KERNEL);
+	if (!bpmp->threaded_channels) {
+		err = -ENOMEM;
+		goto free_rx;
+	}
+
+	err = tegra_bpmp_channel_init(bpmp->tx_channel, bpmp,
+				      bpmp->soc->channels.cpu_tx.offset);
+	if (err < 0)
+		goto free_rx;
+
+	err = tegra_bpmp_channel_init(bpmp->rx_channel, bpmp,
+				      bpmp->soc->channels.cpu_rx.offset);
+	if (err < 0)
+		goto cleanup_tx_channel;
+
+	for (i = 0; i < bpmp->threaded.count; i++) {
+		err = tegra_bpmp_channel_init(
+			&bpmp->threaded_channels[i], bpmp,
+			bpmp->soc->channels.thread.offset + i);
+		if (err < 0)
+			goto cleanup_threaded_channels;
+	}
+
+	/* mbox registration */
+	bpmp->mbox.client.dev = &pdev->dev;
+	bpmp->mbox.client.rx_callback = tegra_bpmp_handle_rx;
+	bpmp->mbox.client.tx_block = false;
+	bpmp->mbox.client.knows_txdone = false;
+
+	bpmp->mbox.channel = mbox_request_channel(&bpmp->mbox.client, 0);
+	if (IS_ERR(bpmp->mbox.channel)) {
+		err = PTR_ERR(bpmp->mbox.channel);
+		dev_err(&pdev->dev, "failed to get HSP mailbox: %d\n", err);
+		goto cleanup_threaded_channels;
+	}
+
+	/* reset message channels */
+	tegra_bpmp_channel_reset(bpmp->tx_channel);
+	tegra_bpmp_channel_reset(bpmp->rx_channel);
+	for (i = 0; i < bpmp->threaded.count; i++)
+		tegra_bpmp_channel_reset(&bpmp->threaded_channels[i]);
+
+	err = tegra_bpmp_request_mrq(bpmp, MRQ_PING,
+				     tegra_bpmp_mrq_handle_ping, bpmp);
+	if (err < 0)
+		goto free_mbox;
+
+	err = tegra_bpmp_ping(bpmp);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to ping BPMP: %d\n", err);
+		goto free_mrq;
+	}
+
+	err = tegra_bpmp_get_firmware_tag(bpmp, tag, sizeof(tag) - 1);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to get firmware tag: %d\n", err);
+		goto free_mrq;
+	}
+
+	dev_info(&pdev->dev, "firmware: %s\n", tag);
+
+	platform_set_drvdata(pdev, bpmp);
+
+	err = of_platform_default_populate(pdev->dev.of_node, NULL, &pdev->dev);
+	if (err < 0)
+		goto free_mrq;
+
+	err = tegra_bpmp_init_clocks(bpmp);
+	if (err < 0)
+		goto free_mrq;
+
+	err = tegra_bpmp_init_resets(bpmp);
+	if (err < 0)
+		goto free_mrq;
+
+	err = tegra_bpmp_init_powergates(bpmp);
+	if (err < 0)
+		goto free_mrq;
+
+	err = tegra_bpmp_init_debugfs(bpmp);
+	if (err < 0)
+		dev_err(&pdev->dev, "debugfs initialization failed: %d\n", err);
+
+	return 0;
+
+free_mrq:
+	tegra_bpmp_free_mrq(bpmp, MRQ_PING, bpmp);
+free_mbox:
+	mbox_free_channel(bpmp->mbox.channel);
+cleanup_threaded_channels:
+	for (i = 0; i < bpmp->threaded.count; i++) {
+		if (bpmp->threaded_channels[i].bpmp)
+			tegra_bpmp_channel_cleanup(&bpmp->threaded_channels[i]);
+	}
+
+	tegra_bpmp_channel_cleanup(bpmp->rx_channel);
+cleanup_tx_channel:
+	tegra_bpmp_channel_cleanup(bpmp->tx_channel);
+free_rx:
+	gen_pool_free(bpmp->rx.pool, (unsigned long)bpmp->rx.virt, 4096);
+free_tx:
+	gen_pool_free(bpmp->tx.pool, (unsigned long)bpmp->tx.virt, 4096);
+	return err;
+}
+
+static const struct tegra_bpmp_soc tegra186_soc = {
+	.channels = {
+		.cpu_tx = {
+			.offset = 3,
+			.timeout = 60 * USEC_PER_SEC,
+		},
+		.thread = {
+			.offset = 0,
+			.count = 3,
+			.timeout = 600 * USEC_PER_SEC,
+		},
+		.cpu_rx = {
+			.offset = 13,
+			.timeout = 0,
+		},
+	},
+	.num_resets = 193,
+};
+
+static const struct of_device_id tegra_bpmp_match[] = {
+	{ .compatible = "nvidia,tegra186-bpmp", .data = &tegra186_soc },
+	{ }
+};
+
+static struct platform_driver tegra_bpmp_driver = {
+	.driver = {
+		.name = "tegra-bpmp",
+		.of_match_table = tegra_bpmp_match,
+	},
+	.probe = tegra_bpmp_probe,
+};
+
+static int __init tegra_bpmp_init(void)
+{
+	return platform_driver_register(&tegra_bpmp_driver);
+}
+core_initcall(tegra_bpmp_init);
diff --git a/drivers/firmware/tegra/ivc.c b/drivers/firmware/tegra/ivc.c
new file mode 100644
index 0000000..00de793
--- /dev/null
+++ b/drivers/firmware/tegra/ivc.c
@@ -0,0 +1,695 @@
+/*
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <soc/tegra/ivc.h>
+
+#define TEGRA_IVC_ALIGN 64
+
+/*
+ * IVC channel reset protocol.
+ *
+ * Each end uses its tx_channel.state to indicate its synchronization state.
+ */
+enum tegra_ivc_state {
+	/*
+	 * This value is zero for backwards compatibility with services that
+	 * assume channels to be initially zeroed. Such channels are in an
+	 * initially valid state, but cannot be asynchronously reset, and must
+	 * maintain a valid state at all times.
+	 *
+	 * The transmitting end can enter the established state from the sync or
+	 * ack state when it observes the receiving endpoint in the ack or
+	 * established state, indicating that has cleared the counters in our
+	 * rx_channel.
+	 */
+	TEGRA_IVC_STATE_ESTABLISHED = 0,
+
+	/*
+	 * If an endpoint is observed in the sync state, the remote endpoint is
+	 * allowed to clear the counters it owns asynchronously with respect to
+	 * the current endpoint. Therefore, the current endpoint is no longer
+	 * allowed to communicate.
+	 */
+	TEGRA_IVC_STATE_SYNC,
+
+	/*
+	 * When the transmitting end observes the receiving end in the sync
+	 * state, it can clear the w_count and r_count and transition to the ack
+	 * state. If the remote endpoint observes us in the ack state, it can
+	 * return to the established state once it has cleared its counters.
+	 */
+	TEGRA_IVC_STATE_ACK
+};
+
+/*
+ * This structure is divided into two-cache aligned parts, the first is only
+ * written through the tx.channel pointer, while the second is only written
+ * through the rx.channel pointer. This delineates ownership of the cache
+ * lines, which is critical to performance and necessary in non-cache coherent
+ * implementations.
+ */
+struct tegra_ivc_header {
+	union {
+		struct {
+			/* fields owned by the transmitting end */
+			u32 count;
+			u32 state;
+		};
+
+		u8 pad[TEGRA_IVC_ALIGN];
+	} tx;
+
+	union {
+		/* fields owned by the receiving end */
+		u32 count;
+		u8 pad[TEGRA_IVC_ALIGN];
+	} rx;
+};
+
+static inline void tegra_ivc_invalidate(struct tegra_ivc *ivc, dma_addr_t phys)
+{
+	if (!ivc->peer)
+		return;
+
+	dma_sync_single_for_cpu(ivc->peer, phys, TEGRA_IVC_ALIGN,
+				DMA_FROM_DEVICE);
+}
+
+static inline void tegra_ivc_flush(struct tegra_ivc *ivc, dma_addr_t phys)
+{
+	if (!ivc->peer)
+		return;
+
+	dma_sync_single_for_device(ivc->peer, phys, TEGRA_IVC_ALIGN,
+				   DMA_TO_DEVICE);
+}
+
+static inline bool tegra_ivc_empty(struct tegra_ivc *ivc,
+				   struct tegra_ivc_header *header)
+{
+	/*
+	 * This function performs multiple checks on the same values with
+	 * security implications, so create snapshots with READ_ONCE() to
+	 * ensure that these checks use the same values.
+	 */
+	u32 tx = READ_ONCE(header->tx.count);
+	u32 rx = READ_ONCE(header->rx.count);
+
+	/*
+	 * Perform an over-full check to prevent denial of service attacks
+	 * where a server could be easily fooled into believing that there's
+	 * an extremely large number of frames ready, since receivers are not
+	 * expected to check for full or over-full conditions.
+	 *
+	 * Although the channel isn't empty, this is an invalid case caused by
+	 * a potentially malicious peer, so returning empty is safer, because
+	 * it gives the impression that the channel has gone silent.
+	 */
+	if (tx - rx > ivc->num_frames)
+		return true;
+
+	return tx == rx;
+}
+
+static inline bool tegra_ivc_full(struct tegra_ivc *ivc,
+				  struct tegra_ivc_header *header)
+{
+	u32 tx = READ_ONCE(header->tx.count);
+	u32 rx = READ_ONCE(header->rx.count);
+
+	/*
+	 * Invalid cases where the counters indicate that the queue is over
+	 * capacity also appear full.
+	 */
+	return tx - rx >= ivc->num_frames;
+}
+
+static inline u32 tegra_ivc_available(struct tegra_ivc *ivc,
+				      struct tegra_ivc_header *header)
+{
+	u32 tx = READ_ONCE(header->tx.count);
+	u32 rx = READ_ONCE(header->rx.count);
+
+	/*
+	 * This function isn't expected to be used in scenarios where an
+	 * over-full situation can lead to denial of service attacks. See the
+	 * comment in tegra_ivc_empty() for an explanation about special
+	 * over-full considerations.
+	 */
+	return tx - rx;
+}
+
+static inline void tegra_ivc_advance_tx(struct tegra_ivc *ivc)
+{
+	WRITE_ONCE(ivc->tx.channel->tx.count,
+		   READ_ONCE(ivc->tx.channel->tx.count) + 1);
+
+	if (ivc->tx.position == ivc->num_frames - 1)
+		ivc->tx.position = 0;
+	else
+		ivc->tx.position++;
+}
+
+static inline void tegra_ivc_advance_rx(struct tegra_ivc *ivc)
+{
+	WRITE_ONCE(ivc->rx.channel->rx.count,
+		   READ_ONCE(ivc->rx.channel->rx.count) + 1);
+
+	if (ivc->rx.position == ivc->num_frames - 1)
+		ivc->rx.position = 0;
+	else
+		ivc->rx.position++;
+}
+
+static inline int tegra_ivc_check_read(struct tegra_ivc *ivc)
+{
+	unsigned int offset = offsetof(struct tegra_ivc_header, tx.count);
+
+	/*
+	 * tx.channel->state is set locally, so it is not synchronized with
+	 * state from the remote peer. The remote peer cannot reset its
+	 * transmit counters until we've acknowledged its synchronization
+	 * request, so no additional synchronization is required because an
+	 * asynchronous transition of rx.channel->state to
+	 * TEGRA_IVC_STATE_ACK is not allowed.
+	 */
+	if (ivc->tx.channel->tx.state != TEGRA_IVC_STATE_ESTABLISHED)
+		return -ECONNRESET;
+
+	/*
+	 * Avoid unnecessary invalidations when performing repeated accesses
+	 * to an IVC channel by checking the old queue pointers first.
+	 *
+	 * Synchronization is only necessary when these pointers indicate
+	 * empty or full.
+	 */
+	if (!tegra_ivc_empty(ivc, ivc->rx.channel))
+		return 0;
+
+	tegra_ivc_invalidate(ivc, ivc->rx.phys + offset);
+
+	if (tegra_ivc_empty(ivc, ivc->rx.channel))
+		return -ENOSPC;
+
+	return 0;
+}
+
+static inline int tegra_ivc_check_write(struct tegra_ivc *ivc)
+{
+	unsigned int offset = offsetof(struct tegra_ivc_header, rx.count);
+
+	if (ivc->tx.channel->tx.state != TEGRA_IVC_STATE_ESTABLISHED)
+		return -ECONNRESET;
+
+	if (!tegra_ivc_full(ivc, ivc->tx.channel))
+		return 0;
+
+	tegra_ivc_invalidate(ivc, ivc->tx.phys + offset);
+
+	if (tegra_ivc_full(ivc, ivc->tx.channel))
+		return -ENOSPC;
+
+	return 0;
+}
+
+static void *tegra_ivc_frame_virt(struct tegra_ivc *ivc,
+				  struct tegra_ivc_header *header,
+				  unsigned int frame)
+{
+	if (WARN_ON(frame >= ivc->num_frames))
+		return ERR_PTR(-EINVAL);
+
+	return (void *)(header + 1) + ivc->frame_size * frame;
+}
+
+static inline dma_addr_t tegra_ivc_frame_phys(struct tegra_ivc *ivc,
+					      dma_addr_t phys,
+					      unsigned int frame)
+{
+	unsigned long offset;
+
+	offset = sizeof(struct tegra_ivc_header) + ivc->frame_size * frame;
+
+	return phys + offset;
+}
+
+static inline void tegra_ivc_invalidate_frame(struct tegra_ivc *ivc,
+					      dma_addr_t phys,
+					      unsigned int frame,
+					      unsigned int offset,
+					      size_t size)
+{
+	if (!ivc->peer || WARN_ON(frame >= ivc->num_frames))
+		return;
+
+	phys = tegra_ivc_frame_phys(ivc, phys, frame) + offset;
+
+	dma_sync_single_for_cpu(ivc->peer, phys, size, DMA_FROM_DEVICE);
+}
+
+static inline void tegra_ivc_flush_frame(struct tegra_ivc *ivc,
+					 dma_addr_t phys,
+					 unsigned int frame,
+					 unsigned int offset,
+					 size_t size)
+{
+	if (!ivc->peer || WARN_ON(frame >= ivc->num_frames))
+		return;
+
+	phys = tegra_ivc_frame_phys(ivc, phys, frame) + offset;
+
+	dma_sync_single_for_device(ivc->peer, phys, size, DMA_TO_DEVICE);
+}
+
+/* directly peek at the next frame rx'ed */
+void *tegra_ivc_read_get_next_frame(struct tegra_ivc *ivc)
+{
+	int err;
+
+	if (WARN_ON(ivc == NULL))
+		return ERR_PTR(-EINVAL);
+
+	err = tegra_ivc_check_read(ivc);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	/*
+	 * Order observation of ivc->rx.position potentially indicating new
+	 * data before data read.
+	 */
+	smp_rmb();
+
+	tegra_ivc_invalidate_frame(ivc, ivc->rx.phys, ivc->rx.position, 0,
+				   ivc->frame_size);
+
+	return tegra_ivc_frame_virt(ivc, ivc->rx.channel, ivc->rx.position);
+}
+EXPORT_SYMBOL(tegra_ivc_read_get_next_frame);
+
+int tegra_ivc_read_advance(struct tegra_ivc *ivc)
+{
+	unsigned int rx = offsetof(struct tegra_ivc_header, rx.count);
+	unsigned int tx = offsetof(struct tegra_ivc_header, tx.count);
+	int err;
+
+	/*
+	 * No read barriers or synchronization here: the caller is expected to
+	 * have already observed the channel non-empty. This check is just to
+	 * catch programming errors.
+	 */
+	err = tegra_ivc_check_read(ivc);
+	if (err < 0)
+		return err;
+
+	tegra_ivc_advance_rx(ivc);
+
+	tegra_ivc_flush(ivc, ivc->rx.phys + rx);
+
+	/*
+	 * Ensure our write to ivc->rx.position occurs before our read from
+	 * ivc->tx.position.
+	 */
+	smp_mb();
+
+	/*
+	 * Notify only upon transition from full to non-full. The available
+	 * count can only asynchronously increase, so the worst possible
+	 * side-effect will be a spurious notification.
+	 */
+	tegra_ivc_invalidate(ivc, ivc->rx.phys + tx);
+
+	if (tegra_ivc_available(ivc, ivc->rx.channel) == ivc->num_frames - 1)
+		ivc->notify(ivc, ivc->notify_data);
+
+	return 0;
+}
+EXPORT_SYMBOL(tegra_ivc_read_advance);
+
+/* directly poke at the next frame to be tx'ed */
+void *tegra_ivc_write_get_next_frame(struct tegra_ivc *ivc)
+{
+	int err;
+
+	err = tegra_ivc_check_write(ivc);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	return tegra_ivc_frame_virt(ivc, ivc->tx.channel, ivc->tx.position);
+}
+EXPORT_SYMBOL(tegra_ivc_write_get_next_frame);
+
+/* advance the tx buffer */
+int tegra_ivc_write_advance(struct tegra_ivc *ivc)
+{
+	unsigned int tx = offsetof(struct tegra_ivc_header, tx.count);
+	unsigned int rx = offsetof(struct tegra_ivc_header, rx.count);
+	int err;
+
+	err = tegra_ivc_check_write(ivc);
+	if (err < 0)
+		return err;
+
+	tegra_ivc_flush_frame(ivc, ivc->tx.phys, ivc->tx.position, 0,
+			      ivc->frame_size);
+
+	/*
+	 * Order any possible stores to the frame before update of
+	 * ivc->tx.position.
+	 */
+	smp_wmb();
+
+	tegra_ivc_advance_tx(ivc);
+	tegra_ivc_flush(ivc, ivc->tx.phys + tx);
+
+	/*
+	 * Ensure our write to ivc->tx.position occurs before our read from
+	 * ivc->rx.position.
+	 */
+	smp_mb();
+
+	/*
+	 * Notify only upon transition from empty to non-empty. The available
+	 * count can only asynchronously decrease, so the worst possible
+	 * side-effect will be a spurious notification.
+	 */
+	tegra_ivc_invalidate(ivc, ivc->tx.phys + rx);
+
+	if (tegra_ivc_available(ivc, ivc->tx.channel) == 1)
+		ivc->notify(ivc, ivc->notify_data);
+
+	return 0;
+}
+EXPORT_SYMBOL(tegra_ivc_write_advance);
+
+void tegra_ivc_reset(struct tegra_ivc *ivc)
+{
+	unsigned int offset = offsetof(struct tegra_ivc_header, tx.count);
+
+	ivc->tx.channel->tx.state = TEGRA_IVC_STATE_SYNC;
+	tegra_ivc_flush(ivc, ivc->tx.phys + offset);
+	ivc->notify(ivc, ivc->notify_data);
+}
+EXPORT_SYMBOL(tegra_ivc_reset);
+
+/*
+ * =======================================================
+ *  IVC State Transition Table - see tegra_ivc_notified()
+ * =======================================================
+ *
+ *	local	remote	action
+ *	-----	------	-----------------------------------
+ *	SYNC	EST	<none>
+ *	SYNC	ACK	reset counters; move to EST; notify
+ *	SYNC	SYNC	reset counters; move to ACK; notify
+ *	ACK	EST	move to EST; notify
+ *	ACK	ACK	move to EST; notify
+ *	ACK	SYNC	reset counters; move to ACK; notify
+ *	EST	EST	<none>
+ *	EST	ACK	<none>
+ *	EST	SYNC	reset counters; move to ACK; notify
+ *
+ * ===============================================================
+ */
+
+int tegra_ivc_notified(struct tegra_ivc *ivc)
+{
+	unsigned int offset = offsetof(struct tegra_ivc_header, tx.count);
+	enum tegra_ivc_state state;
+
+	/* Copy the receiver's state out of shared memory. */
+	tegra_ivc_invalidate(ivc, ivc->rx.phys + offset);
+	state = READ_ONCE(ivc->rx.channel->tx.state);
+
+	if (state == TEGRA_IVC_STATE_SYNC) {
+		offset = offsetof(struct tegra_ivc_header, tx.count);
+
+		/*
+		 * Order observation of TEGRA_IVC_STATE_SYNC before stores
+		 * clearing tx.channel.
+		 */
+		smp_rmb();
+
+		/*
+		 * Reset tx.channel counters. The remote end is in the SYNC
+		 * state and won't make progress until we change our state,
+		 * so the counters are not in use at this time.
+		 */
+		ivc->tx.channel->tx.count = 0;
+		ivc->rx.channel->rx.count = 0;
+
+		ivc->tx.position = 0;
+		ivc->rx.position = 0;
+
+		/*
+		 * Ensure that counters appear cleared before new state can be
+		 * observed.
+		 */
+		smp_wmb();
+
+		/*
+		 * Move to ACK state. We have just cleared our counters, so it
+		 * is now safe for the remote end to start using these values.
+		 */
+		ivc->tx.channel->tx.state = TEGRA_IVC_STATE_ACK;
+		tegra_ivc_flush(ivc, ivc->tx.phys + offset);
+
+		/*
+		 * Notify remote end to observe state transition.
+		 */
+		ivc->notify(ivc, ivc->notify_data);
+
+	} else if (ivc->tx.channel->tx.state == TEGRA_IVC_STATE_SYNC &&
+		   state == TEGRA_IVC_STATE_ACK) {
+		offset = offsetof(struct tegra_ivc_header, tx.count);
+
+		/*
+		 * Order observation of ivc_state_sync before stores clearing
+		 * tx_channel.
+		 */
+		smp_rmb();
+
+		/*
+		 * Reset tx.channel counters. The remote end is in the ACK
+		 * state and won't make progress until we change our state,
+		 * so the counters are not in use at this time.
+		 */
+		ivc->tx.channel->tx.count = 0;
+		ivc->rx.channel->rx.count = 0;
+
+		ivc->tx.position = 0;
+		ivc->rx.position = 0;
+
+		/*
+		 * Ensure that counters appear cleared before new state can be
+		 * observed.
+		 */
+		smp_wmb();
+
+		/*
+		 * Move to ESTABLISHED state. We know that the remote end has
+		 * already cleared its counters, so it is safe to start
+		 * writing/reading on this channel.
+		 */
+		ivc->tx.channel->tx.state = TEGRA_IVC_STATE_ESTABLISHED;
+		tegra_ivc_flush(ivc, ivc->tx.phys + offset);
+
+		/*
+		 * Notify remote end to observe state transition.
+		 */
+		ivc->notify(ivc, ivc->notify_data);
+
+	} else if (ivc->tx.channel->tx.state == TEGRA_IVC_STATE_ACK) {
+		offset = offsetof(struct tegra_ivc_header, tx.count);
+
+		/*
+		 * At this point, we have observed the peer to be in either
+		 * the ACK or ESTABLISHED state. Next, order observation of
+		 * peer state before storing to tx.channel.
+		 */
+		smp_rmb();
+
+		/*
+		 * Move to ESTABLISHED state. We know that we have previously
+		 * cleared our counters, and we know that the remote end has
+		 * cleared its counters, so it is safe to start writing/reading
+		 * on this channel.
+		 */
+		ivc->tx.channel->tx.state = TEGRA_IVC_STATE_ESTABLISHED;
+		tegra_ivc_flush(ivc, ivc->tx.phys + offset);
+
+		/*
+		 * Notify remote end to observe state transition.
+		 */
+		ivc->notify(ivc, ivc->notify_data);
+
+	} else {
+		/*
+		 * There is no need to handle any further action. Either the
+		 * channel is already fully established, or we are waiting for
+		 * the remote end to catch up with our current state. Refer
+		 * to the diagram in "IVC State Transition Table" above.
+		 */
+	}
+
+	if (ivc->tx.channel->tx.state != TEGRA_IVC_STATE_ESTABLISHED)
+		return -EAGAIN;
+
+	return 0;
+}
+EXPORT_SYMBOL(tegra_ivc_notified);
+
+size_t tegra_ivc_align(size_t size)
+{
+	return ALIGN(size, TEGRA_IVC_ALIGN);
+}
+EXPORT_SYMBOL(tegra_ivc_align);
+
+unsigned tegra_ivc_total_queue_size(unsigned queue_size)
+{
+	if (!IS_ALIGNED(queue_size, TEGRA_IVC_ALIGN)) {
+		pr_err("%s: queue_size (%u) must be %u-byte aligned\n",
+		       __func__, queue_size, TEGRA_IVC_ALIGN);
+		return 0;
+	}
+
+	return queue_size + sizeof(struct tegra_ivc_header);
+}
+EXPORT_SYMBOL(tegra_ivc_total_queue_size);
+
+static int tegra_ivc_check_params(unsigned long rx, unsigned long tx,
+				  unsigned int num_frames, size_t frame_size)
+{
+	BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct tegra_ivc_header, tx.count),
+				 TEGRA_IVC_ALIGN));
+	BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct tegra_ivc_header, rx.count),
+				 TEGRA_IVC_ALIGN));
+	BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct tegra_ivc_header),
+				 TEGRA_IVC_ALIGN));
+
+	if ((uint64_t)num_frames * (uint64_t)frame_size >= 0x100000000UL) {
+		pr_err("num_frames * frame_size overflows\n");
+		return -EINVAL;
+	}
+
+	if (!IS_ALIGNED(frame_size, TEGRA_IVC_ALIGN)) {
+		pr_err("frame size not adequately aligned: %zu\n", frame_size);
+		return -EINVAL;
+	}
+
+	/*
+	 * The headers must at least be aligned enough for counters
+	 * to be accessed atomically.
+	 */
+	if (!IS_ALIGNED(rx, TEGRA_IVC_ALIGN)) {
+		pr_err("IVC channel start not aligned: %#lx\n", rx);
+		return -EINVAL;
+	}
+
+	if (!IS_ALIGNED(tx, TEGRA_IVC_ALIGN)) {
+		pr_err("IVC channel start not aligned: %#lx\n", tx);
+		return -EINVAL;
+	}
+
+	if (rx < tx) {
+		if (rx + frame_size * num_frames > tx) {
+			pr_err("queue regions overlap: %#lx + %zx > %#lx\n",
+			       rx, frame_size * num_frames, tx);
+			return -EINVAL;
+		}
+	} else {
+		if (tx + frame_size * num_frames > rx) {
+			pr_err("queue regions overlap: %#lx + %zx > %#lx\n",
+			       tx, frame_size * num_frames, rx);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+int tegra_ivc_init(struct tegra_ivc *ivc, struct device *peer, void *rx,
+		   dma_addr_t rx_phys, void *tx, dma_addr_t tx_phys,
+		   unsigned int num_frames, size_t frame_size,
+		   void (*notify)(struct tegra_ivc *ivc, void *data),
+		   void *data)
+{
+	size_t queue_size;
+	int err;
+
+	if (WARN_ON(!ivc || !notify))
+		return -EINVAL;
+
+	/*
+	 * All sizes that can be returned by communication functions should
+	 * fit in an int.
+	 */
+	if (frame_size > INT_MAX)
+		return -E2BIG;
+
+	err = tegra_ivc_check_params((unsigned long)rx, (unsigned long)tx,
+				     num_frames, frame_size);
+	if (err < 0)
+		return err;
+
+	queue_size = tegra_ivc_total_queue_size(num_frames * frame_size);
+
+	if (peer) {
+		ivc->rx.phys = dma_map_single(peer, rx, queue_size,
+					      DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(peer, ivc->rx.phys))
+			return -ENOMEM;
+
+		ivc->tx.phys = dma_map_single(peer, tx, queue_size,
+					      DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(peer, ivc->tx.phys)) {
+			dma_unmap_single(peer, ivc->rx.phys, queue_size,
+					 DMA_BIDIRECTIONAL);
+			return -ENOMEM;
+		}
+	} else {
+		ivc->rx.phys = rx_phys;
+		ivc->tx.phys = tx_phys;
+	}
+
+	ivc->rx.channel = rx;
+	ivc->tx.channel = tx;
+	ivc->peer = peer;
+	ivc->notify = notify;
+	ivc->notify_data = data;
+	ivc->frame_size = frame_size;
+	ivc->num_frames = num_frames;
+
+	/*
+	 * These values aren't necessarily correct until the channel has been
+	 * reset.
+	 */
+	ivc->tx.position = 0;
+	ivc->rx.position = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL(tegra_ivc_init);
+
+void tegra_ivc_cleanup(struct tegra_ivc *ivc)
+{
+	if (ivc->peer) {
+		size_t size = tegra_ivc_total_queue_size(ivc->num_frames *
+							 ivc->frame_size);
+
+		dma_unmap_single(ivc->peer, ivc->rx.phys, size,
+				 DMA_BIDIRECTIONAL);
+		dma_unmap_single(ivc->peer, ivc->tx.phys, size,
+				 DMA_BIDIRECTIONAL);
+	}
+}
+EXPORT_SYMBOL(tegra_ivc_cleanup);