Update Linux to v5.4.2

Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/drivers/hwtracing/Kconfig b/drivers/hwtracing/Kconfig
index f68e025..1308583 100644
--- a/drivers/hwtracing/Kconfig
+++ b/drivers/hwtracing/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 menu "HW tracing support"
 
 source "drivers/hwtracing/stm/Kconfig"
diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
index ad34380..7a9f5fb 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -1,8 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Coresight configuration
 #
 menuconfig CORESIGHT
 	bool "CoreSight Tracing Support"
+	depends on OF || ACPI
 	select ARM_AMBA
 	select PERF_EVENTS
 	help
@@ -75,20 +77,13 @@
 	bool "CoreSight Embedded Trace Macrocell 4.x driver"
 	depends on ARM64
 	select CORESIGHT_LINKS_AND_SINKS
+	select PID_IN_CONTEXTIDR
 	help
 	  This driver provides support for the ETM4.x tracer module, tracing the
 	  instructions that a processor is executing. This is primarily useful
 	  for instruction level tracing. Depending on the implemented version
 	  data tracing may also be available.
 
-config CORESIGHT_DYNAMIC_REPLICATOR
-	bool "CoreSight Programmable Replicator driver"
-	depends on CORESIGHT_LINKS_AND_SINKS
-	help
-	  This enables support for dynamic CoreSight replicator link driver.
-	  The programmable ATB replicator allows independent filtering of the
-	  trace data based on the traceid.
-
 config CORESIGHT_STM
 	bool "CoreSight System Trace Macrocell driver"
 	depends on (ARM && !(CPU_32v3 || CPU_32v4 || CPU_32v4T)) || ARM64
@@ -111,7 +106,7 @@
 	  can quickly get to know program counter (PC), secure state,
 	  exception level, etc. Before use debugging functionality, platform
 	  needs to ensure the clock domain and power domain are enabled
-	  properly, please refer Documentation/trace/coresight-cpu-debug.txt
+	  properly, please refer Documentation/trace/coresight-cpu-debug.rst
 	  for detailed description and the example for usage.
 
 endif
diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile
index 41870de..3c0ac42 100644
--- a/drivers/hwtracing/coresight/Makefile
+++ b/drivers/hwtracing/coresight/Makefile
@@ -2,8 +2,7 @@
 #
 # Makefile for CoreSight drivers.
 #
-obj-$(CONFIG_CORESIGHT) += coresight.o coresight-etm-perf.o
-obj-$(CONFIG_OF) += of_coresight.o
+obj-$(CONFIG_CORESIGHT) += coresight.o coresight-etm-perf.o coresight-platform.o
 obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o \
 					     coresight-tmc-etf.o \
 					     coresight-tmc-etr.o
@@ -15,7 +14,6 @@
 					coresight-etm3x-sysfs.o
 obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o \
 					coresight-etm4x-sysfs.o
-obj-$(CONFIG_CORESIGHT_DYNAMIC_REPLICATOR) += coresight-dynamic-replicator.o
 obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o
 obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o
 obj-$(CONFIG_CORESIGHT_CATU) += coresight-catu.o
diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c
index ff94e58..16ebf38 100644
--- a/drivers/hwtracing/coresight/coresight-catu.c
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -28,6 +28,8 @@
 #define catu_dbg(x, ...) do {} while (0)
 #endif
 
+DEFINE_CORESIGHT_DEVLIST(catu_devs, "catu");
+
 struct catu_etr_buf {
 	struct tmc_sg_table *catu_table;
 	dma_addr_t sladdr;
@@ -328,19 +330,18 @@
 			      struct etr_buf *etr_buf, int node, void **pages)
 {
 	struct coresight_device *csdev;
-	struct device *catu_dev;
 	struct tmc_sg_table *catu_table;
 	struct catu_etr_buf *catu_buf;
 
 	csdev = tmc_etr_get_catu_device(tmc_drvdata);
 	if (!csdev)
 		return -ENODEV;
-	catu_dev = csdev->dev.parent;
 	catu_buf = kzalloc(sizeof(*catu_buf), GFP_KERNEL);
 	if (!catu_buf)
 		return -ENOMEM;
 
-	catu_table = catu_init_sg_table(catu_dev, node, etr_buf->size, pages);
+	catu_table = catu_init_sg_table(&csdev->dev, node,
+					etr_buf->size, pages);
 	if (IS_ERR(catu_table)) {
 		kfree(catu_buf);
 		return PTR_ERR(catu_table);
@@ -406,18 +407,24 @@
 
 static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
 {
+	int rc;
 	u32 control, mode;
 	struct etr_buf *etr_buf = data;
+	struct device *dev = &drvdata->csdev->dev;
 
 	if (catu_wait_for_ready(drvdata))
-		dev_warn(drvdata->dev, "Timeout while waiting for READY\n");
+		dev_warn(dev, "Timeout while waiting for READY\n");
 
 	control = catu_read_control(drvdata);
 	if (control & BIT(CATU_CONTROL_ENABLE)) {
-		dev_warn(drvdata->dev, "CATU is already enabled\n");
+		dev_warn(dev, "CATU is already enabled\n");
 		return -EBUSY;
 	}
 
+	rc = coresight_claim_device_unlocked(drvdata->base);
+	if (rc)
+		return rc;
+
 	control |= BIT(CATU_CONTROL_ENABLE);
 
 	if (etr_buf && etr_buf->mode == ETR_MODE_CATU) {
@@ -436,7 +443,7 @@
 	catu_write_irqen(drvdata, 0);
 	catu_write_mode(drvdata, mode);
 	catu_write_control(drvdata, control);
-	dev_dbg(drvdata->dev, "Enabled in %s mode\n",
+	dev_dbg(dev, "Enabled in %s mode\n",
 		(mode == CATU_MODE_PASS_THROUGH) ?
 		"Pass through" :
 		"Translate");
@@ -457,14 +464,16 @@
 static int catu_disable_hw(struct catu_drvdata *drvdata)
 {
 	int rc = 0;
+	struct device *dev = &drvdata->csdev->dev;
 
 	catu_write_control(drvdata, 0);
+	coresight_disclaim_device_unlocked(drvdata->base);
 	if (catu_wait_for_ready(drvdata)) {
-		dev_info(drvdata->dev, "Timeout while waiting for READY\n");
+		dev_info(dev, "Timeout while waiting for READY\n");
 		rc = -EAGAIN;
 	}
 
-	dev_dbg(drvdata->dev, "Disabled\n");
+	dev_dbg(dev, "Disabled\n");
 	return rc;
 }
 
@@ -479,12 +488,12 @@
 	return rc;
 }
 
-const struct coresight_ops_helper catu_helper_ops = {
+static const struct coresight_ops_helper catu_helper_ops = {
 	.enable = catu_enable,
 	.disable = catu_disable,
 };
 
-const struct coresight_ops catu_ops = {
+static const struct coresight_ops catu_ops = {
 	.helper_ops = &catu_helper_ops,
 };
 
@@ -496,17 +505,11 @@
 	struct coresight_desc catu_desc;
 	struct coresight_platform_data *pdata = NULL;
 	struct device *dev = &adev->dev;
-	struct device_node *np = dev->of_node;
 	void __iomem *base;
 
-	if (np) {
-		pdata = of_get_coresight_platform_data(dev, np);
-		if (IS_ERR(pdata)) {
-			ret = PTR_ERR(pdata);
-			goto out;
-		}
-		dev->platform_data = pdata;
-	}
+	catu_desc.name = coresight_alloc_device_name(&catu_devs, dev);
+	if (!catu_desc.name)
+		return -ENOMEM;
 
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata) {
@@ -514,7 +517,6 @@
 		goto out;
 	}
 
-	drvdata->dev = dev;
 	dev_set_drvdata(dev, drvdata);
 	base = devm_ioremap_resource(dev, &adev->res);
 	if (IS_ERR(base)) {
@@ -541,6 +543,13 @@
 	if (ret)
 		goto out;
 
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto out;
+	}
+	dev->platform_data = pdata;
+
 	drvdata->base = base;
 	catu_desc.pdata = pdata;
 	catu_desc.dev = dev;
@@ -548,11 +557,13 @@
 	catu_desc.type = CORESIGHT_DEV_TYPE_HELPER;
 	catu_desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CATU;
 	catu_desc.ops = &catu_ops;
+
 	drvdata->csdev = coresight_register(&catu_desc);
 	if (IS_ERR(drvdata->csdev))
 		ret = PTR_ERR(drvdata->csdev);
+	else
+		pm_runtime_put(&adev->dev);
 out:
-	pm_runtime_put(&adev->dev);
 	return ret;
 }
 
diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h
index 1b281f0..80ceee3 100644
--- a/drivers/hwtracing/coresight/coresight-catu.h
+++ b/drivers/hwtracing/coresight/coresight-catu.h
@@ -61,7 +61,6 @@
 #define CATU_IRQEN_OFF		0x0
 
 struct catu_drvdata {
-	struct device *dev;
 	void __iomem *base;
 	struct coresight_device *csdev;
 	int irq;
@@ -109,11 +108,6 @@
 	return true;
 }
 
-#ifdef CONFIG_CORESIGHT_CATU
 extern const struct etr_buf_operations etr_catu_buf_ops;
-#else
-/* Dummy declaration for the CATU ops */
-static const struct etr_buf_operations etr_catu_buf_ops;
-#endif
 
 #endif
diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c
index 45b2460..96544b3 100644
--- a/drivers/hwtracing/coresight/coresight-cpu-debug.c
+++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c
@@ -525,23 +525,12 @@
 
 static int debug_func_init(void)
 {
-	struct dentry *file;
 	int ret;
 
 	/* Create debugfs node */
 	debug_debugfs_dir = debugfs_create_dir("coresight_cpu_debug", NULL);
-	if (!debug_debugfs_dir) {
-		pr_err("%s: unable to create debugfs directory\n", __func__);
-		return -ENOMEM;
-	}
-
-	file = debugfs_create_file("enable", 0644, debug_debugfs_dir, NULL,
-				   &debug_func_knob_fops);
-	if (!file) {
-		pr_err("%s: unable to create enable knob file\n", __func__);
-		ret = -ENOMEM;
-		goto err;
-	}
+	debugfs_create_file("enable", 0644, debug_debugfs_dir, NULL,
+			    &debug_func_knob_fops);
 
 	/* Register function to be called for panic */
 	ret = atomic_notifier_chain_register(&panic_notifier_list,
@@ -572,14 +561,16 @@
 	struct device *dev = &adev->dev;
 	struct debug_drvdata *drvdata;
 	struct resource *res = &adev->res;
-	struct device_node *np = adev->dev.of_node;
 	int ret;
 
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
 
-	drvdata->cpu = np ? of_coresight_get_cpu(np) : 0;
+	drvdata->cpu = coresight_get_cpu(dev);
+	if (drvdata->cpu < 0)
+		return drvdata->cpu;
+
 	if (per_cpu(debug_drvdata, drvdata->cpu)) {
 		dev_err(dev, "CPU%d drvdata has already been initialized\n",
 			drvdata->cpu);
@@ -655,20 +646,23 @@
 	return 0;
 }
 
+static const struct amba_cs_uci_id uci_id_debug[] = {
+	{
+		/*  CPU Debug UCI data */
+		.devarch	= 0x47706a15,
+		.devarch_mask	= 0xfff0ffff,
+		.devtype	= 0x00000015,
+	}
+};
+
 static const struct amba_id debug_ids[] = {
-	{       /* Debug for Cortex-A53 */
-		.id	= 0x000bbd03,
-		.mask	= 0x000fffff,
-	},
-	{       /* Debug for Cortex-A57 */
-		.id	= 0x000bbd07,
-		.mask	= 0x000fffff,
-	},
-	{       /* Debug for Cortex-A72 */
-		.id	= 0x000bbd08,
-		.mask	= 0x000fffff,
-	},
-	{ 0, 0 },
+	CS_AMBA_ID(0x000bbd03),				/* Cortex-A53 */
+	CS_AMBA_ID(0x000bbd07),				/* Cortex-A57 */
+	CS_AMBA_ID(0x000bbd08),				/* Cortex-A72 */
+	CS_AMBA_ID(0x000bbd09),				/* Cortex-A73 */
+	CS_AMBA_UCI_ID(0x000f0205, uci_id_debug),	/* Qualcomm Kryo */
+	CS_AMBA_UCI_ID(0x000f0211, uci_id_debug),	/* Qualcomm Kryo */
+	{},
 };
 
 static struct amba_driver debug_driver = {
diff --git a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c
deleted file mode 100644
index f6d0571..0000000
--- a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c
+++ /dev/null
@@ -1,212 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2011-2015, The Linux Foundation. All rights reserved.
- */
-
-#include <linux/amba/bus.h>
-#include <linux/clk.h>
-#include <linux/coresight.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/pm_runtime.h>
-#include <linux/slab.h>
-
-#include "coresight-priv.h"
-
-#define REPLICATOR_IDFILTER0		0x000
-#define REPLICATOR_IDFILTER1		0x004
-
-/**
- * struct replicator_state - specifics associated to a replicator component
- * @base:	memory mapped base address for this component.
- * @dev:	the device entity associated with this component
- * @atclk:	optional clock for the core parts of the replicator.
- * @csdev:	component vitals needed by the framework
- */
-struct replicator_state {
-	void __iomem		*base;
-	struct device		*dev;
-	struct clk		*atclk;
-	struct coresight_device	*csdev;
-};
-
-static int replicator_enable(struct coresight_device *csdev, int inport,
-			      int outport)
-{
-	struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent);
-
-	CS_UNLOCK(drvdata->base);
-
-	/*
-	 * Ensure that the other port is disabled
-	 * 0x00 - passing through the replicator unimpeded
-	 * 0xff - disable (or impede) the flow of ATB data
-	 */
-	if (outport == 0) {
-		writel_relaxed(0x00, drvdata->base + REPLICATOR_IDFILTER0);
-		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1);
-	} else {
-		writel_relaxed(0x00, drvdata->base + REPLICATOR_IDFILTER1);
-		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0);
-	}
-
-	CS_LOCK(drvdata->base);
-
-	dev_info(drvdata->dev, "REPLICATOR enabled\n");
-	return 0;
-}
-
-static void replicator_disable(struct coresight_device *csdev, int inport,
-				int outport)
-{
-	struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent);
-
-	CS_UNLOCK(drvdata->base);
-
-	/* disable the flow of ATB data through port */
-	if (outport == 0)
-		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0);
-	else
-		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1);
-
-	CS_LOCK(drvdata->base);
-
-	dev_info(drvdata->dev, "REPLICATOR disabled\n");
-}
-
-static const struct coresight_ops_link replicator_link_ops = {
-	.enable		= replicator_enable,
-	.disable	= replicator_disable,
-};
-
-static const struct coresight_ops replicator_cs_ops = {
-	.link_ops	= &replicator_link_ops,
-};
-
-#define coresight_replicator_reg(name, offset) \
-	coresight_simple_reg32(struct replicator_state, name, offset)
-
-coresight_replicator_reg(idfilter0, REPLICATOR_IDFILTER0);
-coresight_replicator_reg(idfilter1, REPLICATOR_IDFILTER1);
-
-static struct attribute *replicator_mgmt_attrs[] = {
-	&dev_attr_idfilter0.attr,
-	&dev_attr_idfilter1.attr,
-	NULL,
-};
-
-static const struct attribute_group replicator_mgmt_group = {
-	.attrs = replicator_mgmt_attrs,
-	.name = "mgmt",
-};
-
-static const struct attribute_group *replicator_groups[] = {
-	&replicator_mgmt_group,
-	NULL,
-};
-
-static int replicator_probe(struct amba_device *adev, const struct amba_id *id)
-{
-	int ret;
-	struct device *dev = &adev->dev;
-	struct resource *res = &adev->res;
-	struct coresight_platform_data *pdata = NULL;
-	struct replicator_state *drvdata;
-	struct coresight_desc desc = { 0 };
-	struct device_node *np = adev->dev.of_node;
-	void __iomem *base;
-
-	if (np) {
-		pdata = of_get_coresight_platform_data(dev, np);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
-		adev->dev.platform_data = pdata;
-	}
-
-	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
-	if (!drvdata)
-		return -ENOMEM;
-
-	drvdata->dev = &adev->dev;
-	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
-	if (!IS_ERR(drvdata->atclk)) {
-		ret = clk_prepare_enable(drvdata->atclk);
-		if (ret)
-			return ret;
-	}
-
-	/* Validity for the resource is already checked by the AMBA core */
-	base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
-
-	drvdata->base = base;
-	dev_set_drvdata(dev, drvdata);
-	pm_runtime_put(&adev->dev);
-
-	desc.type = CORESIGHT_DEV_TYPE_LINK;
-	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
-	desc.ops = &replicator_cs_ops;
-	desc.pdata = adev->dev.platform_data;
-	desc.dev = &adev->dev;
-	desc.groups = replicator_groups;
-	drvdata->csdev = coresight_register(&desc);
-
-	return PTR_ERR_OR_ZERO(drvdata->csdev);
-}
-
-#ifdef CONFIG_PM
-static int replicator_runtime_suspend(struct device *dev)
-{
-	struct replicator_state *drvdata = dev_get_drvdata(dev);
-
-	if (drvdata && !IS_ERR(drvdata->atclk))
-		clk_disable_unprepare(drvdata->atclk);
-
-	return 0;
-}
-
-static int replicator_runtime_resume(struct device *dev)
-{
-	struct replicator_state *drvdata = dev_get_drvdata(dev);
-
-	if (drvdata && !IS_ERR(drvdata->atclk))
-		clk_prepare_enable(drvdata->atclk);
-
-	return 0;
-}
-#endif
-
-static const struct dev_pm_ops replicator_dev_pm_ops = {
-	SET_RUNTIME_PM_OPS(replicator_runtime_suspend,
-			   replicator_runtime_resume,
-			   NULL)
-};
-
-static const struct amba_id replicator_ids[] = {
-	{
-		.id     = 0x000bb909,
-		.mask   = 0x000fffff,
-	},
-	{
-		/* Coresight SoC-600 */
-		.id     = 0x000bb9ec,
-		.mask   = 0x000fffff,
-	},
-	{ 0, 0 },
-};
-
-static struct amba_driver replicator_driver = {
-	.drv = {
-		.name	= "coresight-dynamic-replicator",
-		.pm	= &replicator_dev_pm_ops,
-		.suppress_bind_attrs = true,
-	},
-	.probe		= replicator_probe,
-	.id_table	= replicator_ids,
-};
-builtin_amba_driver(replicator_driver);
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 0dad862..3810290 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -5,7 +5,7 @@
  * Description: CoreSight Embedded Trace Buffer driver
  */
 
-#include <asm/local.h>
+#include <linux/atomic.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -28,6 +28,7 @@
 
 
 #include "coresight-priv.h"
+#include "coresight-etm-perf.h"
 
 #define ETB_RAM_DEPTH_REG	0x004
 #define ETB_STATUS_REG		0x00c
@@ -62,48 +63,46 @@
 #define ETB_FFSR_BIT		1
 #define ETB_FRAME_SIZE_WORDS	4
 
+DEFINE_CORESIGHT_DEVLIST(etb_devs, "etb");
+
 /**
  * struct etb_drvdata - specifics associated to an ETB component
  * @base:	memory mapped base address for this component.
- * @dev:	the device entity associated to this component.
  * @atclk:	optional clock for the core parts of the ETB.
  * @csdev:	component vitals needed by the framework.
  * @miscdev:	specifics to handle "/dev/xyz.etb" entry.
  * @spinlock:	only one at a time pls.
  * @reading:	synchronise user space access to etb buffer.
- * @mode:	this ETB is being used.
+ * @pid:	Process ID of the process being monitored by the session
+ *		that is using this component.
  * @buf:	area of memory where ETB buffer content gets sent.
+ * @mode:	this ETB is being used.
  * @buffer_depth: size of @buf.
  * @trigger_cntr: amount of words to store after a trigger.
  */
 struct etb_drvdata {
 	void __iomem		*base;
-	struct device		*dev;
 	struct clk		*atclk;
 	struct coresight_device	*csdev;
 	struct miscdevice	miscdev;
 	spinlock_t		spinlock;
 	local_t			reading;
-	local_t			mode;
+	pid_t			pid;
 	u8			*buf;
+	u32			mode;
 	u32			buffer_depth;
 	u32			trigger_cntr;
 };
 
-static unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata)
+static int etb_set_buffer(struct coresight_device *csdev,
+			  struct perf_output_handle *handle);
+
+static inline unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata)
 {
-	u32 depth = 0;
-
-	pm_runtime_get_sync(drvdata->dev);
-
-	/* RO registers don't need locking */
-	depth = readl_relaxed(drvdata->base + ETB_RAM_DEPTH_REG);
-
-	pm_runtime_put(drvdata->dev);
-	return depth;
+	return readl_relaxed(drvdata->base + ETB_RAM_DEPTH_REG);
 }
 
-static void etb_enable_hw(struct etb_drvdata *drvdata)
+static void __etb_enable_hw(struct etb_drvdata *drvdata)
 {
 	int i;
 	u32 depth;
@@ -131,42 +130,127 @@
 	CS_LOCK(drvdata->base);
 }
 
-static int etb_enable(struct coresight_device *csdev, u32 mode)
+static int etb_enable_hw(struct etb_drvdata *drvdata)
 {
-	u32 val;
-	unsigned long flags;
-	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int rc = coresight_claim_device(drvdata->base);
 
-	val = local_cmpxchg(&drvdata->mode,
-			    CS_MODE_DISABLED, mode);
-	/*
-	 * When accessing from Perf, a HW buffer can be handled
-	 * by a single trace entity.  In sysFS mode many tracers
-	 * can be logging to the same HW buffer.
-	 */
-	if (val == CS_MODE_PERF)
-		return -EBUSY;
+	if (rc)
+		return rc;
 
-	/* Don't let perf disturb sysFS sessions */
-	if (val == CS_MODE_SYSFS && mode == CS_MODE_PERF)
-		return -EBUSY;
-
-	/* Nothing to do, the tracer is already enabled. */
-	if (val == CS_MODE_SYSFS)
-		goto out;
-
-	spin_lock_irqsave(&drvdata->spinlock, flags);
-	etb_enable_hw(drvdata);
-	spin_unlock_irqrestore(&drvdata->spinlock, flags);
-
-out:
-	dev_info(drvdata->dev, "ETB enabled\n");
+	__etb_enable_hw(drvdata);
 	return 0;
 }
 
-static void etb_disable_hw(struct etb_drvdata *drvdata)
+static int etb_enable_sysfs(struct coresight_device *csdev)
+{
+	int ret = 0;
+	unsigned long flags;
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* Don't messup with perf sessions. */
+	if (drvdata->mode == CS_MODE_PERF) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (drvdata->mode == CS_MODE_DISABLED) {
+		ret = etb_enable_hw(drvdata);
+		if (ret)
+			goto out;
+
+		drvdata->mode = CS_MODE_SYSFS;
+	}
+
+	atomic_inc(csdev->refcnt);
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return ret;
+}
+
+static int etb_enable_perf(struct coresight_device *csdev, void *data)
+{
+	int ret = 0;
+	pid_t pid;
+	unsigned long flags;
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct perf_output_handle *handle = data;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* No need to continue if the component is already in used by sysFS. */
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* Get a handle on the pid of the process to monitor */
+	pid = task_pid_nr(handle->event->owner);
+
+	if (drvdata->pid != -1 && drvdata->pid != pid) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/*
+	 * No HW configuration is needed if the sink is already in
+	 * use for this session.
+	 */
+	if (drvdata->pid == pid) {
+		atomic_inc(csdev->refcnt);
+		goto out;
+	}
+
+	/*
+	 * We don't have an internal state to clean up if we fail to setup
+	 * the perf buffer. So we can perform the step before we turn the
+	 * ETB on and leave without cleaning up.
+	 */
+	ret = etb_set_buffer(csdev, handle);
+	if (ret)
+		goto out;
+
+	ret = etb_enable_hw(drvdata);
+	if (!ret) {
+		/* Associate with monitored process. */
+		drvdata->pid = pid;
+		drvdata->mode = CS_MODE_PERF;
+		atomic_inc(csdev->refcnt);
+	}
+
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return ret;
+}
+
+static int etb_enable(struct coresight_device *csdev, u32 mode, void *data)
+{
+	int ret;
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		ret = etb_enable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		ret = etb_enable_perf(csdev, data);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		return ret;
+
+	dev_dbg(&csdev->dev, "ETB enabled\n");
+	return 0;
+}
+
+static void __etb_disable_hw(struct etb_drvdata *drvdata)
 {
 	u32 ffcr;
+	struct device *dev = &drvdata->csdev->dev;
 
 	CS_UNLOCK(drvdata->base);
 
@@ -179,7 +263,7 @@
 	writel_relaxed(ffcr, drvdata->base + ETB_FFCR);
 
 	if (coresight_timeout(drvdata->base, ETB_FFCR, ETB_FFCR_BIT, 0)) {
-		dev_err(drvdata->dev,
+		dev_err(dev,
 		"timeout while waiting for completion of Manual Flush\n");
 	}
 
@@ -187,7 +271,7 @@
 	writel_relaxed(0x0, drvdata->base + ETB_CTL_REG);
 
 	if (coresight_timeout(drvdata->base, ETB_FFSR, ETB_FFSR_BIT, 1)) {
-		dev_err(drvdata->dev,
+		dev_err(dev,
 			"timeout while waiting for Formatter to Stop\n");
 	}
 
@@ -202,6 +286,7 @@
 	u32 read_data, depth;
 	u32 read_ptr, write_ptr;
 	u32 frame_off, frame_endoff;
+	struct device *dev = &drvdata->csdev->dev;
 
 	CS_UNLOCK(drvdata->base);
 
@@ -211,10 +296,10 @@
 	frame_off = write_ptr % ETB_FRAME_SIZE_WORDS;
 	frame_endoff = ETB_FRAME_SIZE_WORDS - frame_off;
 	if (frame_off) {
-		dev_err(drvdata->dev,
+		dev_err(dev,
 			"write_ptr: %lu not aligned to formatter frame size\n",
 			(unsigned long)write_ptr);
-		dev_err(drvdata->dev, "frameoff: %lu, frame_endoff: %lu\n",
+		dev_err(dev, "frameoff: %lu, frame_endoff: %lu\n",
 			(unsigned long)frame_off, (unsigned long)frame_endoff);
 		write_ptr += frame_endoff;
 	}
@@ -254,30 +339,45 @@
 	CS_LOCK(drvdata->base);
 }
 
-static void etb_disable(struct coresight_device *csdev)
+static void etb_disable_hw(struct etb_drvdata *drvdata)
+{
+	__etb_disable_hw(drvdata);
+	etb_dump_hw(drvdata);
+	coresight_disclaim_device(drvdata->base);
+}
+
+static int etb_disable(struct coresight_device *csdev)
 {
 	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 	unsigned long flags;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	if (atomic_dec_return(csdev->refcnt)) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return -EBUSY;
+	}
+
+	/* Complain if we (somehow) got out of sync */
+	WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED);
 	etb_disable_hw(drvdata);
-	etb_dump_hw(drvdata);
+	/* Dissociate from monitored process. */
+	drvdata->pid = -1;
+	drvdata->mode = CS_MODE_DISABLED;
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	local_set(&drvdata->mode, CS_MODE_DISABLED);
-
-	dev_info(drvdata->dev, "ETB disabled\n");
+	dev_dbg(&csdev->dev, "ETB disabled\n");
+	return 0;
 }
 
-static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu,
-			      void **pages, int nr_pages, bool overwrite)
+static void *etb_alloc_buffer(struct coresight_device *csdev,
+			      struct perf_event *event, void **pages,
+			      int nr_pages, bool overwrite)
 {
 	int node;
 	struct cs_buffers *buf;
 
-	if (cpu == -1)
-		cpu = smp_processor_id();
-	node = cpu_to_node(cpu);
+	node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
 
 	buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
 	if (!buf)
@@ -298,12 +398,14 @@
 }
 
 static int etb_set_buffer(struct coresight_device *csdev,
-			  struct perf_output_handle *handle,
-			  void *sink_config)
+			  struct perf_output_handle *handle)
 {
 	int ret = 0;
 	unsigned long head;
-	struct cs_buffers *buf = sink_config;
+	struct cs_buffers *buf = etm_perf_sink_config(handle);
+
+	if (!buf)
+		return -EINVAL;
 
 	/* wrap head around to the amount of space we have */
 	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
@@ -319,37 +421,7 @@
 	return ret;
 }
 
-static unsigned long etb_reset_buffer(struct coresight_device *csdev,
-				      struct perf_output_handle *handle,
-				      void *sink_config)
-{
-	unsigned long size = 0;
-	struct cs_buffers *buf = sink_config;
-
-	if (buf) {
-		/*
-		 * In snapshot mode ->data_size holds the new address of the
-		 * ring buffer's head.  The size itself is the whole address
-		 * range since we want the latest information.
-		 */
-		if (buf->snapshot)
-			handle->head = local_xchg(&buf->data_size,
-						  buf->nr_pages << PAGE_SHIFT);
-
-		/*
-		 * Tell the tracer PMU how much we got in this run and if
-		 * something went wrong along the way.  Nobody else can use
-		 * this cs_buffers instance until we are done.  As such
-		 * resetting parameters here and squaring off with the ring
-		 * buffer API in the tracer PMU is fine.
-		 */
-		size = local_xchg(&buf->data_size, 0);
-	}
-
-	return size;
-}
-
-static void etb_update_buffer(struct coresight_device *csdev,
+static unsigned long etb_update_buffer(struct coresight_device *csdev,
 			      struct perf_output_handle *handle,
 			      void *sink_config)
 {
@@ -358,17 +430,23 @@
 	u8 *buf_ptr;
 	const u32 *barrier;
 	u32 read_ptr, write_ptr, capacity;
-	u32 status, read_data, to_read;
-	unsigned long offset;
+	u32 status, read_data;
+	unsigned long offset, to_read = 0, flags;
 	struct cs_buffers *buf = sink_config;
 	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	if (!buf)
-		return;
+		return 0;
 
 	capacity = drvdata->buffer_depth * ETB_FRAME_SIZE_WORDS;
 
-	etb_disable_hw(drvdata);
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* Don't do anything if another tracer is using this sink */
+	if (atomic_read(csdev->refcnt) != 1)
+		goto out;
+
+	__etb_disable_hw(drvdata);
 	CS_UNLOCK(drvdata->base);
 
 	/* unit is in words, not bytes */
@@ -381,7 +459,7 @@
 	 * chance to fix things.
 	 */
 	if (write_ptr % ETB_FRAME_SIZE_WORDS) {
-		dev_err(drvdata->dev,
+		dev_err(&csdev->dev,
 			"write_ptr: %lu not aligned to formatter frame size\n",
 			(unsigned long)write_ptr);
 
@@ -433,7 +511,13 @@
 		lost = true;
 	}
 
-	if (lost)
+	/*
+	 * Don't set the TRUNCATED flag in snapshot mode because 1) the
+	 * captured buffer is expected to be truncated and 2) a full buffer
+	 * prevents the event from being re-enabled by the perf core,
+	 * resulting in stale data being send to user space.
+	 */
+	if (!buf->snapshot && lost)
 		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
 
 	/* finally tell HW where we want to start reading from */
@@ -469,18 +553,20 @@
 	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
 
 	/*
-	 * In snapshot mode all we have to do is communicate to
-	 * perf_aux_output_end() the address of the current head.  In full
-	 * trace mode the same function expects a size to move rb->aux_head
-	 * forward.
+	 * In snapshot mode we simply increment the head by the number of byte
+	 * that were written.  User space function  cs_etm_find_snapshot() will
+	 * figure out how many bytes to get from the AUX buffer based on the
+	 * position of the head.
 	 */
 	if (buf->snapshot)
-		local_set(&buf->data_size, (cur * PAGE_SIZE) + offset);
-	else
-		local_add(to_read, &buf->data_size);
+		handle->head += to_read;
 
-	etb_enable_hw(drvdata);
+	__etb_enable_hw(drvdata);
 	CS_LOCK(drvdata->base);
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	return to_read;
 }
 
 static const struct coresight_ops_sink etb_sink_ops = {
@@ -488,8 +574,6 @@
 	.disable	= etb_disable,
 	.alloc_buffer	= etb_alloc_buffer,
 	.free_buffer	= etb_free_buffer,
-	.set_buffer	= etb_set_buffer,
-	.reset_buffer	= etb_reset_buffer,
 	.update_buffer	= etb_update_buffer,
 };
 
@@ -502,14 +586,14 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	if (local_read(&drvdata->mode) == CS_MODE_SYSFS) {
-		etb_disable_hw(drvdata);
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		__etb_disable_hw(drvdata);
 		etb_dump_hw(drvdata);
-		etb_enable_hw(drvdata);
+		__etb_enable_hw(drvdata);
 	}
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "ETB dumped\n");
+	dev_dbg(&drvdata->csdev->dev, "ETB dumped\n");
 }
 
 static int etb_open(struct inode *inode, struct file *file)
@@ -520,7 +604,7 @@
 	if (local_cmpxchg(&drvdata->reading, 0, 1))
 		return -EBUSY;
 
-	dev_dbg(drvdata->dev, "%s: successfully opened\n", __func__);
+	dev_dbg(&drvdata->csdev->dev, "%s: successfully opened\n", __func__);
 	return 0;
 }
 
@@ -530,6 +614,7 @@
 	u32 depth;
 	struct etb_drvdata *drvdata = container_of(file->private_data,
 						   struct etb_drvdata, miscdev);
+	struct device *dev = &drvdata->csdev->dev;
 
 	etb_dump(drvdata);
 
@@ -538,13 +623,14 @@
 		len = depth * 4 - *ppos;
 
 	if (copy_to_user(data, drvdata->buf + *ppos, len)) {
-		dev_dbg(drvdata->dev, "%s: copy_to_user failed\n", __func__);
+		dev_dbg(dev,
+			"%s: copy_to_user failed\n", __func__);
 		return -EFAULT;
 	}
 
 	*ppos += len;
 
-	dev_dbg(drvdata->dev, "%s: %zu bytes copied, %d bytes left\n",
+	dev_dbg(dev, "%s: %zu bytes copied, %d bytes left\n",
 		__func__, len, (int)(depth * 4 - *ppos));
 	return len;
 }
@@ -555,7 +641,7 @@
 						   struct etb_drvdata, miscdev);
 	local_set(&drvdata->reading, 0);
 
-	dev_dbg(drvdata->dev, "%s: released\n", __func__);
+	dev_dbg(&drvdata->csdev->dev, "%s: released\n", __func__);
 	return 0;
 }
 
@@ -646,20 +732,15 @@
 	struct etb_drvdata *drvdata;
 	struct resource *res = &adev->res;
 	struct coresight_desc desc = { 0 };
-	struct device_node *np = adev->dev.of_node;
 
-	if (np) {
-		pdata = of_get_coresight_platform_data(dev, np);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
-		adev->dev.platform_data = pdata;
-	}
+	desc.name = coresight_alloc_device_name(&etb_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
 
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
 
-	drvdata->dev = &adev->dev;
 	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
 	if (!IS_ERR(drvdata->atclk)) {
 		ret = clk_prepare_enable(drvdata->atclk);
@@ -678,7 +759,6 @@
 	spin_lock_init(&drvdata->spinlock);
 
 	drvdata->buffer_depth = etb_get_buffer_depth(drvdata);
-	pm_runtime_put(&adev->dev);
 
 	if (drvdata->buffer_depth & 0x80000000)
 		return -EINVAL;
@@ -688,6 +768,14 @@
 	if (!drvdata->buf)
 		return -ENOMEM;
 
+	/* This device is not associated with a session */
+	drvdata->pid = -1;
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+	adev->dev.platform_data = pdata;
+
 	desc.type = CORESIGHT_DEV_TYPE_SINK;
 	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
 	desc.ops = &etb_cs_ops;
@@ -698,13 +786,14 @@
 	if (IS_ERR(drvdata->csdev))
 		return PTR_ERR(drvdata->csdev);
 
-	drvdata->miscdev.name = pdata->name;
+	drvdata->miscdev.name = desc.name;
 	drvdata->miscdev.minor = MISC_DYNAMIC_MINOR;
 	drvdata->miscdev.fops = &etb_fops;
 	ret = misc_register(&drvdata->miscdev);
 	if (ret)
 		goto err_misc_register;
 
+	pm_runtime_put(&adev->dev);
 	return 0;
 
 err_misc_register:
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 6776956..84f1dcb 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -12,7 +12,9 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/perf_event.h>
+#include <linux/percpu-defs.h>
 #include <linux/slab.h>
+#include <linux/stringhash.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
@@ -22,32 +24,23 @@
 static struct pmu etm_pmu;
 static bool etm_perf_up;
 
-/**
- * struct etm_event_data - Coresight specifics associated to an event
- * @work:		Handle to free allocated memory outside IRQ context.
- * @mask:		Hold the CPU(s) this event was set for.
- * @snk_config:		The sink configuration.
- * @path:		An array of path, each slot for one CPU.
- */
-struct etm_event_data {
-	struct work_struct work;
-	cpumask_t mask;
-	void *snk_config;
-	struct list_head **path;
-};
-
 static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle);
 static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
 
 /* ETMv3.5/PTM's ETMCR is 'config' */
 PMU_FORMAT_ATTR(cycacc,		"config:" __stringify(ETM_OPT_CYCACC));
+PMU_FORMAT_ATTR(contextid,	"config:" __stringify(ETM_OPT_CTXTID));
 PMU_FORMAT_ATTR(timestamp,	"config:" __stringify(ETM_OPT_TS));
 PMU_FORMAT_ATTR(retstack,	"config:" __stringify(ETM_OPT_RETSTK));
+/* Sink ID - same for all ETMs */
+PMU_FORMAT_ATTR(sinkid,		"config2:0-31");
 
 static struct attribute *etm_config_formats_attr[] = {
 	&format_attr_cycacc.attr,
+	&format_attr_contextid.attr,
 	&format_attr_timestamp.attr,
 	&format_attr_retstack.attr,
+	&format_attr_sinkid.attr,
 	NULL,
 };
 
@@ -56,11 +49,33 @@
 	.attrs  = etm_config_formats_attr,
 };
 
-static const struct attribute_group *etm_pmu_attr_groups[] = {
-	&etm_pmu_format_group,
+static struct attribute *etm_config_sinks_attr[] = {
 	NULL,
 };
 
+static const struct attribute_group etm_pmu_sinks_group = {
+	.name   = "sinks",
+	.attrs  = etm_config_sinks_attr,
+};
+
+static const struct attribute_group *etm_pmu_attr_groups[] = {
+	&etm_pmu_format_group,
+	&etm_pmu_sinks_group,
+	NULL,
+};
+
+static inline struct list_head **
+etm_event_cpu_path_ptr(struct etm_event_data *data, int cpu)
+{
+	return per_cpu_ptr(data->path, cpu);
+}
+
+static inline struct list_head *
+etm_event_cpu_path(struct etm_event_data *data, int cpu)
+{
+	return *etm_event_cpu_path_ptr(data, cpu);
+}
+
 static void etm_event_read(struct perf_event *event) {}
 
 static int etm_addr_filters_alloc(struct perf_event *event)
@@ -105,38 +120,50 @@
 	return ret;
 }
 
+static void free_sink_buffer(struct etm_event_data *event_data)
+{
+	int cpu;
+	cpumask_t *mask = &event_data->mask;
+	struct coresight_device *sink;
+
+	if (WARN_ON(cpumask_empty(mask)))
+		return;
+
+	if (!event_data->snk_config)
+		return;
+
+	cpu = cpumask_first(mask);
+	sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu));
+	sink_ops(sink)->free_buffer(event_data->snk_config);
+}
+
 static void free_event_data(struct work_struct *work)
 {
 	int cpu;
 	cpumask_t *mask;
 	struct etm_event_data *event_data;
-	struct coresight_device *sink;
 
 	event_data = container_of(work, struct etm_event_data, work);
 	mask = &event_data->mask;
-	/*
-	 * First deal with the sink configuration.  See comment in
-	 * etm_setup_aux() about why we take the first available path.
-	 */
-	if (event_data->snk_config) {
-		cpu = cpumask_first(mask);
-		sink = coresight_get_sink(event_data->path[cpu]);
-		if (sink_ops(sink)->free_buffer)
-			sink_ops(sink)->free_buffer(event_data->snk_config);
-	}
+
+	/* Free the sink buffers, if there are any */
+	free_sink_buffer(event_data);
 
 	for_each_cpu(cpu, mask) {
-		if (!(IS_ERR_OR_NULL(event_data->path[cpu])))
-			coresight_release_path(event_data->path[cpu]);
+		struct list_head **ppath;
+
+		ppath = etm_event_cpu_path_ptr(event_data, cpu);
+		if (!(IS_ERR_OR_NULL(*ppath)))
+			coresight_release_path(*ppath);
+		*ppath = NULL;
 	}
 
-	kfree(event_data->path);
+	free_percpu(event_data->path);
 	kfree(event_data);
 }
 
 static void *alloc_event_data(int cpu)
 {
-	int size;
 	cpumask_t *mask;
 	struct etm_event_data *event_data;
 
@@ -145,16 +172,12 @@
 	if (!event_data)
 		return NULL;
 
-	/* Make sure nothing disappears under us */
-	get_online_cpus();
-	size = num_online_cpus();
 
 	mask = &event_data->mask;
 	if (cpu != -1)
 		cpumask_set_cpu(cpu, mask);
 	else
-		cpumask_copy(mask, cpu_online_mask);
-	put_online_cpus();
+		cpumask_copy(mask, cpu_present_mask);
 
 	/*
 	 * Each CPU has a single path between source and destination.  As such
@@ -164,8 +187,8 @@
 	 * unused memory when dealing with single CPU trace scenarios is small
 	 * compared to the cost of searching through an optimized array.
 	 */
-	event_data->path = kcalloc(size,
-				   sizeof(struct list_head *), GFP_KERNEL);
+	event_data->path = alloc_percpu(struct list_head *);
+
 	if (!event_data->path) {
 		kfree(event_data);
 		return NULL;
@@ -181,61 +204,80 @@
 	schedule_work(&event_data->work);
 }
 
-static void *etm_setup_aux(int event_cpu, void **pages,
+static void *etm_setup_aux(struct perf_event *event, void **pages,
 			   int nr_pages, bool overwrite)
 {
-	int cpu;
+	u32 id;
+	int cpu = event->cpu;
 	cpumask_t *mask;
 	struct coresight_device *sink;
 	struct etm_event_data *event_data = NULL;
 
-	event_data = alloc_event_data(event_cpu);
+	event_data = alloc_event_data(cpu);
 	if (!event_data)
 		return NULL;
 	INIT_WORK(&event_data->work, free_event_data);
 
-	/*
-	 * In theory nothing prevent tracers in a trace session from being
-	 * associated with different sinks, nor having a sink per tracer.  But
-	 * until we have HW with this kind of topology we need to assume tracers
-	 * in a trace session are using the same sink.  Therefore go through
-	 * the coresight bus and pick the first enabled sink.
-	 *
-	 * When operated from sysFS users are responsible to enable the sink
-	 * while from perf, the perf tools will do it based on the choice made
-	 * on the cmd line.  As such the "enable_sink" flag in sysFS is reset.
-	 */
-	sink = coresight_get_enabled_sink(true);
+	/* First get the selected sink from user space. */
+	if (event->attr.config2) {
+		id = (u32)event->attr.config2;
+		sink = coresight_get_sink_by_id(id);
+	} else {
+		sink = coresight_get_enabled_sink(true);
+	}
+
 	if (!sink)
 		goto err;
 
 	mask = &event_data->mask;
 
-	/* Setup the path for each CPU in a trace session */
+	/*
+	 * Setup the path for each CPU in a trace session. We try to build
+	 * trace path for each CPU in the mask. If we don't find an ETM
+	 * for the CPU or fail to build a path, we clear the CPU from the
+	 * mask and continue with the rest. If ever we try to trace on those
+	 * CPUs, we can handle it and fail the session.
+	 */
 	for_each_cpu(cpu, mask) {
+		struct list_head *path;
 		struct coresight_device *csdev;
 
 		csdev = per_cpu(csdev_src, cpu);
-		if (!csdev)
-			goto err;
+		/*
+		 * If there is no ETM associated with this CPU clear it from
+		 * the mask and continue with the rest. If ever we try to trace
+		 * on this CPU, we handle it accordingly.
+		 */
+		if (!csdev) {
+			cpumask_clear_cpu(cpu, mask);
+			continue;
+		}
 
 		/*
 		 * Building a path doesn't enable it, it simply builds a
 		 * list of devices from source to sink that can be
 		 * referenced later when the path is actually needed.
 		 */
-		event_data->path[cpu] = coresight_build_path(csdev, sink);
-		if (IS_ERR(event_data->path[cpu]))
-			goto err;
+		path = coresight_build_path(csdev, sink);
+		if (IS_ERR(path)) {
+			cpumask_clear_cpu(cpu, mask);
+			continue;
+		}
+
+		*etm_event_cpu_path_ptr(event_data, cpu) = path;
 	}
 
-	if (!sink_ops(sink)->alloc_buffer)
+	/* If we don't have any CPUs ready for tracing, abort */
+	cpu = cpumask_first(mask);
+	if (cpu >= nr_cpu_ids)
 		goto err;
 
-	cpu = cpumask_first(mask);
-	/* Get the AUX specific data from the sink buffer */
+	if (!sink_ops(sink)->alloc_buffer || !sink_ops(sink)->free_buffer)
+		goto err;
+
+	/* Allocate the sink buffer for this session */
 	event_data->snk_config =
-			sink_ops(sink)->alloc_buffer(sink, cpu, pages,
+			sink_ops(sink)->alloc_buffer(sink, event, pages,
 						     nr_pages, overwrite);
 	if (!event_data->snk_config)
 		goto err;
@@ -255,6 +297,7 @@
 	struct etm_event_data *event_data;
 	struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle);
 	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
+	struct list_head *path;
 
 	if (!csdev)
 		goto fail;
@@ -267,18 +310,14 @@
 	if (!event_data)
 		goto fail;
 
+	path = etm_event_cpu_path(event_data, cpu);
 	/* We need a sink, no need to continue without one */
-	sink = coresight_get_sink(event_data->path[cpu]);
-	if (WARN_ON_ONCE(!sink || !sink_ops(sink)->set_buffer))
-		goto fail_end_stop;
-
-	/* Configure the sink */
-	if (sink_ops(sink)->set_buffer(sink, handle,
-				       event_data->snk_config))
+	sink = coresight_get_sink(path);
+	if (WARN_ON_ONCE(!sink))
 		goto fail_end_stop;
 
 	/* Nothing will happen without a path */
-	if (coresight_enable_path(event_data->path[cpu], CS_MODE_PERF))
+	if (coresight_enable_path(path, CS_MODE_PERF, handle))
 		goto fail_end_stop;
 
 	/* Tell the perf core the event is alive */
@@ -286,11 +325,13 @@
 
 	/* Finally enable the tracer */
 	if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
-		goto fail_end_stop;
+		goto fail_disable_path;
 
 out:
 	return;
 
+fail_disable_path:
+	coresight_disable_path(path);
 fail_end_stop:
 	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
 	perf_aux_output_end(handle, 0);
@@ -306,6 +347,7 @@
 	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
 	struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle);
 	struct etm_event_data *event_data = perf_get_aux(handle);
+	struct list_head *path;
 
 	if (event->hw.state == PERF_HES_STOPPED)
 		return;
@@ -313,7 +355,11 @@
 	if (!csdev)
 		return;
 
-	sink = coresight_get_sink(event_data->path[cpu]);
+	path = etm_event_cpu_path(event_data, cpu);
+	if (!path)
+		return;
+
+	sink = coresight_get_sink(path);
 	if (!sink)
 		return;
 
@@ -331,20 +377,13 @@
 		if (!sink_ops(sink)->update_buffer)
 			return;
 
-		sink_ops(sink)->update_buffer(sink, handle,
+		size = sink_ops(sink)->update_buffer(sink, handle,
 					      event_data->snk_config);
-
-		if (!sink_ops(sink)->reset_buffer)
-			return;
-
-		size = sink_ops(sink)->reset_buffer(sink, handle,
-						    event_data->snk_config);
-
 		perf_aux_output_end(handle, size);
 	}
 
 	/* Disabling the path make its elements available to other sessions */
-	coresight_disable_path(event_data->path[cpu]);
+	coresight_disable_path(path);
 }
 
 static int etm_event_add(struct perf_event *event, int mode)
@@ -410,15 +449,16 @@
 static void etm_addr_filters_sync(struct perf_event *event)
 {
 	struct perf_addr_filters_head *head = perf_event_addr_filters(event);
-	unsigned long start, stop, *offs = event->addr_filters_offs;
+	unsigned long start, stop;
+	struct perf_addr_filter_range *fr = event->addr_filter_ranges;
 	struct etm_filters *filters = event->hw.addr_filters;
 	struct etm_filter *etm_filter;
 	struct perf_addr_filter *filter;
 	int i = 0;
 
 	list_for_each_entry(filter, &head->list, entry) {
-		start = filter->offset + offs[i];
-		stop = start + filter->size;
+		start = fr[i].start;
+		stop = start + fr[i].size;
 		etm_filter = &filters->etm_filter[i];
 
 		switch (filter->action) {
@@ -467,11 +507,84 @@
 	return 0;
 }
 
+static ssize_t etm_perf_sink_name_show(struct device *dev,
+				       struct device_attribute *dattr,
+				       char *buf)
+{
+	struct dev_ext_attribute *ea;
+
+	ea = container_of(dattr, struct dev_ext_attribute, attr);
+	return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var));
+}
+
+int etm_perf_add_symlink_sink(struct coresight_device *csdev)
+{
+	int ret;
+	unsigned long hash;
+	const char *name;
+	struct device *pmu_dev = etm_pmu.dev;
+	struct device *dev = &csdev->dev;
+	struct dev_ext_attribute *ea;
+
+	if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
+	    csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
+		return -EINVAL;
+
+	if (csdev->ea != NULL)
+		return -EINVAL;
+
+	if (!etm_perf_up)
+		return -EPROBE_DEFER;
+
+	ea = devm_kzalloc(dev, sizeof(*ea), GFP_KERNEL);
+	if (!ea)
+		return -ENOMEM;
+
+	name = dev_name(dev);
+	/* See function coresight_get_sink_by_id() to know where this is used */
+	hash = hashlen_hash(hashlen_string(NULL, name));
+
+	sysfs_attr_init(&ea->attr.attr);
+	ea->attr.attr.name = devm_kstrdup(dev, name, GFP_KERNEL);
+	if (!ea->attr.attr.name)
+		return -ENOMEM;
+
+	ea->attr.attr.mode = 0444;
+	ea->attr.show = etm_perf_sink_name_show;
+	ea->var = (unsigned long *)hash;
+
+	ret = sysfs_add_file_to_group(&pmu_dev->kobj,
+				      &ea->attr.attr, "sinks");
+
+	if (!ret)
+		csdev->ea = ea;
+
+	return ret;
+}
+
+void etm_perf_del_symlink_sink(struct coresight_device *csdev)
+{
+	struct device *pmu_dev = etm_pmu.dev;
+	struct dev_ext_attribute *ea = csdev->ea;
+
+	if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
+	    csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
+		return;
+
+	if (!ea)
+		return;
+
+	sysfs_remove_file_from_group(&pmu_dev->kobj,
+				     &ea->attr.attr, "sinks");
+	csdev->ea = NULL;
+}
+
 static int __init etm_perf_init(void)
 {
 	int ret;
 
-	etm_pmu.capabilities		= PERF_PMU_CAP_EXCLUSIVE;
+	etm_pmu.capabilities		= (PERF_PMU_CAP_EXCLUSIVE |
+					   PERF_PMU_CAP_ITRACE);
 
 	etm_pmu.attr_groups		= etm_pmu_attr_groups;
 	etm_pmu.task_ctx_nr		= perf_sw_context;
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
index 4197df4..015213a 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.h
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -7,6 +7,7 @@
 #ifndef _CORESIGHT_ETM_PERF_H
 #define _CORESIGHT_ETM_PERF_H
 
+#include <linux/percpu-defs.h>
 #include "coresight-priv.h"
 
 struct coresight_device;
@@ -42,13 +43,42 @@
 	bool			ssstatus;
 };
 
+/**
+ * struct etm_event_data - Coresight specifics associated to an event
+ * @work:		Handle to free allocated memory outside IRQ context.
+ * @mask:		Hold the CPU(s) this event was set for.
+ * @snk_config:		The sink configuration.
+ * @path:		An array of path, each slot for one CPU.
+ */
+struct etm_event_data {
+	struct work_struct work;
+	cpumask_t mask;
+	void *snk_config;
+	struct list_head * __percpu *path;
+};
 
 #ifdef CONFIG_CORESIGHT
 int etm_perf_symlink(struct coresight_device *csdev, bool link);
+int etm_perf_add_symlink_sink(struct coresight_device *csdev);
+void etm_perf_del_symlink_sink(struct coresight_device *csdev);
+static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
+{
+	struct etm_event_data *data = perf_get_aux(handle);
 
+	if (data)
+		return data->snk_config;
+	return NULL;
+}
 #else
 static inline int etm_perf_symlink(struct coresight_device *csdev, bool link)
 { return -EINVAL; }
+int etm_perf_add_symlink_sink(struct coresight_device *csdev)
+{ return -EINVAL; }
+void etm_perf_del_symlink_sink(struct coresight_device *csdev) {}
+static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
+{
+	return NULL;
+}
 
 #endif /* CONFIG_CORESIGHT */
 
diff --git a/drivers/hwtracing/coresight/coresight-etm.h b/drivers/hwtracing/coresight/coresight-etm.h
index 79e1ad8..f3ab96e 100644
--- a/drivers/hwtracing/coresight/coresight-etm.h
+++ b/drivers/hwtracing/coresight/coresight-etm.h
@@ -208,7 +208,6 @@
 /**
  * struct etm_drvdata - specifics associated to an ETM component
  * @base:	memory mapped base address for this component.
- * @dev:	the device entity associated to this component.
  * @atclk:	optional clock for the core parts of the ETM.
  * @csdev:	component vitals needed by the framework.
  * @spinlock:	only one at a time pls.
@@ -232,7 +231,6 @@
  */
 struct etm_drvdata {
 	void __iomem			*base;
-	struct device			*dev;
 	struct clk			*atclk;
 	struct coresight_device		*csdev;
 	spinlock_t			spinlock;
@@ -260,7 +258,7 @@
 {
 	if (drvdata->use_cp14) {
 		if (etm_writel_cp14(off, val)) {
-			dev_err(drvdata->dev,
+			dev_err(&drvdata->csdev->dev,
 				"invalid CP14 access to ETM reg: %#x", off);
 		}
 	} else {
@@ -274,7 +272,7 @@
 
 	if (drvdata->use_cp14) {
 		if (etm_readl_cp14(off, &val)) {
-			dev_err(drvdata->dev,
+			dev_err(&drvdata->csdev->dev,
 				"invalid CP14 access to ETM reg: %#x", off);
 		}
 	} else {
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
index 75487b3..e8c7649 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
@@ -48,7 +48,7 @@
 	unsigned long flags, val;
 	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
 
-	pm_runtime_get_sync(drvdata->dev);
+	pm_runtime_get_sync(dev->parent);
 	spin_lock_irqsave(&drvdata->spinlock, flags);
 	CS_UNLOCK(drvdata->base);
 
@@ -56,7 +56,7 @@
 
 	CS_LOCK(drvdata->base);
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
-	pm_runtime_put(drvdata->dev);
+	pm_runtime_put(dev->parent);
 
 	return sprintf(buf, "%#lx\n", val);
 }
@@ -131,7 +131,7 @@
 
 	if (config->mode & ETM_MODE_STALL) {
 		if (!(drvdata->etmccr & ETMCCR_FIFOFULL)) {
-			dev_warn(drvdata->dev, "stall mode not supported\n");
+			dev_warn(dev, "stall mode not supported\n");
 			ret = -EINVAL;
 			goto err_unlock;
 		}
@@ -141,7 +141,7 @@
 
 	if (config->mode & ETM_MODE_TIMESTAMP) {
 		if (!(drvdata->etmccer & ETMCCER_TIMESTAMP)) {
-			dev_warn(drvdata->dev, "timestamp not supported\n");
+			dev_warn(dev, "timestamp not supported\n");
 			ret = -EINVAL;
 			goto err_unlock;
 		}
@@ -945,7 +945,7 @@
 		goto out;
 	}
 
-	pm_runtime_get_sync(drvdata->dev);
+	pm_runtime_get_sync(dev->parent);
 	spin_lock_irqsave(&drvdata->spinlock, flags);
 
 	CS_UNLOCK(drvdata->base);
@@ -953,7 +953,7 @@
 	CS_LOCK(drvdata->base);
 
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
-	pm_runtime_put(drvdata->dev);
+	pm_runtime_put(dev->parent);
 out:
 	return sprintf(buf, "%#lx\n", val);
 }
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index 7c74263..e2cb687 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -165,7 +165,7 @@
 	 */
 	isb();
 	if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 1)) {
-		dev_err(drvdata->dev,
+		dev_err(&drvdata->csdev->dev,
 			"%s: timeout observed when probing at offset %#x\n",
 			__func__, ETMSR);
 	}
@@ -184,7 +184,7 @@
 	 */
 	isb();
 	if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 0)) {
-		dev_err(drvdata->dev,
+		dev_err(&drvdata->csdev->dev,
 			"%s: timeout observed when probing at offset %#x\n",
 			__func__, ETMSR);
 	}
@@ -355,15 +355,18 @@
 	return 0;
 }
 
-static void etm_enable_hw(void *info)
+static int etm_enable_hw(struct etm_drvdata *drvdata)
 {
-	int i;
+	int i, rc;
 	u32 etmcr;
-	struct etm_drvdata *drvdata = info;
 	struct etm_config *config = &drvdata->config;
 
 	CS_UNLOCK(drvdata->base);
 
+	rc = coresight_claim_device_unlocked(drvdata->base);
+	if (rc)
+		goto done;
+
 	/* Turn engine on */
 	etm_clr_pwrdwn(drvdata);
 	/* Apply power to trace registers */
@@ -418,9 +421,27 @@
 	etm_writel(drvdata, 0x0, ETMVMIDCVR);
 
 	etm_clr_prog(drvdata);
+
+done:
 	CS_LOCK(drvdata->base);
 
-	dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu);
+	dev_dbg(&drvdata->csdev->dev, "cpu: %d enable smp call done: %d\n",
+		drvdata->cpu, rc);
+	return rc;
+}
+
+struct etm_enable_arg {
+	struct etm_drvdata *drvdata;
+	int rc;
+};
+
+static void etm_enable_hw_smp_call(void *info)
+{
+	struct etm_enable_arg *arg = info;
+
+	if (WARN_ON(!arg))
+		return;
+	arg->rc = etm_enable_hw(arg->drvdata);
 }
 
 static int etm_cpu_id(struct coresight_device *csdev)
@@ -434,14 +455,16 @@
 {
 	unsigned long flags;
 	int trace_id = -1;
+	struct device *etm_dev;
 
 	if (!drvdata)
 		goto out;
 
+	etm_dev = drvdata->csdev->dev.parent;
 	if (!local_read(&drvdata->mode))
 		return drvdata->traceid;
 
-	pm_runtime_get_sync(drvdata->dev);
+	pm_runtime_get_sync(etm_dev);
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
 
@@ -450,7 +473,7 @@
 	CS_LOCK(drvdata->base);
 
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
-	pm_runtime_put(drvdata->dev);
+	pm_runtime_put(etm_dev);
 
 out:
 	return trace_id;
@@ -475,14 +498,13 @@
 	/* Configure the tracer based on the session's specifics */
 	etm_parse_event_config(drvdata, event);
 	/* And enable it */
-	etm_enable_hw(drvdata);
-
-	return 0;
+	return etm_enable_hw(drvdata);
 }
 
 static int etm_enable_sysfs(struct coresight_device *csdev)
 {
 	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etm_enable_arg arg = { 0 };
 	int ret;
 
 	spin_lock(&drvdata->spinlock);
@@ -492,20 +514,21 @@
 	 * hw configuration will take place on the local CPU during bring up.
 	 */
 	if (cpu_online(drvdata->cpu)) {
+		arg.drvdata = drvdata;
 		ret = smp_call_function_single(drvdata->cpu,
-					       etm_enable_hw, drvdata, 1);
-		if (ret)
-			goto err;
+					       etm_enable_hw_smp_call, &arg, 1);
+		if (!ret)
+			ret = arg.rc;
+		if (!ret)
+			drvdata->sticky_enable = true;
+	} else {
+		ret = -ENODEV;
 	}
 
-	drvdata->sticky_enable = true;
 	spin_unlock(&drvdata->spinlock);
 
-	dev_info(drvdata->dev, "ETM tracing enabled\n");
-	return 0;
-
-err:
-	spin_unlock(&drvdata->spinlock);
+	if (!ret)
+		dev_dbg(&csdev->dev, "ETM tracing enabled\n");
 	return ret;
 }
 
@@ -556,9 +579,12 @@
 		config->cntr_val[i] = etm_readl(drvdata, ETMCNTVRn(i));
 
 	etm_set_pwrdwn(drvdata);
+	coresight_disclaim_device_unlocked(drvdata->base);
+
 	CS_LOCK(drvdata->base);
 
-	dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu);
+	dev_dbg(&drvdata->csdev->dev,
+		"cpu: %d disable smp call done\n", drvdata->cpu);
 }
 
 static void etm_disable_perf(struct coresight_device *csdev)
@@ -578,6 +604,7 @@
 	 * power down the tracer.
 	 */
 	etm_set_pwrdwn(drvdata);
+	coresight_disclaim_device_unlocked(drvdata->base);
 
 	CS_LOCK(drvdata->base);
 }
@@ -604,7 +631,7 @@
 	spin_unlock(&drvdata->spinlock);
 	cpus_read_unlock();
 
-	dev_info(drvdata->dev, "ETM tracing disabled\n");
+	dev_dbg(&csdev->dev, "ETM tracing disabled\n");
 }
 
 static void etm_disable(struct coresight_device *csdev,
@@ -764,22 +791,12 @@
 	struct etm_drvdata *drvdata;
 	struct resource *res = &adev->res;
 	struct coresight_desc desc = { 0 };
-	struct device_node *np = adev->dev.of_node;
 
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
 
-	if (np) {
-		pdata = of_get_coresight_platform_data(dev, np);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
-
-		adev->dev.platform_data = pdata;
-		drvdata->use_cp14 = of_property_read_bool(np, "arm,cp14");
-	}
-
-	drvdata->dev = &adev->dev;
+	drvdata->use_cp14 = fwnode_property_read_bool(dev->fwnode, "arm,cp14");
 	dev_set_drvdata(dev, drvdata);
 
 	/* Validity for the resource is already checked by the AMBA core */
@@ -798,7 +815,13 @@
 			return ret;
 	}
 
-	drvdata->cpu = pdata ? pdata->cpu : 0;
+	drvdata->cpu = coresight_get_cpu(dev);
+	if (drvdata->cpu < 0)
+		return drvdata->cpu;
+
+	desc.name  = devm_kasprintf(dev, GFP_KERNEL, "etm%d", drvdata->cpu);
+	if (!desc.name)
+		return -ENOMEM;
 
 	cpus_read_lock();
 	etmdrvdata[drvdata->cpu] = drvdata;
@@ -828,6 +851,13 @@
 	etm_init_trace_id(drvdata);
 	etm_set_default(&drvdata->config);
 
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto err_arch_supported;
+	}
+	adev->dev.platform_data = pdata;
+
 	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
 	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
 	desc.ops = &etm_cs_ops;
@@ -847,7 +877,8 @@
 	}
 
 	pm_runtime_put(&adev->dev);
-	dev_info(dev, "%s initialized\n", (char *)id->data);
+	dev_info(&drvdata->csdev->dev,
+		 "%s initialized\n", (char *)coresight_get_uci_data(id));
 	if (boot_enable) {
 		coresight_enable(drvdata->csdev);
 		drvdata->boot_enable = true;
@@ -891,36 +922,18 @@
 };
 
 static const struct amba_id etm_ids[] = {
-	{	/* ETM 3.3 */
-		.id	= 0x000bb921,
-		.mask	= 0x000fffff,
-		.data	= "ETM 3.3",
-	},
-	{	/* ETM 3.5 - Cortex-A5 */
-		.id	= 0x000bb955,
-		.mask	= 0x000fffff,
-		.data	= "ETM 3.5",
-	},
-	{	/* ETM 3.5 */
-		.id	= 0x000bb956,
-		.mask	= 0x000fffff,
-		.data	= "ETM 3.5",
-	},
-	{	/* PTM 1.0 */
-		.id	= 0x000bb950,
-		.mask	= 0x000fffff,
-		.data	= "PTM 1.0",
-	},
-	{	/* PTM 1.1 */
-		.id	= 0x000bb95f,
-		.mask	= 0x000fffff,
-		.data	= "PTM 1.1",
-	},
-	{	/* PTM 1.1 Qualcomm */
-		.id	= 0x000b006f,
-		.mask	= 0x000fffff,
-		.data	= "PTM 1.1",
-	},
+	/* ETM 3.3 */
+	CS_AMBA_ID_DATA(0x000bb921, "ETM 3.3"),
+	/* ETM 3.5 - Cortex-A5 */
+	CS_AMBA_ID_DATA(0x000bb955, "ETM 3.5"),
+	/* ETM 3.5 */
+	CS_AMBA_ID_DATA(0x000bb956, "ETM 3.5"),
+	/* PTM 1.0 */
+	CS_AMBA_ID_DATA(0x000bb950, "PTM 1.0"),
+	/* PTM 1.1 */
+	CS_AMBA_ID_DATA(0x000bb95f, "PTM 1.1"),
+	/* PTM 1.1 Qualcomm */
+	CS_AMBA_ID_DATA(0x000b006f, "PTM 1.1"),
 	{ 0, 0},
 };
 
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index a0365e2..219c10e 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -296,11 +296,8 @@
 
 	spin_lock(&drvdata->spinlock);
 	config->mode = val & ETMv4_MODE_ALL;
-
-	if (config->mode & ETM_MODE_EXCLUDE)
-		etm4_set_mode_exclude(drvdata, true);
-	else
-		etm4_set_mode_exclude(drvdata, false);
+	etm4_set_mode_exclude(drvdata,
+			      config->mode & ETM_MODE_EXCLUDE ? true : false);
 
 	if (drvdata->instrp0 == true) {
 		/* start by clearing instruction P0 field */
@@ -999,10 +996,8 @@
 	 * Program include or exclude control bits for vinst or vdata
 	 * whenever we change addr comparators to ETM_ADDR_TYPE_RANGE
 	 */
-	if (config->mode & ETM_MODE_EXCLUDE)
-		etm4_set_mode_exclude(drvdata, true);
-	else
-		etm4_set_mode_exclude(drvdata, false);
+	etm4_set_mode_exclude(drvdata,
+			      config->mode & ETM_MODE_EXCLUDE ? true : false);
 
 	spin_unlock(&drvdata->spinlock);
 	return size;
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index 1d94ebe..a128b50 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -28,12 +28,14 @@
 #include <linux/pm_runtime.h>
 #include <asm/sections.h>
 #include <asm/local.h>
+#include <asm/virt.h>
 
 #include "coresight-etm4x.h"
 #include "coresight-etm-perf.h"
 
 static int boot_enable;
-module_param_named(boot_enable, boot_enable, int, S_IRUGO);
+module_param(boot_enable, int, 0444);
+MODULE_PARM_DESC(boot_enable, "Enable tracing on boot");
 
 /* The number of ETMv4 currently registered */
 static int etm4_count;
@@ -46,7 +48,7 @@
 
 static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
 {
-	/* Writing any value to ETMOSLAR unlocks the trace registers */
+	/* Writing 0 to TRCOSLAR unlocks the trace registers */
 	writel_relaxed(0x0, drvdata->base + TRCOSLAR);
 	drvdata->os_unlock = true;
 	isb();
@@ -54,7 +56,8 @@
 
 static bool etm4_arch_supported(u8 arch)
 {
-	switch (arch) {
+	/* Mask out the minor version number */
+	switch (arch & 0xf0) {
 	case ETM_ARCH_V4:
 		break;
 	default:
@@ -77,22 +80,31 @@
 	return drvdata->trcid;
 }
 
-static void etm4_enable_hw(void *info)
+struct etm4_enable_arg {
+	struct etmv4_drvdata *drvdata;
+	int rc;
+};
+
+static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
 {
-	int i;
-	struct etmv4_drvdata *drvdata = info;
+	int i, rc;
 	struct etmv4_config *config = &drvdata->config;
+	struct device *etm_dev = &drvdata->csdev->dev;
 
 	CS_UNLOCK(drvdata->base);
 
 	etm4_os_unlock(drvdata);
 
+	rc = coresight_claim_device_unlocked(drvdata->base);
+	if (rc)
+		goto done;
+
 	/* Disable the trace unit before programming trace registers */
 	writel_relaxed(0, drvdata->base + TRCPRGCTLR);
 
 	/* wait for TRCSTATR.IDLE to go up */
 	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
-		dev_err(drvdata->dev,
+		dev_err(etm_dev,
 			"timeout while waiting for Idle Trace Status\n");
 
 	writel_relaxed(config->pe_sel, drvdata->base + TRCPROCSELR);
@@ -128,8 +140,11 @@
 			       drvdata->base + TRCCNTVRn(i));
 	}
 
-	/* Resource selector pair 0 is always implemented and reserved */
-	for (i = 0; i < drvdata->nr_resource * 2; i++)
+	/*
+	 * Resource selector pair 0 is always implemented and reserved.  As
+	 * such start at 2.
+	 */
+	for (i = 2; i < drvdata->nr_resource * 2; i++)
 		writel_relaxed(config->res_ctrl[i],
 			       drvdata->base + TRCRSCTLRn(i));
 
@@ -171,12 +186,116 @@
 
 	/* wait for TRCSTATR.IDLE to go back down to '0' */
 	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 0))
-		dev_err(drvdata->dev,
+		dev_err(etm_dev,
 			"timeout while waiting for Idle Trace Status\n");
 
+	/*
+	 * As recommended by section 4.3.7 ("Synchronization when using the
+	 * memory-mapped interface") of ARM IHI 0064D
+	 */
+	dsb(sy);
+	isb();
+
+done:
 	CS_LOCK(drvdata->base);
 
-	dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu);
+	dev_dbg(etm_dev, "cpu: %d enable smp call done: %d\n",
+		drvdata->cpu, rc);
+	return rc;
+}
+
+static void etm4_enable_hw_smp_call(void *info)
+{
+	struct etm4_enable_arg *arg = info;
+
+	if (WARN_ON(!arg))
+		return;
+	arg->rc = etm4_enable_hw(arg->drvdata);
+}
+
+/*
+ * The goal of function etm4_config_timestamp_event() is to configure a
+ * counter that will tell the tracer to emit a timestamp packet when it
+ * reaches zero.  This is done in order to get a more fine grained idea
+ * of when instructions are executed so that they can be correlated
+ * with execution on other CPUs.
+ *
+ * To do this the counter itself is configured to self reload and
+ * TRCRSCTLR1 (always true) used to get the counter to decrement.  From
+ * there a resource selector is configured with the counter and the
+ * timestamp control register to use the resource selector to trigger the
+ * event that will insert a timestamp packet in the stream.
+ */
+static int etm4_config_timestamp_event(struct etmv4_drvdata *drvdata)
+{
+	int ctridx, ret = -EINVAL;
+	int counter, rselector;
+	u32 val = 0;
+	struct etmv4_config *config = &drvdata->config;
+
+	/* No point in trying if we don't have at least one counter */
+	if (!drvdata->nr_cntr)
+		goto out;
+
+	/* Find a counter that hasn't been initialised */
+	for (ctridx = 0; ctridx < drvdata->nr_cntr; ctridx++)
+		if (config->cntr_val[ctridx] == 0)
+			break;
+
+	/* All the counters have been configured already, bail out */
+	if (ctridx == drvdata->nr_cntr) {
+		pr_debug("%s: no available counter found\n", __func__);
+		ret = -ENOSPC;
+		goto out;
+	}
+
+	/*
+	 * Searching for an available resource selector to use, starting at
+	 * '2' since every implementation has at least 2 resource selector.
+	 * ETMIDR4 gives the number of resource selector _pairs_,
+	 * hence multiply by 2.
+	 */
+	for (rselector = 2; rselector < drvdata->nr_resource * 2; rselector++)
+		if (!config->res_ctrl[rselector])
+			break;
+
+	if (rselector == drvdata->nr_resource * 2) {
+		pr_debug("%s: no available resource selector found\n",
+			 __func__);
+		ret = -ENOSPC;
+		goto out;
+	}
+
+	/* Remember what counter we used */
+	counter = 1 << ctridx;
+
+	/*
+	 * Initialise original and reload counter value to the smallest
+	 * possible value in order to get as much precision as we can.
+	 */
+	config->cntr_val[ctridx] = 1;
+	config->cntrldvr[ctridx] = 1;
+
+	/* Set the trace counter control register */
+	val =  0x1 << 16	|  /* Bit 16, reload counter automatically */
+	       0x0 << 7		|  /* Select single resource selector */
+	       0x1;		   /* Resource selector 1, i.e always true */
+
+	config->cntr_ctrl[ctridx] = val;
+
+	val = 0x2 << 16		| /* Group 0b0010 - Counter and sequencers */
+	      counter << 0;	  /* Counter to use */
+
+	config->res_ctrl[rselector] = val;
+
+	val = 0x0 << 7		| /* Select single resource selector */
+	      rselector;	  /* Resource selector */
+
+	config->ts_ctrl = val;
+
+	ret = 0;
+out:
+	return ret;
 }
 
 static int etm4_parse_event_config(struct etmv4_drvdata *drvdata,
@@ -214,9 +333,29 @@
 		/* TRM: Must program this for cycacc to work */
 		config->ccctlr = ETM_CYC_THRESHOLD_DEFAULT;
 	}
-	if (attr->config & BIT(ETM_OPT_TS))
+	if (attr->config & BIT(ETM_OPT_TS)) {
+		/*
+		 * Configure timestamps to be emitted at regular intervals in
+		 * order to correlate instructions executed on different CPUs
+		 * (CPU-wide trace scenarios).
+		 */
+		ret = etm4_config_timestamp_event(drvdata);
+
+		/*
+		 * No need to go further if timestamp intervals can't
+		 * be configured.
+		 */
+		if (ret)
+			goto out;
+
 		/* bit[11], Global timestamp tracing bit */
 		config->cfg |= BIT(11);
+	}
+
+	if (attr->config & BIT(ETM_OPT_CTXTID))
+		/* bit[6], Context ID tracing bit */
+		config->cfg |= BIT(ETM4_CFG_BIT_CTXTID);
+
 	/* return stack - enable if selected and supported */
 	if ((attr->config & BIT(ETM_OPT_RETSTK)) && drvdata->retstack)
 		/* bit[12], Return stack enable bit */
@@ -242,7 +381,7 @@
 	if (ret)
 		goto out;
 	/* And enable it */
-	etm4_enable_hw(drvdata);
+	ret = etm4_enable_hw(drvdata);
 
 out:
 	return ret;
@@ -251,6 +390,7 @@
 static int etm4_enable_sysfs(struct coresight_device *csdev)
 {
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etm4_enable_arg arg = { 0 };
 	int ret;
 
 	spin_lock(&drvdata->spinlock);
@@ -259,19 +399,17 @@
 	 * Executing etm4_enable_hw on the cpu whose ETM is being enabled
 	 * ensures that register writes occur when cpu is powered.
 	 */
+	arg.drvdata = drvdata;
 	ret = smp_call_function_single(drvdata->cpu,
-				       etm4_enable_hw, drvdata, 1);
-	if (ret)
-		goto err;
-
-	drvdata->sticky_enable = true;
+				       etm4_enable_hw_smp_call, &arg, 1);
+	if (!ret)
+		ret = arg.rc;
+	if (!ret)
+		drvdata->sticky_enable = true;
 	spin_unlock(&drvdata->spinlock);
 
-	dev_info(drvdata->dev, "ETM tracing enabled\n");
-	return 0;
-
-err:
-	spin_unlock(&drvdata->spinlock);
+	if (!ret)
+		dev_dbg(&csdev->dev, "ETM tracing enabled\n");
 	return ret;
 }
 
@@ -323,14 +461,21 @@
 	/* EN, bit[0] Trace unit enable bit */
 	control &= ~0x1;
 
-	/* make sure everything completes before disabling */
-	mb();
+	/*
+	 * Make sure everything completes before disabling, as recommended
+	 * by section 7.3.77 ("TRCVICTLR, ViewInst Main Control Register,
+	 * SSTATUS") of ARM IHI 0064D
+	 */
+	dsb(sy);
 	isb();
 	writel_relaxed(control, drvdata->base + TRCPRGCTLR);
 
+	coresight_disclaim_device_unlocked(drvdata->base);
+
 	CS_LOCK(drvdata->base);
 
-	dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu);
+	dev_dbg(&drvdata->csdev->dev,
+		"cpu: %d disable smp call done\n", drvdata->cpu);
 }
 
 static int etm4_disable_perf(struct coresight_device *csdev,
@@ -380,7 +525,7 @@
 	spin_unlock(&drvdata->spinlock);
 	cpus_read_unlock();
 
-	dev_info(drvdata->dev, "ETM tracing disabled\n");
+	dev_dbg(&csdev->dev, "ETM tracing disabled\n");
 }
 
 static void etm4_disable(struct coresight_device *csdev,
@@ -605,7 +750,7 @@
 	config->vinst_ctrl |= BIT(0);
 }
 
-static u64 etm4_get_access_type(struct etmv4_config *config)
+static u64 etm4_get_ns_access_type(struct etmv4_config *config)
 {
 	u64 access_type = 0;
 
@@ -616,17 +761,26 @@
 	 *   Bit[13] Exception level 1 - OS
 	 *   Bit[14] Exception level 2 - Hypervisor
 	 *   Bit[15] Never implemented
-	 *
-	 * Always stay away from hypervisor mode.
 	 */
-	access_type = ETM_EXLEVEL_NS_HYP;
-
-	if (config->mode & ETM_MODE_EXCL_KERN)
-		access_type |= ETM_EXLEVEL_NS_OS;
+	if (!is_kernel_in_hyp_mode()) {
+		/* Stay away from hypervisor mode for non-VHE */
+		access_type =  ETM_EXLEVEL_NS_HYP;
+		if (config->mode & ETM_MODE_EXCL_KERN)
+			access_type |= ETM_EXLEVEL_NS_OS;
+	} else if (config->mode & ETM_MODE_EXCL_KERN) {
+		access_type = ETM_EXLEVEL_NS_HYP;
+	}
 
 	if (config->mode & ETM_MODE_EXCL_USER)
 		access_type |= ETM_EXLEVEL_NS_APP;
 
+	return access_type;
+}
+
+static u64 etm4_get_access_type(struct etmv4_config *config)
+{
+	u64 access_type = etm4_get_ns_access_type(config);
+
 	/*
 	 * EXLEVEL_S, bits[11:8], don't trace anything happening
 	 * in secure state.
@@ -880,20 +1034,10 @@
 
 	addr_acc = config->addr_acc[ETM_DEFAULT_ADDR_COMP];
 	/* clear default config */
-	addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS);
+	addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS |
+		      ETM_EXLEVEL_NS_HYP);
 
-	/*
-	 * EXLEVEL_NS, bits[15:12]
-	 * The Exception levels are:
-	 *   Bit[12] Exception level 0 - Application
-	 *   Bit[13] Exception level 1 - OS
-	 *   Bit[14] Exception level 2 - Hypervisor
-	 *   Bit[15] Never implemented
-	 */
-	if (mode & ETM_MODE_EXCL_KERN)
-		addr_acc |= ETM_EXLEVEL_NS_OS;
-	else
-		addr_acc |= ETM_EXLEVEL_NS_APP;
+	addr_acc |= etm4_get_ns_access_type(config);
 
 	config->addr_acc[ETM_DEFAULT_ADDR_COMP] = addr_acc;
 	config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] = addr_acc;
@@ -915,10 +1059,8 @@
 		return 0;
 
 	spin_lock(&etmdrvdata[cpu]->spinlock);
-	if (!etmdrvdata[cpu]->os_unlock) {
+	if (!etmdrvdata[cpu]->os_unlock)
 		etm4_os_unlock(etmdrvdata[cpu]);
-		etmdrvdata[cpu]->os_unlock = true;
-	}
 
 	if (local_read(&etmdrvdata[cpu]->mode))
 		etm4_enable_hw(etmdrvdata[cpu]);
@@ -952,20 +1094,11 @@
 	struct etmv4_drvdata *drvdata;
 	struct resource *res = &adev->res;
 	struct coresight_desc desc = { 0 };
-	struct device_node *np = adev->dev.of_node;
 
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
 
-	if (np) {
-		pdata = of_get_coresight_platform_data(dev, np);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
-		adev->dev.platform_data = pdata;
-	}
-
-	drvdata->dev = &adev->dev;
 	dev_set_drvdata(dev, drvdata);
 
 	/* Validity for the resource is already checked by the AMBA core */
@@ -977,7 +1110,13 @@
 
 	spin_lock_init(&drvdata->spinlock);
 
-	drvdata->cpu = pdata ? pdata->cpu : 0;
+	drvdata->cpu = coresight_get_cpu(dev);
+	if (drvdata->cpu < 0)
+		return drvdata->cpu;
+
+	desc.name = devm_kasprintf(dev, GFP_KERNEL, "etm%d", drvdata->cpu);
+	if (!desc.name)
+		return -ENOMEM;
 
 	cpus_read_lock();
 	etmdrvdata[drvdata->cpu] = drvdata;
@@ -1008,6 +1147,13 @@
 	etm4_init_trace_id(drvdata);
 	etm4_set_default(&drvdata->config);
 
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto err_arch_supported;
+	}
+	adev->dev.platform_data = pdata;
+
 	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
 	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
 	desc.ops = &etm4_cs_ops;
@@ -1027,7 +1173,7 @@
 	}
 
 	pm_runtime_put(&adev->dev);
-	dev_info(dev, "CPU%d: ETM v%d.%d initialized\n",
+	dev_info(&drvdata->csdev->dev, "CPU%d: ETM v%d.%d initialized\n",
 		 drvdata->cpu, drvdata->arch >> 4, drvdata->arch & 0xf);
 
 	if (boot_enable) {
@@ -1046,18 +1192,25 @@
 	return ret;
 }
 
-#define ETM4x_AMBA_ID(pid)			\
-	{					\
-		.id	= pid,			\
-		.mask	= 0x000fffff,		\
+static struct amba_cs_uci_id uci_id_etm4[] = {
+	{
+		/*  ETMv4 UCI data */
+		.devarch	= 0x47704a13,
+		.devarch_mask	= 0xfff0ffff,
+		.devtype	= 0x00000013,
 	}
+};
 
 static const struct amba_id etm4_ids[] = {
-	ETM4x_AMBA_ID(0x000bb95d),		/* Cortex-A53 */
-	ETM4x_AMBA_ID(0x000bb95e),		/* Cortex-A57 */
-	ETM4x_AMBA_ID(0x000bb95a),		/* Cortex-A72 */
-	ETM4x_AMBA_ID(0x000bb959),		/* Cortex-A73 */
-	ETM4x_AMBA_ID(0x000bb9da),		/* Cortex-A35 */
+	CS_AMBA_ID(0x000bb95d),			/* Cortex-A53 */
+	CS_AMBA_ID(0x000bb95e),			/* Cortex-A57 */
+	CS_AMBA_ID(0x000bb95a),			/* Cortex-A72 */
+	CS_AMBA_ID(0x000bb959),			/* Cortex-A73 */
+	CS_AMBA_UCI_ID(0x000bb9da, uci_id_etm4),/* Cortex-A35 */
+	CS_AMBA_UCI_ID(0x000f0205, uci_id_etm4),/* Qualcomm Kryo */
+	CS_AMBA_UCI_ID(0x000f0211, uci_id_etm4),/* Qualcomm Kryo */
+	CS_AMBA_ID(0x000bb802),			/* Qualcomm Kryo 385 Cortex-A55 */
+	CS_AMBA_ID(0x000bb803),			/* Qualcomm Kryo 385 Cortex-A75 */
 	{},
 };
 
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index 52786e9..4523f10 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -284,7 +284,6 @@
 /**
  * struct etm4_drvdata - specifics associated to an ETM component
  * @base:       Memory mapped base address for this component.
- * @dev:        The device entity associated to this component.
  * @csdev:      Component vitals needed by the framework.
  * @spinlock:   Only one at a time pls.
  * @mode:	This tracer's mode, i.e sysFS, Perf or disabled.
@@ -340,7 +339,6 @@
  */
 struct etmv4_drvdata {
 	void __iomem			*base;
-	struct device			*dev;
 	struct coresight_device		*csdev;
 	spinlock_t			spinlock;
 	local_t				mode;
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
index 448145a..05f7896 100644
--- a/drivers/hwtracing/coresight/coresight-funnel.c
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -5,6 +5,7 @@
  * Description: CoreSight Funnel driver
  */
 
+#include <linux/acpi.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -12,6 +13,8 @@
 #include <linux/err.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/coresight.h>
 #include <linux/amba/bus.h>
@@ -25,51 +28,65 @@
 #define FUNNEL_HOLDTIME_MASK	0xf00
 #define FUNNEL_HOLDTIME_SHFT	0x8
 #define FUNNEL_HOLDTIME		(0x7 << FUNNEL_HOLDTIME_SHFT)
+#define FUNNEL_ENSx_MASK	0xff
+
+DEFINE_CORESIGHT_DEVLIST(funnel_devs, "funnel");
 
 /**
  * struct funnel_drvdata - specifics associated to a funnel component
  * @base:	memory mapped base address for this component.
- * @dev:	the device entity associated to this component.
  * @atclk:	optional clock for the core parts of the funnel.
  * @csdev:	component vitals needed by the framework.
  * @priority:	port selection order.
  */
 struct funnel_drvdata {
 	void __iomem		*base;
-	struct device		*dev;
 	struct clk		*atclk;
 	struct coresight_device	*csdev;
 	unsigned long		priority;
 };
 
-static void funnel_enable_hw(struct funnel_drvdata *drvdata, int port)
+static int dynamic_funnel_enable_hw(struct funnel_drvdata *drvdata, int port)
 {
 	u32 functl;
+	int rc = 0;
 
 	CS_UNLOCK(drvdata->base);
 
 	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
+	/* Claim the device only when we enable the first slave */
+	if (!(functl & FUNNEL_ENSx_MASK)) {
+		rc = coresight_claim_device_unlocked(drvdata->base);
+		if (rc)
+			goto done;
+	}
+
 	functl &= ~FUNNEL_HOLDTIME_MASK;
 	functl |= FUNNEL_HOLDTIME;
 	functl |= (1 << port);
 	writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL);
 	writel_relaxed(drvdata->priority, drvdata->base + FUNNEL_PRICTL);
-
+done:
 	CS_LOCK(drvdata->base);
+	return rc;
 }
 
 static int funnel_enable(struct coresight_device *csdev, int inport,
 			 int outport)
 {
+	int rc = 0;
 	struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
-	funnel_enable_hw(drvdata, inport);
+	if (drvdata->base)
+		rc = dynamic_funnel_enable_hw(drvdata, inport);
 
-	dev_info(drvdata->dev, "FUNNEL inport %d enabled\n", inport);
-	return 0;
+	if (!rc)
+		dev_dbg(&csdev->dev, "FUNNEL inport %d enabled\n", inport);
+	return rc;
 }
 
-static void funnel_disable_hw(struct funnel_drvdata *drvdata, int inport)
+static void dynamic_funnel_disable_hw(struct funnel_drvdata *drvdata,
+				      int inport)
 {
 	u32 functl;
 
@@ -79,6 +96,10 @@
 	functl &= ~(1 << inport);
 	writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL);
 
+	/* Disclaim the device if none of the slaves are now active */
+	if (!(functl & FUNNEL_ENSx_MASK))
+		coresight_disclaim_device_unlocked(drvdata->base);
+
 	CS_LOCK(drvdata->base);
 }
 
@@ -87,9 +108,10 @@
 {
 	struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
-	funnel_disable_hw(drvdata, inport);
+	if (drvdata->base)
+		dynamic_funnel_disable_hw(drvdata, inport);
 
-	dev_info(drvdata->dev, "FUNNEL inport %d disabled\n", inport);
+	dev_dbg(&csdev->dev, "FUNNEL inport %d disabled\n", inport);
 }
 
 static const struct coresight_ops_link funnel_link_ops = {
@@ -144,11 +166,11 @@
 	u32 val;
 	struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent);
 
-	pm_runtime_get_sync(drvdata->dev);
+	pm_runtime_get_sync(dev->parent);
 
 	val = get_funnel_ctrl_hw(drvdata);
 
-	pm_runtime_put(drvdata->dev);
+	pm_runtime_put(dev->parent);
 
 	return sprintf(buf, "%#x\n", val);
 }
@@ -161,54 +183,74 @@
 };
 ATTRIBUTE_GROUPS(coresight_funnel);
 
-static int funnel_probe(struct amba_device *adev, const struct amba_id *id)
+static int funnel_probe(struct device *dev, struct resource *res)
 {
 	int ret;
 	void __iomem *base;
-	struct device *dev = &adev->dev;
 	struct coresight_platform_data *pdata = NULL;
 	struct funnel_drvdata *drvdata;
-	struct resource *res = &adev->res;
 	struct coresight_desc desc = { 0 };
-	struct device_node *np = adev->dev.of_node;
 
-	if (np) {
-		pdata = of_get_coresight_platform_data(dev, np);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
-		adev->dev.platform_data = pdata;
-	}
+	if (is_of_node(dev_fwnode(dev)) &&
+	    of_device_is_compatible(dev->of_node, "arm,coresight-funnel"))
+		dev_warn_once(dev, "Uses OBSOLETE CoreSight funnel binding\n");
+
+	desc.name = coresight_alloc_device_name(&funnel_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
 
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
 
-	drvdata->dev = &adev->dev;
-	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
+	drvdata->atclk = devm_clk_get(dev, "atclk"); /* optional */
 	if (!IS_ERR(drvdata->atclk)) {
 		ret = clk_prepare_enable(drvdata->atclk);
 		if (ret)
 			return ret;
 	}
+
+	/*
+	 * Map the device base for dynamic-funnel, which has been
+	 * validated by AMBA core.
+	 */
+	if (res) {
+		base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(base)) {
+			ret = PTR_ERR(base);
+			goto out_disable_clk;
+		}
+		drvdata->base = base;
+		desc.groups = coresight_funnel_groups;
+	}
+
 	dev_set_drvdata(dev, drvdata);
 
-	/* Validity for the resource is already checked by the AMBA core */
-	base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
-
-	drvdata->base = base;
-	pm_runtime_put(&adev->dev);
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto out_disable_clk;
+	}
+	dev->platform_data = pdata;
 
 	desc.type = CORESIGHT_DEV_TYPE_LINK;
 	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG;
 	desc.ops = &funnel_cs_ops;
 	desc.pdata = pdata;
 	desc.dev = dev;
-	desc.groups = coresight_funnel_groups;
 	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto out_disable_clk;
+	}
 
-	return PTR_ERR_OR_ZERO(drvdata->csdev);
+	pm_runtime_put(dev);
+	ret = 0;
+
+out_disable_clk:
+	if (ret && !IS_ERR_OR_NULL(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+	return ret;
 }
 
 #ifdef CONFIG_PM
@@ -237,7 +279,56 @@
 	SET_RUNTIME_PM_OPS(funnel_runtime_suspend, funnel_runtime_resume, NULL)
 };
 
-static const struct amba_id funnel_ids[] = {
+static int static_funnel_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	/* Static funnel do not have programming base */
+	ret = funnel_probe(&pdev->dev, NULL);
+
+	if (ret) {
+		pm_runtime_put_noidle(&pdev->dev);
+		pm_runtime_disable(&pdev->dev);
+	}
+
+	return ret;
+}
+
+static const struct of_device_id static_funnel_match[] = {
+	{.compatible = "arm,coresight-static-funnel"},
+	{}
+};
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id static_funnel_ids[] = {
+	{"ARMHC9FE", 0},
+	{},
+};
+#endif
+
+static struct platform_driver static_funnel_driver = {
+	.probe          = static_funnel_probe,
+	.driver         = {
+		.name   = "coresight-static-funnel",
+		.of_match_table = static_funnel_match,
+		.acpi_match_table = ACPI_PTR(static_funnel_ids),
+		.pm	= &funnel_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(static_funnel_driver);
+
+static int dynamic_funnel_probe(struct amba_device *adev,
+				const struct amba_id *id)
+{
+	return funnel_probe(&adev->dev, &adev->res);
+}
+
+static const struct amba_id dynamic_funnel_ids[] = {
 	{
 		.id     = 0x000bb908,
 		.mask   = 0x000fffff,
@@ -250,14 +341,14 @@
 	{ 0, 0},
 };
 
-static struct amba_driver funnel_driver = {
+static struct amba_driver dynamic_funnel_driver = {
 	.drv = {
-		.name	= "coresight-funnel",
+		.name	= "coresight-dynamic-funnel",
 		.owner	= THIS_MODULE,
 		.pm	= &funnel_dev_pm_ops,
 		.suppress_bind_attrs = true,
 	},
-	.probe		= funnel_probe,
-	.id_table	= funnel_ids,
+	.probe		= dynamic_funnel_probe,
+	.id_table	= dynamic_funnel_ids,
 };
-builtin_amba_driver(funnel_driver);
+builtin_amba_driver(dynamic_funnel_driver);
diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c
new file mode 100644
index 0000000..3c5bee4
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-platform.c
@@ -0,0 +1,808 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/acpi.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_graph.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/amba/bus.h>
+#include <linux/coresight.h>
+#include <linux/cpumask.h>
+#include <asm/smp_plat.h>
+
+#include "coresight-priv.h"
+/*
+ * coresight_alloc_conns: Allocate connections record for each output
+ * port from the device.
+ */
+static int coresight_alloc_conns(struct device *dev,
+				 struct coresight_platform_data *pdata)
+{
+	if (pdata->nr_outport) {
+		pdata->conns = devm_kzalloc(dev, pdata->nr_outport *
+					    sizeof(*pdata->conns),
+					    GFP_KERNEL);
+		if (!pdata->conns)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static struct device *
+coresight_find_device_by_fwnode(struct fwnode_handle *fwnode)
+{
+	struct device *dev = NULL;
+
+	/*
+	 * If we have a non-configurable replicator, it will be found on the
+	 * platform bus.
+	 */
+	dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
+	if (dev)
+		return dev;
+
+	/*
+	 * We have a configurable component - circle through the AMBA bus
+	 * looking for the device that matches the endpoint node.
+	 */
+	return bus_find_device_by_fwnode(&amba_bustype, fwnode);
+}
+
+#ifdef CONFIG_OF
+static inline bool of_coresight_legacy_ep_is_input(struct device_node *ep)
+{
+	return of_property_read_bool(ep, "slave-mode");
+}
+
+static void of_coresight_get_ports_legacy(const struct device_node *node,
+					  int *nr_inport, int *nr_outport)
+{
+	struct device_node *ep = NULL;
+	int in = 0, out = 0;
+
+	do {
+		ep = of_graph_get_next_endpoint(node, ep);
+		if (!ep)
+			break;
+
+		if (of_coresight_legacy_ep_is_input(ep))
+			in++;
+		else
+			out++;
+
+	} while (ep);
+
+	*nr_inport = in;
+	*nr_outport = out;
+}
+
+static struct device_node *of_coresight_get_port_parent(struct device_node *ep)
+{
+	struct device_node *parent = of_graph_get_port_parent(ep);
+
+	/*
+	 * Skip one-level up to the real device node, if we
+	 * are using the new bindings.
+	 */
+	if (of_node_name_eq(parent, "in-ports") ||
+	    of_node_name_eq(parent, "out-ports"))
+		parent = of_get_next_parent(parent);
+
+	return parent;
+}
+
+static inline struct device_node *
+of_coresight_get_input_ports_node(const struct device_node *node)
+{
+	return of_get_child_by_name(node, "in-ports");
+}
+
+static inline struct device_node *
+of_coresight_get_output_ports_node(const struct device_node *node)
+{
+	return of_get_child_by_name(node, "out-ports");
+}
+
+static inline int
+of_coresight_count_ports(struct device_node *port_parent)
+{
+	int i = 0;
+	struct device_node *ep = NULL;
+
+	while ((ep = of_graph_get_next_endpoint(port_parent, ep)))
+		i++;
+	return i;
+}
+
+static void of_coresight_get_ports(const struct device_node *node,
+				   int *nr_inport, int *nr_outport)
+{
+	struct device_node *input_ports = NULL, *output_ports = NULL;
+
+	input_ports = of_coresight_get_input_ports_node(node);
+	output_ports = of_coresight_get_output_ports_node(node);
+
+	if (input_ports || output_ports) {
+		if (input_ports) {
+			*nr_inport = of_coresight_count_ports(input_ports);
+			of_node_put(input_ports);
+		}
+		if (output_ports) {
+			*nr_outport = of_coresight_count_ports(output_ports);
+			of_node_put(output_ports);
+		}
+	} else {
+		/* Fall back to legacy DT bindings parsing */
+		of_coresight_get_ports_legacy(node, nr_inport, nr_outport);
+	}
+}
+
+static int of_coresight_get_cpu(struct device *dev)
+{
+	int cpu;
+	struct device_node *dn;
+
+	if (!dev->of_node)
+		return -ENODEV;
+
+	dn = of_parse_phandle(dev->of_node, "cpu", 0);
+	if (!dn)
+		return -ENODEV;
+
+	cpu = of_cpu_node_to_id(dn);
+	of_node_put(dn);
+
+	return cpu;
+}
+
+/*
+ * of_coresight_parse_endpoint : Parse the given output endpoint @ep
+ * and fill the connection information in @conn
+ *
+ * Parses the local port, remote device name and the remote port.
+ *
+ * Returns :
+ *	 1	- If the parsing is successful and a connection record
+ *		  was created for an output connection.
+ *	 0	- If the parsing completed without any fatal errors.
+ *	-Errno	- Fatal error, abort the scanning.
+ */
+static int of_coresight_parse_endpoint(struct device *dev,
+				       struct device_node *ep,
+				       struct coresight_connection *conn)
+{
+	int ret = 0;
+	struct of_endpoint endpoint, rendpoint;
+	struct device_node *rparent = NULL;
+	struct device_node *rep = NULL;
+	struct device *rdev = NULL;
+	struct fwnode_handle *rdev_fwnode;
+
+	do {
+		/* Parse the local port details */
+		if (of_graph_parse_endpoint(ep, &endpoint))
+			break;
+		/*
+		 * Get a handle on the remote endpoint and the device it is
+		 * attached to.
+		 */
+		rep = of_graph_get_remote_endpoint(ep);
+		if (!rep)
+			break;
+		rparent = of_coresight_get_port_parent(rep);
+		if (!rparent)
+			break;
+		if (of_graph_parse_endpoint(rep, &rendpoint))
+			break;
+
+		rdev_fwnode = of_fwnode_handle(rparent);
+		/* If the remote device is not available, defer probing */
+		rdev = coresight_find_device_by_fwnode(rdev_fwnode);
+		if (!rdev) {
+			ret = -EPROBE_DEFER;
+			break;
+		}
+
+		conn->outport = endpoint.port;
+		/*
+		 * Hold the refcount to the target device. This could be
+		 * released via:
+		 * 1) coresight_release_platform_data() if the probe fails or
+		 *    this device is unregistered.
+		 * 2) While removing the target device via
+		 *    coresight_remove_match()
+		 */
+		conn->child_fwnode = fwnode_handle_get(rdev_fwnode);
+		conn->child_port = rendpoint.port;
+		/* Connection record updated */
+		ret = 1;
+	} while (0);
+
+	of_node_put(rparent);
+	of_node_put(rep);
+	put_device(rdev);
+
+	return ret;
+}
+
+static int of_get_coresight_platform_data(struct device *dev,
+					  struct coresight_platform_data *pdata)
+{
+	int ret = 0;
+	struct coresight_connection *conn;
+	struct device_node *ep = NULL;
+	const struct device_node *parent = NULL;
+	bool legacy_binding = false;
+	struct device_node *node = dev->of_node;
+
+	/* Get the number of input and output port for this component */
+	of_coresight_get_ports(node, &pdata->nr_inport, &pdata->nr_outport);
+
+	/* If there are no output connections, we are done */
+	if (!pdata->nr_outport)
+		return 0;
+
+	ret = coresight_alloc_conns(dev, pdata);
+	if (ret)
+		return ret;
+
+	parent = of_coresight_get_output_ports_node(node);
+	/*
+	 * If the DT uses obsoleted bindings, the ports are listed
+	 * under the device and we need to filter out the input
+	 * ports.
+	 */
+	if (!parent) {
+		legacy_binding = true;
+		parent = node;
+		dev_warn_once(dev, "Uses obsolete Coresight DT bindings\n");
+	}
+
+	conn = pdata->conns;
+
+	/* Iterate through each output port to discover topology */
+	while ((ep = of_graph_get_next_endpoint(parent, ep))) {
+		/*
+		 * Legacy binding mixes input/output ports under the
+		 * same parent. So, skip the input ports if we are dealing
+		 * with legacy binding, as they processed with their
+		 * connected output ports.
+		 */
+		if (legacy_binding && of_coresight_legacy_ep_is_input(ep))
+			continue;
+
+		ret = of_coresight_parse_endpoint(dev, ep, conn);
+		switch (ret) {
+		case 1:
+			conn++;		/* Fall through */
+		case 0:
+			break;
+		default:
+			return ret;
+		}
+	}
+
+	return 0;
+}
+#else
+static inline int
+of_get_coresight_platform_data(struct device *dev,
+			       struct coresight_platform_data *pdata)
+{
+	return -ENOENT;
+}
+
+static inline int of_coresight_get_cpu(struct device *dev)
+{
+	return -ENODEV;
+}
+#endif
+
+#ifdef CONFIG_ACPI
+
+#include <acpi/actypes.h>
+#include <acpi/processor.h>
+
+/* ACPI Graph _DSD UUID : "ab02a46b-74c7-45a2-bd68-f7d344ef2153" */
+static const guid_t acpi_graph_uuid = GUID_INIT(0xab02a46b, 0x74c7, 0x45a2,
+						0xbd, 0x68, 0xf7, 0xd3,
+						0x44, 0xef, 0x21, 0x53);
+/* Coresight ACPI Graph UUID : "3ecbc8b6-1d0e-4fb3-8107-e627f805c6cd" */
+static const guid_t coresight_graph_uuid = GUID_INIT(0x3ecbc8b6, 0x1d0e, 0x4fb3,
+						     0x81, 0x07, 0xe6, 0x27,
+						     0xf8, 0x05, 0xc6, 0xcd);
+#define ACPI_CORESIGHT_LINK_SLAVE	0
+#define ACPI_CORESIGHT_LINK_MASTER	1
+
+static inline bool is_acpi_guid(const union acpi_object *obj)
+{
+	return (obj->type == ACPI_TYPE_BUFFER) && (obj->buffer.length == 16);
+}
+
+/*
+ * acpi_guid_matches	- Checks if the given object is a GUID object and
+ * that it matches the supplied the GUID.
+ */
+static inline bool acpi_guid_matches(const union acpi_object *obj,
+				   const guid_t *guid)
+{
+	return is_acpi_guid(obj) &&
+	       guid_equal((guid_t *)obj->buffer.pointer, guid);
+}
+
+static inline bool is_acpi_dsd_graph_guid(const union acpi_object *obj)
+{
+	return acpi_guid_matches(obj, &acpi_graph_uuid);
+}
+
+static inline bool is_acpi_coresight_graph_guid(const union acpi_object *obj)
+{
+	return acpi_guid_matches(obj, &coresight_graph_uuid);
+}
+
+static inline bool is_acpi_coresight_graph(const union acpi_object *obj)
+{
+	const union acpi_object *graphid, *guid, *links;
+
+	if (obj->type != ACPI_TYPE_PACKAGE ||
+	    obj->package.count < 3)
+		return false;
+
+	graphid = &obj->package.elements[0];
+	guid = &obj->package.elements[1];
+	links = &obj->package.elements[2];
+
+	if (graphid->type != ACPI_TYPE_INTEGER ||
+	    links->type != ACPI_TYPE_INTEGER)
+		return false;
+
+	return is_acpi_coresight_graph_guid(guid);
+}
+
+/*
+ * acpi_validate_dsd_graph	- Make sure the given _DSD graph conforms
+ * to the ACPI _DSD Graph specification.
+ *
+ * ACPI Devices Graph property has the following format:
+ *  {
+ *	Revision	- Integer, must be 0
+ *	NumberOfGraphs	- Integer, N indicating the following list.
+ *	Graph[1],
+ *	 ...
+ *	Graph[N]
+ *  }
+ *
+ * And each Graph entry has the following format:
+ *  {
+ *	GraphID		- Integer, identifying a graph the device belongs to.
+ *	UUID		- UUID identifying the specification that governs
+ *			  this graph. (e.g, see is_acpi_coresight_graph())
+ *	NumberOfLinks	- Number "N" of connections on this node of the graph.
+ *	Links[1]
+ *	...
+ *	Links[N]
+ *  }
+ *
+ * Where each "Links" entry has the following format:
+ *
+ * {
+ *	SourcePortAddress	- Integer
+ *	DestinationPortAddress	- Integer
+ *	DestinationDeviceName	- Reference to another device
+ *	( --- CoreSight specific extensions below ---)
+ *	DirectionOfFlow		- Integer 1 for output(master)
+ *				  0 for input(slave)
+ * }
+ *
+ * e.g:
+ * For a Funnel device
+ *
+ * Device(MFUN) {
+ *   ...
+ *
+ *   Name (_DSD, Package() {
+ *	// DSD Package contains tuples of {  Proeprty_Type_UUID, Package() }
+ *	ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), //Std. Property UUID
+ *	Package() {
+ *		Package(2) { "property-name", <property-value> }
+ *	},
+ *
+ *	ToUUID("ab02a46b-74c7-45a2-bd68-f7d344ef2153"), // ACPI Graph UUID
+ *	Package() {
+ *	  0,		// Revision
+ *	  1,		// NumberOfGraphs.
+ *	  Package() {	// Graph[0] Package
+ *	     1,		// GraphID
+ *	     // Coresight Graph UUID
+ *	     ToUUID("3ecbc8b6-1d0e-4fb3-8107-e627f805c6cd"),
+ *	     3,		// NumberOfLinks aka ports
+ *	     // Link[0]: Output_0 -> Replicator:Input_0
+ *	     Package () { 0, 0, \_SB_.RPL0, 1 },
+ *	     // Link[1]: Input_0 <- Cluster0_Funnel0:Output_0
+ *	     Package () { 0, 0, \_SB_.CLU0.FUN0, 0 },
+ *	     // Link[2]: Input_1 <- Cluster1_Funnel0:Output_0
+ *	      Package () { 1, 0, \_SB_.CLU1.FUN0, 0 },
+ *	  }	// End of Graph[0] Package
+ *
+ *	}, // End of ACPI Graph Property
+ *  })
+ */
+static inline bool acpi_validate_dsd_graph(const union acpi_object *graph)
+{
+	int i, n;
+	const union acpi_object *rev, *nr_graphs;
+
+	/* The graph must contain at least the Revision and Number of Graphs */
+	if (graph->package.count < 2)
+		return false;
+
+	rev = &graph->package.elements[0];
+	nr_graphs = &graph->package.elements[1];
+
+	if (rev->type != ACPI_TYPE_INTEGER ||
+	    nr_graphs->type != ACPI_TYPE_INTEGER)
+		return false;
+
+	/* We only support revision 0 */
+	if (rev->integer.value != 0)
+		return false;
+
+	n = nr_graphs->integer.value;
+	/* CoreSight devices are only part of a single Graph */
+	if (n != 1)
+		return false;
+
+	/* Make sure the ACPI graph package has right number of elements */
+	if (graph->package.count != (n + 2))
+		return false;
+
+	/*
+	 * Each entry must be a graph package with at least 3 members :
+	 * { GraphID, UUID, NumberOfLinks(n), Links[.],... }
+	 */
+	for (i = 2; i < n + 2; i++) {
+		const union acpi_object *obj = &graph->package.elements[i];
+
+		if (obj->type != ACPI_TYPE_PACKAGE ||
+		    obj->package.count < 3)
+			return false;
+	}
+
+	return true;
+}
+
+/* acpi_get_dsd_graph	- Find the _DSD Graph property for the given device. */
+const union acpi_object *
+acpi_get_dsd_graph(struct acpi_device *adev)
+{
+	int i;
+	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER };
+	acpi_status status;
+	const union acpi_object *dsd;
+
+	status = acpi_evaluate_object_typed(adev->handle, "_DSD", NULL,
+					    &buf, ACPI_TYPE_PACKAGE);
+	if (ACPI_FAILURE(status))
+		return NULL;
+
+	dsd = buf.pointer;
+
+	/*
+	 * _DSD property consists tuples { Prop_UUID, Package() }
+	 * Iterate through all the packages and find the Graph.
+	 */
+	for (i = 0; i + 1 < dsd->package.count; i += 2) {
+		const union acpi_object *guid, *package;
+
+		guid = &dsd->package.elements[i];
+		package = &dsd->package.elements[i + 1];
+
+		/* All _DSD elements must have a UUID and a Package */
+		if (!is_acpi_guid(guid) || package->type != ACPI_TYPE_PACKAGE)
+			break;
+		/* Skip the non-Graph _DSD packages */
+		if (!is_acpi_dsd_graph_guid(guid))
+			continue;
+		if (acpi_validate_dsd_graph(package))
+			return package;
+		/* Invalid graph format, continue */
+		dev_warn(&adev->dev, "Invalid Graph _DSD property\n");
+	}
+
+	return NULL;
+}
+
+static inline bool
+acpi_validate_coresight_graph(const union acpi_object *cs_graph)
+{
+	int nlinks;
+
+	nlinks = cs_graph->package.elements[2].integer.value;
+	/*
+	 * Graph must have the following fields :
+	 * { GraphID, GraphUUID, NumberOfLinks, Links... }
+	 */
+	if (cs_graph->package.count != (nlinks + 3))
+		return false;
+	/* The links are validated in acpi_coresight_parse_link() */
+	return true;
+}
+
+/*
+ * acpi_get_coresight_graph	- Parse the device _DSD tables and find
+ * the Graph property matching the CoreSight Graphs.
+ *
+ * Returns the pointer to the CoreSight Graph Package when found. Otherwise
+ * returns NULL.
+ */
+const union acpi_object *
+acpi_get_coresight_graph(struct acpi_device *adev)
+{
+	const union acpi_object *graph_list, *graph;
+	int i, nr_graphs;
+
+	graph_list = acpi_get_dsd_graph(adev);
+	if (!graph_list)
+		return graph_list;
+
+	nr_graphs = graph_list->package.elements[1].integer.value;
+
+	for (i = 2; i < nr_graphs + 2; i++) {
+		graph = &graph_list->package.elements[i];
+		if (!is_acpi_coresight_graph(graph))
+			continue;
+		if (acpi_validate_coresight_graph(graph))
+			return graph;
+		/* Invalid graph format */
+		break;
+	}
+
+	return NULL;
+}
+
+/*
+ * acpi_coresight_parse_link	- Parse the given Graph connection
+ * of the device and populate the coresight_connection for an output
+ * connection.
+ *
+ * CoreSight Graph specification mandates that the direction of the data
+ * flow must be specified in the link. i.e,
+ *
+ *	SourcePortAddress,	// Integer
+ *	DestinationPortAddress,	// Integer
+ *	DestinationDeviceName,	// Reference to another device
+ *	DirectionOfFlow,	// 1 for output(master), 0 for input(slave)
+ *
+ * Returns the direction of the data flow [ Input(slave) or Output(master) ]
+ * upon success.
+ * Returns an negative error number otherwise.
+ */
+static int acpi_coresight_parse_link(struct acpi_device *adev,
+				     const union acpi_object *link,
+				     struct coresight_connection *conn)
+{
+	int rc, dir;
+	const union acpi_object *fields;
+	struct acpi_device *r_adev;
+	struct device *rdev;
+
+	if (link->type != ACPI_TYPE_PACKAGE ||
+	    link->package.count != 4)
+		return -EINVAL;
+
+	fields = link->package.elements;
+
+	if (fields[0].type != ACPI_TYPE_INTEGER ||
+	    fields[1].type != ACPI_TYPE_INTEGER ||
+	    fields[2].type != ACPI_TYPE_LOCAL_REFERENCE ||
+	    fields[3].type != ACPI_TYPE_INTEGER)
+		return -EINVAL;
+
+	rc = acpi_bus_get_device(fields[2].reference.handle, &r_adev);
+	if (rc)
+		return rc;
+
+	dir = fields[3].integer.value;
+	if (dir == ACPI_CORESIGHT_LINK_MASTER) {
+		conn->outport = fields[0].integer.value;
+		conn->child_port = fields[1].integer.value;
+		rdev = coresight_find_device_by_fwnode(&r_adev->fwnode);
+		if (!rdev)
+			return -EPROBE_DEFER;
+		/*
+		 * Hold the refcount to the target device. This could be
+		 * released via:
+		 * 1) coresight_release_platform_data() if the probe fails or
+		 *    this device is unregistered.
+		 * 2) While removing the target device via
+		 *    coresight_remove_match().
+		 */
+		conn->child_fwnode = fwnode_handle_get(&r_adev->fwnode);
+	}
+
+	return dir;
+}
+
+/*
+ * acpi_coresight_parse_graph	- Parse the _DSD CoreSight graph
+ * connection information and populate the supplied coresight_platform_data
+ * instance.
+ */
+static int acpi_coresight_parse_graph(struct acpi_device *adev,
+				      struct coresight_platform_data *pdata)
+{
+	int rc, i, nlinks;
+	const union acpi_object *graph;
+	struct coresight_connection *conns, *ptr;
+
+	pdata->nr_inport = pdata->nr_outport = 0;
+	graph = acpi_get_coresight_graph(adev);
+	if (!graph)
+		return -ENOENT;
+
+	nlinks = graph->package.elements[2].integer.value;
+	if (!nlinks)
+		return 0;
+
+	/*
+	 * To avoid scanning the table twice (once for finding the number of
+	 * output links and then later for parsing the output links),
+	 * cache the links information in one go and then later copy
+	 * it to the pdata.
+	 */
+	conns = devm_kcalloc(&adev->dev, nlinks, sizeof(*conns), GFP_KERNEL);
+	if (!conns)
+		return -ENOMEM;
+	ptr = conns;
+	for (i = 0; i < nlinks; i++) {
+		const union acpi_object *link = &graph->package.elements[3 + i];
+		int dir;
+
+		dir = acpi_coresight_parse_link(adev, link, ptr);
+		if (dir < 0)
+			return dir;
+
+		if (dir == ACPI_CORESIGHT_LINK_MASTER) {
+			pdata->nr_outport++;
+			ptr++;
+		} else {
+			pdata->nr_inport++;
+		}
+	}
+
+	rc = coresight_alloc_conns(&adev->dev, pdata);
+	if (rc)
+		return rc;
+
+	/* Copy the connection information to the final location */
+	for (i = 0; i < pdata->nr_outport; i++)
+		pdata->conns[i] = conns[i];
+
+	devm_kfree(&adev->dev, conns);
+	return 0;
+}
+
+/*
+ * acpi_handle_to_logical_cpuid - Map a given acpi_handle to the
+ * logical CPU id of the corresponding CPU device.
+ *
+ * Returns the logical CPU id when found. Otherwise returns >= nr_cpus_id.
+ */
+static int
+acpi_handle_to_logical_cpuid(acpi_handle handle)
+{
+	int i;
+	struct acpi_processor *pr;
+
+	for_each_possible_cpu(i) {
+		pr = per_cpu(processors, i);
+		if (pr && pr->handle == handle)
+			break;
+	}
+
+	return i;
+}
+
+/*
+ * acpi_coresigh_get_cpu - Find the logical CPU id of the CPU associated
+ * with this coresight device. With ACPI bindings, the CoreSight components
+ * are listed as child device of the associated CPU.
+ *
+ * Returns the logical CPU id when found. Otherwise returns 0.
+ */
+static int acpi_coresight_get_cpu(struct device *dev)
+{
+	int cpu;
+	acpi_handle cpu_handle;
+	acpi_status status;
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+
+	if (!adev)
+		return -ENODEV;
+	status = acpi_get_parent(adev->handle, &cpu_handle);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	cpu = acpi_handle_to_logical_cpuid(cpu_handle);
+	if (cpu >= nr_cpu_ids)
+		return -ENODEV;
+	return cpu;
+}
+
+static int
+acpi_get_coresight_platform_data(struct device *dev,
+				 struct coresight_platform_data *pdata)
+{
+	struct acpi_device *adev;
+
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return -EINVAL;
+
+	return acpi_coresight_parse_graph(adev, pdata);
+}
+
+#else
+
+static inline int
+acpi_get_coresight_platform_data(struct device *dev,
+				 struct coresight_platform_data *pdata)
+{
+	return -ENOENT;
+}
+
+static inline int acpi_coresight_get_cpu(struct device *dev)
+{
+	return -ENODEV;
+}
+#endif
+
+int coresight_get_cpu(struct device *dev)
+{
+	if (is_of_node(dev->fwnode))
+		return of_coresight_get_cpu(dev);
+	else if (is_acpi_device_node(dev->fwnode))
+		return acpi_coresight_get_cpu(dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(coresight_get_cpu);
+
+struct coresight_platform_data *
+coresight_get_platform_data(struct device *dev)
+{
+	int ret = -ENOENT;
+	struct coresight_platform_data *pdata = NULL;
+	struct fwnode_handle *fwnode = dev_fwnode(dev);
+
+	if (IS_ERR_OR_NULL(fwnode))
+		goto error;
+
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	if (is_of_node(fwnode))
+		ret = of_get_coresight_platform_data(dev, pdata);
+	else if (is_acpi_device_node(fwnode))
+		ret = acpi_get_coresight_platform_data(dev, pdata);
+
+	if (!ret)
+		return pdata;
+error:
+	if (!IS_ERR_OR_NULL(pdata))
+		/* Cleanup the connection information */
+		coresight_release_platform_data(pdata);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(coresight_get_platform_data);
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 1a6cf35..82e563c 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -6,6 +6,7 @@
 #ifndef _CORESIGHT_PRIV_H
 #define _CORESIGHT_PRIV_H
 
+#include <linux/amba/bus.h>
 #include <linux/bitops.h>
 #include <linux/io.h>
 #include <linux/coresight.h>
@@ -25,6 +26,13 @@
 #define CORESIGHT_DEVID		0xfc8
 #define CORESIGHT_DEVTYPE	0xfcc
 
+
+/*
+ * Coresight device CLAIM protocol.
+ * See PSCI - ARM DEN 0022D, Section: 6.8.1 Debug and Trace save and restore.
+ */
+#define CORESIGHT_CLAIM_SELF_HOSTED	BIT(1)
+
 #define TIMEOUT_US		100
 #define BMVAL(val, lsb, msb)	((val & GENMASK(msb, lsb)) >> lsb)
 
@@ -137,9 +145,10 @@
 }
 
 void coresight_disable_path(struct list_head *path);
-int coresight_enable_path(struct list_head *path, u32 mode);
+int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data);
 struct coresight_device *coresight_get_sink(struct list_head *path);
 struct coresight_device *coresight_get_enabled_sink(bool reset);
+struct coresight_device *coresight_get_sink_by_id(u32 id);
 struct list_head *coresight_build_path(struct coresight_device *csdev,
 				       struct coresight_device *sink);
 void coresight_release_path(struct list_head *path);
@@ -152,4 +161,45 @@
 static inline int etm_writel_cp14(u32 off, u32 val) { return 0; }
 #endif
 
+/*
+ * Macros and inline functions to handle CoreSight UCI data and driver
+ * private data in AMBA ID table entries, and extract data values.
+ */
+
+/* coresight AMBA ID, no UCI, no driver data: id table entry */
+#define CS_AMBA_ID(pid)			\
+	{				\
+		.id	= pid,		\
+		.mask	= 0x000fffff,	\
+	}
+
+/* coresight AMBA ID, UCI with driver data only: id table entry. */
+#define CS_AMBA_ID_DATA(pid, dval)				\
+	{							\
+		.id	= pid,					\
+		.mask	= 0x000fffff,				\
+		.data	=  (void *)&(struct amba_cs_uci_id)	\
+			{				\
+				.data = (void *)dval,	\
+			}				\
+	}
+
+/* coresight AMBA ID, full UCI structure: id table entry. */
+#define CS_AMBA_UCI_ID(pid, uci_ptr)		\
+	{					\
+		.id	= pid,			\
+		.mask	= 0x000fffff,		\
+		.data	= (void *)uci_ptr	\
+	}
+
+/* extract the data value from a UCI structure given amba_id pointer. */
+static inline void *coresight_get_uci_data(const struct amba_id *id)
+{
+	if (id->data)
+		return ((struct amba_cs_uci_id *)(id->data))->data;
+	return 0;
+}
+
+void coresight_release_platform_data(struct coresight_platform_data *pdata);
+
 #endif
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
index 8d2eaaa..b29ba64 100644
--- a/drivers/hwtracing/coresight/coresight-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -1,10 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2011-2015, The Linux Foundation. All rights reserved.
  *
  * Description: CoreSight Replicator driver
  */
 
+#include <linux/acpi.h>
+#include <linux/amba/bus.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
@@ -18,25 +20,117 @@
 
 #include "coresight-priv.h"
 
+#define REPLICATOR_IDFILTER0		0x000
+#define REPLICATOR_IDFILTER1		0x004
+
+DEFINE_CORESIGHT_DEVLIST(replicator_devs, "replicator");
+
 /**
  * struct replicator_drvdata - specifics associated to a replicator component
- * @dev:	the device entity associated with this component
+ * @base:	memory mapped base address for this component. Also indicates
+ *		whether this one is programmable or not.
  * @atclk:	optional clock for the core parts of the replicator.
  * @csdev:	component vitals needed by the framework
  */
 struct replicator_drvdata {
-	struct device		*dev;
+	void __iomem		*base;
 	struct clk		*atclk;
 	struct coresight_device	*csdev;
 };
 
+static void dynamic_replicator_reset(struct replicator_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	if (!coresight_claim_device_unlocked(drvdata->base)) {
+		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0);
+		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1);
+		coresight_disclaim_device_unlocked(drvdata->base);
+	}
+
+	CS_LOCK(drvdata->base);
+}
+
+/*
+ * replicator_reset : Reset the replicator configuration to sane values.
+ */
+static inline void replicator_reset(struct replicator_drvdata *drvdata)
+{
+	if (drvdata->base)
+		dynamic_replicator_reset(drvdata);
+}
+
+static int dynamic_replicator_enable(struct replicator_drvdata *drvdata,
+				     int inport, int outport)
+{
+	int rc = 0;
+	u32 reg;
+
+	switch (outport) {
+	case 0:
+		reg = REPLICATOR_IDFILTER0;
+		break;
+	case 1:
+		reg = REPLICATOR_IDFILTER1;
+		break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	CS_UNLOCK(drvdata->base);
+
+	if ((readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0) == 0xff) &&
+	    (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff))
+		rc = coresight_claim_device_unlocked(drvdata->base);
+
+	/* Ensure that the outport is enabled. */
+	if (!rc)
+		writel_relaxed(0x00, drvdata->base + reg);
+	CS_LOCK(drvdata->base);
+
+	return rc;
+}
+
 static int replicator_enable(struct coresight_device *csdev, int inport,
 			     int outport)
 {
+	int rc = 0;
 	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
-	dev_info(drvdata->dev, "REPLICATOR enabled\n");
-	return 0;
+	if (drvdata->base)
+		rc = dynamic_replicator_enable(drvdata, inport, outport);
+	if (!rc)
+		dev_dbg(&csdev->dev, "REPLICATOR enabled\n");
+	return rc;
+}
+
+static void dynamic_replicator_disable(struct replicator_drvdata *drvdata,
+				       int inport, int outport)
+{
+	u32 reg;
+
+	switch (outport) {
+	case 0:
+		reg = REPLICATOR_IDFILTER0;
+		break;
+	case 1:
+		reg = REPLICATOR_IDFILTER1;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	CS_UNLOCK(drvdata->base);
+
+	/* disable the flow of ATB data through port */
+	writel_relaxed(0xff, drvdata->base + reg);
+
+	if ((readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0) == 0xff) &&
+	    (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff))
+		coresight_disclaim_device_unlocked(drvdata->base);
+	CS_LOCK(drvdata->base);
 }
 
 static void replicator_disable(struct coresight_device *csdev, int inport,
@@ -44,7 +138,9 @@
 {
 	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
-	dev_info(drvdata->dev, "REPLICATOR disabled\n");
+	if (drvdata->base)
+		dynamic_replicator_disable(drvdata, inport, outport);
+	dev_dbg(&csdev->dev, "REPLICATOR disabled\n");
 }
 
 static const struct coresight_ops_link replicator_link_ops = {
@@ -56,58 +152,115 @@
 	.link_ops	= &replicator_link_ops,
 };
 
-static int replicator_probe(struct platform_device *pdev)
+#define coresight_replicator_reg(name, offset) \
+	coresight_simple_reg32(struct replicator_drvdata, name, offset)
+
+coresight_replicator_reg(idfilter0, REPLICATOR_IDFILTER0);
+coresight_replicator_reg(idfilter1, REPLICATOR_IDFILTER1);
+
+static struct attribute *replicator_mgmt_attrs[] = {
+	&dev_attr_idfilter0.attr,
+	&dev_attr_idfilter1.attr,
+	NULL,
+};
+
+static const struct attribute_group replicator_mgmt_group = {
+	.attrs = replicator_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *replicator_groups[] = {
+	&replicator_mgmt_group,
+	NULL,
+};
+
+static int replicator_probe(struct device *dev, struct resource *res)
 {
-	int ret;
-	struct device *dev = &pdev->dev;
+	int ret = 0;
 	struct coresight_platform_data *pdata = NULL;
 	struct replicator_drvdata *drvdata;
 	struct coresight_desc desc = { 0 };
-	struct device_node *np = pdev->dev.of_node;
+	void __iomem *base;
 
-	if (np) {
-		pdata = of_get_coresight_platform_data(dev, np);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
-		pdev->dev.platform_data = pdata;
-	}
+	if (is_of_node(dev_fwnode(dev)) &&
+	    of_device_is_compatible(dev->of_node, "arm,coresight-replicator"))
+		dev_warn_once(dev,
+			      "Uses OBSOLETE CoreSight replicator binding\n");
+
+	desc.name = coresight_alloc_device_name(&replicator_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
 
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
 
-	drvdata->dev = &pdev->dev;
-	drvdata->atclk = devm_clk_get(&pdev->dev, "atclk"); /* optional */
+	drvdata->atclk = devm_clk_get(dev, "atclk"); /* optional */
 	if (!IS_ERR(drvdata->atclk)) {
 		ret = clk_prepare_enable(drvdata->atclk);
 		if (ret)
 			return ret;
 	}
-	pm_runtime_get_noresume(&pdev->dev);
-	pm_runtime_set_active(&pdev->dev);
-	pm_runtime_enable(&pdev->dev);
-	platform_set_drvdata(pdev, drvdata);
+
+	/*
+	 * Map the device base for dynamic-replicator, which has been
+	 * validated by AMBA core
+	 */
+	if (res) {
+		base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(base)) {
+			ret = PTR_ERR(base);
+			goto out_disable_clk;
+		}
+		drvdata->base = base;
+		desc.groups = replicator_groups;
+	}
+
+	dev_set_drvdata(dev, drvdata);
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto out_disable_clk;
+	}
+	dev->platform_data = pdata;
 
 	desc.type = CORESIGHT_DEV_TYPE_LINK;
 	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
 	desc.ops = &replicator_cs_ops;
-	desc.pdata = pdev->dev.platform_data;
-	desc.dev = &pdev->dev;
+	desc.pdata = dev->platform_data;
+	desc.dev = dev;
+
 	drvdata->csdev = coresight_register(&desc);
 	if (IS_ERR(drvdata->csdev)) {
 		ret = PTR_ERR(drvdata->csdev);
-		goto out_disable_pm;
+		goto out_disable_clk;
 	}
 
-	pm_runtime_put(&pdev->dev);
+	replicator_reset(drvdata);
+	pm_runtime_put(dev);
 
-	return 0;
-
-out_disable_pm:
-	if (!IS_ERR(drvdata->atclk))
+out_disable_clk:
+	if (ret && !IS_ERR_OR_NULL(drvdata->atclk))
 		clk_disable_unprepare(drvdata->atclk);
-	pm_runtime_put_noidle(&pdev->dev);
-	pm_runtime_disable(&pdev->dev);
+	return ret;
+}
+
+static int static_replicator_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	/* Static replicators do not have programming base */
+	ret = replicator_probe(&pdev->dev, NULL);
+
+	if (ret) {
+		pm_runtime_put_noidle(&pdev->dev);
+		pm_runtime_disable(&pdev->dev);
+	}
 
 	return ret;
 }
@@ -139,18 +292,57 @@
 			   replicator_runtime_resume, NULL)
 };
 
-static const struct of_device_id replicator_match[] = {
+static const struct of_device_id static_replicator_match[] = {
 	{.compatible = "arm,coresight-replicator"},
+	{.compatible = "arm,coresight-static-replicator"},
 	{}
 };
 
-static struct platform_driver replicator_driver = {
-	.probe          = replicator_probe,
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id static_replicator_acpi_ids[] = {
+	{"ARMHC985", 0}, /* ARM CoreSight Static Replicator */
+	{}
+};
+#endif
+
+static struct platform_driver static_replicator_driver = {
+	.probe          = static_replicator_probe,
 	.driver         = {
-		.name   = "coresight-replicator",
-		.of_match_table = replicator_match,
+		.name   = "coresight-static-replicator",
+		.of_match_table = of_match_ptr(static_replicator_match),
+		.acpi_match_table = ACPI_PTR(static_replicator_acpi_ids),
 		.pm	= &replicator_dev_pm_ops,
 		.suppress_bind_attrs = true,
 	},
 };
-builtin_platform_driver(replicator_driver);
+builtin_platform_driver(static_replicator_driver);
+
+static int dynamic_replicator_probe(struct amba_device *adev,
+				    const struct amba_id *id)
+{
+	return replicator_probe(&adev->dev, &adev->res);
+}
+
+static const struct amba_id dynamic_replicator_ids[] = {
+	{
+		.id     = 0x000bb909,
+		.mask   = 0x000fffff,
+	},
+	{
+		/* Coresight SoC-600 */
+		.id     = 0x000bb9ec,
+		.mask   = 0x000fffff,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver dynamic_replicator_driver = {
+	.drv = {
+		.name	= "coresight-dynamic-replicator",
+		.pm	= &replicator_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= dynamic_replicator_probe,
+	.id_table	= dynamic_replicator_ids,
+};
+builtin_amba_driver(dynamic_replicator_driver);
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index c46c70a..b908ca1 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -16,6 +16,7 @@
  * (C) 2015-2016 Chunyan Zhang <zhang.chunyan@linaro.org>
  */
 #include <asm/local.h>
+#include <linux/acpi.h>
 #include <linux/amba/bus.h>
 #include <linux/bitmap.h>
 #include <linux/clk.h>
@@ -107,10 +108,11 @@
 	unsigned long		*guaranteed;
 };
 
+DEFINE_CORESIGHT_DEVLIST(stm_devs, "stm");
+
 /**
  * struct stm_drvdata - specifics associated to an STM component
  * @base:		memory mapped base address for this component.
- * @dev:		the device entity associated to this component.
  * @atclk:		optional clock for the core parts of the STM.
  * @csdev:		component vitals needed by the framework.
  * @spinlock:		only one at a time pls.
@@ -128,7 +130,6 @@
  */
 struct stm_drvdata {
 	void __iomem		*base;
-	struct device		*dev;
 	struct clk		*atclk;
 	struct coresight_device	*csdev;
 	spinlock_t		spinlock;
@@ -205,13 +206,13 @@
 	if (val)
 		return -EBUSY;
 
-	pm_runtime_get_sync(drvdata->dev);
+	pm_runtime_get_sync(csdev->dev.parent);
 
 	spin_lock(&drvdata->spinlock);
 	stm_enable_hw(drvdata);
 	spin_unlock(&drvdata->spinlock);
 
-	dev_info(drvdata->dev, "STM tracing enabled\n");
+	dev_dbg(&csdev->dev, "STM tracing enabled\n");
 	return 0;
 }
 
@@ -271,10 +272,10 @@
 		/* Wait until the engine has completely stopped */
 		coresight_timeout(drvdata->base, STMTCSR, STMTCSR_BUSY_BIT, 0);
 
-		pm_runtime_put(drvdata->dev);
+		pm_runtime_put(csdev->dev.parent);
 
 		local_set(&drvdata->mode, CS_MODE_DISABLED);
-		dev_info(drvdata->dev, "STM tracing disabled\n");
+		dev_dbg(&csdev->dev, "STM tracing disabled\n");
 	}
 }
 
@@ -685,14 +686,15 @@
 	NULL,
 };
 
-static int stm_get_resource_byname(struct device_node *np,
-				   char *ch_base, struct resource *res)
+#ifdef CONFIG_OF
+static int of_stm_get_stimulus_area(struct device *dev, struct resource *res)
 {
 	const char *name = NULL;
 	int index = 0, found = 0;
+	struct device_node *np = dev->of_node;
 
 	while (!of_property_read_string_index(np, "reg-names", index, &name)) {
-		if (strcmp(ch_base, name)) {
+		if (strcmp("stm-stimulus-base", name)) {
 			index++;
 			continue;
 		}
@@ -707,6 +709,70 @@
 
 	return of_address_to_resource(np, index, res);
 }
+#else
+static inline int of_stm_get_stimulus_area(struct device *dev,
+					   struct resource *res)
+{
+	return -ENOENT;
+}
+#endif
+
+#ifdef CONFIG_ACPI
+static int acpi_stm_get_stimulus_area(struct device *dev, struct resource *res)
+{
+	int rc;
+	bool found_base = false;
+	struct resource_entry *rent;
+	LIST_HEAD(res_list);
+
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+
+	if (!adev)
+		return -ENODEV;
+	rc = acpi_dev_get_resources(adev, &res_list, NULL, NULL);
+	if (rc < 0)
+		return rc;
+
+	/*
+	 * The stimulus base for STM device must be listed as the second memory
+	 * resource, followed by the programming base address as described in
+	 * "Section 2.3 Resources" in ACPI for CoreSightTM 1.0 Platform Design
+	 * document (DEN0067).
+	 */
+	rc = -ENOENT;
+	list_for_each_entry(rent, &res_list, node) {
+		if (resource_type(rent->res) != IORESOURCE_MEM)
+			continue;
+		if (found_base) {
+			*res = *rent->res;
+			rc = 0;
+			break;
+		}
+
+		found_base = true;
+	}
+
+	acpi_dev_free_resource_list(&res_list);
+	return rc;
+}
+#else
+static inline int acpi_stm_get_stimulus_area(struct device *dev,
+					     struct resource *res)
+{
+	return -ENOENT;
+}
+#endif
+
+static int stm_get_stimulus_area(struct device *dev, struct resource *res)
+{
+	struct fwnode_handle *fwnode = dev_fwnode(dev);
+
+	if (is_of_node(fwnode))
+		return of_stm_get_stimulus_area(dev, res);
+	else if (is_acpi_node(fwnode))
+		return acpi_stm_get_stimulus_area(dev, res);
+	return -ENOENT;
+}
 
 static u32 stm_fundamental_data_size(struct stm_drvdata *drvdata)
 {
@@ -763,9 +829,10 @@
 	bitmap_clear(drvdata->chs.guaranteed, 0, drvdata->numsp);
 }
 
-static void stm_init_generic_data(struct stm_drvdata *drvdata)
+static void stm_init_generic_data(struct stm_drvdata *drvdata,
+				  const char *name)
 {
-	drvdata->stm.name = dev_name(drvdata->dev);
+	drvdata->stm.name = name;
 
 	/*
 	 * MasterIDs are assigned at HW design phase. As such the core is
@@ -793,21 +860,17 @@
 	struct stm_drvdata *drvdata;
 	struct resource *res = &adev->res;
 	struct resource ch_res;
-	size_t res_size, bitmap_size;
+	size_t bitmap_size;
 	struct coresight_desc desc = { 0 };
-	struct device_node *np = adev->dev.of_node;
 
-	if (np) {
-		pdata = of_get_coresight_platform_data(dev, np);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
-		adev->dev.platform_data = pdata;
-	}
+	desc.name = coresight_alloc_device_name(&stm_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
+
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
 
-	drvdata->dev = &adev->dev;
 	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
 	if (!IS_ERR(drvdata->atclk)) {
 		ret = clk_prepare_enable(drvdata->atclk);
@@ -821,7 +884,7 @@
 		return PTR_ERR(base);
 	drvdata->base = base;
 
-	ret = stm_get_resource_byname(np, "stm-stimulus-base", &ch_res);
+	ret = stm_get_stimulus_area(dev, &ch_res);
 	if (ret)
 		return ret;
 	drvdata->chs.phys = ch_res.start;
@@ -833,15 +896,11 @@
 
 	drvdata->write_bytes = stm_fundamental_data_size(drvdata);
 
-	if (boot_nr_channel) {
+	if (boot_nr_channel)
 		drvdata->numsp = boot_nr_channel;
-		res_size = min((resource_size_t)(boot_nr_channel *
-				  BYTES_PER_CHANNEL), resource_size(res));
-	} else {
+	else
 		drvdata->numsp = stm_num_stimulus_port(drvdata);
-		res_size = min((resource_size_t)(drvdata->numsp *
-				 BYTES_PER_CHANNEL), resource_size(res));
-	}
+
 	bitmap_size = BITS_TO_LONGS(drvdata->numsp) * sizeof(long);
 
 	guaranteed = devm_kzalloc(dev, bitmap_size, GFP_KERNEL);
@@ -852,14 +911,22 @@
 	spin_lock_init(&drvdata->spinlock);
 
 	stm_init_default_data(drvdata);
-	stm_init_generic_data(drvdata);
+	stm_init_generic_data(drvdata, desc.name);
 
 	if (stm_register_device(dev, &drvdata->stm, THIS_MODULE)) {
 		dev_info(dev,
-			 "stm_register_device failed, probing deffered\n");
+			 "%s : stm_register_device failed, probing deferred\n",
+			 desc.name);
 		return -EPROBE_DEFER;
 	}
 
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto stm_unregister;
+	}
+	adev->dev.platform_data = pdata;
+
 	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
 	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE;
 	desc.ops = &stm_cs_ops;
@@ -874,7 +941,8 @@
 
 	pm_runtime_put(&adev->dev);
 
-	dev_info(dev, "%s initialized\n", (char *)id->data);
+	dev_info(&drvdata->csdev->dev, "%s initialized\n",
+		 (char *)coresight_get_uci_data(id));
 	return 0;
 
 stm_unregister:
@@ -909,16 +977,8 @@
 };
 
 static const struct amba_id stm_ids[] = {
-	{
-		.id     = 0x000bb962,
-		.mask   = 0x000fffff,
-		.data	= "STM32",
-	},
-	{
-		.id	= 0x000bb963,
-		.mask	= 0x000fffff,
-		.data	= "STM500",
-	},
+	CS_AMBA_ID_DATA(0x000bb962, "STM32"),
+	CS_AMBA_ID_DATA(0x000bb963, "STM500"),
 	{ 0, 0},
 };
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index 0549249..807416b 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -4,14 +4,19 @@
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
  */
 
+#include <linux/atomic.h>
 #include <linux/circ_buf.h>
 #include <linux/coresight.h>
 #include <linux/perf_event.h>
 #include <linux/slab.h>
 #include "coresight-priv.h"
 #include "coresight-tmc.h"
+#include "coresight-etm-perf.h"
 
-static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
+static int tmc_set_etf_buffer(struct coresight_device *csdev,
+			      struct perf_output_handle *handle);
+
+static void __tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
@@ -30,33 +35,41 @@
 	CS_LOCK(drvdata->base);
 }
 
+static int tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
+{
+	int rc = coresight_claim_device(drvdata->base);
+
+	if (rc)
+		return rc;
+
+	__tmc_etb_enable_hw(drvdata);
+	return 0;
+}
+
 static void tmc_etb_dump_hw(struct tmc_drvdata *drvdata)
 {
 	char *bufp;
 	u32 read_data, lost;
-	int i;
 
 	/* Check if the buffer wrapped around. */
 	lost = readl_relaxed(drvdata->base + TMC_STS) & TMC_STS_FULL;
 	bufp = drvdata->buf;
 	drvdata->len = 0;
 	while (1) {
-		for (i = 0; i < drvdata->memwidth; i++) {
-			read_data = readl_relaxed(drvdata->base + TMC_RRD);
-			if (read_data == 0xFFFFFFFF)
-				goto done;
-			memcpy(bufp, &read_data, 4);
-			bufp += 4;
-			drvdata->len += 4;
-		}
+		read_data = readl_relaxed(drvdata->base + TMC_RRD);
+		if (read_data == 0xFFFFFFFF)
+			break;
+		memcpy(bufp, &read_data, 4);
+		bufp += 4;
+		drvdata->len += 4;
 	}
-done:
+
 	if (lost)
 		coresight_insert_barrier_packet(drvdata->buf);
 	return;
 }
 
-static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
+static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
@@ -72,7 +85,13 @@
 	CS_LOCK(drvdata->base);
 }
 
-static void tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
+static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
+{
+	__tmc_etb_disable_hw(drvdata);
+	coresight_disclaim_device(drvdata->base);
+}
+
+static void __tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
@@ -88,13 +107,24 @@
 	CS_LOCK(drvdata->base);
 }
 
+static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
+{
+	int rc = coresight_claim_device(drvdata->base);
+
+	if (rc)
+		return rc;
+
+	__tmc_etf_enable_hw(drvdata);
+	return 0;
+}
+
 static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
 	tmc_flush_and_stop(drvdata);
 	tmc_disable_hw(drvdata);
-
+	coresight_disclaim_device_unlocked(drvdata->base);
 	CS_LOCK(drvdata->base);
 }
 
@@ -151,8 +181,10 @@
 	 * sink is already enabled no memory is needed and the HW need not be
 	 * touched.
 	 */
-	if (drvdata->mode == CS_MODE_SYSFS)
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		atomic_inc(csdev->refcnt);
 		goto out;
+	}
 
 	/*
 	 * If drvdata::buf isn't NULL, memory was allocated for a previous
@@ -170,8 +202,14 @@
 		drvdata->buf = buf;
 	}
 
-	drvdata->mode = CS_MODE_SYSFS;
-	tmc_etb_enable_hw(drvdata);
+	ret = tmc_etb_enable_hw(drvdata);
+	if (!ret) {
+		drvdata->mode = CS_MODE_SYSFS;
+		atomic_inc(csdev->refcnt);
+	} else {
+		/* Free up the buffer if we failed to enable */
+		used = false;
+	}
 out:
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
@@ -182,47 +220,73 @@
 	return ret;
 }
 
-static int tmc_enable_etf_sink_perf(struct coresight_device *csdev)
+static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data)
 {
 	int ret = 0;
+	pid_t pid;
 	unsigned long flags;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct perf_output_handle *handle = data;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	if (drvdata->reading) {
+	do {
 		ret = -EINVAL;
-		goto out;
-	}
+		if (drvdata->reading)
+			break;
+		/*
+		 * No need to continue if the ETB/ETF is already operated
+		 * from sysFS.
+		 */
+		if (drvdata->mode == CS_MODE_SYSFS) {
+			ret = -EBUSY;
+			break;
+		}
 
-	/*
-	 * In Perf mode there can be only one writer per sink.  There
-	 * is also no need to continue if the ETB/ETR is already operated
-	 * from sysFS.
-	 */
-	if (drvdata->mode != CS_MODE_DISABLED) {
-		ret = -EINVAL;
-		goto out;
-	}
+		/* Get a handle on the pid of the process to monitor */
+		pid = task_pid_nr(handle->event->owner);
 
-	drvdata->mode = CS_MODE_PERF;
-	tmc_etb_enable_hw(drvdata);
-out:
+		if (drvdata->pid != -1 && drvdata->pid != pid) {
+			ret = -EBUSY;
+			break;
+		}
+
+		ret = tmc_set_etf_buffer(csdev, handle);
+		if (ret)
+			break;
+
+		/*
+		 * No HW configuration is needed if the sink is already in
+		 * use for this session.
+		 */
+		if (drvdata->pid == pid) {
+			atomic_inc(csdev->refcnt);
+			break;
+		}
+
+		ret  = tmc_etb_enable_hw(drvdata);
+		if (!ret) {
+			/* Associate with monitored process. */
+			drvdata->pid = pid;
+			drvdata->mode = CS_MODE_PERF;
+			atomic_inc(csdev->refcnt);
+		}
+	} while (0);
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 	return ret;
 }
 
-static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode)
+static int tmc_enable_etf_sink(struct coresight_device *csdev,
+			       u32 mode, void *data)
 {
 	int ret;
-	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	switch (mode) {
 	case CS_MODE_SYSFS:
 		ret = tmc_enable_etf_sink_sysfs(csdev);
 		break;
 	case CS_MODE_PERF:
-		ret = tmc_enable_etf_sink_perf(csdev);
+		ret = tmc_enable_etf_sink_perf(csdev, data);
 		break;
 	/* We shouldn't be here */
 	default:
@@ -233,35 +297,44 @@
 	if (ret)
 		return ret;
 
-	dev_info(drvdata->dev, "TMC-ETB/ETF enabled\n");
+	dev_dbg(&csdev->dev, "TMC-ETB/ETF enabled\n");
 	return 0;
 }
 
-static void tmc_disable_etf_sink(struct coresight_device *csdev)
+static int tmc_disable_etf_sink(struct coresight_device *csdev)
 {
 	unsigned long flags;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
+
 	if (drvdata->reading) {
 		spin_unlock_irqrestore(&drvdata->spinlock, flags);
-		return;
+		return -EBUSY;
 	}
 
-	/* Disable the TMC only if it needs to */
-	if (drvdata->mode != CS_MODE_DISABLED) {
-		tmc_etb_disable_hw(drvdata);
-		drvdata->mode = CS_MODE_DISABLED;
+	if (atomic_dec_return(csdev->refcnt)) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return -EBUSY;
 	}
 
+	/* Complain if we (somehow) got out of sync */
+	WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED);
+	tmc_etb_disable_hw(drvdata);
+	/* Dissociate from monitored process. */
+	drvdata->pid = -1;
+	drvdata->mode = CS_MODE_DISABLED;
+
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "TMC-ETB/ETF disabled\n");
+	dev_dbg(&csdev->dev, "TMC-ETB/ETF disabled\n");
+	return 0;
 }
 
 static int tmc_enable_etf_link(struct coresight_device *csdev,
 			       int inport, int outport)
 {
+	int ret;
 	unsigned long flags;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
@@ -271,12 +344,14 @@
 		return -EBUSY;
 	}
 
-	tmc_etf_enable_hw(drvdata);
-	drvdata->mode = CS_MODE_SYSFS;
+	ret = tmc_etf_enable_hw(drvdata);
+	if (!ret)
+		drvdata->mode = CS_MODE_SYSFS;
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "TMC-ETF enabled\n");
-	return 0;
+	if (!ret)
+		dev_dbg(&csdev->dev, "TMC-ETF enabled\n");
+	return ret;
 }
 
 static void tmc_disable_etf_link(struct coresight_device *csdev,
@@ -295,18 +370,17 @@
 	drvdata->mode = CS_MODE_DISABLED;
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "TMC-ETF disabled\n");
+	dev_dbg(&csdev->dev, "TMC-ETF disabled\n");
 }
 
-static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu,
-				  void **pages, int nr_pages, bool overwrite)
+static void *tmc_alloc_etf_buffer(struct coresight_device *csdev,
+				  struct perf_event *event, void **pages,
+				  int nr_pages, bool overwrite)
 {
 	int node;
 	struct cs_buffers *buf;
 
-	if (cpu == -1)
-		cpu = smp_processor_id();
-	node = cpu_to_node(cpu);
+	node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
 
 	/* Allocate memory structure for interaction with Perf */
 	buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
@@ -328,12 +402,14 @@
 }
 
 static int tmc_set_etf_buffer(struct coresight_device *csdev,
-			      struct perf_output_handle *handle,
-			      void *sink_config)
+			      struct perf_output_handle *handle)
 {
 	int ret = 0;
 	unsigned long head;
-	struct cs_buffers *buf = sink_config;
+	struct cs_buffers *buf = etm_perf_sink_config(handle);
+
+	if (!buf)
+		return -EINVAL;
 
 	/* wrap head around to the amount of space we have */
 	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
@@ -349,36 +425,7 @@
 	return ret;
 }
 
-static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
-					  struct perf_output_handle *handle,
-					  void *sink_config)
-{
-	long size = 0;
-	struct cs_buffers *buf = sink_config;
-
-	if (buf) {
-		/*
-		 * In snapshot mode ->data_size holds the new address of the
-		 * ring buffer's head.  The size itself is the whole address
-		 * range since we want the latest information.
-		 */
-		if (buf->snapshot)
-			handle->head = local_xchg(&buf->data_size,
-						  buf->nr_pages << PAGE_SHIFT);
-		/*
-		 * Tell the tracer PMU how much we got in this run and if
-		 * something went wrong along the way.  Nobody else can use
-		 * this cs_buffers instance until we are done.  As such
-		 * resetting parameters here and squaring off with the ring
-		 * buffer API in the tracer PMU is fine.
-		 */
-		size = local_xchg(&buf->data_size, 0);
-	}
-
-	return size;
-}
-
-static void tmc_update_etf_buffer(struct coresight_device *csdev,
+static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev,
 				  struct perf_output_handle *handle,
 				  void *sink_config)
 {
@@ -387,17 +434,23 @@
 	const u32 *barrier;
 	u32 *buf_ptr;
 	u64 read_ptr, write_ptr;
-	u32 status, to_read;
-	unsigned long offset;
+	u32 status;
+	unsigned long offset, to_read = 0, flags;
 	struct cs_buffers *buf = sink_config;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	if (!buf)
-		return;
+		return 0;
 
 	/* This shouldn't happen */
 	if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF))
-		return;
+		return 0;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* Don't do anything if another tracer is using this sink */
+	if (atomic_read(csdev->refcnt) != 1)
+		goto out;
 
 	CS_UNLOCK(drvdata->base);
 
@@ -421,33 +474,16 @@
 	/*
 	 * The TMC RAM buffer may be bigger than the space available in the
 	 * perf ring buffer (handle->size).  If so advance the RRP so that we
-	 * get the latest trace data.
+	 * get the latest trace data.  In snapshot mode none of that matters
+	 * since we are expected to clobber stale data in favour of the latest
+	 * traces.
 	 */
-	if (to_read > handle->size) {
-		u32 mask = 0;
-
-		/*
-		 * The value written to RRP must be byte-address aligned to
-		 * the width of the trace memory databus _and_ to a frame
-		 * boundary (16 byte), whichever is the biggest. For example,
-		 * for 32-bit, 64-bit and 128-bit wide trace memory, the four
-		 * LSBs must be 0s. For 256-bit wide trace memory, the five
-		 * LSBs must be 0s.
-		 */
-		switch (drvdata->memwidth) {
-		case TMC_MEM_INTF_WIDTH_32BITS:
-		case TMC_MEM_INTF_WIDTH_64BITS:
-		case TMC_MEM_INTF_WIDTH_128BITS:
-			mask = GENMASK(31, 5);
-			break;
-		case TMC_MEM_INTF_WIDTH_256BITS:
-			mask = GENMASK(31, 6);
-			break;
-		}
+	if (!buf->snapshot && to_read > handle->size) {
+		u32 mask = tmc_get_memwidth_mask(drvdata);
 
 		/*
 		 * Make sure the new size is aligned in accordance with the
-		 * requirement explained above.
+		 * requirement explained in function tmc_get_memwidth_mask().
 		 */
 		to_read = handle->size & mask;
 		/* Move the RAM read pointer up */
@@ -460,7 +496,13 @@
 		lost = true;
 	}
 
-	if (lost)
+	/*
+	 * Don't set the TRUNCATED flag in snapshot mode because 1) the
+	 * captured buffer is expected to be truncated and 2) a full buffer
+	 * prevents the event from being re-enabled by the perf core,
+	 * resulting in stale data being send to user space.
+	 */
+	if (!buf->snapshot && lost)
 		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
 
 	cur = buf->cur;
@@ -487,17 +529,19 @@
 	}
 
 	/*
-	 * In snapshot mode all we have to do is communicate to
-	 * perf_aux_output_end() the address of the current head.  In full
-	 * trace mode the same function expects a size to move rb->aux_head
-	 * forward.
+	 * In snapshot mode we simply increment the head by the number of byte
+	 * that were written.  User space function  cs_etm_find_snapshot() will
+	 * figure out how many bytes to get from the AUX buffer based on the
+	 * position of the head.
 	 */
 	if (buf->snapshot)
-		local_set(&buf->data_size, (cur * PAGE_SIZE) + offset);
-	else
-		local_add(to_read, &buf->data_size);
+		handle->head += to_read;
 
 	CS_LOCK(drvdata->base);
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	return to_read;
 }
 
 static const struct coresight_ops_sink tmc_etf_sink_ops = {
@@ -505,8 +549,6 @@
 	.disable	= tmc_disable_etf_sink,
 	.alloc_buffer	= tmc_alloc_etf_buffer,
 	.free_buffer	= tmc_free_etf_buffer,
-	.set_buffer	= tmc_set_etf_buffer,
-	.reset_buffer	= tmc_reset_etf_buffer,
 	.update_buffer	= tmc_update_etf_buffer,
 };
 
@@ -563,7 +605,7 @@
 
 	/* Disable the TMC if need be */
 	if (drvdata->mode == CS_MODE_SYSFS)
-		tmc_etb_disable_hw(drvdata);
+		__tmc_etb_disable_hw(drvdata);
 
 	drvdata->reading = true;
 out:
@@ -603,7 +645,7 @@
 		 * can't be NULL.
 		 */
 		memset(drvdata->buf, 0, drvdata->size);
-		tmc_etb_enable_hw(drvdata);
+		__tmc_etb_enable_hw(drvdata);
 	} else {
 		/*
 		 * The ETB/ETF is not tracing and the buffer was just read.
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 2eda5de..625882b 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -4,12 +4,18 @@
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
  */
 
+#include <linux/atomic.h>
 #include <linux/coresight.h>
 #include <linux/dma-mapping.h>
 #include <linux/iommu.h>
+#include <linux/idr.h>
+#include <linux/mutex.h>
+#include <linux/refcount.h>
 #include <linux/slab.h>
+#include <linux/types.h>
 #include <linux/vmalloc.h>
 #include "coresight-catu.h"
+#include "coresight-etm-perf.h"
 #include "coresight-priv.h"
 #include "coresight-tmc.h"
 
@@ -21,6 +27,32 @@
 };
 
 /*
+ * etr_perf_buffer - Perf buffer used for ETR
+ * @drvdata		- The ETR drvdaga this buffer has been allocated for.
+ * @etr_buf		- Actual buffer used by the ETR
+ * @pid			- The PID this etr_perf_buffer belongs to.
+ * @snaphost		- Perf session mode
+ * @head		- handle->head at the beginning of the session.
+ * @nr_pages		- Number of pages in the ring buffer.
+ * @pages		- Array of Pages in the ring buffer.
+ */
+struct etr_perf_buffer {
+	struct tmc_drvdata	*drvdata;
+	struct etr_buf		*etr_buf;
+	pid_t			pid;
+	bool			snapshot;
+	unsigned long		head;
+	int			nr_pages;
+	void			**pages;
+};
+
+/* Convert the perf index to an offset within the ETR buffer */
+#define PERF_IDX2OFF(idx, buf)	((idx) % ((buf)->nr_pages << PAGE_SHIFT))
+
+/* Lower limit for ETR hardware buffer */
+#define TMC_ETR_PERF_MIN_BUF_SIZE	SZ_1M
+
+/*
  * The TMC ETR SG has a page size of 4K. The SG table contains pointers
  * to 4KB buffers. However, the OS may use a PAGE_SIZE different from
  * 4K (i.e, 16KB or 64KB). This implies that a single OS page could
@@ -130,10 +162,11 @@
 			   struct device *dev, enum dma_data_direction dir)
 {
 	int i;
+	struct device *real_dev = dev->parent;
 
 	for (i = 0; i < tmc_pages->nr_pages; i++) {
 		if (tmc_pages->daddrs && tmc_pages->daddrs[i])
-			dma_unmap_page(dev, tmc_pages->daddrs[i],
+			dma_unmap_page(real_dev, tmc_pages->daddrs[i],
 					 PAGE_SIZE, dir);
 		if (tmc_pages->pages && tmc_pages->pages[i])
 			__free_page(tmc_pages->pages[i]);
@@ -161,6 +194,7 @@
 	int i, nr_pages;
 	dma_addr_t paddr;
 	struct page *page;
+	struct device *real_dev = dev->parent;
 
 	nr_pages = tmc_pages->nr_pages;
 	tmc_pages->daddrs = kcalloc(nr_pages, sizeof(*tmc_pages->daddrs),
@@ -184,8 +218,8 @@
 			page = alloc_pages_node(node,
 						GFP_KERNEL | __GFP_ZERO, 0);
 		}
-		paddr = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
-		if (dma_mapping_error(dev, paddr))
+		paddr = dma_map_page(real_dev, page, 0, PAGE_SIZE, dir);
+		if (dma_mapping_error(real_dev, paddr))
 			goto err;
 		tmc_pages->daddrs[i] = paddr;
 		tmc_pages->pages[i] = page;
@@ -272,7 +306,7 @@
  * and data buffers. TMC writes to the data buffers and reads from the SG
  * Table pages.
  *
- * @dev		- Device to which page should be DMA mapped.
+ * @dev		- Coresight device to which page should be DMA mapped.
  * @node	- Numa node for mem allocations
  * @nr_tpages	- Number of pages for the table entries.
  * @nr_dpages	- Number of pages for Data buffer.
@@ -316,13 +350,13 @@
 {
 	int i, index, start;
 	int npages = DIV_ROUND_UP(size, PAGE_SIZE);
-	struct device *dev = table->dev;
+	struct device *real_dev = table->dev->parent;
 	struct tmc_pages *data = &table->data_pages;
 
 	start = offset >> PAGE_SHIFT;
 	for (i = start; i < (start + npages); i++) {
 		index = i % data->nr_pages;
-		dma_sync_single_for_cpu(dev, data->daddrs[index],
+		dma_sync_single_for_cpu(real_dev, data->daddrs[index],
 					PAGE_SIZE, DMA_FROM_DEVICE);
 	}
 }
@@ -331,11 +365,11 @@
 void tmc_sg_table_sync_table(struct tmc_sg_table *sg_table)
 {
 	int i;
-	struct device *dev = sg_table->dev;
+	struct device *real_dev = sg_table->dev->parent;
 	struct tmc_pages *table_pages = &sg_table->table_pages;
 
 	for (i = 0; i < table_pages->nr_pages; i++)
-		dma_sync_single_for_device(dev, table_pages->daddrs[i],
+		dma_sync_single_for_device(real_dev, table_pages->daddrs[i],
 					   PAGE_SIZE, DMA_TO_DEVICE);
 }
 
@@ -536,7 +570,7 @@
 	sg_table = tmc_alloc_sg_table(dev, node, nr_tpages, nr_dpages, pages);
 	if (IS_ERR(sg_table)) {
 		kfree(etr_table);
-		return ERR_PTR(PTR_ERR(sg_table));
+		return ERR_CAST(sg_table);
 	}
 
 	etr_table->sg_table = sg_table;
@@ -558,6 +592,7 @@
 				  void **pages)
 {
 	struct etr_flat_buf *flat_buf;
+	struct device *real_dev = drvdata->csdev->dev.parent;
 
 	/* We cannot reuse existing pages for flat buf */
 	if (pages)
@@ -567,7 +602,7 @@
 	if (!flat_buf)
 		return -ENOMEM;
 
-	flat_buf->vaddr = dma_alloc_coherent(drvdata->dev, etr_buf->size,
+	flat_buf->vaddr = dma_alloc_coherent(real_dev, etr_buf->size,
 					     &flat_buf->daddr, GFP_KERNEL);
 	if (!flat_buf->vaddr) {
 		kfree(flat_buf);
@@ -575,7 +610,7 @@
 	}
 
 	flat_buf->size = etr_buf->size;
-	flat_buf->dev = drvdata->dev;
+	flat_buf->dev = &drvdata->csdev->dev;
 	etr_buf->hwaddr = flat_buf->daddr;
 	etr_buf->mode = ETR_MODE_FLAT;
 	etr_buf->private = flat_buf;
@@ -586,9 +621,12 @@
 {
 	struct etr_flat_buf *flat_buf = etr_buf->private;
 
-	if (flat_buf && flat_buf->daddr)
-		dma_free_coherent(flat_buf->dev, flat_buf->size,
+	if (flat_buf && flat_buf->daddr) {
+		struct device *real_dev = flat_buf->dev->parent;
+
+		dma_free_coherent(real_dev, flat_buf->size,
 				  flat_buf->vaddr, flat_buf->daddr);
+	}
 	kfree(flat_buf);
 }
 
@@ -634,8 +672,9 @@
 				void **pages)
 {
 	struct etr_sg_table *etr_table;
+	struct device *dev = &drvdata->csdev->dev;
 
-	etr_table = tmc_init_etr_sg_table(drvdata->dev, node,
+	etr_table = tmc_init_etr_sg_table(dev, node,
 					  etr_buf->size, pages);
 	if (IS_ERR(etr_table))
 		return -ENOMEM;
@@ -719,8 +758,8 @@
 	if (!IS_ENABLED(CONFIG_CORESIGHT_CATU))
 		return NULL;
 
-	for (i = 0; i < etr->nr_outport; i++) {
-		tmp = etr->conns[i].child_dev;
+	for (i = 0; i < etr->pdata->nr_outport; i++) {
+		tmp = etr->pdata->conns[i].child_dev;
 		if (tmp && coresight_is_catu_device(tmp))
 			return tmp;
 	}
@@ -728,12 +767,14 @@
 	return NULL;
 }
 
-static inline void tmc_etr_enable_catu(struct tmc_drvdata *drvdata)
+static inline int tmc_etr_enable_catu(struct tmc_drvdata *drvdata,
+				      struct etr_buf *etr_buf)
 {
 	struct coresight_device *catu = tmc_etr_get_catu_device(drvdata);
 
 	if (catu && helper_ops(catu)->enable)
-		helper_ops(catu)->enable(catu, drvdata->etr_buf);
+		return helper_ops(catu)->enable(catu, etr_buf);
+	return 0;
 }
 
 static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata)
@@ -747,7 +788,8 @@
 static const struct etr_buf_operations *etr_buf_ops[] = {
 	[ETR_MODE_FLAT] = &etr_flat_buf_ops,
 	[ETR_MODE_ETR_SG] = &etr_sg_buf_ops,
-	[ETR_MODE_CATU] = &etr_catu_buf_ops,
+	[ETR_MODE_CATU] = IS_ENABLED(CONFIG_CORESIGHT_CATU)
+						? &etr_catu_buf_ops : NULL,
 };
 
 static inline int tmc_etr_mode_alloc_buf(int mode,
@@ -761,7 +803,7 @@
 	case ETR_MODE_FLAT:
 	case ETR_MODE_ETR_SG:
 	case ETR_MODE_CATU:
-		if (etr_buf_ops[mode]->alloc)
+		if (etr_buf_ops[mode] && etr_buf_ops[mode]->alloc)
 			rc = etr_buf_ops[mode]->alloc(drvdata, etr_buf,
 						      node, pages);
 		if (!rc)
@@ -788,9 +830,10 @@
 	bool has_etr_sg, has_iommu;
 	bool has_sg, has_catu;
 	struct etr_buf *etr_buf;
+	struct device *dev = &drvdata->csdev->dev;
 
 	has_etr_sg = tmc_etr_has_cap(drvdata, TMC_ETR_SG);
-	has_iommu = iommu_get_domain_for_dev(drvdata->dev);
+	has_iommu = iommu_get_domain_for_dev(dev->parent);
 	has_catu = !!tmc_etr_get_catu_device(drvdata);
 
 	has_sg = has_catu || has_etr_sg;
@@ -828,7 +871,8 @@
 		return ERR_PTR(rc);
 	}
 
-	dev_dbg(drvdata->dev, "allocated buffer of size %ldKB in mode %d\n",
+	refcount_set(&etr_buf->refcount, 1);
+	dev_dbg(dev, "allocated buffer of size %ldKB in mode %d\n",
 		(unsigned long)size >> 10, etr_buf->mode);
 	return etr_buf;
 }
@@ -884,28 +928,31 @@
 	rrp = tmc_read_rrp(drvdata);
 	rwp = tmc_read_rwp(drvdata);
 	status = readl_relaxed(drvdata->base + TMC_STS);
+
+	/*
+	 * If there were memory errors in the session, truncate the
+	 * buffer.
+	 */
+	if (WARN_ON_ONCE(status & TMC_STS_MEMERR)) {
+		dev_dbg(&drvdata->csdev->dev,
+			"tmc memory error detected, truncating buffer\n");
+		etr_buf->len = 0;
+		etr_buf->full = 0;
+		return;
+	}
+
 	etr_buf->full = status & TMC_STS_FULL;
 
 	WARN_ON(!etr_buf->ops || !etr_buf->ops->sync);
 
 	etr_buf->ops->sync(etr_buf, rrp, rwp);
-
-	/* Insert barrier packets at the beginning, if there was an overflow */
-	if (etr_buf->full)
-		tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset);
 }
 
-static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
+static void __tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
 {
 	u32 axictl, sts;
 	struct etr_buf *etr_buf = drvdata->etr_buf;
 
-	/*
-	 * If this ETR is connected to a CATU, enable it before we turn
-	 * this on
-	 */
-	tmc_etr_enable_catu(drvdata);
-
 	CS_UNLOCK(drvdata->base);
 
 	/* Wait for TMCSReady bit to be set */
@@ -924,11 +971,8 @@
 		axictl |= TMC_AXICTL_ARCACHE_OS;
 	}
 
-	if (etr_buf->mode == ETR_MODE_ETR_SG) {
-		if (WARN_ON(!tmc_etr_has_cap(drvdata, TMC_ETR_SG)))
-			return;
+	if (etr_buf->mode == ETR_MODE_ETR_SG)
 		axictl |= TMC_AXICTL_SCT_GAT_MODE;
-	}
 
 	writel_relaxed(axictl, drvdata->base + TMC_AXICTL);
 	tmc_write_dba(drvdata, etr_buf->hwaddr);
@@ -954,19 +998,54 @@
 	CS_LOCK(drvdata->base);
 }
 
+static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata,
+			     struct etr_buf *etr_buf)
+{
+	int rc;
+
+	/* Callers should provide an appropriate buffer for use */
+	if (WARN_ON(!etr_buf))
+		return -EINVAL;
+
+	if ((etr_buf->mode == ETR_MODE_ETR_SG) &&
+	    WARN_ON(!tmc_etr_has_cap(drvdata, TMC_ETR_SG)))
+		return -EINVAL;
+
+	if (WARN_ON(drvdata->etr_buf))
+		return -EBUSY;
+
+	/*
+	 * If this ETR is connected to a CATU, enable it before we turn
+	 * this on.
+	 */
+	rc = tmc_etr_enable_catu(drvdata, etr_buf);
+	if (rc)
+		return rc;
+	rc = coresight_claim_device(drvdata->base);
+	if (!rc) {
+		drvdata->etr_buf = etr_buf;
+		__tmc_etr_enable_hw(drvdata);
+	}
+
+	return rc;
+}
+
 /*
  * Return the available trace data in the buffer (starts at etr_buf->offset,
  * limited by etr_buf->len) from @pos, with a maximum limit of @len,
  * also updating the @bufpp on where to find it. Since the trace data
  * starts at anywhere in the buffer, depending on the RRP, we adjust the
  * @len returned to handle buffer wrapping around.
+ *
+ * We are protected here by drvdata->reading != 0, which ensures the
+ * sysfs_buf stays alive.
  */
 ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata,
 				loff_t pos, size_t len, char **bufpp)
 {
 	s64 offset;
 	ssize_t actual = len;
-	struct etr_buf *etr_buf = drvdata->etr_buf;
+	struct etr_buf *etr_buf = drvdata->sysfs_buf;
 
 	if (pos + actual > etr_buf->len)
 		actual = etr_buf->len - pos;
@@ -996,10 +1075,24 @@
 
 static void tmc_etr_sync_sysfs_buf(struct tmc_drvdata *drvdata)
 {
-	tmc_sync_etr_buf(drvdata);
+	struct etr_buf *etr_buf = drvdata->etr_buf;
+
+	if (WARN_ON(drvdata->sysfs_buf != etr_buf)) {
+		tmc_etr_free_sysfs_buf(drvdata->sysfs_buf);
+		drvdata->sysfs_buf = NULL;
+	} else {
+		tmc_sync_etr_buf(drvdata);
+		/*
+		 * Insert barrier packets at the beginning, if there was
+		 * an overflow.
+		 */
+		if (etr_buf->full)
+			tmc_etr_buf_insert_barrier_packet(etr_buf,
+							  etr_buf->offset);
+	}
 }
 
-static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
+static void __tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
@@ -1015,8 +1108,16 @@
 
 	CS_LOCK(drvdata->base);
 
+}
+
+static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
+{
+	__tmc_etr_disable_hw(drvdata);
 	/* Disable CATU device if this ETR is connected to one */
 	tmc_etr_disable_catu(drvdata);
+	coresight_disclaim_device(drvdata->base);
+	/* Reset the ETR buf used by hardware */
+	drvdata->etr_buf = NULL;
 }
 
 static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
@@ -1024,7 +1125,7 @@
 	int ret = 0;
 	unsigned long flags;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
-	struct etr_buf *new_buf = NULL, *free_buf = NULL;
+	struct etr_buf *sysfs_buf = NULL, *new_buf = NULL, *free_buf = NULL;
 
 	/*
 	 * If we are enabling the ETR from disabled state, we need to make
@@ -1035,7 +1136,8 @@
 	 * with the lock released.
 	 */
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	if (!drvdata->etr_buf || (drvdata->etr_buf->size != drvdata->size)) {
+	sysfs_buf = READ_ONCE(drvdata->sysfs_buf);
+	if (!sysfs_buf || (sysfs_buf->size != drvdata->size)) {
 		spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 		/* Allocate memory with the locks released */
@@ -1057,21 +1159,26 @@
 	 * sink is already enabled no memory is needed and the HW need not be
 	 * touched, even if the buffer size has changed.
 	 */
-	if (drvdata->mode == CS_MODE_SYSFS)
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		atomic_inc(csdev->refcnt);
 		goto out;
+	}
 
 	/*
 	 * If we don't have a buffer or it doesn't match the requested size,
 	 * use the buffer allocated above. Otherwise reuse the existing buffer.
 	 */
-	if (!drvdata->etr_buf ||
-	    (new_buf && drvdata->etr_buf->size != new_buf->size)) {
-		free_buf = drvdata->etr_buf;
-		drvdata->etr_buf = new_buf;
+	sysfs_buf = READ_ONCE(drvdata->sysfs_buf);
+	if (!sysfs_buf || (new_buf && sysfs_buf->size != new_buf->size)) {
+		free_buf = sysfs_buf;
+		drvdata->sysfs_buf = new_buf;
 	}
 
-	drvdata->mode = CS_MODE_SYSFS;
-	tmc_etr_enable_hw(drvdata);
+	ret = tmc_etr_enable_hw(drvdata, drvdata->sysfs_buf);
+	if (!ret) {
+		drvdata->mode = CS_MODE_SYSFS;
+		atomic_inc(csdev->refcnt);
+	}
 out:
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
@@ -1080,55 +1187,482 @@
 		tmc_etr_free_sysfs_buf(free_buf);
 
 	if (!ret)
-		dev_info(drvdata->dev, "TMC-ETR enabled\n");
+		dev_dbg(&csdev->dev, "TMC-ETR enabled\n");
 
 	return ret;
 }
 
-static int tmc_enable_etr_sink_perf(struct coresight_device *csdev)
+/*
+ * alloc_etr_buf: Allocate ETR buffer for use by perf.
+ * The size of the hardware buffer is dependent on the size configured
+ * via sysfs and the perf ring buffer size. We prefer to allocate the
+ * largest possible size, scaling down the size by half until it
+ * reaches a minimum limit (1M), beyond which we give up.
+ */
+static struct etr_buf *
+alloc_etr_buf(struct tmc_drvdata *drvdata, struct perf_event *event,
+	      int nr_pages, void **pages, bool snapshot)
 {
-	/* We don't support perf mode yet ! */
-	return -EINVAL;
+	int node;
+	struct etr_buf *etr_buf;
+	unsigned long size;
+
+	node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
+	/*
+	 * Try to match the perf ring buffer size if it is larger
+	 * than the size requested via sysfs.
+	 */
+	if ((nr_pages << PAGE_SHIFT) > drvdata->size) {
+		etr_buf = tmc_alloc_etr_buf(drvdata, (nr_pages << PAGE_SHIFT),
+					    0, node, NULL);
+		if (!IS_ERR(etr_buf))
+			goto done;
+	}
+
+	/*
+	 * Else switch to configured size for this ETR
+	 * and scale down until we hit the minimum limit.
+	 */
+	size = drvdata->size;
+	do {
+		etr_buf = tmc_alloc_etr_buf(drvdata, size, 0, node, NULL);
+		if (!IS_ERR(etr_buf))
+			goto done;
+		size /= 2;
+	} while (size >= TMC_ETR_PERF_MIN_BUF_SIZE);
+
+	return ERR_PTR(-ENOMEM);
+
+done:
+	return etr_buf;
 }
 
-static int tmc_enable_etr_sink(struct coresight_device *csdev, u32 mode)
+static struct etr_buf *
+get_perf_etr_buf_cpu_wide(struct tmc_drvdata *drvdata,
+			  struct perf_event *event, int nr_pages,
+			  void **pages, bool snapshot)
+{
+	int ret;
+	pid_t pid = task_pid_nr(event->owner);
+	struct etr_buf *etr_buf;
+
+retry:
+	/*
+	 * An etr_perf_buffer is associated with an event and holds a reference
+	 * to the AUX ring buffer that was created for that event.  In CPU-wide
+	 * N:1 mode multiple events (one per CPU), each with its own AUX ring
+	 * buffer, share a sink.  As such an etr_perf_buffer is created for each
+	 * event but a single etr_buf associated with the ETR is shared between
+	 * them.  The last event in a trace session will copy the content of the
+	 * etr_buf to its AUX ring buffer.  Ring buffer associated to other
+	 * events are simply not used an freed as events are destoyed.  We still
+	 * need to allocate a ring buffer for each event since we don't know
+	 * which event will be last.
+	 */
+
+	/*
+	 * The first thing to do here is check if an etr_buf has already been
+	 * allocated for this session.  If so it is shared with this event,
+	 * otherwise it is created.
+	 */
+	mutex_lock(&drvdata->idr_mutex);
+	etr_buf = idr_find(&drvdata->idr, pid);
+	if (etr_buf) {
+		refcount_inc(&etr_buf->refcount);
+		mutex_unlock(&drvdata->idr_mutex);
+		return etr_buf;
+	}
+
+	/* If we made it here no buffer has been allocated, do so now. */
+	mutex_unlock(&drvdata->idr_mutex);
+
+	etr_buf = alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot);
+	if (IS_ERR(etr_buf))
+		return etr_buf;
+
+	/* Now that we have a buffer, add it to the IDR. */
+	mutex_lock(&drvdata->idr_mutex);
+	ret = idr_alloc(&drvdata->idr, etr_buf, pid, pid + 1, GFP_KERNEL);
+	mutex_unlock(&drvdata->idr_mutex);
+
+	/* Another event with this session ID has allocated this buffer. */
+	if (ret == -ENOSPC) {
+		tmc_free_etr_buf(etr_buf);
+		goto retry;
+	}
+
+	/* The IDR can't allocate room for a new session, abandon ship. */
+	if (ret == -ENOMEM) {
+		tmc_free_etr_buf(etr_buf);
+		return ERR_PTR(ret);
+	}
+
+
+	return etr_buf;
+}
+
+static struct etr_buf *
+get_perf_etr_buf_per_thread(struct tmc_drvdata *drvdata,
+			    struct perf_event *event, int nr_pages,
+			    void **pages, bool snapshot)
+{
+	/*
+	 * In per-thread mode the etr_buf isn't shared, so just go ahead
+	 * with memory allocation.
+	 */
+	return alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot);
+}
+
+static struct etr_buf *
+get_perf_etr_buf(struct tmc_drvdata *drvdata, struct perf_event *event,
+		 int nr_pages, void **pages, bool snapshot)
+{
+	if (event->cpu == -1)
+		return get_perf_etr_buf_per_thread(drvdata, event, nr_pages,
+						   pages, snapshot);
+
+	return get_perf_etr_buf_cpu_wide(drvdata, event, nr_pages,
+					 pages, snapshot);
+}
+
+static struct etr_perf_buffer *
+tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, struct perf_event *event,
+		       int nr_pages, void **pages, bool snapshot)
+{
+	int node;
+	struct etr_buf *etr_buf;
+	struct etr_perf_buffer *etr_perf;
+
+	node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
+
+	etr_perf = kzalloc_node(sizeof(*etr_perf), GFP_KERNEL, node);
+	if (!etr_perf)
+		return ERR_PTR(-ENOMEM);
+
+	etr_buf = get_perf_etr_buf(drvdata, event, nr_pages, pages, snapshot);
+	if (!IS_ERR(etr_buf))
+		goto done;
+
+	kfree(etr_perf);
+	return ERR_PTR(-ENOMEM);
+
+done:
+	/*
+	 * Keep a reference to the ETR this buffer has been allocated for
+	 * in order to have access to the IDR in tmc_free_etr_buffer().
+	 */
+	etr_perf->drvdata = drvdata;
+	etr_perf->etr_buf = etr_buf;
+
+	return etr_perf;
+}
+
+
+static void *tmc_alloc_etr_buffer(struct coresight_device *csdev,
+				  struct perf_event *event, void **pages,
+				  int nr_pages, bool snapshot)
+{
+	struct etr_perf_buffer *etr_perf;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	etr_perf = tmc_etr_setup_perf_buf(drvdata, event,
+					  nr_pages, pages, snapshot);
+	if (IS_ERR(etr_perf)) {
+		dev_dbg(&csdev->dev, "Unable to allocate ETR buffer\n");
+		return NULL;
+	}
+
+	etr_perf->pid = task_pid_nr(event->owner);
+	etr_perf->snapshot = snapshot;
+	etr_perf->nr_pages = nr_pages;
+	etr_perf->pages = pages;
+
+	return etr_perf;
+}
+
+static void tmc_free_etr_buffer(void *config)
+{
+	struct etr_perf_buffer *etr_perf = config;
+	struct tmc_drvdata *drvdata = etr_perf->drvdata;
+	struct etr_buf *buf, *etr_buf = etr_perf->etr_buf;
+
+	if (!etr_buf)
+		goto free_etr_perf_buffer;
+
+	mutex_lock(&drvdata->idr_mutex);
+	/* If we are not the last one to use the buffer, don't touch it. */
+	if (!refcount_dec_and_test(&etr_buf->refcount)) {
+		mutex_unlock(&drvdata->idr_mutex);
+		goto free_etr_perf_buffer;
+	}
+
+	/* We are the last one, remove from the IDR and free the buffer. */
+	buf = idr_remove(&drvdata->idr, etr_perf->pid);
+	mutex_unlock(&drvdata->idr_mutex);
+
+	/*
+	 * Something went very wrong if the buffer associated with this ID
+	 * is not the same in the IDR.  Leak to avoid use after free.
+	 */
+	if (buf && WARN_ON(buf != etr_buf))
+		goto free_etr_perf_buffer;
+
+	tmc_free_etr_buf(etr_perf->etr_buf);
+
+free_etr_perf_buffer:
+	kfree(etr_perf);
+}
+
+/*
+ * tmc_etr_sync_perf_buffer: Copy the actual trace data from the hardware
+ * buffer to the perf ring buffer.
+ */
+static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
+				     unsigned long src_offset,
+				     unsigned long to_copy)
+{
+	long bytes;
+	long pg_idx, pg_offset;
+	unsigned long head = etr_perf->head;
+	char **dst_pages, *src_buf;
+	struct etr_buf *etr_buf = etr_perf->etr_buf;
+
+	head = etr_perf->head;
+	pg_idx = head >> PAGE_SHIFT;
+	pg_offset = head & (PAGE_SIZE - 1);
+	dst_pages = (char **)etr_perf->pages;
+
+	while (to_copy > 0) {
+		/*
+		 * In one iteration, we can copy minimum of :
+		 *  1) what is available in the source buffer,
+		 *  2) what is available in the source buffer, before it
+		 *     wraps around.
+		 *  3) what is available in the destination page.
+		 * in one iteration.
+		 */
+		if (src_offset >= etr_buf->size)
+			src_offset -= etr_buf->size;
+		bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy,
+					     &src_buf);
+		if (WARN_ON_ONCE(bytes <= 0))
+			break;
+		bytes = min(bytes, (long)(PAGE_SIZE - pg_offset));
+
+		memcpy(dst_pages[pg_idx] + pg_offset, src_buf, bytes);
+
+		to_copy -= bytes;
+
+		/* Move destination pointers */
+		pg_offset += bytes;
+		if (pg_offset == PAGE_SIZE) {
+			pg_offset = 0;
+			if (++pg_idx == etr_perf->nr_pages)
+				pg_idx = 0;
+		}
+
+		/* Move source pointers */
+		src_offset += bytes;
+	}
+}
+
+/*
+ * tmc_update_etr_buffer : Update the perf ring buffer with the
+ * available trace data. We use software double buffering at the moment.
+ *
+ * TODO: Add support for reusing the perf ring buffer.
+ */
+static unsigned long
+tmc_update_etr_buffer(struct coresight_device *csdev,
+		      struct perf_output_handle *handle,
+		      void *config)
+{
+	bool lost = false;
+	unsigned long flags, offset, size = 0;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etr_perf_buffer *etr_perf = config;
+	struct etr_buf *etr_buf = etr_perf->etr_buf;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* Don't do anything if another tracer is using this sink */
+	if (atomic_read(csdev->refcnt) != 1) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		goto out;
+	}
+
+	if (WARN_ON(drvdata->perf_buf != etr_buf)) {
+		lost = true;
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		goto out;
+	}
+
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	tmc_sync_etr_buf(drvdata);
+
+	CS_LOCK(drvdata->base);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	lost = etr_buf->full;
+	offset = etr_buf->offset;
+	size = etr_buf->len;
+
+	/*
+	 * The ETR buffer may be bigger than the space available in the
+	 * perf ring buffer (handle->size).  If so advance the offset so that we
+	 * get the latest trace data.  In snapshot mode none of that matters
+	 * since we are expected to clobber stale data in favour of the latest
+	 * traces.
+	 */
+	if (!etr_perf->snapshot && size > handle->size) {
+		u32 mask = tmc_get_memwidth_mask(drvdata);
+
+		/*
+		 * Make sure the new size is aligned in accordance with the
+		 * requirement explained in function tmc_get_memwidth_mask().
+		 */
+		size = handle->size & mask;
+		offset = etr_buf->offset + etr_buf->len - size;
+
+		if (offset >= etr_buf->size)
+			offset -= etr_buf->size;
+		lost = true;
+	}
+
+	/* Insert barrier packets at the beginning, if there was an overflow */
+	if (lost)
+		tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset);
+	tmc_etr_sync_perf_buffer(etr_perf, offset, size);
+
+	/*
+	 * In snapshot mode we simply increment the head by the number of byte
+	 * that were written.  User space function  cs_etm_find_snapshot() will
+	 * figure out how many bytes to get from the AUX buffer based on the
+	 * position of the head.
+	 */
+	if (etr_perf->snapshot)
+		handle->head += size;
+out:
+	/*
+	 * Don't set the TRUNCATED flag in snapshot mode because 1) the
+	 * captured buffer is expected to be truncated and 2) a full buffer
+	 * prevents the event from being re-enabled by the perf core,
+	 * resulting in stale data being send to user space.
+	 */
+	if (!etr_perf->snapshot && lost)
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+	return size;
+}
+
+static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
+{
+	int rc = 0;
+	pid_t pid;
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct perf_output_handle *handle = data;
+	struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	 /* Don't use this sink if it is already claimed by sysFS */
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		rc = -EBUSY;
+		goto unlock_out;
+	}
+
+	if (WARN_ON(!etr_perf || !etr_perf->etr_buf)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
+	/* Get a handle on the pid of the process to monitor */
+	pid = etr_perf->pid;
+
+	/* Do not proceed if this device is associated with another session */
+	if (drvdata->pid != -1 && drvdata->pid != pid) {
+		rc = -EBUSY;
+		goto unlock_out;
+	}
+
+	etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf);
+
+	/*
+	 * No HW configuration is needed if the sink is already in
+	 * use for this session.
+	 */
+	if (drvdata->pid == pid) {
+		atomic_inc(csdev->refcnt);
+		goto unlock_out;
+	}
+
+	rc = tmc_etr_enable_hw(drvdata, etr_perf->etr_buf);
+	if (!rc) {
+		/* Associate with monitored process. */
+		drvdata->pid = pid;
+		drvdata->mode = CS_MODE_PERF;
+		drvdata->perf_buf = etr_perf->etr_buf;
+		atomic_inc(csdev->refcnt);
+	}
+
+unlock_out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return rc;
+}
+
+static int tmc_enable_etr_sink(struct coresight_device *csdev,
+			       u32 mode, void *data)
 {
 	switch (mode) {
 	case CS_MODE_SYSFS:
 		return tmc_enable_etr_sink_sysfs(csdev);
 	case CS_MODE_PERF:
-		return tmc_enable_etr_sink_perf(csdev);
+		return tmc_enable_etr_sink_perf(csdev, data);
 	}
 
 	/* We shouldn't be here */
 	return -EINVAL;
 }
 
-static void tmc_disable_etr_sink(struct coresight_device *csdev)
+static int tmc_disable_etr_sink(struct coresight_device *csdev)
 {
 	unsigned long flags;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
+
 	if (drvdata->reading) {
 		spin_unlock_irqrestore(&drvdata->spinlock, flags);
-		return;
+		return -EBUSY;
 	}
 
-	/* Disable the TMC only if it needs to */
-	if (drvdata->mode != CS_MODE_DISABLED) {
-		tmc_etr_disable_hw(drvdata);
-		drvdata->mode = CS_MODE_DISABLED;
+	if (atomic_dec_return(csdev->refcnt)) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return -EBUSY;
 	}
 
+	/* Complain if we (somehow) got out of sync */
+	WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED);
+	tmc_etr_disable_hw(drvdata);
+	/* Dissociate from monitored process. */
+	drvdata->pid = -1;
+	drvdata->mode = CS_MODE_DISABLED;
+	/* Reset perf specific data */
+	drvdata->perf_buf = NULL;
+
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "TMC-ETR disabled\n");
+	dev_dbg(&csdev->dev, "TMC-ETR disabled\n");
+	return 0;
 }
 
 static const struct coresight_ops_sink tmc_etr_sink_ops = {
 	.enable		= tmc_enable_etr_sink,
 	.disable	= tmc_disable_etr_sink,
+	.alloc_buffer	= tmc_alloc_etr_buffer,
+	.update_buffer	= tmc_update_etr_buffer,
+	.free_buffer	= tmc_free_etr_buffer,
 };
 
 const struct coresight_ops tmc_etr_cs_ops = {
@@ -1150,21 +1684,19 @@
 		goto out;
 	}
 
-	/* Don't interfere if operated from Perf */
-	if (drvdata->mode == CS_MODE_PERF) {
+	/*
+	 * We can safely allow reads even if the ETR is operating in PERF mode,
+	 * since the sysfs session is captured in mode specific data.
+	 * If drvdata::sysfs_data is NULL the trace data has been read already.
+	 */
+	if (!drvdata->sysfs_buf) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	/* If drvdata::etr_buf is NULL the trace data has been read already */
-	if (drvdata->etr_buf == NULL) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/* Disable the TMC if need be */
+	/* Disable the TMC if we are trying to read from a running session. */
 	if (drvdata->mode == CS_MODE_SYSFS)
-		tmc_etr_disable_hw(drvdata);
+		__tmc_etr_disable_hw(drvdata);
 
 	drvdata->reading = true;
 out:
@@ -1176,7 +1708,7 @@
 int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata)
 {
 	unsigned long flags;
-	struct etr_buf *etr_buf = NULL;
+	struct etr_buf *sysfs_buf = NULL;
 
 	/* config types are set a boot time and never change */
 	if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETR))
@@ -1191,22 +1723,22 @@
 		 * buffer. Since the tracer is still enabled drvdata::buf can't
 		 * be NULL.
 		 */
-		tmc_etr_enable_hw(drvdata);
+		__tmc_etr_enable_hw(drvdata);
 	} else {
 		/*
 		 * The ETR is not tracing and the buffer was just read.
 		 * As such prepare to free the trace buffer.
 		 */
-		etr_buf =  drvdata->etr_buf;
-		drvdata->etr_buf = NULL;
+		sysfs_buf = drvdata->sysfs_buf;
+		drvdata->sysfs_buf = NULL;
 	}
 
 	drvdata->reading = false;
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 	/* Free allocated memory out side of the spinlock */
-	if (etr_buf)
-		tmc_free_etr_buf(etr_buf);
+	if (sysfs_buf)
+		tmc_etr_free_sysfs_buf(sysfs_buf);
 
 	return 0;
 }
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
index 1b817ec..1cf82fa 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.c
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -8,10 +8,12 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/device.h>
+#include <linux/idr.h>
 #include <linux/io.h>
 #include <linux/err.h>
 #include <linux/fs.h>
 #include <linux/miscdevice.h>
+#include <linux/mutex.h>
 #include <linux/property.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
@@ -25,12 +27,16 @@
 #include "coresight-priv.h"
 #include "coresight-tmc.h"
 
+DEFINE_CORESIGHT_DEVLIST(etb_devs, "tmc_etb");
+DEFINE_CORESIGHT_DEVLIST(etf_devs, "tmc_etf");
+DEFINE_CORESIGHT_DEVLIST(etr_devs, "tmc_etr");
+
 void tmc_wait_for_tmcready(struct tmc_drvdata *drvdata)
 {
 	/* Ensure formatter, unformatter and hardware fifo are empty */
 	if (coresight_timeout(drvdata->base,
 			      TMC_STS, TMC_STS_TMCREADY_BIT, 1)) {
-		dev_err(drvdata->dev,
+		dev_err(&drvdata->csdev->dev,
 			"timeout while waiting for TMC to be Ready\n");
 	}
 }
@@ -47,7 +53,7 @@
 	/* Ensure flush completes */
 	if (coresight_timeout(drvdata->base,
 			      TMC_FFCR, TMC_FFCR_FLUSHMAN_BIT, 0)) {
-		dev_err(drvdata->dev,
+		dev_err(&drvdata->csdev->dev,
 		"timeout while waiting for completion of Manual Flush\n");
 	}
 
@@ -64,6 +70,34 @@
 	writel_relaxed(0x0, drvdata->base + TMC_CTL);
 }
 
+u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata)
+{
+	u32 mask = 0;
+
+	/*
+	 * When moving RRP or an offset address forward, the new values must
+	 * be byte-address aligned to the width of the trace memory databus
+	 * _and_ to a frame boundary (16 byte), whichever is the biggest. For
+	 * example, for 32-bit, 64-bit and 128-bit wide trace memory, the four
+	 * LSBs must be 0s. For 256-bit wide trace memory, the five LSBs must
+	 * be 0s.
+	 */
+	switch (drvdata->memwidth) {
+	case TMC_MEM_INTF_WIDTH_32BITS:
+	/* fallthrough */
+	case TMC_MEM_INTF_WIDTH_64BITS:
+	/* fallthrough */
+	case TMC_MEM_INTF_WIDTH_128BITS:
+		mask = GENMASK(31, 4);
+		break;
+	case TMC_MEM_INTF_WIDTH_256BITS:
+		mask = GENMASK(31, 5);
+		break;
+	}
+
+	return mask;
+}
+
 static int tmc_read_prepare(struct tmc_drvdata *drvdata)
 {
 	int ret = 0;
@@ -81,7 +115,7 @@
 	}
 
 	if (!ret)
-		dev_info(drvdata->dev, "TMC read start\n");
+		dev_dbg(&drvdata->csdev->dev, "TMC read start\n");
 
 	return ret;
 }
@@ -103,7 +137,7 @@
 	}
 
 	if (!ret)
-		dev_info(drvdata->dev, "TMC read end\n");
+		dev_dbg(&drvdata->csdev->dev, "TMC read end\n");
 
 	return ret;
 }
@@ -120,7 +154,7 @@
 
 	nonseekable_open(inode, file);
 
-	dev_dbg(drvdata->dev, "%s: successfully opened\n", __func__);
+	dev_dbg(&drvdata->csdev->dev, "%s: successfully opened\n", __func__);
 	return 0;
 }
 
@@ -150,12 +184,13 @@
 		return 0;
 
 	if (copy_to_user(data, bufp, actual)) {
-		dev_dbg(drvdata->dev, "%s: copy_to_user failed\n", __func__);
+		dev_dbg(&drvdata->csdev->dev,
+			"%s: copy_to_user failed\n", __func__);
 		return -EFAULT;
 	}
 
 	*ppos += actual;
-	dev_dbg(drvdata->dev, "%zu bytes copied\n", actual);
+	dev_dbg(&drvdata->csdev->dev, "%zu bytes copied\n", actual);
 
 	return actual;
 }
@@ -170,7 +205,7 @@
 	if (ret)
 		return ret;
 
-	dev_dbg(drvdata->dev, "%s: released\n", __func__);
+	dev_dbg(&drvdata->csdev->dev, "%s: released\n", __func__);
 	return 0;
 }
 
@@ -229,6 +264,7 @@
 coresight_tmc_reg(mode, TMC_MODE);
 coresight_tmc_reg(pscr, TMC_PSCR);
 coresight_tmc_reg(axictl, TMC_AXICTL);
+coresight_tmc_reg(authstatus, TMC_AUTHSTATUS);
 coresight_tmc_reg(devid, CORESIGHT_DEVID);
 coresight_tmc_reg64(rrp, TMC_RRP, TMC_RRPHI);
 coresight_tmc_reg64(rwp, TMC_RWP, TMC_RWPHI);
@@ -248,6 +284,7 @@
 	&dev_attr_devid.attr,
 	&dev_attr_dba.attr,
 	&dev_attr_axictl.attr,
+	&dev_attr_authstatus.attr,
 	NULL,
 };
 
@@ -330,22 +367,32 @@
 	NULL,
 };
 
-static inline bool tmc_etr_can_use_sg(struct tmc_drvdata *drvdata)
+static inline bool tmc_etr_can_use_sg(struct device *dev)
 {
-	return fwnode_property_present(drvdata->dev->fwnode,
-				       "arm,scatter-gather");
+	return fwnode_property_present(dev->fwnode, "arm,scatter-gather");
+}
+
+static inline bool tmc_etr_has_non_secure_access(struct tmc_drvdata *drvdata)
+{
+	u32 auth = readl_relaxed(drvdata->base + TMC_AUTHSTATUS);
+
+	return (auth & TMC_AUTH_NSID_MASK) == 0x3;
 }
 
 /* Detect and initialise the capabilities of a TMC ETR */
-static int tmc_etr_setup_caps(struct tmc_drvdata *drvdata,
-			     u32 devid, void *dev_caps)
+static int tmc_etr_setup_caps(struct device *parent, u32 devid, void *dev_caps)
 {
+	int rc;
 	u32 dma_mask = 0;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(parent);
+
+	if (!tmc_etr_has_non_secure_access(drvdata))
+		return -EACCES;
 
 	/* Set the unadvertised capabilities */
 	tmc_etr_init_caps(drvdata, (u32)(unsigned long)dev_caps);
 
-	if (!(devid & TMC_DEVID_NOSCAT) && tmc_etr_can_use_sg(drvdata))
+	if (!(devid & TMC_DEVID_NOSCAT) && tmc_etr_can_use_sg(parent))
 		tmc_etr_set_cap(drvdata, TMC_ETR_SG);
 
 	/* Check if the AXI address width is available */
@@ -363,13 +410,25 @@
 	case 44:
 	case 48:
 	case 52:
-		dev_info(drvdata->dev, "Detected dma mask %dbits\n", dma_mask);
+		dev_info(parent, "Detected dma mask %dbits\n", dma_mask);
 		break;
 	default:
 		dma_mask = 40;
 	}
 
-	return dma_set_mask_and_coherent(drvdata->dev, DMA_BIT_MASK(dma_mask));
+	rc = dma_set_mask_and_coherent(parent, DMA_BIT_MASK(dma_mask));
+	if (rc)
+		dev_err(parent, "Failed to setup DMA mask: %d\n", rc);
+	return rc;
+}
+
+static u32 tmc_etr_get_default_buffer_size(struct device *dev)
+{
+	u32 size;
+
+	if (fwnode_property_read_u32(dev->fwnode, "arm,buffer-size", &size))
+		size = SZ_1M;
+	return size;
 }
 
 static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
@@ -382,23 +441,13 @@
 	struct tmc_drvdata *drvdata;
 	struct resource *res = &adev->res;
 	struct coresight_desc desc = { 0 };
-	struct device_node *np = adev->dev.of_node;
-
-	if (np) {
-		pdata = of_get_coresight_platform_data(dev, np);
-		if (IS_ERR(pdata)) {
-			ret = PTR_ERR(pdata);
-			goto out;
-		}
-		adev->dev.platform_data = pdata;
-	}
+	struct coresight_dev_list *dev_list = NULL;
 
 	ret = -ENOMEM;
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		goto out;
 
-	drvdata->dev = &adev->dev;
 	dev_set_drvdata(dev, drvdata);
 
 	/* Validity for the resource is already checked by the AMBA core */
@@ -415,21 +464,14 @@
 	devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID);
 	drvdata->config_type = BMVAL(devid, 6, 7);
 	drvdata->memwidth = tmc_get_memwidth(devid);
+	/* This device is not associated with a session */
+	drvdata->pid = -1;
 
-	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
-		if (np)
-			ret = of_property_read_u32(np,
-						   "arm,buffer-size",
-						   &drvdata->size);
-		if (ret)
-			drvdata->size = SZ_1M;
-	} else {
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR)
+		drvdata->size = tmc_etr_get_default_buffer_size(dev);
+	else
 		drvdata->size = readl_relaxed(drvdata->base + TMC_RSZ) * 4;
-	}
 
-	pm_runtime_put(&adev->dev);
-
-	desc.pdata = pdata;
 	desc.dev = dev;
 	desc.groups = coresight_tmc_groups;
 
@@ -438,63 +480,72 @@
 		desc.type = CORESIGHT_DEV_TYPE_SINK;
 		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
 		desc.ops = &tmc_etb_cs_ops;
+		dev_list = &etb_devs;
 		break;
 	case TMC_CONFIG_TYPE_ETR:
 		desc.type = CORESIGHT_DEV_TYPE_SINK;
 		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
 		desc.ops = &tmc_etr_cs_ops;
-		ret = tmc_etr_setup_caps(drvdata, devid, id->data);
+		ret = tmc_etr_setup_caps(dev, devid,
+					 coresight_get_uci_data(id));
 		if (ret)
 			goto out;
+		idr_init(&drvdata->idr);
+		mutex_init(&drvdata->idr_mutex);
+		dev_list = &etr_devs;
 		break;
 	case TMC_CONFIG_TYPE_ETF:
 		desc.type = CORESIGHT_DEV_TYPE_LINKSINK;
 		desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO;
 		desc.ops = &tmc_etf_cs_ops;
+		dev_list = &etf_devs;
 		break;
 	default:
-		pr_err("%s: Unsupported TMC config\n", pdata->name);
+		pr_err("%s: Unsupported TMC config\n", desc.name);
 		ret = -EINVAL;
 		goto out;
 	}
 
+	desc.name = coresight_alloc_device_name(dev_list, dev);
+	if (!desc.name) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto out;
+	}
+	adev->dev.platform_data = pdata;
+	desc.pdata = pdata;
+
 	drvdata->csdev = coresight_register(&desc);
 	if (IS_ERR(drvdata->csdev)) {
 		ret = PTR_ERR(drvdata->csdev);
 		goto out;
 	}
 
-	drvdata->miscdev.name = pdata->name;
+	drvdata->miscdev.name = desc.name;
 	drvdata->miscdev.minor = MISC_DYNAMIC_MINOR;
 	drvdata->miscdev.fops = &tmc_fops;
 	ret = misc_register(&drvdata->miscdev);
 	if (ret)
 		coresight_unregister(drvdata->csdev);
+	else
+		pm_runtime_put(&adev->dev);
 out:
 	return ret;
 }
 
 static const struct amba_id tmc_ids[] = {
-	{
-		.id     = 0x000bb961,
-		.mask   = 0x000fffff,
-	},
-	{
-		/* Coresight SoC 600 TMC-ETR/ETS */
-		.id	= 0x000bb9e8,
-		.mask	= 0x000fffff,
-		.data	= (void *)(unsigned long)CORESIGHT_SOC_600_ETR_CAPS,
-	},
-	{
-		/* Coresight SoC 600 TMC-ETB */
-		.id	= 0x000bb9e9,
-		.mask	= 0x000fffff,
-	},
-	{
-		/* Coresight SoC 600 TMC-ETF */
-		.id	= 0x000bb9ea,
-		.mask	= 0x000fffff,
-	},
+	CS_AMBA_ID(0x000bb961),
+	/* Coresight SoC 600 TMC-ETR/ETS */
+	CS_AMBA_ID_DATA(0x000bb9e8, (unsigned long)CORESIGHT_SOC_600_ETR_CAPS),
+	/* Coresight SoC 600 TMC-ETB */
+	CS_AMBA_ID(0x000bb9e9),
+	/* Coresight SoC 600 TMC-ETF */
+	CS_AMBA_ID(0x000bb9ea),
 	{ 0, 0},
 };
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
index 7027bd6..71de978 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.h
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -8,7 +8,10 @@
 #define _CORESIGHT_TMC_H
 
 #include <linux/dma-mapping.h>
+#include <linux/idr.h>
 #include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/refcount.h>
 
 #define TMC_RSZ			0x004
 #define TMC_STS			0x00c
@@ -36,6 +39,7 @@
 #define TMC_ITATBCTR2		0xef0
 #define TMC_ITATBCTR1		0xef4
 #define TMC_ITATBCTR0		0xef8
+#define TMC_AUTHSTATUS		0xfb8
 
 /* register description */
 /* TMC_CTL - 0x020 */
@@ -44,6 +48,7 @@
 #define TMC_STS_TMCREADY_BIT	2
 #define TMC_STS_FULL		BIT(0)
 #define TMC_STS_TRIGGERED	BIT(1)
+#define TMC_STS_MEMERR		BIT(5)
 /*
  * TMC_AXICTL - 0x110
  *
@@ -86,6 +91,8 @@
 #define TMC_DEVID_AXIAW_SHIFT	17
 #define TMC_DEVID_AXIAW_MASK	0x7f
 
+#define TMC_AUTH_NSID_MASK	GENMASK(1, 0)
+
 enum tmc_config_type {
 	TMC_CONFIG_TYPE_ETB,
 	TMC_CONFIG_TYPE_ETR,
@@ -133,6 +140,7 @@
 
 /**
  * struct etr_buf - Details of the buffer used by ETR
+ * refcount	; Number of sources currently using this etr_buf.
  * @mode	: Mode of the ETR buffer, contiguous, Scatter Gather etc.
  * @full	: Trace data overflow
  * @size	: Size of the buffer.
@@ -143,6 +151,7 @@
  * @private	: Backend specific information for the buf
  */
 struct etr_buf {
+	refcount_t			refcount;
 	enum etr_mode			mode;
 	bool				full;
 	ssize_t				size;
@@ -156,10 +165,11 @@
 /**
  * struct tmc_drvdata - specifics associated to an TMC component
  * @base:	memory mapped base address for this component.
- * @dev:	the device entity associated to this component.
  * @csdev:	component vitals needed by the framework.
  * @miscdev:	specifics to handle "/dev/xyz.tmc" entry.
  * @spinlock:	only one at a time pls.
+ * @pid:	Process ID of the process being monitored by the session
+ *		that is using this component.
  * @buf:	Snapshot of the trace data for ETF/ETB.
  * @etr_buf:	details of buffer used in TMC-ETR
  * @len:	size of the available trace for ETF/ETB.
@@ -170,13 +180,17 @@
  * @trigger_cntr: amount of words to store after a trigger.
  * @etr_caps:	Bitmask of capabilities of the TMC ETR, inferred from the
  *		device configuration register (DEVID)
+ * @idr:	Holds etr_bufs allocated for this ETR.
+ * @idr_mutex:	Access serialisation for idr.
+ * @sysfs_buf:	SYSFS buffer for ETR.
+ * @perf_buf:	PERF buffer for ETR.
  */
 struct tmc_drvdata {
 	void __iomem		*base;
-	struct device		*dev;
 	struct coresight_device	*csdev;
 	struct miscdevice	miscdev;
 	spinlock_t		spinlock;
+	pid_t			pid;
 	bool			reading;
 	union {
 		char		*buf;		/* TMC ETB */
@@ -189,6 +203,10 @@
 	enum tmc_mem_intf_width	memwidth;
 	u32			trigger_cntr;
 	u32			etr_caps;
+	struct idr		idr;
+	struct mutex		idr_mutex;
+	struct etr_buf		*sysfs_buf;
+	struct etr_buf		*perf_buf;
 };
 
 struct etr_buf_operations {
@@ -237,6 +255,7 @@
 void tmc_flush_and_stop(struct tmc_drvdata *drvdata);
 void tmc_enable_hw(struct tmc_drvdata *drvdata);
 void tmc_disable_hw(struct tmc_drvdata *drvdata);
+u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata);
 
 /* ETB/ETF functions */
 int tmc_read_prepare_etb(struct tmc_drvdata *drvdata);
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index 459ef93..f8583e4 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -5,6 +5,7 @@
  * Description: CoreSight Trace Port Interface Unit driver
  */
 
+#include <linux/atomic.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/device.h>
@@ -46,15 +47,15 @@
 #define FFCR_FON_MAN		BIT(6)
 #define FFCR_STOP_FI		BIT(12)
 
+DEFINE_CORESIGHT_DEVLIST(tpiu_devs, "tpiu");
+
 /**
  * @base:	memory mapped base address for this component.
- * @dev:	the device entity associated to this component.
  * @atclk:	optional clock for the core parts of the TPIU.
  * @csdev:	component vitals needed by the framework.
  */
 struct tpiu_drvdata {
 	void __iomem		*base;
-	struct device		*dev;
 	struct clk		*atclk;
 	struct coresight_device	*csdev;
 };
@@ -68,13 +69,13 @@
 	CS_LOCK(drvdata->base);
 }
 
-static int tpiu_enable(struct coresight_device *csdev, u32 mode)
+static int tpiu_enable(struct coresight_device *csdev, u32 mode, void *__unused)
 {
 	struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	tpiu_enable_hw(drvdata);
-
-	dev_info(drvdata->dev, "TPIU enabled\n");
+	atomic_inc(csdev->refcnt);
+	dev_dbg(&csdev->dev, "TPIU enabled\n");
 	return 0;
 }
 
@@ -94,13 +95,17 @@
 	CS_LOCK(drvdata->base);
 }
 
-static void tpiu_disable(struct coresight_device *csdev)
+static int tpiu_disable(struct coresight_device *csdev)
 {
 	struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
+	if (atomic_dec_return(csdev->refcnt))
+		return -EBUSY;
+
 	tpiu_disable_hw(drvdata);
 
-	dev_info(drvdata->dev, "TPIU disabled\n");
+	dev_dbg(&csdev->dev, "TPIU disabled\n");
+	return 0;
 }
 
 static const struct coresight_ops_sink tpiu_sink_ops = {
@@ -121,20 +126,15 @@
 	struct tpiu_drvdata *drvdata;
 	struct resource *res = &adev->res;
 	struct coresight_desc desc = { 0 };
-	struct device_node *np = adev->dev.of_node;
 
-	if (np) {
-		pdata = of_get_coresight_platform_data(dev, np);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
-		adev->dev.platform_data = pdata;
-	}
+	desc.name = coresight_alloc_device_name(&tpiu_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
 
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
 
-	drvdata->dev = &adev->dev;
 	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
 	if (!IS_ERR(drvdata->atclk)) {
 		ret = clk_prepare_enable(drvdata->atclk);
@@ -153,7 +153,10 @@
 	/* Disable tpiu to support older devices */
 	tpiu_disable_hw(drvdata);
 
-	pm_runtime_put(&adev->dev);
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+	dev->platform_data = pdata;
 
 	desc.type = CORESIGHT_DEV_TYPE_SINK;
 	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PORT;
@@ -162,7 +165,12 @@
 	desc.dev = dev;
 	drvdata->csdev = coresight_register(&desc);
 
-	return PTR_ERR_OR_ZERO(drvdata->csdev);
+	if (!IS_ERR(drvdata->csdev)) {
+		pm_runtime_put(&adev->dev);
+		return 0;
+	}
+
+	return PTR_ERR(drvdata->csdev);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 3e07fd3..6453c67 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -11,6 +11,7 @@
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/slab.h>
+#include <linux/stringhash.h>
 #include <linux/mutex.h>
 #include <linux/clk.h>
 #include <linux/coresight.h>
@@ -18,6 +19,7 @@
 #include <linux/delay.h>
 #include <linux/pm_runtime.h>
 
+#include "coresight-etm-perf.h"
 #include "coresight-priv.h"
 
 static DEFINE_MUTEX(coresight_mutex);
@@ -98,8 +100,8 @@
 	int i;
 	struct coresight_connection *conn;
 
-	for (i = 0; i < parent->nr_outport; i++) {
-		conn = &parent->conns[i];
+	for (i = 0; i < parent->pdata->nr_outport; i++) {
+		conn = &parent->pdata->conns[i];
 		if (conn->child_dev == csdev)
 			return conn->child_port;
 	}
@@ -116,8 +118,8 @@
 	int i;
 	struct coresight_connection *conn;
 
-	for (i = 0; i < csdev->nr_outport; i++) {
-		conn = &csdev->conns[i];
+	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+		conn = &csdev->pdata->conns[i];
 		if (conn->child_dev == child)
 			return conn->outport;
 	}
@@ -128,32 +130,123 @@
 	return -ENODEV;
 }
 
-static int coresight_enable_sink(struct coresight_device *csdev, u32 mode)
+static inline u32 coresight_read_claim_tags(void __iomem *base)
+{
+	return readl_relaxed(base + CORESIGHT_CLAIMCLR);
+}
+
+static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
+{
+	return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
+}
+
+static inline bool coresight_is_claimed_any(void __iomem *base)
+{
+	return coresight_read_claim_tags(base) != 0;
+}
+
+static inline void coresight_set_claim_tags(void __iomem *base)
+{
+	writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMSET);
+	isb();
+}
+
+static inline void coresight_clear_claim_tags(void __iomem *base)
+{
+	writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMCLR);
+	isb();
+}
+
+/*
+ * coresight_claim_device_unlocked : Claim the device for self-hosted usage
+ * to prevent an external tool from touching this device. As per PSCI
+ * standards, section "Preserving the execution context" => "Debug and Trace
+ * save and Restore", DBGCLAIM[1] is reserved for Self-hosted debug/trace and
+ * DBGCLAIM[0] is reserved for external tools.
+ *
+ * Called with CS_UNLOCKed for the component.
+ * Returns : 0 on success
+ */
+int coresight_claim_device_unlocked(void __iomem *base)
+{
+	if (coresight_is_claimed_any(base))
+		return -EBUSY;
+
+	coresight_set_claim_tags(base);
+	if (coresight_is_claimed_self_hosted(base))
+		return 0;
+	/* There was a race setting the tags, clean up and fail */
+	coresight_clear_claim_tags(base);
+	return -EBUSY;
+}
+
+int coresight_claim_device(void __iomem *base)
+{
+	int rc;
+
+	CS_UNLOCK(base);
+	rc = coresight_claim_device_unlocked(base);
+	CS_LOCK(base);
+
+	return rc;
+}
+
+/*
+ * coresight_disclaim_device_unlocked : Clear the claim tags for the device.
+ * Called with CS_UNLOCKed for the component.
+ */
+void coresight_disclaim_device_unlocked(void __iomem *base)
+{
+
+	if (coresight_is_claimed_self_hosted(base))
+		coresight_clear_claim_tags(base);
+	else
+		/*
+		 * The external agent may have not honoured our claim
+		 * and has manipulated it. Or something else has seriously
+		 * gone wrong in our driver.
+		 */
+		WARN_ON_ONCE(1);
+}
+
+void coresight_disclaim_device(void __iomem *base)
+{
+	CS_UNLOCK(base);
+	coresight_disclaim_device_unlocked(base);
+	CS_LOCK(base);
+}
+
+static int coresight_enable_sink(struct coresight_device *csdev,
+				 u32 mode, void *data)
 {
 	int ret;
 
-	if (!csdev->enable) {
-		if (sink_ops(csdev)->enable) {
-			ret = sink_ops(csdev)->enable(csdev, mode);
-			if (ret)
-				return ret;
-		}
-		csdev->enable = true;
-	}
+	/*
+	 * We need to make sure the "new" session is compatible with the
+	 * existing "mode" of operation.
+	 */
+	if (!sink_ops(csdev)->enable)
+		return -EINVAL;
 
-	atomic_inc(csdev->refcnt);
+	ret = sink_ops(csdev)->enable(csdev, mode, data);
+	if (ret)
+		return ret;
+	csdev->enable = true;
 
 	return 0;
 }
 
 static void coresight_disable_sink(struct coresight_device *csdev)
 {
-	if (atomic_dec_return(csdev->refcnt) == 0) {
-		if (sink_ops(csdev)->disable) {
-			sink_ops(csdev)->disable(csdev);
-			csdev->enable = false;
-		}
-	}
+	int ret;
+
+	if (!sink_ops(csdev)->disable)
+		return;
+
+	ret = sink_ops(csdev)->disable(csdev);
+	if (ret)
+		return;
+	csdev->enable = false;
 }
 
 static int coresight_enable_link(struct coresight_device *csdev,
@@ -184,8 +277,10 @@
 	if (atomic_inc_return(&csdev->refcnt[refport]) == 1) {
 		if (link_ops(csdev)->enable) {
 			ret = link_ops(csdev)->enable(csdev, inport, outport);
-			if (ret)
+			if (ret) {
+				atomic_dec(&csdev->refcnt[refport]);
 				return ret;
+			}
 		}
 	}
 
@@ -211,10 +306,10 @@
 
 	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) {
 		refport = inport;
-		nr_conns = csdev->nr_inport;
+		nr_conns = csdev->pdata->nr_inport;
 	} else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) {
 		refport = outport;
-		nr_conns = csdev->nr_outport;
+		nr_conns = csdev->pdata->nr_outport;
 	} else {
 		refport = 0;
 		nr_conns = 1;
@@ -274,13 +369,21 @@
 	return !csdev->enable;
 }
 
-void coresight_disable_path(struct list_head *path)
+/*
+ * coresight_disable_path_from : Disable components in the given path beyond
+ * @nd in the list. If @nd is NULL, all the components, except the SOURCE are
+ * disabled.
+ */
+static void coresight_disable_path_from(struct list_head *path,
+					struct coresight_node *nd)
 {
 	u32 type;
-	struct coresight_node *nd;
 	struct coresight_device *csdev, *parent, *child;
 
-	list_for_each_entry(nd, path, link) {
+	if (!nd)
+		nd = list_first_entry(path, struct coresight_node, link);
+
+	list_for_each_entry_continue(nd, path, link) {
 		csdev = nd->csdev;
 		type = csdev->type;
 
@@ -300,7 +403,12 @@
 			coresight_disable_sink(csdev);
 			break;
 		case CORESIGHT_DEV_TYPE_SOURCE:
-			/* sources are disabled from either sysFS or Perf */
+			/*
+			 * We skip the first node in the path assuming that it
+			 * is the source. So we don't expect a source device in
+			 * the middle of a path.
+			 */
+			WARN_ON(1);
 			break;
 		case CORESIGHT_DEV_TYPE_LINK:
 			parent = list_prev_entry(nd, link)->csdev;
@@ -313,7 +421,12 @@
 	}
 }
 
-int coresight_enable_path(struct list_head *path, u32 mode)
+void coresight_disable_path(struct list_head *path)
+{
+	coresight_disable_path_from(path, NULL);
+}
+
+int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data)
 {
 
 	int ret = 0;
@@ -338,9 +451,15 @@
 
 		switch (type) {
 		case CORESIGHT_DEV_TYPE_SINK:
-			ret = coresight_enable_sink(csdev, mode);
+			ret = coresight_enable_sink(csdev, mode, sink_data);
+			/*
+			 * Sink is the first component turned on. If we
+			 * failed to enable the sink, there are no components
+			 * that need disabling. Disabling the path here
+			 * would mean we could disrupt an existing session.
+			 */
 			if (ret)
-				goto err;
+				goto out;
 			break;
 		case CORESIGHT_DEV_TYPE_SOURCE:
 			/* sources are enabled from either sysFS or Perf */
@@ -360,7 +479,7 @@
 out:
 	return ret;
 err:
-	coresight_disable_path(path);
+	coresight_disable_path_from(path, nd);
 	goto out;
 }
 
@@ -379,9 +498,9 @@
 	return csdev;
 }
 
-static int coresight_enabled_sink(struct device *dev, void *data)
+static int coresight_enabled_sink(struct device *dev, const void *data)
 {
-	bool *reset = data;
+	const bool *reset = data;
 	struct coresight_device *csdev = to_coresight_device(dev);
 
 	if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
@@ -425,6 +544,47 @@
 	return dev ? to_coresight_device(dev) : NULL;
 }
 
+static int coresight_sink_by_id(struct device *dev, const void *data)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+	unsigned long hash;
+
+	if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+	     csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+
+		if (!csdev->ea)
+			return 0;
+		/*
+		 * See function etm_perf_add_symlink_sink() to know where
+		 * this comes from.
+		 */
+		hash = (unsigned long)csdev->ea->var;
+
+		if ((u32)hash == *(u32 *)data)
+			return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * coresight_get_sink_by_id - returns the sink that matches the id
+ * @id: Id of the sink to match
+ *
+ * The name of a sink is unique, whether it is found on the AMBA bus or
+ * otherwise.  As such the hash of that name can easily be used to identify
+ * a sink.
+ */
+struct coresight_device *coresight_get_sink_by_id(u32 id)
+{
+	struct device *dev = NULL;
+
+	dev = bus_find_device(&coresight_bustype, NULL, &id,
+			      coresight_sink_by_id);
+
+	return dev ? to_coresight_device(dev) : NULL;
+}
+
 /*
  * coresight_grab_device - Power up this device and any of the helper
  * devices connected to it for trace operation. Since the helper devices
@@ -435,9 +595,10 @@
 {
 	int i;
 
-	for (i = 0; i < csdev->nr_outport; i++) {
-		struct coresight_device *child = csdev->conns[i].child_dev;
+	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+		struct coresight_device *child;
 
+		child  = csdev->pdata->conns[i].child_dev;
 		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
 			pm_runtime_get_sync(child->dev.parent);
 	}
@@ -453,9 +614,10 @@
 	int i;
 
 	pm_runtime_put(csdev->dev.parent);
-	for (i = 0; i < csdev->nr_outport; i++) {
-		struct coresight_device *child = csdev->conns[i].child_dev;
+	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+		struct coresight_device *child;
 
+		child  = csdev->pdata->conns[i].child_dev;
 		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
 			pm_runtime_put(child->dev.parent);
 	}
@@ -485,9 +647,10 @@
 		goto out;
 
 	/* Not a sink - recursively explore each port found on this element */
-	for (i = 0; i < csdev->nr_outport; i++) {
-		struct coresight_device *child_dev = csdev->conns[i].child_dev;
+	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+		struct coresight_device *child_dev;
 
+		child_dev = csdev->pdata->conns[i].child_dev;
 		if (child_dev &&
 		    _coresight_build_path(child_dev, sink, path) == 0) {
 			found = true;
@@ -635,7 +798,7 @@
 		goto out;
 	}
 
-	ret = coresight_enable_path(path, CS_MODE_SYSFS);
+	ret = coresight_enable_path(path, CS_MODE_SYSFS, NULL);
 	if (ret)
 		goto err_path;
 
@@ -815,7 +978,7 @@
 {
 	struct coresight_device *csdev = to_coresight_device(dev);
 
-	kfree(csdev->conns);
+	fwnode_handle_put(csdev->dev.fwnode);
 	kfree(csdev->refcnt);
 	kfree(csdev);
 }
@@ -841,19 +1004,17 @@
 	 * Circle throuch all the connection of that component.  If we find
 	 * an orphan connection whose name matches @csdev, link it.
 	 */
-	for (i = 0; i < i_csdev->nr_outport; i++) {
-		conn = &i_csdev->conns[i];
+	for (i = 0; i < i_csdev->pdata->nr_outport; i++) {
+		conn = &i_csdev->pdata->conns[i];
 
 		/* We have found at least one orphan connection */
 		if (conn->child_dev == NULL) {
 			/* Does it match this newly added device? */
-			if (conn->child_name &&
-			    !strcmp(dev_name(&csdev->dev), conn->child_name)) {
+			if (conn->child_fwnode == csdev->dev.fwnode)
 				conn->child_dev = csdev;
-			} else {
+			else
 				/* This component still has an orphan */
 				still_orphan = true;
-			}
 		}
 	}
 
@@ -881,13 +1042,11 @@
 {
 	int i;
 
-	for (i = 0; i < csdev->nr_outport; i++) {
-		struct coresight_connection *conn = &csdev->conns[i];
+	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+		struct coresight_connection *conn = &csdev->pdata->conns[i];
 		struct device *dev = NULL;
 
-		if (conn->child_name)
-			dev = bus_find_device_by_name(&coresight_bustype, NULL,
-						      conn->child_name);
+		dev = bus_find_device_by_fwnode(&coresight_bustype, conn->child_fwnode);
 		if (dev) {
 			conn->child_dev = to_coresight_device(dev);
 			/* and put reference from 'bus_find_device()' */
@@ -916,15 +1075,21 @@
 	 * Circle throuch all the connection of that component.  If we find
 	 * a connection whose name matches @csdev, remove it.
 	 */
-	for (i = 0; i < iterator->nr_outport; i++) {
-		conn = &iterator->conns[i];
+	for (i = 0; i < iterator->pdata->nr_outport; i++) {
+		conn = &iterator->pdata->conns[i];
 
 		if (conn->child_dev == NULL)
 			continue;
 
-		if (!strcmp(dev_name(&csdev->dev), conn->child_name)) {
+		if (csdev->dev.fwnode == conn->child_fwnode) {
 			iterator->orphan = true;
 			conn->child_dev = NULL;
+			/*
+			 * Drop the reference to the handle for the remote
+			 * device acquired in parsing the connections from
+			 * platform data.
+			 */
+			fwnode_handle_put(conn->child_fwnode);
 			/* No need to continue */
 			break;
 		}
@@ -937,10 +1102,21 @@
 	return 0;
 }
 
+/*
+ * coresight_remove_conns - Remove references to this given devices
+ * from the connections of other devices.
+ */
 static void coresight_remove_conns(struct coresight_device *csdev)
 {
-	bus_for_each_dev(&coresight_bustype, NULL,
-			 csdev, coresight_remove_match);
+	/*
+	 * Another device will point to this device only if there is
+	 * an output port connected to this one. i.e, if the device
+	 * doesn't have at least one input port, there is no point
+	 * in searching all the devices.
+	 */
+	if (csdev->pdata->nr_inport)
+		bus_for_each_dev(&coresight_bustype, NULL,
+				 csdev, coresight_remove_match);
 }
 
 /**
@@ -993,20 +1169,34 @@
 }
 postcore_initcall(coresight_init);
 
-struct coresight_device *coresight_register(struct coresight_desc *desc)
+/*
+ * coresight_release_platform_data: Release references to the devices connected
+ * to the output port of this device.
+ */
+void coresight_release_platform_data(struct coresight_platform_data *pdata)
 {
 	int i;
+
+	for (i = 0; i < pdata->nr_outport; i++) {
+		if (pdata->conns[i].child_fwnode) {
+			fwnode_handle_put(pdata->conns[i].child_fwnode);
+			pdata->conns[i].child_fwnode = NULL;
+		}
+	}
+}
+
+struct coresight_device *coresight_register(struct coresight_desc *desc)
+{
 	int ret;
 	int link_subtype;
 	int nr_refcnts = 1;
 	atomic_t *refcnts = NULL;
 	struct coresight_device *csdev;
-	struct coresight_connection *conns = NULL;
 
 	csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
 	if (!csdev) {
 		ret = -ENOMEM;
-		goto err_kzalloc_csdev;
+		goto err_out;
 	}
 
 	if (desc->type == CORESIGHT_DEV_TYPE_LINK ||
@@ -1022,30 +1212,12 @@
 	refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
 	if (!refcnts) {
 		ret = -ENOMEM;
-		goto err_kzalloc_refcnts;
+		goto err_free_csdev;
 	}
 
 	csdev->refcnt = refcnts;
 
-	csdev->nr_inport = desc->pdata->nr_inport;
-	csdev->nr_outport = desc->pdata->nr_outport;
-
-	/* Initialise connections if there is at least one outport */
-	if (csdev->nr_outport) {
-		conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL);
-		if (!conns) {
-			ret = -ENOMEM;
-			goto err_kzalloc_conns;
-		}
-
-		for (i = 0; i < csdev->nr_outport; i++) {
-			conns[i].outport = desc->pdata->outports[i];
-			conns[i].child_name = desc->pdata->child_names[i];
-			conns[i].child_port = desc->pdata->child_ports[i];
-		}
-	}
-
-	csdev->conns = conns;
+	csdev->pdata = desc->pdata;
 
 	csdev->type = desc->type;
 	csdev->subtype = desc->subtype;
@@ -1057,12 +1229,37 @@
 	csdev->dev.parent = desc->dev;
 	csdev->dev.release = coresight_device_release;
 	csdev->dev.bus = &coresight_bustype;
-	dev_set_name(&csdev->dev, "%s", desc->pdata->name);
+	/*
+	 * Hold the reference to our parent device. This will be
+	 * dropped only in coresight_device_release().
+	 */
+	csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
+	dev_set_name(&csdev->dev, "%s", desc->name);
 
 	ret = device_register(&csdev->dev);
 	if (ret) {
 		put_device(&csdev->dev);
-		goto err_kzalloc_csdev;
+		/*
+		 * All resources are free'd explicitly via
+		 * coresight_device_release(), triggered from put_device().
+		 */
+		goto err_out;
+	}
+
+	if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+	    csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+		ret = etm_perf_add_symlink_sink(csdev);
+
+		if (ret) {
+			device_unregister(&csdev->dev);
+			/*
+			 * As with the above, all resources are free'd
+			 * explicitly via coresight_device_release() triggered
+			 * from put_device(), which is in turn called from
+			 * function device_unregister().
+			 */
+			goto err_out;
+		}
 	}
 
 	mutex_lock(&coresight_mutex);
@@ -1074,19 +1271,79 @@
 
 	return csdev;
 
-err_kzalloc_conns:
-	kfree(refcnts);
-err_kzalloc_refcnts:
+err_free_csdev:
 	kfree(csdev);
-err_kzalloc_csdev:
+err_out:
+	/* Cleanup the connection information */
+	coresight_release_platform_data(desc->pdata);
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(coresight_register);
 
 void coresight_unregister(struct coresight_device *csdev)
 {
+	etm_perf_del_symlink_sink(csdev);
 	/* Remove references of that device in the topology */
 	coresight_remove_conns(csdev);
+	coresight_release_platform_data(csdev->pdata);
 	device_unregister(&csdev->dev);
 }
 EXPORT_SYMBOL_GPL(coresight_unregister);
+
+
+/*
+ * coresight_search_device_idx - Search the fwnode handle of a device
+ * in the given dev_idx list. Must be called with the coresight_mutex held.
+ *
+ * Returns the index of the entry, when found. Otherwise, -ENOENT.
+ */
+static inline int coresight_search_device_idx(struct coresight_dev_list *dict,
+					      struct fwnode_handle *fwnode)
+{
+	int i;
+
+	for (i = 0; i < dict->nr_idx; i++)
+		if (dict->fwnode_list[i] == fwnode)
+			return i;
+	return -ENOENT;
+}
+
+/*
+ * coresight_alloc_device_name - Get an index for a given device in the
+ * device index list specific to a driver. An index is allocated for a
+ * device and is tracked with the fwnode_handle to prevent allocating
+ * duplicate indices for the same device (e.g, if we defer probing of
+ * a device due to dependencies), in case the index is requested again.
+ */
+char *coresight_alloc_device_name(struct coresight_dev_list *dict,
+				  struct device *dev)
+{
+	int idx;
+	char *name = NULL;
+	struct fwnode_handle **list;
+
+	mutex_lock(&coresight_mutex);
+
+	idx = coresight_search_device_idx(dict, dev_fwnode(dev));
+	if (idx < 0) {
+		/* Make space for the new entry */
+		idx = dict->nr_idx;
+		list = krealloc(dict->fwnode_list,
+				(idx + 1) * sizeof(*dict->fwnode_list),
+				GFP_KERNEL);
+		if (ZERO_OR_NULL_PTR(list)) {
+			idx = -ENOMEM;
+			goto done;
+		}
+
+		list[idx] = dev_fwnode(dev);
+		dict->fwnode_list = list;
+		dict->nr_idx = idx + 1;
+	}
+
+	name = devm_kasprintf(dev, GFP_KERNEL, "%s%d", dict->pfx, idx);
+done:
+	mutex_unlock(&coresight_mutex);
+	return name;
+}
+EXPORT_SYMBOL_GPL(coresight_alloc_device_name);
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
deleted file mode 100644
index 6880bee..0000000
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ /dev/null
@@ -1,195 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2012, The Linux Foundation. All rights reserved.
- */
-
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/clk.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_graph.h>
-#include <linux/of_platform.h>
-#include <linux/platform_device.h>
-#include <linux/amba/bus.h>
-#include <linux/coresight.h>
-#include <linux/cpumask.h>
-#include <asm/smp_plat.h>
-
-
-static int of_dev_node_match(struct device *dev, void *data)
-{
-	return dev->of_node == data;
-}
-
-static struct device *
-of_coresight_get_endpoint_device(struct device_node *endpoint)
-{
-	struct device *dev = NULL;
-
-	/*
-	 * If we have a non-configurable replicator, it will be found on the
-	 * platform bus.
-	 */
-	dev = bus_find_device(&platform_bus_type, NULL,
-			      endpoint, of_dev_node_match);
-	if (dev)
-		return dev;
-
-	/*
-	 * We have a configurable component - circle through the AMBA bus
-	 * looking for the device that matches the endpoint node.
-	 */
-	return bus_find_device(&amba_bustype, NULL,
-			       endpoint, of_dev_node_match);
-}
-
-static void of_coresight_get_ports(const struct device_node *node,
-				   int *nr_inport, int *nr_outport)
-{
-	struct device_node *ep = NULL;
-	int in = 0, out = 0;
-
-	do {
-		ep = of_graph_get_next_endpoint(node, ep);
-		if (!ep)
-			break;
-
-		if (of_property_read_bool(ep, "slave-mode"))
-			in++;
-		else
-			out++;
-
-	} while (ep);
-
-	*nr_inport = in;
-	*nr_outport = out;
-}
-
-static int of_coresight_alloc_memory(struct device *dev,
-			struct coresight_platform_data *pdata)
-{
-	/* List of output port on this component */
-	pdata->outports = devm_kcalloc(dev,
-				       pdata->nr_outport,
-				       sizeof(*pdata->outports),
-				       GFP_KERNEL);
-	if (!pdata->outports)
-		return -ENOMEM;
-
-	/* Children connected to this component via @outports */
-	pdata->child_names = devm_kcalloc(dev,
-					  pdata->nr_outport,
-					  sizeof(*pdata->child_names),
-					  GFP_KERNEL);
-	if (!pdata->child_names)
-		return -ENOMEM;
-
-	/* Port number on the child this component is connected to */
-	pdata->child_ports = devm_kcalloc(dev,
-					  pdata->nr_outport,
-					  sizeof(*pdata->child_ports),
-					  GFP_KERNEL);
-	if (!pdata->child_ports)
-		return -ENOMEM;
-
-	return 0;
-}
-
-int of_coresight_get_cpu(const struct device_node *node)
-{
-	int cpu;
-	struct device_node *dn;
-
-	dn = of_parse_phandle(node, "cpu", 0);
-	/* Affinity defaults to CPU0 */
-	if (!dn)
-		return 0;
-	cpu = of_cpu_node_to_id(dn);
-	of_node_put(dn);
-
-	/* Affinity to CPU0 if no cpu nodes are found */
-	return (cpu < 0) ? 0 : cpu;
-}
-EXPORT_SYMBOL_GPL(of_coresight_get_cpu);
-
-struct coresight_platform_data *
-of_get_coresight_platform_data(struct device *dev,
-			       const struct device_node *node)
-{
-	int i = 0, ret = 0;
-	struct coresight_platform_data *pdata;
-	struct of_endpoint endpoint, rendpoint;
-	struct device *rdev;
-	struct device_node *ep = NULL;
-	struct device_node *rparent = NULL;
-	struct device_node *rport = NULL;
-
-	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return ERR_PTR(-ENOMEM);
-
-	/* Use device name as sysfs handle */
-	pdata->name = dev_name(dev);
-
-	/* Get the number of input and output port for this component */
-	of_coresight_get_ports(node, &pdata->nr_inport, &pdata->nr_outport);
-
-	if (pdata->nr_outport) {
-		ret = of_coresight_alloc_memory(dev, pdata);
-		if (ret)
-			return ERR_PTR(ret);
-
-		/* Iterate through each port to discover topology */
-		do {
-			/* Get a handle on a port */
-			ep = of_graph_get_next_endpoint(node, ep);
-			if (!ep)
-				break;
-
-			/*
-			 * No need to deal with input ports, processing for as
-			 * processing for output ports will deal with them.
-			 */
-			if (of_find_property(ep, "slave-mode", NULL))
-				continue;
-
-			/* Get a handle on the local endpoint */
-			ret = of_graph_parse_endpoint(ep, &endpoint);
-
-			if (ret)
-				continue;
-
-			/* The local out port number */
-			pdata->outports[i] = endpoint.port;
-
-			/*
-			 * Get a handle on the remote port and parent
-			 * attached to it.
-			 */
-			rparent = of_graph_get_remote_port_parent(ep);
-			rport = of_graph_get_remote_port(ep);
-
-			if (!rparent || !rport)
-				continue;
-
-			if (of_graph_parse_endpoint(rport, &rendpoint))
-				continue;
-
-			rdev = of_coresight_get_endpoint_device(rparent);
-			if (!rdev)
-				return ERR_PTR(-EPROBE_DEFER);
-
-			pdata->child_names[i] = dev_name(rdev);
-			pdata->child_ports[i] = rendpoint.id;
-
-			i++;
-		} while (ep);
-	}
-
-	pdata->cpu = of_coresight_get_cpu(node);
-
-	return pdata;
-}
-EXPORT_SYMBOL_GPL(of_get_coresight_platform_data);
diff --git a/drivers/hwtracing/intel_th/Kconfig b/drivers/hwtracing/intel_th/Kconfig
index ca0527d..4b63593 100644
--- a/drivers/hwtracing/intel_th/Kconfig
+++ b/drivers/hwtracing/intel_th/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config INTEL_TH
 	tristate "Intel(R) Trace Hub controller"
 	depends on HAS_DMA && HAS_IOMEM
diff --git a/drivers/hwtracing/intel_th/Makefile b/drivers/hwtracing/intel_th/Makefile
index d9252fa..b63eb8f 100644
--- a/drivers/hwtracing/intel_th/Makefile
+++ b/drivers/hwtracing/intel_th/Makefile
@@ -20,3 +20,6 @@
 
 obj-$(CONFIG_INTEL_TH_PTI)	+= intel_th_pti.o
 intel_th_pti-y			:= pti.o
+
+obj-$(CONFIG_INTEL_TH_MSU)	+= intel_th_msu_sink.o
+intel_th_msu_sink-y		:= msu-sink.o
diff --git a/drivers/hwtracing/intel_th/acpi.c b/drivers/hwtracing/intel_th/acpi.c
index 87bc374..87f9024 100644
--- a/drivers/hwtracing/intel_th/acpi.c
+++ b/drivers/hwtracing/intel_th/acpi.c
@@ -37,15 +37,21 @@
 static int intel_th_acpi_probe(struct platform_device *pdev)
 {
 	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	struct resource resource[TH_MMIO_END];
 	const struct acpi_device_id *id;
 	struct intel_th *th;
+	int i, r;
 
 	id = acpi_match_device(intel_th_acpi_ids, &pdev->dev);
 	if (!id)
 		return -ENODEV;
 
-	th = intel_th_alloc(&pdev->dev, (void *)id->driver_data,
-			    pdev->resource, pdev->num_resources, -1);
+	for (i = 0, r = 0; i < pdev->num_resources && r < TH_MMIO_END; i++)
+		if (pdev->resource[i].flags &
+		    (IORESOURCE_IRQ | IORESOURCE_MEM))
+			resource[r++] = pdev->resource[i];
+
+	th = intel_th_alloc(&pdev->dev, (void *)id->driver_data, resource, r);
 	if (IS_ERR(th))
 		return PTR_ERR(th);
 
diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
index fc6b7f8..d5c1821 100644
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -422,6 +422,7 @@
 	unsigned		nres;
 	unsigned		type;
 	unsigned		otype;
+	bool			mknode;
 	unsigned		scrpd;
 	int			id;
 } intel_th_subdevices[] = {
@@ -429,9 +430,9 @@
 		.nres	= 1,
 		.res	= {
 			{
-				/* Handle TSCU from GTH driver */
+				/* Handle TSCU and CTS from GTH driver */
 				.start	= REG_GTH_OFFSET,
-				.end	= REG_TSCU_OFFSET + REG_TSCU_LENGTH - 1,
+				.end	= REG_CTS_OFFSET + REG_CTS_LENGTH - 1,
 				.flags	= IORESOURCE_MEM,
 			},
 		},
@@ -456,6 +457,7 @@
 		.name	= "msc",
 		.id	= 0,
 		.type	= INTEL_TH_OUTPUT,
+		.mknode	= true,
 		.otype	= GTH_MSU,
 		.scrpd	= SCRPD_MEM_IS_PRIM_DEST | SCRPD_MSC0_IS_ENABLED,
 	},
@@ -476,6 +478,7 @@
 		.name	= "msc",
 		.id	= 1,
 		.type	= INTEL_TH_OUTPUT,
+		.mknode	= true,
 		.otype	= GTH_MSU,
 		.scrpd	= SCRPD_MEM_IS_PRIM_DEST | SCRPD_MSC1_IS_ENABLED,
 	},
@@ -488,7 +491,7 @@
 				.flags	= IORESOURCE_MEM,
 			},
 			{
-				.start	= 1, /* use resource[1] */
+				.start	= TH_MMIO_SW,
 				.end	= 0,
 				.flags	= IORESOURCE_MEM,
 			},
@@ -498,6 +501,24 @@
 		.type	= INTEL_TH_SOURCE,
 	},
 	{
+		.nres	= 2,
+		.res	= {
+			{
+				.start	= REG_STH_OFFSET,
+				.end	= REG_STH_OFFSET + REG_STH_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.start	= TH_MMIO_RTIT,
+				.end	= 0,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.id	= -1,
+		.name	= "rtit",
+		.type	= INTEL_TH_SOURCE,
+	},
+	{
 		.nres	= 1,
 		.res	= {
 			{
@@ -581,7 +602,6 @@
 	struct intel_th_device *thdev;
 	struct resource res[3];
 	unsigned int req = 0;
-	bool is64bit = false;
 	int r, err;
 
 	thdev = intel_th_device_alloc(th, subdev->type, subdev->name,
@@ -591,18 +611,12 @@
 
 	thdev->drvdata = th->drvdata;
 
-	for (r = 0; r < th->num_resources; r++)
-		if (th->resource[r].flags & IORESOURCE_MEM_64) {
-			is64bit = true;
-			break;
-		}
-
 	memcpy(res, subdev->res,
 	       sizeof(struct resource) * subdev->nres);
 
 	for (r = 0; r < subdev->nres; r++) {
 		struct resource *devres = th->resource;
-		int bar = 0; /* cut subdevices' MMIO from resource[0] */
+		int bar = TH_MMIO_CONFIG;
 
 		/*
 		 * Take .end == 0 to mean 'take the whole bar',
@@ -611,8 +625,9 @@
 		 */
 		if (!res[r].end && res[r].flags == IORESOURCE_MEM) {
 			bar = res[r].start;
-			if (is64bit)
-				bar *= 2;
+			err = -ENODEV;
+			if (bar >= th->num_resources)
+				goto fail_put_device;
 			res[r].start = 0;
 			res[r].end = resource_size(&devres[bar]) - 1;
 		}
@@ -624,7 +639,12 @@
 			dev_dbg(th->dev, "%s:%d @ %pR\n",
 				subdev->name, r, &res[r]);
 		} else if (res[r].flags & IORESOURCE_IRQ) {
-			res[r].start	= th->irq;
+			/*
+			 * Only pass on the IRQ if we have useful interrupts:
+			 * the ones that can be configured via MINTCTL.
+			 */
+			if (INTEL_TH_CAP(th, has_mintctl) && th->irq != -1)
+				res[r].start = th->irq;
 		}
 	}
 
@@ -635,7 +655,8 @@
 	}
 
 	if (subdev->type == INTEL_TH_OUTPUT) {
-		thdev->dev.devt = MKDEV(th->major, th->num_thdevs);
+		if (subdev->mknode)
+			thdev->dev.devt = MKDEV(th->major, th->num_thdevs);
 		thdev->output.type = subdev->otype;
 		thdev->output.port = -1;
 		thdev->output.scratchpad = subdev->scrpd;
@@ -754,8 +775,13 @@
 
 		thdev = intel_th_subdevice_alloc(th, subdev);
 		/* note: caller should free subdevices from th::thdev[] */
-		if (IS_ERR(thdev))
+		if (IS_ERR(thdev)) {
+			/* ENODEV for individual subdevices is allowed */
+			if (PTR_ERR(thdev) == -ENODEV)
+				continue;
+
 			return PTR_ERR(thdev);
+		}
 
 		th->thdev[th->num_thdevs++] = thdev;
 	}
@@ -763,13 +789,6 @@
 	return 0;
 }
 
-static int match_devt(struct device *dev, void *data)
-{
-	dev_t devt = (dev_t)(unsigned long)data;
-
-	return dev->devt == devt;
-}
-
 static int intel_th_output_open(struct inode *inode, struct file *file)
 {
 	const struct file_operations *fops;
@@ -777,9 +796,7 @@
 	struct device *dev;
 	int err;
 
-	dev = bus_find_device(&intel_th_bus, NULL,
-			      (void *)(unsigned long)inode->i_rdev,
-			      match_devt);
+	dev = bus_find_device_by_devt(&intel_th_bus, inode->i_rdev);
 	if (!dev || !dev->driver)
 		return -ENODEV;
 
@@ -805,26 +822,40 @@
 	.llseek	= noop_llseek,
 };
 
+static irqreturn_t intel_th_irq(int irq, void *data)
+{
+	struct intel_th *th = data;
+	irqreturn_t ret = IRQ_NONE;
+	struct intel_th_driver *d;
+	int i;
+
+	for (i = 0; i < th->num_thdevs; i++) {
+		if (th->thdev[i]->type != INTEL_TH_OUTPUT)
+			continue;
+
+		d = to_intel_th_driver(th->thdev[i]->dev.driver);
+		if (d && d->irq)
+			ret |= d->irq(th->thdev[i]);
+	}
+
+	if (ret == IRQ_NONE)
+		pr_warn_ratelimited("nobody cared for irq\n");
+
+	return ret;
+}
+
 /**
  * intel_th_alloc() - allocate a new Intel TH device and its subdevices
  * @dev:	parent device
- * @devres:	parent's resources
- * @ndevres:	number of resources
+ * @devres:	resources indexed by th_mmio_idx
  * @irq:	irq number
  */
 struct intel_th *
 intel_th_alloc(struct device *dev, struct intel_th_drvdata *drvdata,
-	       struct resource *devres, unsigned int ndevres, int irq)
+	       struct resource *devres, unsigned int ndevres)
 {
+	int err, r, nr_mmios = 0;
 	struct intel_th *th;
-	int err, r;
-
-	if (irq == -1)
-		for (r = 0; r < ndevres; r++)
-			if (devres[r].flags & IORESOURCE_IRQ) {
-				irq = devres[r].start;
-				break;
-			}
 
 	th = kzalloc(sizeof(*th), GFP_KERNEL);
 	if (!th)
@@ -842,12 +873,32 @@
 		err = th->major;
 		goto err_ida;
 	}
+	th->irq = -1;
 	th->dev = dev;
 	th->drvdata = drvdata;
 
-	th->resource = devres;
-	th->num_resources = ndevres;
-	th->irq = irq;
+	for (r = 0; r < ndevres; r++)
+		switch (devres[r].flags & IORESOURCE_TYPE_BITS) {
+		case IORESOURCE_MEM:
+			th->resource[nr_mmios++] = devres[r];
+			break;
+		case IORESOURCE_IRQ:
+			err = devm_request_irq(dev, devres[r].start,
+					       intel_th_irq, IRQF_SHARED,
+					       dev_name(dev), th);
+			if (err)
+				goto err_chrdev;
+
+			if (th->irq == -1)
+				th->irq = devres[r].start;
+			break;
+		default:
+			dev_warn(dev, "Unknown resource type %lx\n",
+				 devres[r].flags);
+			break;
+		}
+
+	th->num_resources = nr_mmios;
 
 	dev_set_drvdata(dev, th);
 
@@ -864,6 +915,10 @@
 
 	return th;
 
+err_chrdev:
+	__unregister_chrdev(th->major, 0, TH_POSSIBLE_OUTPUTS,
+			    "intel_th/output");
+
 err_ida:
 	ida_simple_remove(&intel_th_ida, th->id);
 
@@ -924,6 +979,27 @@
 EXPORT_SYMBOL_GPL(intel_th_trace_enable);
 
 /**
+ * intel_th_trace_switch() - execute a switch sequence
+ * @thdev:	output device that requests tracing switch
+ */
+int intel_th_trace_switch(struct intel_th_device *thdev)
+{
+	struct intel_th_device *hub = to_intel_th_device(thdev->dev.parent);
+	struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver);
+
+	if (WARN_ON_ONCE(hub->type != INTEL_TH_SWITCH))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(thdev->type != INTEL_TH_OUTPUT))
+		return -EINVAL;
+
+	hubdrv->trig_switch(hub, &thdev->output);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_th_trace_switch);
+
+/**
  * intel_th_trace_disable() - disable tracing for an output device
  * @thdev:	output device that requests tracing be disabled
  */
diff --git a/drivers/hwtracing/intel_th/gth.c b/drivers/hwtracing/intel_th/gth.c
index 8426b79..f72803a 100644
--- a/drivers/hwtracing/intel_th/gth.c
+++ b/drivers/hwtracing/intel_th/gth.c
@@ -308,6 +308,11 @@
 	iowrite32(0, gth->base + REG_GTH_SCR);
 	iowrite32(0xfc, gth->base + REG_GTH_SCR2);
 
+	/* setup CTS for single trigger */
+	iowrite32(CTS_EVENT_ENABLE_IF_ANYTHING, gth->base + REG_CTS_C0S0_EN);
+	iowrite32(CTS_ACTION_CONTROL_SET_STATE(CTS_STATE_IDLE) |
+		  CTS_ACTION_CONTROL_TRIGGER, gth->base + REG_CTS_C0S0_ACT);
+
 	return 0;
 }
 
@@ -457,6 +462,68 @@
 }
 
 /**
+ * intel_th_gth_stop() - stop tracing to an output device
+ * @gth:		GTH device
+ * @output:		output device's descriptor
+ * @capture_done:	set when no more traces will be captured
+ *
+ * This will stop tracing using force storeEn off signal and wait for the
+ * pipelines to be empty for the corresponding output port.
+ */
+static void intel_th_gth_stop(struct gth_device *gth,
+			      struct intel_th_output *output,
+			      bool capture_done)
+{
+	struct intel_th_device *outdev =
+		container_of(output, struct intel_th_device, output);
+	struct intel_th_driver *outdrv =
+		to_intel_th_driver(outdev->dev.driver);
+	unsigned long count;
+	u32 reg;
+	u32 scr2 = 0xfc | (capture_done ? 1 : 0);
+
+	iowrite32(0, gth->base + REG_GTH_SCR);
+	iowrite32(scr2, gth->base + REG_GTH_SCR2);
+
+	/* wait on pipeline empty for the given port */
+	for (reg = 0, count = GTH_PLE_WAITLOOP_DEPTH;
+	     count && !(reg & BIT(output->port)); count--) {
+		reg = ioread32(gth->base + REG_GTH_STAT);
+		cpu_relax();
+	}
+
+	if (!count)
+		dev_dbg(gth->dev, "timeout waiting for GTH[%d] PLE\n",
+			output->port);
+
+	/* wait on output piepline empty */
+	if (outdrv->wait_empty)
+		outdrv->wait_empty(outdev);
+
+	/* clear force capture done for next captures */
+	iowrite32(0xfc, gth->base + REG_GTH_SCR2);
+}
+
+/**
+ * intel_th_gth_start() - start tracing to an output device
+ * @gth:	GTH device
+ * @output:	output device's descriptor
+ *
+ * This will start tracing using force storeEn signal.
+ */
+static void intel_th_gth_start(struct gth_device *gth,
+			       struct intel_th_output *output)
+{
+	u32 scr = 0xfc0000;
+
+	if (output->multiblock)
+		scr |= 0xff;
+
+	iowrite32(scr, gth->base + REG_GTH_SCR);
+	iowrite32(0, gth->base + REG_GTH_SCR2);
+}
+
+/**
  * intel_th_gth_disable() - disable tracing to an output device
  * @thdev:	GTH device
  * @output:	output device's descriptor
@@ -469,7 +536,6 @@
 				 struct intel_th_output *output)
 {
 	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
-	unsigned long count;
 	int master;
 	u32 reg;
 
@@ -482,22 +548,7 @@
 	}
 	spin_unlock(&gth->gth_lock);
 
-	iowrite32(0, gth->base + REG_GTH_SCR);
-	iowrite32(0xfd, gth->base + REG_GTH_SCR2);
-
-	/* wait on pipeline empty for the given port */
-	for (reg = 0, count = GTH_PLE_WAITLOOP_DEPTH;
-	     count && !(reg & BIT(output->port)); count--) {
-		reg = ioread32(gth->base + REG_GTH_STAT);
-		cpu_relax();
-	}
-
-	/* clear force capture done for next captures */
-	iowrite32(0xfc, gth->base + REG_GTH_SCR2);
-
-	if (!count)
-		dev_dbg(&thdev->dev, "timeout waiting for GTH[%d] PLE\n",
-			output->port);
+	intel_th_gth_stop(gth, output, true);
 
 	reg = ioread32(gth->base + REG_GTH_SCRPD0);
 	reg &= ~output->scratchpad;
@@ -526,8 +577,8 @@
 {
 	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
 	struct intel_th *th = to_intel_th(thdev);
-	u32 scr = 0xfc0000, scrpd;
 	int master;
+	u32 scrpd;
 
 	spin_lock(&gth->gth_lock);
 	for_each_set_bit(master, gth->output[output->port].master,
@@ -535,9 +586,6 @@
 		gth_master_set(gth, master, output->port);
 	}
 
-	if (output->multiblock)
-		scr |= 0xff;
-
 	output->active = true;
 	spin_unlock(&gth->gth_lock);
 
@@ -548,8 +596,41 @@
 	scrpd |= output->scratchpad;
 	iowrite32(scrpd, gth->base + REG_GTH_SCRPD0);
 
-	iowrite32(scr, gth->base + REG_GTH_SCR);
-	iowrite32(0, gth->base + REG_GTH_SCR2);
+	intel_th_gth_start(gth, output);
+}
+
+/**
+ * intel_th_gth_switch() - execute a switch sequence
+ * @thdev:	GTH device
+ * @output:	output device's descriptor
+ *
+ * This will execute a switch sequence that will trigger a switch window
+ * when tracing to MSC in multi-block mode.
+ */
+static void intel_th_gth_switch(struct intel_th_device *thdev,
+				struct intel_th_output *output)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	unsigned long count;
+	u32 reg;
+
+	/* trigger */
+	iowrite32(0, gth->base + REG_CTS_CTL);
+	iowrite32(CTS_CTL_SEQUENCER_ENABLE, gth->base + REG_CTS_CTL);
+	/* wait on trigger status */
+	for (reg = 0, count = CTS_TRIG_WAITLOOP_DEPTH;
+	     count && !(reg & BIT(4)); count--) {
+		reg = ioread32(gth->base + REG_CTS_STAT);
+		cpu_relax();
+	}
+	if (!count)
+		dev_dbg(&thdev->dev, "timeout waiting for CTS Trigger\n");
+
+	/* De-assert the trigger */
+	iowrite32(0, gth->base + REG_CTS_CTL);
+
+	intel_th_gth_stop(gth, output, false);
+	intel_th_gth_start(gth, output);
 }
 
 /**
@@ -607,6 +688,7 @@
 {
 	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
 	int port = othdev->output.port;
+	int master;
 
 	if (thdev->host_mode)
 		return;
@@ -615,6 +697,9 @@
 	othdev->output.port = -1;
 	othdev->output.active = false;
 	gth->output[port].output = NULL;
+	for (master = 0; master <= TH_CONFIGURABLE_MASTERS; master++)
+		if (gth->master[master] == port)
+			gth->master[master] = -1;
 	spin_unlock(&gth->gth_lock);
 }
 
@@ -731,6 +816,7 @@
 	.unassign	= intel_th_gth_unassign,
 	.set_output	= intel_th_gth_set_output,
 	.enable		= intel_th_gth_enable,
+	.trig_switch	= intel_th_gth_switch,
 	.disable	= intel_th_gth_disable,
 	.driver	= {
 		.name	= "gth",
diff --git a/drivers/hwtracing/intel_th/gth.h b/drivers/hwtracing/intel_th/gth.h
index 6f2b0b9..bfcc0fd 100644
--- a/drivers/hwtracing/intel_th/gth.h
+++ b/drivers/hwtracing/intel_th/gth.h
@@ -49,6 +49,12 @@
 	REG_GTH_SCRPD3		= 0xec, /* ScratchPad[3] */
 	REG_TSCU_TSUCTRL	= 0x2000, /* TSCU control register */
 	REG_TSCU_TSCUSTAT	= 0x2004, /* TSCU status register */
+
+	/* Common Capture Sequencer (CTS) registers */
+	REG_CTS_C0S0_EN		= 0x30c0, /* clause_event_enable_c0s0 */
+	REG_CTS_C0S0_ACT	= 0x3180, /* clause_action_control_c0s0 */
+	REG_CTS_STAT		= 0x32a0, /* cts_status */
+	REG_CTS_CTL		= 0x32a4, /* cts_control */
 };
 
 /* waiting for Pipeline Empty bit(s) to assert for GTH */
@@ -57,4 +63,17 @@
 #define TSUCTRL_CTCRESYNC	BIT(0)
 #define TSCUSTAT_CTCSYNCING	BIT(1)
 
+/* waiting for Trigger status to assert for CTS */
+#define CTS_TRIG_WAITLOOP_DEPTH	10000
+
+#define CTS_EVENT_ENABLE_IF_ANYTHING	BIT(31)
+#define CTS_ACTION_CONTROL_STATE_OFF	27
+#define CTS_ACTION_CONTROL_SET_STATE(x)	\
+	(((x) & 0x1f) << CTS_ACTION_CONTROL_STATE_OFF)
+#define CTS_ACTION_CONTROL_TRIGGER	BIT(4)
+
+#define CTS_STATE_IDLE			0x10u
+
+#define CTS_CTL_SEQUENCER_ENABLE	BIT(0)
+
 #endif /* __INTEL_TH_GTH_H__ */
diff --git a/drivers/hwtracing/intel_th/intel_th.h b/drivers/hwtracing/intel_th/intel_th.h
index 780206d..0df4800 100644
--- a/drivers/hwtracing/intel_th/intel_th.h
+++ b/drivers/hwtracing/intel_th/intel_th.h
@@ -8,6 +8,8 @@
 #ifndef __INTEL_TH_H__
 #define __INTEL_TH_H__
 
+#include <linux/irqreturn.h>
+
 /* intel_th_device device types */
 enum {
 	/* Devices that generate trace data */
@@ -18,6 +20,8 @@
 	INTEL_TH_SWITCH,
 };
 
+struct intel_th_device;
+
 /**
  * struct intel_th_output - descriptor INTEL_TH_OUTPUT type devices
  * @port:	output port number, assigned by the switch
@@ -25,6 +29,7 @@
  * @scratchpad:	scratchpad bits to flag when this output is enabled
  * @multiblock:	true for multiblock output configuration
  * @active:	true when this output is enabled
+ * @wait_empty:	wait for device pipeline to be empty
  *
  * Output port descriptor, used by switch driver to tell which output
  * port this output device corresponds to. Filled in at output device's
@@ -42,10 +47,12 @@
 /**
  * struct intel_th_drvdata - describes hardware capabilities and quirks
  * @tscu_enable:	device needs SW to enable time stamping unit
+ * @has_mintctl:	device has interrupt control (MINTCTL) register
  * @host_mode_only:	device can only operate in 'host debugger' mode
  */
 struct intel_th_drvdata {
 	unsigned int	tscu_enable        : 1,
+			has_mintctl        : 1,
 			host_mode_only     : 1;
 };
 
@@ -157,10 +164,13 @@
 					    struct intel_th_device *othdev);
 	void			(*enable)(struct intel_th_device *thdev,
 					  struct intel_th_output *output);
+	void			(*trig_switch)(struct intel_th_device *thdev,
+					       struct intel_th_output *output);
 	void			(*disable)(struct intel_th_device *thdev,
 					   struct intel_th_output *output);
 	/* output ops */
-	void			(*irq)(struct intel_th_device *thdev);
+	irqreturn_t		(*irq)(struct intel_th_device *thdev);
+	void			(*wait_empty)(struct intel_th_device *thdev);
 	int			(*activate)(struct intel_th_device *thdev);
 	void			(*deactivate)(struct intel_th_device *thdev);
 	/* file_operations for those who want a device node */
@@ -213,21 +223,23 @@
 
 struct intel_th *
 intel_th_alloc(struct device *dev, struct intel_th_drvdata *drvdata,
-	       struct resource *devres, unsigned int ndevres, int irq);
+	       struct resource *devres, unsigned int ndevres);
 void intel_th_free(struct intel_th *th);
 
 int intel_th_driver_register(struct intel_th_driver *thdrv);
 void intel_th_driver_unregister(struct intel_th_driver *thdrv);
 
 int intel_th_trace_enable(struct intel_th_device *thdev);
+int intel_th_trace_switch(struct intel_th_device *thdev);
 int intel_th_trace_disable(struct intel_th_device *thdev);
 int intel_th_set_output(struct intel_th_device *thdev,
 			unsigned int master);
 int intel_th_output_enable(struct intel_th *th, unsigned int otype);
 
-enum {
+enum th_mmio_idx {
 	TH_MMIO_CONFIG = 0,
-	TH_MMIO_SW = 2,
+	TH_MMIO_SW = 1,
+	TH_MMIO_RTIT = 2,
 	TH_MMIO_END,
 };
 
@@ -237,6 +249,9 @@
 #define TH_CONFIGURABLE_MASTERS 256
 #define TH_MSC_MAX		2
 
+/* Maximum IRQ vectors */
+#define TH_NVEC_MAX		8
+
 /**
  * struct intel_th - Intel TH controller
  * @dev:	driver core's device
@@ -244,7 +259,7 @@
  * @hub:	"switch" subdevice (GTH)
  * @resource:	resources of the entire controller
  * @num_thdevs:	number of devices in the @thdev array
- * @num_resources:	number or resources in the @resource array
+ * @num_resources:	number of resources in the @resource array
  * @irq:	irq number
  * @id:		this Intel TH controller's device ID in the system
  * @major:	device node major for output devices
@@ -256,7 +271,7 @@
 	struct intel_th_device	*hub;
 	struct intel_th_drvdata	*drvdata;
 
-	struct resource		*resource;
+	struct resource		resource[TH_MMIO_END];
 	int			(*activate)(struct intel_th *);
 	void			(*deactivate)(struct intel_th *);
 	unsigned int		num_thdevs;
@@ -296,6 +311,9 @@
 	REG_TSCU_OFFSET		= 0x2000,
 	REG_TSCU_LENGTH		= 0x1000,
 
+	REG_CTS_OFFSET		= 0x3000,
+	REG_CTS_LENGTH		= 0x1000,
+
 	/* Software Trace Hub (STH) [0x4000..0x4fff] */
 	REG_STH_OFFSET		= 0x4000,
 	REG_STH_LENGTH		= 0x2000,
diff --git a/drivers/hwtracing/intel_th/msu-sink.c b/drivers/hwtracing/intel_th/msu-sink.c
new file mode 100644
index 0000000..2c7f511
--- /dev/null
+++ b/drivers/hwtracing/intel_th/msu-sink.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * An example software sink buffer for Intel TH MSU.
+ *
+ * Copyright (C) 2019 Intel Corporation.
+ */
+
+#include <linux/intel_th.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+
+#define MAX_SGTS 16
+
+struct msu_sink_private {
+	struct device	*dev;
+	struct sg_table **sgts;
+	unsigned int	nr_sgts;
+};
+
+static void *msu_sink_assign(struct device *dev, int *mode)
+{
+	struct msu_sink_private *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return NULL;
+
+	priv->sgts = kcalloc(MAX_SGTS, sizeof(void *), GFP_KERNEL);
+	if (!priv->sgts) {
+		kfree(priv);
+		return NULL;
+	}
+
+	priv->dev = dev;
+	*mode = MSC_MODE_MULTI;
+
+	return priv;
+}
+
+static void msu_sink_unassign(void *data)
+{
+	struct msu_sink_private *priv = data;
+
+	kfree(priv->sgts);
+	kfree(priv);
+}
+
+/* See also: msc.c: __msc_buffer_win_alloc() */
+static int msu_sink_alloc_window(void *data, struct sg_table **sgt, size_t size)
+{
+	struct msu_sink_private *priv = data;
+	unsigned int nents;
+	struct scatterlist *sg_ptr;
+	void *block;
+	int ret, i;
+
+	if (priv->nr_sgts == MAX_SGTS)
+		return -ENOMEM;
+
+	nents = DIV_ROUND_UP(size, PAGE_SIZE);
+
+	ret = sg_alloc_table(*sgt, nents, GFP_KERNEL);
+	if (ret)
+		return -ENOMEM;
+
+	priv->sgts[priv->nr_sgts++] = *sgt;
+
+	for_each_sg((*sgt)->sgl, sg_ptr, nents, i) {
+		block = dma_alloc_coherent(priv->dev->parent->parent,
+					   PAGE_SIZE, &sg_dma_address(sg_ptr),
+					   GFP_KERNEL);
+		sg_set_buf(sg_ptr, block, PAGE_SIZE);
+	}
+
+	return nents;
+}
+
+/* See also: msc.c: __msc_buffer_win_free() */
+static void msu_sink_free_window(void *data, struct sg_table *sgt)
+{
+	struct msu_sink_private *priv = data;
+	struct scatterlist *sg_ptr;
+	int i;
+
+	for_each_sg(sgt->sgl, sg_ptr, sgt->nents, i) {
+		dma_free_coherent(priv->dev->parent->parent, PAGE_SIZE,
+				  sg_virt(sg_ptr), sg_dma_address(sg_ptr));
+	}
+
+	sg_free_table(sgt);
+	priv->nr_sgts--;
+}
+
+static int msu_sink_ready(void *data, struct sg_table *sgt, size_t bytes)
+{
+	struct msu_sink_private *priv = data;
+
+	intel_th_msc_window_unlock(priv->dev, sgt);
+
+	return 0;
+}
+
+static const struct msu_buffer sink_mbuf = {
+	.name		= "sink",
+	.assign		= msu_sink_assign,
+	.unassign	= msu_sink_unassign,
+	.alloc_window	= msu_sink_alloc_window,
+	.free_window	= msu_sink_free_window,
+	.ready		= msu_sink_ready,
+};
+
+module_intel_th_msu_buffer(sink_mbuf);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index d293e55..6d240df 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -17,40 +17,63 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/io.h>
+#include <linux/workqueue.h>
 #include <linux/dma-mapping.h>
 
 #ifdef CONFIG_X86
 #include <asm/set_memory.h>
 #endif
 
+#include <linux/intel_th.h>
 #include "intel_th.h"
 #include "msu.h"
 
 #define msc_dev(x) (&(x)->thdev->dev)
 
-/**
- * struct msc_block - multiblock mode block descriptor
- * @bdesc:	pointer to hardware descriptor (beginning of the block)
- * @addr:	physical address of the block
+/*
+ * Lockout state transitions:
+ *   READY -> INUSE -+-> LOCKED -+-> READY -> etc.
+ *                   \-----------/
+ * WIN_READY:	window can be used by HW
+ * WIN_INUSE:	window is in use
+ * WIN_LOCKED:	window is filled up and is being processed by the buffer
+ * handling code
+ *
+ * All state transitions happen automatically, except for the LOCKED->READY,
+ * which needs to be signalled by the buffer code by calling
+ * intel_th_msc_window_unlock().
+ *
+ * When the interrupt handler has to switch to the next window, it checks
+ * whether it's READY, and if it is, it performs the switch and tracing
+ * continues. If it's LOCKED, it stops the trace.
  */
-struct msc_block {
-	struct msc_block_desc	*bdesc;
-	dma_addr_t		addr;
+enum lockout_state {
+	WIN_READY = 0,
+	WIN_INUSE,
+	WIN_LOCKED
 };
 
 /**
  * struct msc_window - multiblock mode window descriptor
  * @entry:	window list linkage (msc::win_list)
  * @pgoff:	page offset into the buffer that this window starts at
+ * @lockout:	lockout state, see comment below
+ * @lo_lock:	lockout state serialization
  * @nr_blocks:	number of blocks (pages) in this window
- * @block:	array of block descriptors
+ * @nr_segs:	number of segments in this window (<= @nr_blocks)
+ * @_sgt:	array of block descriptors
+ * @sgt:	array of block descriptors
  */
 struct msc_window {
 	struct list_head	entry;
 	unsigned long		pgoff;
+	enum lockout_state	lockout;
+	spinlock_t		lo_lock;
 	unsigned int		nr_blocks;
+	unsigned int		nr_segs;
 	struct msc		*msc;
-	struct msc_block	block[0];
+	struct sg_table		_sgt;
+	struct sg_table		*sgt;
 };
 
 /**
@@ -72,8 +95,8 @@
 	struct msc_window	*start_win;
 	struct msc_window	*win;
 	unsigned long		offset;
-	int			start_block;
-	int			block;
+	struct scatterlist	*start_block;
+	struct scatterlist	*block;
 	unsigned int		block_off;
 	unsigned int		wrap_count;
 	unsigned int		eof;
@@ -83,7 +106,11 @@
  * struct msc - MSC device representation
  * @reg_base:		register window base address
  * @thdev:		intel_th_device pointer
+ * @mbuf:		MSU buffer, if assigned
+ * @mbuf_priv		MSU buffer's private data, if @mbuf
  * @win_list:		list of windows in multiblock mode
+ * @single_sgt:		single mode buffer
+ * @cur_win:		current window
  * @nr_pages:		total number of pages allocated for this buffer
  * @single_sz:		amount of data in single mode
  * @single_wrap:	single mode wrap occurred
@@ -101,14 +128,23 @@
  */
 struct msc {
 	void __iomem		*reg_base;
+	void __iomem		*msu_base;
 	struct intel_th_device	*thdev;
 
+	const struct msu_buffer	*mbuf;
+	void			*mbuf_priv;
+
+	struct work_struct	work;
 	struct list_head	win_list;
+	struct sg_table		single_sgt;
+	struct msc_window	*cur_win;
 	unsigned long		nr_pages;
 	unsigned long		single_sz;
 	unsigned int		single_wrap : 1;
 	void			*base;
 	dma_addr_t		base_addr;
+	u32			orig_addr;
+	u32			orig_sz;
 
 	/* <0: no buffer, 0: no users, >0: active users */
 	atomic_t		user_count;
@@ -120,12 +156,108 @@
 
 	/* config */
 	unsigned int		enabled : 1,
-				wrap	: 1;
+				wrap	: 1,
+				do_irq	: 1;
 	unsigned int		mode;
 	unsigned int		burst_len;
 	unsigned int		index;
 };
 
+static LIST_HEAD(msu_buffer_list);
+static DEFINE_MUTEX(msu_buffer_mutex);
+
+/**
+ * struct msu_buffer_entry - internal MSU buffer bookkeeping
+ * @entry:	link to msu_buffer_list
+ * @mbuf:	MSU buffer object
+ * @owner:	module that provides this MSU buffer
+ */
+struct msu_buffer_entry {
+	struct list_head	entry;
+	const struct msu_buffer	*mbuf;
+	struct module		*owner;
+};
+
+static struct msu_buffer_entry *__msu_buffer_entry_find(const char *name)
+{
+	struct msu_buffer_entry *mbe;
+
+	lockdep_assert_held(&msu_buffer_mutex);
+
+	list_for_each_entry(mbe, &msu_buffer_list, entry) {
+		if (!strcmp(mbe->mbuf->name, name))
+			return mbe;
+	}
+
+	return NULL;
+}
+
+static const struct msu_buffer *
+msu_buffer_get(const char *name)
+{
+	struct msu_buffer_entry *mbe;
+
+	mutex_lock(&msu_buffer_mutex);
+	mbe = __msu_buffer_entry_find(name);
+	if (mbe && !try_module_get(mbe->owner))
+		mbe = NULL;
+	mutex_unlock(&msu_buffer_mutex);
+
+	return mbe ? mbe->mbuf : NULL;
+}
+
+static void msu_buffer_put(const struct msu_buffer *mbuf)
+{
+	struct msu_buffer_entry *mbe;
+
+	mutex_lock(&msu_buffer_mutex);
+	mbe = __msu_buffer_entry_find(mbuf->name);
+	if (mbe)
+		module_put(mbe->owner);
+	mutex_unlock(&msu_buffer_mutex);
+}
+
+int intel_th_msu_buffer_register(const struct msu_buffer *mbuf,
+				 struct module *owner)
+{
+	struct msu_buffer_entry *mbe;
+	int ret = 0;
+
+	mbe = kzalloc(sizeof(*mbe), GFP_KERNEL);
+	if (!mbe)
+		return -ENOMEM;
+
+	mutex_lock(&msu_buffer_mutex);
+	if (__msu_buffer_entry_find(mbuf->name)) {
+		ret = -EEXIST;
+		kfree(mbe);
+		goto unlock;
+	}
+
+	mbe->mbuf = mbuf;
+	mbe->owner = owner;
+	list_add_tail(&mbe->entry, &msu_buffer_list);
+unlock:
+	mutex_unlock(&msu_buffer_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_th_msu_buffer_register);
+
+void intel_th_msu_buffer_unregister(const struct msu_buffer *mbuf)
+{
+	struct msu_buffer_entry *mbe;
+
+	mutex_lock(&msu_buffer_mutex);
+	mbe = __msu_buffer_entry_find(mbuf->name);
+	if (mbe) {
+		list_del(&mbe->entry);
+		kfree(mbe);
+	}
+	mutex_unlock(&msu_buffer_mutex);
+}
+EXPORT_SYMBOL_GPL(intel_th_msu_buffer_unregister);
+
 static inline bool msc_block_is_empty(struct msc_block_desc *bdesc)
 {
 	/* header hasn't been written */
@@ -139,72 +271,25 @@
 	return false;
 }
 
-/**
- * msc_oldest_window() - locate the window with oldest data
- * @msc:	MSC device
- *
- * This should only be used in multiblock mode. Caller should hold the
- * msc::user_count reference.
- *
- * Return:	the oldest window with valid data
- */
-static struct msc_window *msc_oldest_window(struct msc *msc)
+static inline struct scatterlist *msc_win_base_sg(struct msc_window *win)
 {
-	struct msc_window *win;
-	u32 reg = ioread32(msc->reg_base + REG_MSU_MSC0NWSA);
-	unsigned long win_addr = (unsigned long)reg << PAGE_SHIFT;
-	unsigned int found = 0;
-
-	if (list_empty(&msc->win_list))
-		return NULL;
-
-	/*
-	 * we might need a radix tree for this, depending on how
-	 * many windows a typical user would allocate; ideally it's
-	 * something like 2, in which case we're good
-	 */
-	list_for_each_entry(win, &msc->win_list, entry) {
-		if (win->block[0].addr == win_addr)
-			found++;
-
-		/* skip the empty ones */
-		if (msc_block_is_empty(win->block[0].bdesc))
-			continue;
-
-		if (found)
-			return win;
-	}
-
-	return list_entry(msc->win_list.next, struct msc_window, entry);
+	return win->sgt->sgl;
 }
 
-/**
- * msc_win_oldest_block() - locate the oldest block in a given window
- * @win:	window to look at
- *
- * Return:	index of the block with the oldest data
- */
-static unsigned int msc_win_oldest_block(struct msc_window *win)
+static inline struct msc_block_desc *msc_win_base(struct msc_window *win)
 {
-	unsigned int blk;
-	struct msc_block_desc *bdesc = win->block[0].bdesc;
+	return sg_virt(msc_win_base_sg(win));
+}
 
-	/* without wrapping, first block is the oldest */
-	if (!msc_block_wrapped(bdesc))
-		return 0;
+static inline dma_addr_t msc_win_base_dma(struct msc_window *win)
+{
+	return sg_dma_address(msc_win_base_sg(win));
+}
 
-	/*
-	 * with wrapping, last written block contains both the newest and the
-	 * oldest data for this window.
-	 */
-	for (blk = 0; blk < win->nr_blocks; blk++) {
-		bdesc = win->block[blk].bdesc;
-
-		if (msc_block_last_written(bdesc))
-			return blk;
-	}
-
-	return 0;
+static inline unsigned long
+msc_win_base_pfn(struct msc_window *win)
+{
+	return PFN_DOWN(msc_win_base_dma(win));
 }
 
 /**
@@ -226,22 +311,126 @@
 static struct msc_window *msc_next_window(struct msc_window *win)
 {
 	if (msc_is_last_win(win))
-		return list_entry(win->msc->win_list.next, struct msc_window,
-				  entry);
+		return list_first_entry(&win->msc->win_list, struct msc_window,
+					entry);
 
-	return list_entry(win->entry.next, struct msc_window, entry);
+	return list_next_entry(win, entry);
+}
+
+static size_t msc_win_total_sz(struct msc_window *win)
+{
+	struct scatterlist *sg;
+	unsigned int blk;
+	size_t size = 0;
+
+	for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
+		struct msc_block_desc *bdesc = sg_virt(sg);
+
+		if (msc_block_wrapped(bdesc))
+			return (size_t)win->nr_blocks << PAGE_SHIFT;
+
+		size += msc_total_sz(bdesc);
+		if (msc_block_last_written(bdesc))
+			break;
+	}
+
+	return size;
+}
+
+/**
+ * msc_find_window() - find a window matching a given sg_table
+ * @msc:	MSC device
+ * @sgt:	SG table of the window
+ * @nonempty:	skip over empty windows
+ *
+ * Return:	MSC window structure pointer or NULL if the window
+ *		could not be found.
+ */
+static struct msc_window *
+msc_find_window(struct msc *msc, struct sg_table *sgt, bool nonempty)
+{
+	struct msc_window *win;
+	unsigned int found = 0;
+
+	if (list_empty(&msc->win_list))
+		return NULL;
+
+	/*
+	 * we might need a radix tree for this, depending on how
+	 * many windows a typical user would allocate; ideally it's
+	 * something like 2, in which case we're good
+	 */
+	list_for_each_entry(win, &msc->win_list, entry) {
+		if (win->sgt == sgt)
+			found++;
+
+		/* skip the empty ones */
+		if (nonempty && msc_block_is_empty(msc_win_base(win)))
+			continue;
+
+		if (found)
+			return win;
+	}
+
+	return NULL;
+}
+
+/**
+ * msc_oldest_window() - locate the window with oldest data
+ * @msc:	MSC device
+ *
+ * This should only be used in multiblock mode. Caller should hold the
+ * msc::user_count reference.
+ *
+ * Return:	the oldest window with valid data
+ */
+static struct msc_window *msc_oldest_window(struct msc *msc)
+{
+	struct msc_window *win;
+
+	if (list_empty(&msc->win_list))
+		return NULL;
+
+	win = msc_find_window(msc, msc_next_window(msc->cur_win)->sgt, true);
+	if (win)
+		return win;
+
+	return list_first_entry(&msc->win_list, struct msc_window, entry);
+}
+
+/**
+ * msc_win_oldest_sg() - locate the oldest block in a given window
+ * @win:	window to look at
+ *
+ * Return:	index of the block with the oldest data
+ */
+static struct scatterlist *msc_win_oldest_sg(struct msc_window *win)
+{
+	unsigned int blk;
+	struct scatterlist *sg;
+	struct msc_block_desc *bdesc = msc_win_base(win);
+
+	/* without wrapping, first block is the oldest */
+	if (!msc_block_wrapped(bdesc))
+		return msc_win_base_sg(win);
+
+	/*
+	 * with wrapping, last written block contains both the newest and the
+	 * oldest data for this window.
+	 */
+	for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
+		struct msc_block_desc *bdesc = sg_virt(sg);
+
+		if (msc_block_last_written(bdesc))
+			return sg;
+	}
+
+	return msc_win_base_sg(win);
 }
 
 static struct msc_block_desc *msc_iter_bdesc(struct msc_iter *iter)
 {
-	return iter->win->block[iter->block].bdesc;
-}
-
-static void msc_iter_init(struct msc_iter *iter)
-{
-	memset(iter, 0, sizeof(*iter));
-	iter->start_block = -1;
-	iter->block = -1;
+	return sg_virt(iter->block);
 }
 
 static struct msc_iter *msc_iter_install(struct msc *msc)
@@ -266,7 +455,6 @@
 		goto unlock;
 	}
 
-	msc_iter_init(iter);
 	iter->msc = msc;
 
 	list_add_tail(&iter->entry, &msc->iter_list);
@@ -287,10 +475,10 @@
 
 static void msc_iter_block_start(struct msc_iter *iter)
 {
-	if (iter->start_block != -1)
+	if (iter->start_block)
 		return;
 
-	iter->start_block = msc_win_oldest_block(iter->win);
+	iter->start_block = msc_win_oldest_sg(iter->win);
 	iter->block = iter->start_block;
 	iter->wrap_count = 0;
 
@@ -314,7 +502,7 @@
 		return -EINVAL;
 
 	iter->win = iter->start_win;
-	iter->start_block = -1;
+	iter->start_block = NULL;
 
 	msc_iter_block_start(iter);
 
@@ -324,7 +512,7 @@
 static int msc_iter_win_advance(struct msc_iter *iter)
 {
 	iter->win = msc_next_window(iter->win);
-	iter->start_block = -1;
+	iter->start_block = NULL;
 
 	if (iter->win == iter->start_win) {
 		iter->eof++;
@@ -354,8 +542,10 @@
 		return msc_iter_win_advance(iter);
 
 	/* block advance */
-	if (++iter->block == iter->win->nr_blocks)
-		iter->block = 0;
+	if (sg_is_last(iter->block))
+		iter->block = msc_win_base_sg(iter->win);
+	else
+		iter->block = sg_next(iter->block);
 
 	/* no wrapping, sanity check in case there is no last written block */
 	if (!iter->wrap_count && iter->block == iter->start_block)
@@ -460,20 +650,101 @@
 static void msc_buffer_clear_hw_header(struct msc *msc)
 {
 	struct msc_window *win;
+	struct scatterlist *sg;
 
 	list_for_each_entry(win, &msc->win_list, entry) {
 		unsigned int blk;
 		size_t hw_sz = sizeof(struct msc_block_desc) -
 			offsetof(struct msc_block_desc, hw_tag);
 
-		for (blk = 0; blk < win->nr_blocks; blk++) {
-			struct msc_block_desc *bdesc = win->block[blk].bdesc;
+		for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
+			struct msc_block_desc *bdesc = sg_virt(sg);
 
 			memset(&bdesc->hw_tag, 0, hw_sz);
 		}
 	}
 }
 
+static int intel_th_msu_init(struct msc *msc)
+{
+	u32 mintctl, msusts;
+
+	if (!msc->do_irq)
+		return 0;
+
+	if (!msc->mbuf)
+		return 0;
+
+	mintctl = ioread32(msc->msu_base + REG_MSU_MINTCTL);
+	mintctl |= msc->index ? M1BLIE : M0BLIE;
+	iowrite32(mintctl, msc->msu_base + REG_MSU_MINTCTL);
+	if (mintctl != ioread32(msc->msu_base + REG_MSU_MINTCTL)) {
+		dev_info(msc_dev(msc), "MINTCTL ignores writes: no usable interrupts\n");
+		msc->do_irq = 0;
+		return 0;
+	}
+
+	msusts = ioread32(msc->msu_base + REG_MSU_MSUSTS);
+	iowrite32(msusts, msc->msu_base + REG_MSU_MSUSTS);
+
+	return 0;
+}
+
+static void intel_th_msu_deinit(struct msc *msc)
+{
+	u32 mintctl;
+
+	if (!msc->do_irq)
+		return;
+
+	mintctl = ioread32(msc->msu_base + REG_MSU_MINTCTL);
+	mintctl &= msc->index ? ~M1BLIE : ~M0BLIE;
+	iowrite32(mintctl, msc->msu_base + REG_MSU_MINTCTL);
+}
+
+static int msc_win_set_lockout(struct msc_window *win,
+			       enum lockout_state expect,
+			       enum lockout_state new)
+{
+	enum lockout_state old;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!win->msc->mbuf)
+		return 0;
+
+	spin_lock_irqsave(&win->lo_lock, flags);
+	old = win->lockout;
+
+	if (old != expect) {
+		ret = -EINVAL;
+		dev_warn_ratelimited(msc_dev(win->msc),
+				     "expected lockout state %d, got %d\n",
+				     expect, old);
+		goto unlock;
+	}
+
+	win->lockout = new;
+
+	if (old == expect && new == WIN_LOCKED)
+		atomic_inc(&win->msc->user_count);
+	else if (old == expect && old == WIN_LOCKED)
+		atomic_dec(&win->msc->user_count);
+
+unlock:
+	spin_unlock_irqrestore(&win->lo_lock, flags);
+
+	if (ret) {
+		if (expect == WIN_READY && old == WIN_LOCKED)
+			return -EBUSY;
+
+		/* from intel_th_msc_window_unlock(), don't warn if not locked */
+		if (expect == WIN_LOCKED && old == new)
+			return 0;
+	}
+
+	return ret;
+}
 /**
  * msc_configure() - set up MSC hardware
  * @msc:	the MSC device to configure
@@ -491,8 +762,15 @@
 	if (msc->mode > MSC_MODE_MULTI)
 		return -ENOTSUPP;
 
-	if (msc->mode == MSC_MODE_MULTI)
+	if (msc->mode == MSC_MODE_MULTI) {
+		if (msc_win_set_lockout(msc->cur_win, WIN_READY, WIN_INUSE))
+			return -EBUSY;
+
 		msc_buffer_clear_hw_header(msc);
+	}
+
+	msc->orig_addr = ioread32(msc->reg_base + REG_MSU_MSC0BAR);
+	msc->orig_sz   = ioread32(msc->reg_base + REG_MSU_MSC0SIZE);
 
 	reg = msc->base_addr >> PAGE_SHIFT;
 	iowrite32(reg, msc->reg_base + REG_MSU_MSC0BAR);
@@ -514,10 +792,14 @@
 
 	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);
 
+	intel_th_msu_init(msc);
+
 	msc->thdev->output.multiblock = msc->mode == MSC_MODE_MULTI;
 	intel_th_trace_enable(msc->thdev);
 	msc->enabled = 1;
 
+	if (msc->mbuf && msc->mbuf->activate)
+		msc->mbuf->activate(msc->mbuf_priv);
 
 	return 0;
 }
@@ -531,23 +813,21 @@
  */
 static void msc_disable(struct msc *msc)
 {
-	unsigned long count;
+	struct msc_window *win = msc->cur_win;
 	u32 reg;
 
 	lockdep_assert_held(&msc->buf_mutex);
 
+	if (msc->mode == MSC_MODE_MULTI)
+		msc_win_set_lockout(win, WIN_INUSE, WIN_LOCKED);
+
+	if (msc->mbuf && msc->mbuf->deactivate)
+		msc->mbuf->deactivate(msc->mbuf_priv);
+	intel_th_msu_deinit(msc);
 	intel_th_trace_disable(msc->thdev);
 
-	for (reg = 0, count = MSC_PLE_WAITLOOP_DEPTH;
-	     count && !(reg & MSCSTS_PLE); count--) {
-		reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
-		cpu_relax();
-	}
-
-	if (!count)
-		dev_dbg(msc_dev(msc), "timeout waiting for MSC0 PLE\n");
-
 	if (msc->mode == MSC_MODE_SINGLE) {
+		reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
 		msc->single_wrap = !!(reg & MSCSTS_WRAPSTAT);
 
 		reg = ioread32(msc->reg_base + REG_MSU_MSC0MWP);
@@ -559,16 +839,25 @@
 	reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL);
 	reg &= ~MSC_EN;
 	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);
+
+	if (msc->mbuf && msc->mbuf->ready)
+		msc->mbuf->ready(msc->mbuf_priv, win->sgt,
+				 msc_win_total_sz(win));
+
 	msc->enabled = 0;
 
-	iowrite32(0, msc->reg_base + REG_MSU_MSC0BAR);
-	iowrite32(0, msc->reg_base + REG_MSU_MSC0SIZE);
+	iowrite32(msc->orig_addr, msc->reg_base + REG_MSU_MSC0BAR);
+	iowrite32(msc->orig_sz, msc->reg_base + REG_MSU_MSC0SIZE);
 
 	dev_dbg(msc_dev(msc), "MSCnNWSA: %08x\n",
 		ioread32(msc->reg_base + REG_MSU_MSC0NWSA));
 
 	reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
 	dev_dbg(msc_dev(msc), "MSCnSTS: %08x\n", reg);
+
+	reg = ioread32(msc->reg_base + REG_MSU_MSUSTS);
+	reg &= msc->index ? MSUSTS_MSC1BLAST : MSUSTS_MSC0BLAST;
+	iowrite32(reg, msc->reg_base + REG_MSU_MSUSTS);
 }
 
 static int intel_th_msc_activate(struct intel_th_device *thdev)
@@ -617,22 +906,45 @@
  */
 static int msc_buffer_contig_alloc(struct msc *msc, unsigned long size)
 {
+	unsigned long nr_pages = size >> PAGE_SHIFT;
 	unsigned int order = get_order(size);
 	struct page *page;
+	int ret;
 
 	if (!size)
 		return 0;
 
-	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+	ret = sg_alloc_table(&msc->single_sgt, 1, GFP_KERNEL);
+	if (ret)
+		goto err_out;
+
+	ret = -ENOMEM;
+	page = alloc_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, order);
 	if (!page)
-		return -ENOMEM;
+		goto err_free_sgt;
 
 	split_page(page, order);
-	msc->nr_pages = size >> PAGE_SHIFT;
+	sg_set_buf(msc->single_sgt.sgl, page_address(page), size);
+
+	ret = dma_map_sg(msc_dev(msc)->parent->parent, msc->single_sgt.sgl, 1,
+			 DMA_FROM_DEVICE);
+	if (ret < 0)
+		goto err_free_pages;
+
+	msc->nr_pages = nr_pages;
 	msc->base = page_address(page);
-	msc->base_addr = page_to_phys(page);
+	msc->base_addr = sg_dma_address(msc->single_sgt.sgl);
 
 	return 0;
+
+err_free_pages:
+	__free_pages(page, order);
+
+err_free_sgt:
+	sg_free_table(&msc->single_sgt);
+
+err_out:
+	return ret;
 }
 
 /**
@@ -643,6 +955,10 @@
 {
 	unsigned long off;
 
+	dma_unmap_sg(msc_dev(msc)->parent->parent, msc->single_sgt.sgl,
+		     1, DMA_FROM_DEVICE);
+	sg_free_table(&msc->single_sgt);
+
 	for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) {
 		struct page *page = virt_to_page(msc->base + off);
 
@@ -669,6 +985,69 @@
 	return virt_to_page(msc->base + (pgoff << PAGE_SHIFT));
 }
 
+static int __msc_buffer_win_alloc(struct msc_window *win,
+				  unsigned int nr_segs)
+{
+	struct scatterlist *sg_ptr;
+	void *block;
+	int i, ret;
+
+	ret = sg_alloc_table(win->sgt, nr_segs, GFP_KERNEL);
+	if (ret)
+		return -ENOMEM;
+
+	for_each_sg(win->sgt->sgl, sg_ptr, nr_segs, i) {
+		block = dma_alloc_coherent(msc_dev(win->msc)->parent->parent,
+					  PAGE_SIZE, &sg_dma_address(sg_ptr),
+					  GFP_KERNEL);
+		if (!block)
+			goto err_nomem;
+
+		sg_set_buf(sg_ptr, block, PAGE_SIZE);
+	}
+
+	return nr_segs;
+
+err_nomem:
+	for_each_sg(win->sgt->sgl, sg_ptr, i, ret)
+		dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE,
+				  sg_virt(sg_ptr), sg_dma_address(sg_ptr));
+
+	sg_free_table(win->sgt);
+
+	return -ENOMEM;
+}
+
+#ifdef CONFIG_X86
+static void msc_buffer_set_uc(struct msc_window *win, unsigned int nr_segs)
+{
+	struct scatterlist *sg_ptr;
+	int i;
+
+	for_each_sg(win->sgt->sgl, sg_ptr, nr_segs, i) {
+		/* Set the page as uncached */
+		set_memory_uc((unsigned long)sg_virt(sg_ptr),
+			      PFN_DOWN(sg_ptr->length));
+	}
+}
+
+static void msc_buffer_set_wb(struct msc_window *win)
+{
+	struct scatterlist *sg_ptr;
+	int i;
+
+	for_each_sg(win->sgt->sgl, sg_ptr, win->nr_segs, i) {
+		/* Reset the page to write-back */
+		set_memory_wb((unsigned long)sg_virt(sg_ptr),
+			      PFN_DOWN(sg_ptr->length));
+	}
+}
+#else /* !X86 */
+static inline void
+msc_buffer_set_uc(struct msc_window *win, unsigned int nr_segs) {}
+static inline void msc_buffer_set_wb(struct msc_window *win) {}
+#endif /* CONFIG_X86 */
+
 /**
  * msc_buffer_win_alloc() - alloc a window for a multiblock mode
  * @msc:	MSC device
@@ -682,44 +1061,46 @@
 static int msc_buffer_win_alloc(struct msc *msc, unsigned int nr_blocks)
 {
 	struct msc_window *win;
-	unsigned long size = PAGE_SIZE;
-	int i, ret = -ENOMEM;
+	int ret = -ENOMEM;
 
 	if (!nr_blocks)
 		return 0;
 
-	win = kzalloc(offsetof(struct msc_window, block[nr_blocks]),
-		      GFP_KERNEL);
+	win = kzalloc(sizeof(*win), GFP_KERNEL);
 	if (!win)
 		return -ENOMEM;
 
+	win->msc = msc;
+	win->sgt = &win->_sgt;
+	win->lockout = WIN_READY;
+	spin_lock_init(&win->lo_lock);
+
 	if (!list_empty(&msc->win_list)) {
-		struct msc_window *prev = list_entry(msc->win_list.prev,
-						     struct msc_window, entry);
+		struct msc_window *prev = list_last_entry(&msc->win_list,
+							  struct msc_window,
+							  entry);
 
 		win->pgoff = prev->pgoff + prev->nr_blocks;
 	}
 
-	for (i = 0; i < nr_blocks; i++) {
-		win->block[i].bdesc =
-			dma_alloc_coherent(msc_dev(msc)->parent->parent, size,
-					   &win->block[i].addr, GFP_KERNEL);
+	if (msc->mbuf && msc->mbuf->alloc_window)
+		ret = msc->mbuf->alloc_window(msc->mbuf_priv, &win->sgt,
+					      nr_blocks << PAGE_SHIFT);
+	else
+		ret = __msc_buffer_win_alloc(win, nr_blocks);
 
-		if (!win->block[i].bdesc)
-			goto err_nomem;
+	if (ret <= 0)
+		goto err_nomem;
 
-#ifdef CONFIG_X86
-		/* Set the page as uncached */
-		set_memory_uc((unsigned long)win->block[i].bdesc, 1);
-#endif
-	}
+	msc_buffer_set_uc(win, ret);
 
-	win->msc = msc;
+	win->nr_segs = ret;
 	win->nr_blocks = nr_blocks;
 
 	if (list_empty(&msc->win_list)) {
-		msc->base = win->block[0].bdesc;
-		msc->base_addr = win->block[0].addr;
+		msc->base = msc_win_base(win);
+		msc->base_addr = msc_win_base_dma(win);
+		msc->cur_win = win;
 	}
 
 	list_add_tail(&win->entry, &msc->win_list);
@@ -728,19 +1109,26 @@
 	return 0;
 
 err_nomem:
-	for (i--; i >= 0; i--) {
-#ifdef CONFIG_X86
-		/* Reset the page to write-back before releasing */
-		set_memory_wb((unsigned long)win->block[i].bdesc, 1);
-#endif
-		dma_free_coherent(msc_dev(msc)->parent->parent, size,
-				  win->block[i].bdesc, win->block[i].addr);
-	}
 	kfree(win);
 
 	return ret;
 }
 
+static void __msc_buffer_win_free(struct msc *msc, struct msc_window *win)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(win->sgt->sgl, sg, win->nr_segs, i) {
+		struct page *page = sg_page(sg);
+
+		page->mapping = NULL;
+		dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE,
+				  sg_virt(sg), sg_dma_address(sg));
+	}
+	sg_free_table(win->sgt);
+}
+
 /**
  * msc_buffer_win_free() - free a window from MSC's window list
  * @msc:	MSC device
@@ -751,8 +1139,6 @@
  */
 static void msc_buffer_win_free(struct msc *msc, struct msc_window *win)
 {
-	int i;
-
 	msc->nr_pages -= win->nr_blocks;
 
 	list_del(&win->entry);
@@ -761,17 +1147,12 @@
 		msc->base_addr = 0;
 	}
 
-	for (i = 0; i < win->nr_blocks; i++) {
-		struct page *page = virt_to_page(win->block[i].bdesc);
+	msc_buffer_set_wb(win);
 
-		page->mapping = NULL;
-#ifdef CONFIG_X86
-		/* Reset the page to write-back before releasing */
-		set_memory_wb((unsigned long)win->block[i].bdesc, 1);
-#endif
-		dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE,
-				  win->block[i].bdesc, win->block[i].addr);
-	}
+	if (msc->mbuf && msc->mbuf->free_window)
+		msc->mbuf->free_window(msc->mbuf_priv, win->sgt);
+	else
+		__msc_buffer_win_free(msc, win);
 
 	kfree(win);
 }
@@ -789,6 +1170,7 @@
 
 	/* call with msc::mutex locked */
 	list_for_each_entry(win, &msc->win_list, entry) {
+		struct scatterlist *sg;
 		unsigned int blk;
 		u32 sw_tag = 0;
 
@@ -798,35 +1180,34 @@
 		 */
 		if (msc_is_last_win(win)) {
 			sw_tag |= MSC_SW_TAG_LASTWIN;
-			next_win = list_entry(msc->win_list.next,
-					      struct msc_window, entry);
+			next_win = list_first_entry(&msc->win_list,
+						    struct msc_window, entry);
 		} else {
-			next_win = list_entry(win->entry.next,
-					      struct msc_window, entry);
+			next_win = list_next_entry(win, entry);
 		}
 
-		for (blk = 0; blk < win->nr_blocks; blk++) {
-			struct msc_block_desc *bdesc = win->block[blk].bdesc;
+		for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
+			struct msc_block_desc *bdesc = sg_virt(sg);
 
 			memset(bdesc, 0, sizeof(*bdesc));
 
-			bdesc->next_win = next_win->block[0].addr >> PAGE_SHIFT;
+			bdesc->next_win = msc_win_base_pfn(next_win);
 
 			/*
 			 * Similarly to last window, last block should point
 			 * to the first one.
 			 */
-			if (blk == win->nr_blocks - 1) {
+			if (blk == win->nr_segs - 1) {
 				sw_tag |= MSC_SW_TAG_LASTBLK;
-				bdesc->next_blk =
-					win->block[0].addr >> PAGE_SHIFT;
+				bdesc->next_blk = msc_win_base_pfn(win);
 			} else {
-				bdesc->next_blk =
-					win->block[blk + 1].addr >> PAGE_SHIFT;
+				dma_addr_t addr = sg_dma_address(sg_next(sg));
+
+				bdesc->next_blk = PFN_DOWN(addr);
 			}
 
 			bdesc->sw_tag = sw_tag;
-			bdesc->block_sz = PAGE_SIZE / 64;
+			bdesc->block_sz = sg->length / 64;
 		}
 	}
 
@@ -985,6 +1366,8 @@
 static struct page *msc_buffer_get_page(struct msc *msc, unsigned long pgoff)
 {
 	struct msc_window *win;
+	struct scatterlist *sg;
+	unsigned int blk;
 
 	if (msc->mode == MSC_MODE_SINGLE)
 		return msc_buffer_contig_get_page(msc, pgoff);
@@ -997,7 +1380,18 @@
 
 found:
 	pgoff -= win->pgoff;
-	return virt_to_page(win->block[pgoff].bdesc);
+
+	for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
+		struct page *page = sg_page(sg);
+		size_t pgsz = PFN_DOWN(sg->length);
+
+		if (pgoff < pgsz)
+			return page + pgoff;
+
+		pgoff -= pgsz;
+	}
+
+	return NULL;
 }
 
 /**
@@ -1250,6 +1644,22 @@
 	.owner		= THIS_MODULE,
 };
 
+static void intel_th_msc_wait_empty(struct intel_th_device *thdev)
+{
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+	unsigned long count;
+	u32 reg;
+
+	for (reg = 0, count = MSC_PLE_WAITLOOP_DEPTH;
+	     count && !(reg & MSCSTS_PLE); count--) {
+		reg = __raw_readl(msc->reg_base + REG_MSU_MSC0STS);
+		cpu_relax();
+	}
+
+	if (!count)
+		dev_dbg(msc_dev(msc), "timeout waiting for MSC0 PLE\n");
+}
+
 static int intel_th_msc_init(struct msc *msc)
 {
 	atomic_set(&msc->user_count, -1);
@@ -1266,6 +1676,97 @@
 	return 0;
 }
 
+static void msc_win_switch(struct msc *msc)
+{
+	struct msc_window *first;
+
+	first = list_first_entry(&msc->win_list, struct msc_window, entry);
+
+	if (msc_is_last_win(msc->cur_win))
+		msc->cur_win = first;
+	else
+		msc->cur_win = list_next_entry(msc->cur_win, entry);
+
+	msc->base = msc_win_base(msc->cur_win);
+	msc->base_addr = msc_win_base_dma(msc->cur_win);
+
+	intel_th_trace_switch(msc->thdev);
+}
+
+/**
+ * intel_th_msc_window_unlock - put the window back in rotation
+ * @dev:	MSC device to which this relates
+ * @sgt:	buffer's sg_table for the window, does nothing if NULL
+ */
+void intel_th_msc_window_unlock(struct device *dev, struct sg_table *sgt)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	struct msc_window *win;
+
+	if (!sgt)
+		return;
+
+	win = msc_find_window(msc, sgt, false);
+	if (!win)
+		return;
+
+	msc_win_set_lockout(win, WIN_LOCKED, WIN_READY);
+}
+EXPORT_SYMBOL_GPL(intel_th_msc_window_unlock);
+
+static void msc_work(struct work_struct *work)
+{
+	struct msc *msc = container_of(work, struct msc, work);
+
+	intel_th_msc_deactivate(msc->thdev);
+}
+
+static irqreturn_t intel_th_msc_interrupt(struct intel_th_device *thdev)
+{
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+	u32 msusts = ioread32(msc->msu_base + REG_MSU_MSUSTS);
+	u32 mask = msc->index ? MSUSTS_MSC1BLAST : MSUSTS_MSC0BLAST;
+	struct msc_window *win, *next_win;
+
+	if (!msc->do_irq || !msc->mbuf)
+		return IRQ_NONE;
+
+	msusts &= mask;
+
+	if (!msusts)
+		return msc->enabled ? IRQ_HANDLED : IRQ_NONE;
+
+	iowrite32(msusts, msc->msu_base + REG_MSU_MSUSTS);
+
+	if (!msc->enabled)
+		return IRQ_NONE;
+
+	/* grab the window before we do the switch */
+	win = msc->cur_win;
+	if (!win)
+		return IRQ_HANDLED;
+	next_win = msc_next_window(win);
+	if (!next_win)
+		return IRQ_HANDLED;
+
+	/* next window: if READY, proceed, if LOCKED, stop the trace */
+	if (msc_win_set_lockout(next_win, WIN_READY, WIN_INUSE)) {
+		schedule_work(&msc->work);
+		return IRQ_HANDLED;
+	}
+
+	/* current window: INUSE -> LOCKED */
+	msc_win_set_lockout(win, WIN_INUSE, WIN_LOCKED);
+
+	msc_win_switch(msc);
+
+	if (msc->mbuf && msc->mbuf->ready)
+		msc->mbuf->ready(msc->mbuf_priv, win->sgt,
+				 msc_win_total_sz(win));
+
+	return IRQ_HANDLED;
+}
+
 static const char * const msc_mode[] = {
 	[MSC_MODE_SINGLE]	= "single",
 	[MSC_MODE_MULTI]	= "multi",
@@ -1300,21 +1801,43 @@
 
 static DEVICE_ATTR_RW(wrap);
 
+static void msc_buffer_unassign(struct msc *msc)
+{
+	lockdep_assert_held(&msc->buf_mutex);
+
+	if (!msc->mbuf)
+		return;
+
+	msc->mbuf->unassign(msc->mbuf_priv);
+	msu_buffer_put(msc->mbuf);
+	msc->mbuf_priv = NULL;
+	msc->mbuf = NULL;
+}
+
 static ssize_t
 mode_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct msc *msc = dev_get_drvdata(dev);
+	const char *mode = msc_mode[msc->mode];
+	ssize_t ret;
 
-	return scnprintf(buf, PAGE_SIZE, "%s\n", msc_mode[msc->mode]);
+	mutex_lock(&msc->buf_mutex);
+	if (msc->mbuf)
+		mode = msc->mbuf->name;
+	ret = scnprintf(buf, PAGE_SIZE, "%s\n", mode);
+	mutex_unlock(&msc->buf_mutex);
+
+	return ret;
 }
 
 static ssize_t
 mode_store(struct device *dev, struct device_attribute *attr, const char *buf,
 	   size_t size)
 {
+	const struct msu_buffer *mbuf = NULL;
 	struct msc *msc = dev_get_drvdata(dev);
 	size_t len = size;
-	char *cp;
+	char *cp, *mode;
 	int i, ret;
 
 	if (!capable(CAP_SYS_RAWIO))
@@ -1324,17 +1847,64 @@
 	if (cp)
 		len = cp - buf;
 
-	for (i = 0; i < ARRAY_SIZE(msc_mode); i++)
-		if (!strncmp(msc_mode[i], buf, len))
-			goto found;
+	mode = kstrndup(buf, len, GFP_KERNEL);
+	if (!mode)
+		return -ENOMEM;
+
+	i = match_string(msc_mode, ARRAY_SIZE(msc_mode), mode);
+	if (i >= 0) {
+		kfree(mode);
+		goto found;
+	}
+
+	/* Buffer sinks only work with a usable IRQ */
+	if (!msc->do_irq) {
+		kfree(mode);
+		return -EINVAL;
+	}
+
+	mbuf = msu_buffer_get(mode);
+	kfree(mode);
+	if (mbuf)
+		goto found;
 
 	return -EINVAL;
 
 found:
 	mutex_lock(&msc->buf_mutex);
+	ret = 0;
+
+	/* Same buffer: do nothing */
+	if (mbuf && mbuf == msc->mbuf) {
+		/* put the extra reference we just got */
+		msu_buffer_put(mbuf);
+		goto unlock;
+	}
+
 	ret = msc_buffer_unlocked_free_unless_used(msc);
-	if (!ret)
-		msc->mode = i;
+	if (ret)
+		goto unlock;
+
+	if (mbuf) {
+		void *mbuf_priv = mbuf->assign(dev, &i);
+
+		if (!mbuf_priv) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
+
+		msc_buffer_unassign(msc);
+		msc->mbuf_priv = mbuf_priv;
+		msc->mbuf = mbuf;
+	} else {
+		msc_buffer_unassign(msc);
+	}
+
+	msc->mode = i;
+
+unlock:
+	if (ret && mbuf)
+		msu_buffer_put(mbuf);
 	mutex_unlock(&msc->buf_mutex);
 
 	return ret ? ret : size;
@@ -1423,7 +1993,8 @@
 		if (!end)
 			break;
 
-		len -= end - p;
+		/* consume the number and the following comma, hence +1 */
+		len -= end - p + 1;
 		p = end + 1;
 	} while (len);
 
@@ -1439,10 +2010,43 @@
 
 static DEVICE_ATTR_RW(nr_pages);
 
+static ssize_t
+win_switch_store(struct device *dev, struct device_attribute *attr,
+		 const char *buf, size_t size)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val != 1)
+		return -EINVAL;
+
+	mutex_lock(&msc->buf_mutex);
+	/*
+	 * Window switch can only happen in the "multi" mode.
+	 * If a external buffer is engaged, they have the full
+	 * control over window switching.
+	 */
+	if (msc->mode != MSC_MODE_MULTI || msc->mbuf)
+		ret = -ENOTSUPP;
+	else
+		msc_win_switch(msc);
+	mutex_unlock(&msc->buf_mutex);
+
+	return ret ? ret : size;
+}
+
+static DEVICE_ATTR_WO(win_switch);
+
 static struct attribute *msc_output_attrs[] = {
 	&dev_attr_wrap.attr,
 	&dev_attr_mode.attr,
 	&dev_attr_nr_pages.attr,
+	&dev_attr_win_switch.attr,
 	NULL,
 };
 
@@ -1470,11 +2074,17 @@
 	if (!msc)
 		return -ENOMEM;
 
+	res = intel_th_device_get_resource(thdev, IORESOURCE_IRQ, 1);
+	if (!res)
+		msc->do_irq = 1;
+
 	msc->index = thdev->id;
 
 	msc->thdev = thdev;
 	msc->reg_base = base + msc->index * 0x100;
+	msc->msu_base = base;
 
+	INIT_WORK(&msc->work, msc_work);
 	err = intel_th_msc_init(msc);
 	if (err)
 		return err;
@@ -1503,6 +2113,8 @@
 static struct intel_th_driver intel_th_msc_driver = {
 	.probe	= intel_th_msc_probe,
 	.remove	= intel_th_msc_remove,
+	.irq		= intel_th_msc_interrupt,
+	.wait_empty	= intel_th_msc_wait_empty,
 	.activate	= intel_th_msc_activate,
 	.deactivate	= intel_th_msc_deactivate,
 	.fops	= &intel_th_msc_fops,
diff --git a/drivers/hwtracing/intel_th/msu.h b/drivers/hwtracing/intel_th/msu.h
index 9cc8ace..e771f50 100644
--- a/drivers/hwtracing/intel_th/msu.h
+++ b/drivers/hwtracing/intel_th/msu.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Intel(R) Trace Hub Memory Storage Unit (MSU) data structures
  *
@@ -11,6 +11,7 @@
 enum {
 	REG_MSU_MSUPARAMS	= 0x0000,
 	REG_MSU_MSUSTS		= 0x0008,
+	REG_MSU_MINTCTL		= 0x0004, /* MSU-global interrupt control */
 	REG_MSU_MSC0CTL		= 0x0100, /* MSC0 control */
 	REG_MSU_MSC0STS		= 0x0104, /* MSC0 status */
 	REG_MSU_MSC0BAR		= 0x0108, /* MSC0 output base address */
@@ -28,6 +29,8 @@
 
 /* MSUSTS bits */
 #define MSUSTS_MSU_INT	BIT(0)
+#define MSUSTS_MSC0BLAST	BIT(16)
+#define MSUSTS_MSC1BLAST	BIT(24)
 
 /* MSCnCTL bits */
 #define MSC_EN		BIT(0)
@@ -36,13 +39,10 @@
 #define MSC_MODE	(BIT(4) | BIT(5))
 #define MSC_LEN		(BIT(8) | BIT(9) | BIT(10))
 
-/* MSC operating modes (MSC_MODE) */
-enum {
-	MSC_MODE_SINGLE	= 0,
-	MSC_MODE_MULTI,
-	MSC_MODE_EXI,
-	MSC_MODE_DEBUG,
-};
+/* MINTCTL bits */
+#define MICDE		BIT(0)
+#define M0BLIE		BIT(16)
+#define M1BLIE		BIT(24)
 
 /* MSCnSTS bits */
 #define MSCSTS_WRAPSTAT	BIT(1)	/* Wrap occurred */
@@ -85,9 +85,19 @@
 	return bdesc->valid_dw * 4 - MSC_BDESC;
 }
 
+static inline unsigned long msc_total_sz(struct msc_block_desc *bdesc)
+{
+	return bdesc->valid_dw * 4;
+}
+
+static inline unsigned long msc_block_sz(struct msc_block_desc *bdesc)
+{
+	return bdesc->block_sz * 64 - MSC_BDESC;
+}
+
 static inline bool msc_block_wrapped(struct msc_block_desc *bdesc)
 {
-	if (bdesc->hw_tag & MSC_HW_TAG_BLOCKWRAP)
+	if (bdesc->hw_tag & (MSC_HW_TAG_BLOCKWRAP | MSC_HW_TAG_WINWRAP))
 		return true;
 
 	return false;
@@ -96,7 +106,7 @@
 static inline bool msc_block_last_written(struct msc_block_desc *bdesc)
 {
 	if ((bdesc->hw_tag & MSC_HW_TAG_ENDBIT) ||
-	    (msc_data_sz(bdesc) != DATA_IN_PAGE))
+	    (msc_data_sz(bdesc) != msc_block_sz(bdesc)))
 		return true;
 
 	return false;
diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index 1cf6290..03ca5b1 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -17,7 +17,13 @@
 
 #define DRIVER_NAME "intel_th_pci"
 
-#define BAR_MASK (BIT(TH_MMIO_CONFIG) | BIT(TH_MMIO_SW))
+enum {
+	TH_PCI_CONFIG_BAR	= 0,
+	TH_PCI_STH_SW_BAR	= 2,
+	TH_PCI_RTIT_BAR		= 4,
+};
+
+#define BAR_MASK (BIT(TH_PCI_CONFIG_BAR) | BIT(TH_PCI_STH_SW_BAR))
 
 #define PCI_REG_NPKDSC	0x80
 #define NPKDSC_TSACT	BIT(5)
@@ -66,8 +72,12 @@
 			      const struct pci_device_id *id)
 {
 	struct intel_th_drvdata *drvdata = (void *)id->driver_data;
+	struct resource resource[TH_MMIO_END + TH_NVEC_MAX] = {
+		[TH_MMIO_CONFIG]	= pdev->resource[TH_PCI_CONFIG_BAR],
+		[TH_MMIO_SW]		= pdev->resource[TH_PCI_STH_SW_BAR],
+	};
+	int err, r = TH_MMIO_SW + 1, i;
 	struct intel_th *th;
-	int err;
 
 	err = pcim_enable_device(pdev);
 	if (err)
@@ -77,8 +87,19 @@
 	if (err)
 		return err;
 
-	th = intel_th_alloc(&pdev->dev, drvdata, pdev->resource,
-			    DEVICE_COUNT_RESOURCE, pdev->irq);
+	if (pdev->resource[TH_PCI_RTIT_BAR].start) {
+		resource[TH_MMIO_RTIT] = pdev->resource[TH_PCI_RTIT_BAR];
+		r++;
+	}
+
+	err = pci_alloc_irq_vectors(pdev, 1, 8, PCI_IRQ_ALL_TYPES);
+	if (err > 0)
+		for (i = 0; i < err; i++, r++) {
+			resource[r].flags = IORESOURCE_IRQ;
+			resource[r].start = pci_irq_vector(pdev, i);
+		}
+
+	th = intel_th_alloc(&pdev->dev, drvdata, resource, r);
 	if (IS_ERR(th))
 		return PTR_ERR(th);
 
@@ -95,10 +116,13 @@
 	struct intel_th *th = pci_get_drvdata(pdev);
 
 	intel_th_free(th);
+
+	pci_free_irq_vectors(pdev);
 }
 
 static const struct intel_th_drvdata intel_th_2x = {
 	.tscu_enable	= 1,
+	.has_mintctl	= 1,
 };
 
 static const struct pci_device_id intel_th_pci_id_table[] = {
@@ -141,6 +165,11 @@
 		.driver_data = (kernel_ulong_t)0,
 	},
 	{
+		/* Lewisburg PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa226),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
 		/* Gemini Lake */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e),
 		.driver_data = (kernel_ulong_t)&intel_th_2x,
@@ -165,6 +194,31 @@
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x34a6),
 		.driver_data = (kernel_ulong_t)&intel_th_2x,
 	},
+	{
+		/* Comet Lake */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x02a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Comet Lake PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x06a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Ice Lake NNPI */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Tiger Lake PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa0a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Jasper Lake PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
 	{ 0 },
 };
 
diff --git a/drivers/hwtracing/intel_th/pti.c b/drivers/hwtracing/intel_th/pti.c
index 5669433..0da6b78 100644
--- a/drivers/hwtracing/intel_th/pti.c
+++ b/drivers/hwtracing/intel_th/pti.c
@@ -272,19 +272,17 @@
 			      const char *buf, size_t size)
 {
 	struct pti_device *pti = dev_get_drvdata(dev);
-	ssize_t ret = -EINVAL;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(lpp_dest_str); i++)
-		if (sysfs_streq(buf, lpp_dest_str[i]))
-			break;
+	i = sysfs_match_string(lpp_dest_str, buf);
+	if (i < 0)
+		return i;
 
-	if (i < ARRAY_SIZE(lpp_dest_str) && pti->lpp_dest_mask & BIT(i)) {
-		pti->lpp_dest = i;
-		ret = size;
-	}
+	if (!(pti->lpp_dest_mask & BIT(i)))
+		return -EINVAL;
 
-	return ret;
+	pti->lpp_dest = i;
+	return size;
 }
 
 static DEVICE_ATTR_RW(lpp_dest);
diff --git a/drivers/hwtracing/intel_th/pti.h b/drivers/hwtracing/intel_th/pti.h
index e9381ba..7dfc043 100644
--- a/drivers/hwtracing/intel_th/pti.h
+++ b/drivers/hwtracing/intel_th/pti.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Intel(R) Trace Hub PTI output data structures
  *
diff --git a/drivers/hwtracing/intel_th/sth.c b/drivers/hwtracing/intel_th/sth.c
index 4b7ae47..3a1f4e6 100644
--- a/drivers/hwtracing/intel_th/sth.c
+++ b/drivers/hwtracing/intel_th/sth.c
@@ -84,8 +84,12 @@
 	/* Global packets (GERR, XSYNC, TRIG) are sent with register writes */
 	case STP_PACKET_GERR:
 		reg += 4;
+		/* fall through */
+
 	case STP_PACKET_XSYNC:
 		reg += 8;
+		/* fall through */
+
 	case STP_PACKET_TRIG:
 		if (flags & STP_PACKET_TIMESTAMPED)
 			reg += 4;
diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig
index 723e2d9..d0e92a8 100644
--- a/drivers/hwtracing/stm/Kconfig
+++ b/drivers/hwtracing/stm/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config STM
 	tristate "System Trace Module devices"
 	select CONFIGFS_FS
@@ -11,6 +12,35 @@
 
 if STM
 
+config STM_PROTO_BASIC
+	tristate "Basic STM framing protocol driver"
+	default CONFIG_STM
+	help
+	  This is a simple framing protocol for sending data over STM
+	  devices. This was the protocol that the STM framework used
+	  exclusively until the MIPI SyS-T support was added. Use this
+	  driver for compatibility with your existing STM setup.
+
+	  The receiving side only needs to be able to decode the MIPI
+	  STP protocol in order to extract the data.
+
+	  If you want to be able to use the basic protocol or want the
+	  backwards compatibility for your existing setup, say Y.
+
+config STM_PROTO_SYS_T
+	tristate "MIPI SyS-T STM framing protocol driver"
+	default CONFIG_STM
+	help
+	  This is an implementation of MIPI SyS-T protocol to be used
+	  over the STP transport. In addition to the data payload, it
+	  also carries additional metadata for time correlation, better
+	  means of trace source identification, etc.
+
+	  The receiving side must be able to decode this protocol in
+	  addition to the MIPI STP, in order to extract the data.
+
+	  If you don't know what this is, say N.
+
 config STM_DUMMY
 	tristate "Dummy STM driver"
 	help
diff --git a/drivers/hwtracing/stm/Makefile b/drivers/hwtracing/stm/Makefile
index effc19e..1692fcd 100644
--- a/drivers/hwtracing/stm/Makefile
+++ b/drivers/hwtracing/stm/Makefile
@@ -3,6 +3,12 @@
 
 stm_core-y		:= core.o policy.o
 
+obj-$(CONFIG_STM_PROTO_BASIC) += stm_p_basic.o
+obj-$(CONFIG_STM_PROTO_SYS_T) += stm_p_sys-t.o
+
+stm_p_basic-y		:= p_basic.o
+stm_p_sys-t-y		:= p_sys-t.o
+
 obj-$(CONFIG_STM_DUMMY)	+= dummy_stm.o
 
 obj-$(CONFIG_STM_SOURCE_CONSOLE)	+= stm_console.o
diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index 10bcb5d..603b83a 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -89,13 +89,6 @@
 	.dev_groups	= stm_groups,
 };
 
-static int stm_dev_match(struct device *dev, const void *data)
-{
-	const char *name = data;
-
-	return sysfs_streq(name, dev_name(dev));
-}
-
 /**
  * stm_find_device() - find stm device by name
  * @buf:	character buffer containing the name
@@ -116,7 +109,7 @@
 	if (!stm_core_up)
 		return NULL;
 
-	dev = class_find_device(&stm_class, NULL, buf, stm_dev_match);
+	dev = class_find_device_by_name(&stm_class, buf);
 	if (!dev)
 		return NULL;
 
@@ -166,11 +159,10 @@
 static int stp_master_alloc(struct stm_device *stm, unsigned int idx)
 {
 	struct stp_master *master;
-	size_t size;
 
-	size = ALIGN(stm->data->sw_nchannels, 8) / 8;
-	size += sizeof(struct stp_master);
-	master = kzalloc(size, GFP_ATOMIC);
+	master = kzalloc(struct_size(master, chan_map,
+				     BITS_TO_LONGS(stm->data->sw_nchannels)),
+			 GFP_ATOMIC);
 	if (!master)
 		return -ENOMEM;
 
@@ -218,8 +210,8 @@
 	bitmap_release_region(&master->chan_map[0], output->channel,
 			      ilog2(output->nr_chans));
 
-	output->nr_chans = 0;
 	master->nr_free += output->nr_chans;
+	output->nr_chans = 0;
 }
 
 /*
@@ -244,6 +236,9 @@
 			;
 		if (i == width)
 			return pos;
+
+		/* step over [pos..pos+i) to continue search */
+		pos += i;
 	}
 
 	return -1;
@@ -293,15 +288,15 @@
 	if (width > stm->data->sw_nchannels)
 		return -EINVAL;
 
-	if (policy_node) {
-		stp_policy_node_get_ranges(policy_node,
-					   &midx, &mend, &cidx, &cend);
-	} else {
-		midx = stm->data->sw_start;
-		cidx = 0;
-		mend = stm->data->sw_end;
-		cend = stm->data->sw_nchannels - 1;
-	}
+	/* We no longer accept policy_node==NULL here */
+	if (WARN_ON_ONCE(!policy_node))
+		return -EINVAL;
+
+	/*
+	 * Also, the caller holds reference to policy_node, so it won't
+	 * disappear on us.
+	 */
+	stp_policy_node_get_ranges(policy_node, &midx, &mend, &cidx, &cend);
 
 	spin_lock(&stm->mc_lock);
 	spin_lock(&output->lock);
@@ -316,11 +311,26 @@
 	output->master = midx;
 	output->channel = cidx;
 	output->nr_chans = width;
+	if (stm->pdrv->output_open) {
+		void *priv = stp_policy_node_priv(policy_node);
+
+		if (WARN_ON_ONCE(!priv))
+			goto unlock;
+
+		/* configfs subsys mutex is held by the caller */
+		ret = stm->pdrv->output_open(priv, output);
+		if (ret)
+			goto unlock;
+	}
+
 	stm_output_claim(stm, output);
 	dev_dbg(&stm->dev, "assigned %u:%u (+%u)\n", midx, cidx, width);
 
 	ret = 0;
 unlock:
+	if (ret)
+		output->nr_chans = 0;
+
 	spin_unlock(&output->lock);
 	spin_unlock(&stm->mc_lock);
 
@@ -333,6 +343,8 @@
 	spin_lock(&output->lock);
 	if (output->nr_chans)
 		stm_output_disclaim(stm, output);
+	if (stm->pdrv && stm->pdrv->output_close)
+		stm->pdrv->output_close(output);
 	spin_unlock(&output->lock);
 	spin_unlock(&stm->mc_lock);
 }
@@ -349,6 +361,127 @@
 	return MAJOR(dev->devt) == major;
 }
 
+/*
+ * Framing protocol management
+ * Modules can implement STM protocol drivers and (un-)register them
+ * with the STM class framework.
+ */
+static struct list_head stm_pdrv_head;
+static struct mutex stm_pdrv_mutex;
+
+struct stm_pdrv_entry {
+	struct list_head			entry;
+	const struct stm_protocol_driver	*pdrv;
+	const struct config_item_type		*node_type;
+};
+
+static const struct stm_pdrv_entry *
+__stm_lookup_protocol(const char *name)
+{
+	struct stm_pdrv_entry *pe;
+
+	/*
+	 * If no name is given (NULL or ""), fall back to "p_basic".
+	 */
+	if (!name || !*name)
+		name = "p_basic";
+
+	list_for_each_entry(pe, &stm_pdrv_head, entry) {
+		if (!strcmp(name, pe->pdrv->name))
+			return pe;
+	}
+
+	return NULL;
+}
+
+int stm_register_protocol(const struct stm_protocol_driver *pdrv)
+{
+	struct stm_pdrv_entry *pe = NULL;
+	int ret = -ENOMEM;
+
+	mutex_lock(&stm_pdrv_mutex);
+
+	if (__stm_lookup_protocol(pdrv->name)) {
+		ret = -EEXIST;
+		goto unlock;
+	}
+
+	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+	if (!pe)
+		goto unlock;
+
+	if (pdrv->policy_attr) {
+		pe->node_type = get_policy_node_type(pdrv->policy_attr);
+		if (!pe->node_type)
+			goto unlock;
+	}
+
+	list_add_tail(&pe->entry, &stm_pdrv_head);
+	pe->pdrv = pdrv;
+
+	ret = 0;
+unlock:
+	mutex_unlock(&stm_pdrv_mutex);
+
+	if (ret)
+		kfree(pe);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(stm_register_protocol);
+
+void stm_unregister_protocol(const struct stm_protocol_driver *pdrv)
+{
+	struct stm_pdrv_entry *pe, *iter;
+
+	mutex_lock(&stm_pdrv_mutex);
+
+	list_for_each_entry_safe(pe, iter, &stm_pdrv_head, entry) {
+		if (pe->pdrv == pdrv) {
+			list_del(&pe->entry);
+
+			if (pe->node_type) {
+				kfree(pe->node_type->ct_attrs);
+				kfree(pe->node_type);
+			}
+			kfree(pe);
+			break;
+		}
+	}
+
+	mutex_unlock(&stm_pdrv_mutex);
+}
+EXPORT_SYMBOL_GPL(stm_unregister_protocol);
+
+static bool stm_get_protocol(const struct stm_protocol_driver *pdrv)
+{
+	return try_module_get(pdrv->owner);
+}
+
+void stm_put_protocol(const struct stm_protocol_driver *pdrv)
+{
+	module_put(pdrv->owner);
+}
+
+int stm_lookup_protocol(const char *name,
+			const struct stm_protocol_driver **pdrv,
+			const struct config_item_type **node_type)
+{
+	const struct stm_pdrv_entry *pe;
+
+	mutex_lock(&stm_pdrv_mutex);
+
+	pe = __stm_lookup_protocol(name);
+	if (pe && pe->pdrv && stm_get_protocol(pe->pdrv)) {
+		*pdrv = pe->pdrv;
+		*node_type = pe->node_type;
+	}
+
+	mutex_unlock(&stm_pdrv_mutex);
+
+	return pe ? 0 : -ENOENT;
+}
+
 static int stm_char_open(struct inode *inode, struct file *file)
 {
 	struct stm_file *stmf;
@@ -405,42 +538,81 @@
 	return 0;
 }
 
-static int stm_file_assign(struct stm_file *stmf, char *id, unsigned int width)
+static int
+stm_assign_first_policy(struct stm_device *stm, struct stm_output *output,
+			char **ids, unsigned int width)
 {
-	struct stm_device *stm = stmf->stm;
-	int ret;
+	struct stp_policy_node *pn;
+	int err, n;
 
-	stmf->policy_node = stp_policy_node_lookup(stm, id);
+	/*
+	 * On success, stp_policy_node_lookup() will return holding the
+	 * configfs subsystem mutex, which is then released in
+	 * stp_policy_node_put(). This allows the pdrv->output_open() in
+	 * stm_output_assign() to serialize against the attribute accessors.
+	 */
+	for (n = 0, pn = NULL; ids[n] && !pn; n++)
+		pn = stp_policy_node_lookup(stm, ids[n]);
 
-	ret = stm_output_assign(stm, width, stmf->policy_node, &stmf->output);
+	if (!pn)
+		return -EINVAL;
 
-	if (stmf->policy_node)
-		stp_policy_node_put(stmf->policy_node);
+	err = stm_output_assign(stm, width, pn, output);
 
-	return ret;
+	stp_policy_node_put(pn);
+
+	return err;
 }
 
-static ssize_t notrace stm_write(struct stm_data *data, unsigned int master,
-			  unsigned int channel, const char *buf, size_t count)
+/**
+ * stm_data_write() - send the given payload as data packets
+ * @data:	stm driver's data
+ * @m:		STP master
+ * @c:		STP channel
+ * @ts_first:	timestamp the first packet
+ * @buf:	data payload buffer
+ * @count:	data payload size
+ */
+ssize_t notrace stm_data_write(struct stm_data *data, unsigned int m,
+			       unsigned int c, bool ts_first, const void *buf,
+			       size_t count)
 {
-	unsigned int flags = STP_PACKET_TIMESTAMPED;
-	const unsigned char *p = buf, nil = 0;
-	size_t pos;
+	unsigned int flags = ts_first ? STP_PACKET_TIMESTAMPED : 0;
 	ssize_t sz;
+	size_t pos;
 
-	for (pos = 0, p = buf; count > pos; pos += sz, p += sz) {
+	for (pos = 0, sz = 0; pos < count; pos += sz) {
 		sz = min_t(unsigned int, count - pos, 8);
-		sz = data->packet(data, master, channel, STP_PACKET_DATA, flags,
-				  sz, p);
-		flags = 0;
-
-		if (sz < 0)
+		sz = data->packet(data, m, c, STP_PACKET_DATA, flags, sz,
+				  &((u8 *)buf)[pos]);
+		if (sz <= 0)
 			break;
+
+		if (ts_first) {
+			flags = 0;
+			ts_first = false;
+		}
 	}
 
-	data->packet(data, master, channel, STP_PACKET_FLAG, 0, 0, &nil);
+	return sz < 0 ? sz : pos;
+}
+EXPORT_SYMBOL_GPL(stm_data_write);
 
-	return pos;
+static ssize_t notrace
+stm_write(struct stm_device *stm, struct stm_output *output,
+	  unsigned int chan, const char *buf, size_t count)
+{
+	int err;
+
+	/* stm->pdrv is serialized against policy_mutex */
+	if (!stm->pdrv)
+		return -ENODEV;
+
+	err = stm->pdrv->write(stm->data, output, chan, buf, count);
+	if (err < 0)
+		return err;
+
+	return err;
 }
 
 static ssize_t stm_char_write(struct file *file, const char __user *buf,
@@ -455,16 +627,21 @@
 		count = PAGE_SIZE - 1;
 
 	/*
-	 * if no m/c have been assigned to this writer up to this
-	 * point, use "default" policy entry
+	 * If no m/c have been assigned to this writer up to this
+	 * point, try to use the task name and "default" policy entries.
 	 */
 	if (!stmf->output.nr_chans) {
-		err = stm_file_assign(stmf, "default", 1);
+		char comm[sizeof(current->comm)];
+		char *ids[] = { comm, "default", NULL };
+
+		get_task_comm(comm, current);
+
+		err = stm_assign_first_policy(stmf->stm, &stmf->output, ids, 1);
 		/*
 		 * EBUSY means that somebody else just assigned this
 		 * output, which is just fine for write()
 		 */
-		if (err && err != -EBUSY)
+		if (err)
 			return err;
 	}
 
@@ -480,8 +657,7 @@
 
 	pm_runtime_get_sync(&stm->dev);
 
-	count = stm_write(stm->data, stmf->output.master, stmf->output.channel,
-			  kbuf, count);
+	count = stm_write(stm, &stmf->output, 0, kbuf, count);
 
 	pm_runtime_mark_last_busy(&stm->dev);
 	pm_runtime_put_autosuspend(&stm->dev);
@@ -550,7 +726,8 @@
 {
 	struct stm_device *stm = stmf->stm;
 	struct stp_policy_id *id;
-	int ret = -EINVAL;
+	char *ids[] = { NULL, NULL };
+	int ret = -EINVAL, wlimit = 1;
 	u32 size;
 
 	if (stmf->output.nr_chans)
@@ -578,11 +755,15 @@
 	if (id->__reserved_0 || id->__reserved_1)
 		goto err_free;
 
-	if (id->width < 1 ||
-	    id->width > PAGE_SIZE / stm->data->sw_mmiosz)
+	if (stm->data->sw_mmiosz)
+		wlimit = PAGE_SIZE / stm->data->sw_mmiosz;
+
+	if (id->width < 1 || id->width > wlimit)
 		goto err_free;
 
-	ret = stm_file_assign(stmf, id->id, id->width);
+	ids[0] = id->id;
+	ret = stm_assign_first_policy(stmf->stm, &stmf->output, ids,
+				      id->width);
 	if (ret)
 		goto err_free;
 
@@ -818,8 +999,8 @@
 static int stm_source_link_add(struct stm_source_device *src,
 			       struct stm_device *stm)
 {
-	char *id;
-	int err;
+	char *ids[] = { NULL, "default", NULL };
+	int err = -ENOMEM;
 
 	mutex_lock(&stm->link_mutex);
 	spin_lock(&stm->link_lock);
@@ -833,19 +1014,13 @@
 	spin_unlock(&stm->link_lock);
 	mutex_unlock(&stm->link_mutex);
 
-	id = kstrdup(src->data->name, GFP_KERNEL);
-	if (id) {
-		src->policy_node =
-			stp_policy_node_lookup(stm, id);
+	ids[0] = kstrdup(src->data->name, GFP_KERNEL);
+	if (!ids[0])
+		goto fail_detach;
 
-		kfree(id);
-	}
-
-	err = stm_output_assign(stm, src->data->nr_chans,
-				src->policy_node, &src->output);
-
-	if (src->policy_node)
-		stp_policy_node_put(src->policy_node);
+	err = stm_assign_first_policy(stm, &src->output, ids,
+				      src->data->nr_chans);
+	kfree(ids[0]);
 
 	if (err)
 		goto fail_detach;
@@ -1094,7 +1269,6 @@
 
 err:
 	put_device(&src->dev);
-	kfree(src);
 
 	return err;
 }
@@ -1134,9 +1308,7 @@
 
 	stm = srcu_dereference(src->link, &stm_source_srcu);
 	if (stm)
-		count = stm_write(stm->data, src->output.master,
-				  src->output.channel + chan,
-				  buf, count);
+		count = stm_write(stm, &src->output, chan, buf, count);
 	else
 		count = -ENODEV;
 
@@ -1163,7 +1335,15 @@
 		goto err_src;
 
 	init_srcu_struct(&stm_source_srcu);
+	INIT_LIST_HEAD(&stm_pdrv_head);
+	mutex_init(&stm_pdrv_mutex);
 
+	/*
+	 * So as to not confuse existing users with a requirement
+	 * to load yet another module, do it here.
+	 */
+	if (IS_ENABLED(CONFIG_STM_PROTO_BASIC))
+		(void)request_module_nowait("stm_p_basic");
 	stm_core_up++;
 
 	return 0;
diff --git a/drivers/hwtracing/stm/heartbeat.c b/drivers/hwtracing/stm/heartbeat.c
index 7db4239..3e7df1c 100644
--- a/drivers/hwtracing/stm/heartbeat.c
+++ b/drivers/hwtracing/stm/heartbeat.c
@@ -76,7 +76,7 @@
 			goto fail_unregister;
 
 		stm_heartbeat[i].data.nr_chans	= 1;
-		stm_heartbeat[i].data.link		= stm_heartbeat_link;
+		stm_heartbeat[i].data.link	= stm_heartbeat_link;
 		stm_heartbeat[i].data.unlink	= stm_heartbeat_unlink;
 		hrtimer_init(&stm_heartbeat[i].hrtimer, CLOCK_MONOTONIC,
 			     HRTIMER_MODE_ABS);
diff --git a/drivers/hwtracing/stm/p_basic.c b/drivers/hwtracing/stm/p_basic.c
new file mode 100644
index 0000000..8980a6a
--- /dev/null
+++ b/drivers/hwtracing/stm/p_basic.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Basic framing protocol for STM devices.
+ * Copyright (c) 2018, Intel Corporation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/stm.h>
+#include "stm.h"
+
+static ssize_t basic_write(struct stm_data *data, struct stm_output *output,
+			   unsigned int chan, const char *buf, size_t count)
+{
+	unsigned int c = output->channel + chan;
+	unsigned int m = output->master;
+	const unsigned char nil = 0;
+	ssize_t sz;
+
+	sz = stm_data_write(data, m, c, true, buf, count);
+	if (sz > 0)
+		data->packet(data, m, c, STP_PACKET_FLAG, 0, 0, &nil);
+
+	return sz;
+}
+
+static const struct stm_protocol_driver basic_pdrv = {
+	.owner	= THIS_MODULE,
+	.name	= "p_basic",
+	.write	= basic_write,
+};
+
+static int basic_stm_init(void)
+{
+	return stm_register_protocol(&basic_pdrv);
+}
+
+static void basic_stm_exit(void)
+{
+	stm_unregister_protocol(&basic_pdrv);
+}
+
+module_init(basic_stm_init);
+module_exit(basic_stm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Basic STM framing protocol driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/p_sys-t.c b/drivers/hwtracing/stm/p_sys-t.c
new file mode 100644
index 0000000..b178a54
--- /dev/null
+++ b/drivers/hwtracing/stm/p_sys-t.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MIPI SyS-T framing protocol for STM devices.
+ * Copyright (c) 2018, Intel Corporation.
+ */
+
+#include <linux/configfs.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/stm.h>
+#include "stm.h"
+
+enum sys_t_message_type {
+	MIPI_SYST_TYPE_BUILD	= 0,
+	MIPI_SYST_TYPE_SHORT32,
+	MIPI_SYST_TYPE_STRING,
+	MIPI_SYST_TYPE_CATALOG,
+	MIPI_SYST_TYPE_RAW	= 6,
+	MIPI_SYST_TYPE_SHORT64,
+	MIPI_SYST_TYPE_CLOCK,
+};
+
+enum sys_t_message_severity {
+	MIPI_SYST_SEVERITY_MAX	= 0,
+	MIPI_SYST_SEVERITY_FATAL,
+	MIPI_SYST_SEVERITY_ERROR,
+	MIPI_SYST_SEVERITY_WARNING,
+	MIPI_SYST_SEVERITY_INFO,
+	MIPI_SYST_SEVERITY_USER1,
+	MIPI_SYST_SEVERITY_USER2,
+	MIPI_SYST_SEVERITY_DEBUG,
+};
+
+enum sys_t_message_build_subtype {
+	MIPI_SYST_BUILD_ID_COMPACT32 = 0,
+	MIPI_SYST_BUILD_ID_COMPACT64,
+	MIPI_SYST_BUILD_ID_LONG,
+};
+
+enum sys_t_message_clock_subtype {
+	MIPI_SYST_CLOCK_TRANSPORT_SYNC = 1,
+};
+
+enum sys_t_message_string_subtype {
+	MIPI_SYST_STRING_GENERIC	= 1,
+	MIPI_SYST_STRING_FUNCTIONENTER,
+	MIPI_SYST_STRING_FUNCTIONEXIT,
+	MIPI_SYST_STRING_INVALIDPARAM	= 5,
+	MIPI_SYST_STRING_ASSERT		= 7,
+	MIPI_SYST_STRING_PRINTF_32	= 11,
+	MIPI_SYST_STRING_PRINTF_64	= 12,
+};
+
+#define MIPI_SYST_TYPE(t)		((u32)(MIPI_SYST_TYPE_ ## t))
+#define MIPI_SYST_SEVERITY(s)		((u32)(MIPI_SYST_SEVERITY_ ## s) << 4)
+#define MIPI_SYST_OPT_LOC		BIT(8)
+#define MIPI_SYST_OPT_LEN		BIT(9)
+#define MIPI_SYST_OPT_CHK		BIT(10)
+#define MIPI_SYST_OPT_TS		BIT(11)
+#define MIPI_SYST_UNIT(u)		((u32)(u) << 12)
+#define MIPI_SYST_ORIGIN(o)		((u32)(o) << 16)
+#define MIPI_SYST_OPT_GUID		BIT(23)
+#define MIPI_SYST_SUBTYPE(s)		((u32)(MIPI_SYST_ ## s) << 24)
+#define MIPI_SYST_UNITLARGE(u)		(MIPI_SYST_UNIT(u & 0xf) | \
+					 MIPI_SYST_ORIGIN(u >> 4))
+#define MIPI_SYST_TYPES(t, s)		(MIPI_SYST_TYPE(t) | \
+					 MIPI_SYST_SUBTYPE(t ## _ ## s))
+
+#define DATA_HEADER	(MIPI_SYST_TYPES(STRING, GENERIC)	| \
+			 MIPI_SYST_SEVERITY(INFO)		| \
+			 MIPI_SYST_OPT_GUID)
+
+#define CLOCK_SYNC_HEADER	(MIPI_SYST_TYPES(CLOCK, TRANSPORT_SYNC)	| \
+				 MIPI_SYST_SEVERITY(MAX))
+
+struct sys_t_policy_node {
+	uuid_t		uuid;
+	bool		do_len;
+	unsigned long	ts_interval;
+	unsigned long	clocksync_interval;
+};
+
+struct sys_t_output {
+	struct sys_t_policy_node	node;
+	unsigned long	ts_jiffies;
+	unsigned long	clocksync_jiffies;
+};
+
+static void sys_t_policy_node_init(void *priv)
+{
+	struct sys_t_policy_node *pn = priv;
+
+	generate_random_uuid(pn->uuid.b);
+}
+
+static int sys_t_output_open(void *priv, struct stm_output *output)
+{
+	struct sys_t_policy_node *pn = priv;
+	struct sys_t_output *opriv;
+
+	opriv = kzalloc(sizeof(*opriv), GFP_ATOMIC);
+	if (!opriv)
+		return -ENOMEM;
+
+	memcpy(&opriv->node, pn, sizeof(opriv->node));
+	output->pdrv_private = opriv;
+
+	return 0;
+}
+
+static void sys_t_output_close(struct stm_output *output)
+{
+	kfree(output->pdrv_private);
+}
+
+static ssize_t sys_t_policy_uuid_show(struct config_item *item,
+				      char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%pU\n", &pn->uuid);
+}
+
+static ssize_t
+sys_t_policy_uuid_store(struct config_item *item, const char *page,
+			size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = uuid_parse(page, &pn->uuid);
+	mutex_unlock(mutexp);
+
+	return ret < 0 ? ret : count;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, uuid);
+
+static ssize_t sys_t_policy_do_len_show(struct config_item *item,
+				      char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%d\n", pn->do_len);
+}
+
+static ssize_t
+sys_t_policy_do_len_store(struct config_item *item, const char *page,
+			size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = kstrtobool(page, &pn->do_len);
+	mutex_unlock(mutexp);
+
+	return ret ? ret : count;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, do_len);
+
+static ssize_t sys_t_policy_ts_interval_show(struct config_item *item,
+					     char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%u\n", jiffies_to_msecs(pn->ts_interval));
+}
+
+static ssize_t
+sys_t_policy_ts_interval_store(struct config_item *item, const char *page,
+			       size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	unsigned int ms;
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = kstrtouint(page, 10, &ms);
+	mutex_unlock(mutexp);
+
+	if (!ret) {
+		pn->ts_interval = msecs_to_jiffies(ms);
+		return count;
+	}
+
+	return ret;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, ts_interval);
+
+static ssize_t sys_t_policy_clocksync_interval_show(struct config_item *item,
+						    char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%u\n", jiffies_to_msecs(pn->clocksync_interval));
+}
+
+static ssize_t
+sys_t_policy_clocksync_interval_store(struct config_item *item,
+				      const char *page, size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	unsigned int ms;
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = kstrtouint(page, 10, &ms);
+	mutex_unlock(mutexp);
+
+	if (!ret) {
+		pn->clocksync_interval = msecs_to_jiffies(ms);
+		return count;
+	}
+
+	return ret;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, clocksync_interval);
+
+static struct configfs_attribute *sys_t_policy_attrs[] = {
+	&sys_t_policy_attr_uuid,
+	&sys_t_policy_attr_do_len,
+	&sys_t_policy_attr_ts_interval,
+	&sys_t_policy_attr_clocksync_interval,
+	NULL,
+};
+
+static inline bool sys_t_need_ts(struct sys_t_output *op)
+{
+	if (op->node.ts_interval &&
+	    time_after(op->ts_jiffies + op->node.ts_interval, jiffies)) {
+		op->ts_jiffies = jiffies;
+
+		return true;
+	}
+
+	return false;
+}
+
+static bool sys_t_need_clock_sync(struct sys_t_output *op)
+{
+	if (op->node.clocksync_interval &&
+	    time_after(op->clocksync_jiffies + op->node.clocksync_interval,
+		       jiffies)) {
+		op->clocksync_jiffies = jiffies;
+
+		return true;
+	}
+
+	return false;
+}
+
+static ssize_t
+sys_t_clock_sync(struct stm_data *data, unsigned int m, unsigned int c)
+{
+	u32 header = CLOCK_SYNC_HEADER;
+	const unsigned char nil = 0;
+	u64 payload[2]; /* Clock value and frequency */
+	ssize_t sz;
+
+	sz = data->packet(data, m, c, STP_PACKET_DATA, STP_PACKET_TIMESTAMPED,
+			  4, (u8 *)&header);
+	if (sz <= 0)
+		return sz;
+
+	payload[0] = ktime_get_real_ns();
+	payload[1] = NSEC_PER_SEC;
+	sz = stm_data_write(data, m, c, false, &payload, sizeof(payload));
+	if (sz <= 0)
+		return sz;
+
+	data->packet(data, m, c, STP_PACKET_FLAG, 0, 0, &nil);
+
+	return sizeof(header) + sizeof(payload);
+}
+
+static ssize_t sys_t_write(struct stm_data *data, struct stm_output *output,
+			   unsigned int chan, const char *buf, size_t count)
+{
+	struct sys_t_output *op = output->pdrv_private;
+	unsigned int c = output->channel + chan;
+	unsigned int m = output->master;
+	const unsigned char nil = 0;
+	u32 header = DATA_HEADER;
+	ssize_t sz;
+
+	/* We require an existing policy node to proceed */
+	if (!op)
+		return -EINVAL;
+
+	if (sys_t_need_clock_sync(op)) {
+		sz = sys_t_clock_sync(data, m, c);
+		if (sz <= 0)
+			return sz;
+	}
+
+	if (op->node.do_len)
+		header |= MIPI_SYST_OPT_LEN;
+	if (sys_t_need_ts(op))
+		header |= MIPI_SYST_OPT_TS;
+
+	/*
+	 * STP framing rules for SyS-T frames:
+	 *   * the first packet of the SyS-T frame is timestamped;
+	 *   * the last packet is a FLAG.
+	 */
+	/* Message layout: HEADER / GUID / [LENGTH /][TIMESTAMP /] DATA */
+	/* HEADER */
+	sz = data->packet(data, m, c, STP_PACKET_DATA, STP_PACKET_TIMESTAMPED,
+			  4, (u8 *)&header);
+	if (sz <= 0)
+		return sz;
+
+	/* GUID */
+	sz = stm_data_write(data, m, c, false, op->node.uuid.b, UUID_SIZE);
+	if (sz <= 0)
+		return sz;
+
+	/* [LENGTH] */
+	if (op->node.do_len) {
+		u16 length = count;
+
+		sz = data->packet(data, m, c, STP_PACKET_DATA, 0, 2,
+				  (u8 *)&length);
+		if (sz <= 0)
+			return sz;
+	}
+
+	/* [TIMESTAMP] */
+	if (header & MIPI_SYST_OPT_TS) {
+		u64 ts = ktime_get_real_ns();
+
+		sz = stm_data_write(data, m, c, false, &ts, sizeof(ts));
+		if (sz <= 0)
+			return sz;
+	}
+
+	/* DATA */
+	sz = stm_data_write(data, m, c, false, buf, count);
+	if (sz > 0)
+		data->packet(data, m, c, STP_PACKET_FLAG, 0, 0, &nil);
+
+	return sz;
+}
+
+static const struct stm_protocol_driver sys_t_pdrv = {
+	.owner			= THIS_MODULE,
+	.name			= "p_sys-t",
+	.priv_sz		= sizeof(struct sys_t_policy_node),
+	.write			= sys_t_write,
+	.policy_attr		= sys_t_policy_attrs,
+	.policy_node_init	= sys_t_policy_node_init,
+	.output_open		= sys_t_output_open,
+	.output_close		= sys_t_output_close,
+};
+
+static int sys_t_stm_init(void)
+{
+	return stm_register_protocol(&sys_t_pdrv);
+}
+
+static void sys_t_stm_exit(void)
+{
+	stm_unregister_protocol(&sys_t_pdrv);
+}
+
+module_init(sys_t_stm_init);
+module_exit(sys_t_stm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MIPI SyS-T STM framing protocol driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/policy.c b/drivers/hwtracing/stm/policy.c
index 3fd07e2..4b9e44b 100644
--- a/drivers/hwtracing/stm/policy.c
+++ b/drivers/hwtracing/stm/policy.c
@@ -33,8 +33,18 @@
 	unsigned int		last_master;
 	unsigned int		first_channel;
 	unsigned int		last_channel;
+	/* this is the one that's exposed to the attributes */
+	unsigned char		priv[0];
 };
 
+void *stp_policy_node_priv(struct stp_policy_node *pn)
+{
+	if (!pn)
+		return NULL;
+
+	return pn->priv;
+}
+
 static struct configfs_subsystem stp_policy_subsys;
 
 void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
@@ -68,6 +78,14 @@
 		NULL;
 }
 
+void *to_pdrv_policy_node(struct config_item *item)
+{
+	struct stp_policy_node *node = to_stp_policy_node(item);
+
+	return stp_policy_node_priv(node);
+}
+EXPORT_SYMBOL_GPL(to_pdrv_policy_node);
+
 static ssize_t
 stp_policy_node_masters_show(struct config_item *item, char *page)
 {
@@ -163,7 +181,9 @@
 
 static void stp_policy_node_release(struct config_item *item)
 {
-	kfree(to_stp_policy_node(item));
+	struct stp_policy_node *node = to_stp_policy_node(item);
+
+	kfree(node);
 }
 
 static struct configfs_item_operations stp_policy_node_item_ops = {
@@ -182,10 +202,34 @@
 static const struct config_item_type stp_policy_type;
 static const struct config_item_type stp_policy_node_type;
 
+const struct config_item_type *
+get_policy_node_type(struct configfs_attribute **attrs)
+{
+	struct config_item_type *type;
+	struct configfs_attribute **merged;
+
+	type = kmemdup(&stp_policy_node_type, sizeof(stp_policy_node_type),
+		       GFP_KERNEL);
+	if (!type)
+		return NULL;
+
+	merged = memcat_p(stp_policy_node_attrs, attrs);
+	if (!merged) {
+		kfree(type);
+		return NULL;
+	}
+
+	type->ct_attrs = merged;
+
+	return type;
+}
+
 static struct config_group *
 stp_policy_node_make(struct config_group *group, const char *name)
 {
+	const struct config_item_type *type = &stp_policy_node_type;
 	struct stp_policy_node *policy_node, *parent_node;
+	const struct stm_protocol_driver *pdrv;
 	struct stp_policy *policy;
 
 	if (group->cg_item.ci_type == &stp_policy_type) {
@@ -199,12 +243,20 @@
 	if (!policy->stm)
 		return ERR_PTR(-ENODEV);
 
-	policy_node = kzalloc(sizeof(struct stp_policy_node), GFP_KERNEL);
+	pdrv = policy->stm->pdrv;
+	policy_node =
+		kzalloc(offsetof(struct stp_policy_node, priv[pdrv->priv_sz]),
+			GFP_KERNEL);
 	if (!policy_node)
 		return ERR_PTR(-ENOMEM);
 
-	config_group_init_type_name(&policy_node->group, name,
-				    &stp_policy_node_type);
+	if (pdrv->policy_node_init)
+		pdrv->policy_node_init((void *)policy_node->priv);
+
+	if (policy->stm->pdrv_node_type)
+		type = policy->stm->pdrv_node_type;
+
+	config_group_init_type_name(&policy_node->group, name, type);
 
 	policy_node->policy = policy;
 
@@ -254,8 +306,25 @@
 
 CONFIGFS_ATTR_RO(stp_policy_, device);
 
+static ssize_t stp_policy_protocol_show(struct config_item *item,
+					char *page)
+{
+	struct stp_policy *policy = to_stp_policy(item);
+	ssize_t count;
+
+	count = sprintf(page, "%s\n",
+			(policy && policy->stm) ?
+			policy->stm->pdrv->name :
+			"<none>");
+
+	return count;
+}
+
+CONFIGFS_ATTR_RO(stp_policy_, protocol);
+
 static struct configfs_attribute *stp_policy_attrs[] = {
 	&stp_policy_attr_device,
+	&stp_policy_attr_protocol,
 	NULL,
 };
 
@@ -276,6 +345,7 @@
 	stm->policy = NULL;
 	policy->stm = NULL;
 
+	stm_put_protocol(stm->pdrv);
 	stm_put_device(stm);
 }
 
@@ -311,11 +381,14 @@
 };
 
 static struct config_group *
-stp_policies_make(struct config_group *group, const char *name)
+stp_policy_make(struct config_group *group, const char *name)
 {
+	const struct config_item_type *pdrv_node_type;
+	const struct stm_protocol_driver *pdrv;
+	char *devname, *proto, *p;
 	struct config_group *ret;
 	struct stm_device *stm;
-	char *devname, *p;
+	int err;
 
 	devname = kasprintf(GFP_KERNEL, "%s", name);
 	if (!devname)
@@ -326,6 +399,7 @@
 	 * <device_name> is the name of an existing stm device; may
 	 *               contain dots;
 	 * <policy_name> is an arbitrary string; may not contain dots
+	 * <device_name>:<protocol_name>.<policy_name>
 	 */
 	p = strrchr(devname, '.');
 	if (!p) {
@@ -335,11 +409,28 @@
 
 	*p = '\0';
 
+	/*
+	 * look for ":<protocol_name>":
+	 *  + no protocol suffix: fall back to whatever is available;
+	 *  + unknown protocol: fail the whole thing
+	 */
+	proto = strrchr(devname, ':');
+	if (proto)
+		*proto++ = '\0';
+
 	stm = stm_find_device(devname);
+	if (!stm) {
+		kfree(devname);
+		return ERR_PTR(-ENODEV);
+	}
+
+	err = stm_lookup_protocol(proto, &pdrv, &pdrv_node_type);
 	kfree(devname);
 
-	if (!stm)
+	if (err) {
+		stm_put_device(stm);
 		return ERR_PTR(-ENODEV);
+	}
 
 	mutex_lock(&stm->policy_mutex);
 	if (stm->policy) {
@@ -355,25 +446,33 @@
 
 	config_group_init_type_name(&stm->policy->group, name,
 				    &stp_policy_type);
-	stm->policy->stm = stm;
 
+	stm->pdrv = pdrv;
+	stm->pdrv_node_type = pdrv_node_type;
+	stm->policy->stm = stm;
 	ret = &stm->policy->group;
 
 unlock_policy:
 	mutex_unlock(&stm->policy_mutex);
 
-	if (IS_ERR(ret))
+	if (IS_ERR(ret)) {
+		/*
+		 * pdrv and stm->pdrv at this point can be quite different,
+		 * and only one of them needs to be 'put'
+		 */
+		stm_put_protocol(pdrv);
 		stm_put_device(stm);
+	}
 
 	return ret;
 }
 
-static struct configfs_group_operations stp_policies_group_ops = {
-	.make_group	= stp_policies_make,
+static struct configfs_group_operations stp_policy_root_group_ops = {
+	.make_group	= stp_policy_make,
 };
 
-static const struct config_item_type stp_policies_type = {
-	.ct_group_ops	= &stp_policies_group_ops,
+static const struct config_item_type stp_policy_root_type = {
+	.ct_group_ops	= &stp_policy_root_group_ops,
 	.ct_owner	= THIS_MODULE,
 };
 
@@ -381,7 +480,7 @@
 	.su_group = {
 		.cg_item = {
 			.ci_namebuf	= "stp-policy",
-			.ci_type	= &stp_policies_type,
+			.ci_type	= &stp_policy_root_type,
 		},
 	},
 };
@@ -392,7 +491,7 @@
 static struct stp_policy_node *
 __stp_policy_node_lookup(struct stp_policy *policy, char *s)
 {
-	struct stp_policy_node *policy_node, *ret;
+	struct stp_policy_node *policy_node, *ret = NULL;
 	struct list_head *head = &policy->group.cg_children;
 	struct config_item *item;
 	char *start, *end = s;
@@ -400,10 +499,6 @@
 	if (list_empty(head))
 		return NULL;
 
-	/* return the first entry if everything else fails */
-	item = list_entry(head->next, struct config_item, ci_entry);
-	ret = to_stp_policy_node(item);
-
 next:
 	for (;;) {
 		start = strsep(&end, "/");
@@ -449,25 +544,25 @@
 
 	if (policy_node)
 		config_item_get(&policy_node->group.cg_item);
-	mutex_unlock(&stp_policy_subsys.su_mutex);
+	else
+		mutex_unlock(&stp_policy_subsys.su_mutex);
 
 	return policy_node;
 }
 
 void stp_policy_node_put(struct stp_policy_node *policy_node)
 {
+	lockdep_assert_held(&stp_policy_subsys.su_mutex);
+
+	mutex_unlock(&stp_policy_subsys.su_mutex);
 	config_item_put(&policy_node->group.cg_item);
 }
 
 int __init stp_configfs_init(void)
 {
-	int err;
-
 	config_group_init(&stp_policy_subsys.su_group);
 	mutex_init(&stp_policy_subsys.su_mutex);
-	err = configfs_register_subsystem(&stp_policy_subsys);
-
-	return err;
+	return configfs_register_subsystem(&stp_policy_subsys);
 }
 
 void __exit stp_configfs_exit(void)
diff --git a/drivers/hwtracing/stm/stm.h b/drivers/hwtracing/stm/stm.h
index 923571a..3569439 100644
--- a/drivers/hwtracing/stm/stm.h
+++ b/drivers/hwtracing/stm/stm.h
@@ -10,20 +10,17 @@
 #ifndef _STM_STM_H_
 #define _STM_STM_H_
 
+#include <linux/configfs.h>
+
 struct stp_policy;
 struct stp_policy_node;
+struct stm_protocol_driver;
 
-struct stp_policy_node *
-stp_policy_node_lookup(struct stm_device *stm, char *s);
-void stp_policy_node_put(struct stp_policy_node *policy_node);
-void stp_policy_unbind(struct stp_policy *policy);
-
-void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
-				unsigned int *mstart, unsigned int *mend,
-				unsigned int *cstart, unsigned int *cend);
 int stp_configfs_init(void);
 void stp_configfs_exit(void);
 
+void *stp_policy_node_priv(struct stp_policy_node *pn);
+
 struct stp_master {
 	unsigned int	nr_free;
 	unsigned long	chan_map[0];
@@ -40,6 +37,9 @@
 	struct mutex		link_mutex;
 	spinlock_t		link_lock;
 	struct list_head	link_list;
+	/* framing protocol in use */
+	const struct stm_protocol_driver	*pdrv;
+	const struct config_item_type		*pdrv_node_type;
 	/* master allocation */
 	spinlock_t		mc_lock;
 	struct stp_master	*masters[0];
@@ -48,16 +48,28 @@
 #define to_stm_device(_d)				\
 	container_of((_d), struct stm_device, dev)
 
+struct stp_policy_node *
+stp_policy_node_lookup(struct stm_device *stm, char *s);
+void stp_policy_node_put(struct stp_policy_node *policy_node);
+void stp_policy_unbind(struct stp_policy *policy);
+
+void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
+				unsigned int *mstart, unsigned int *mend,
+				unsigned int *cstart, unsigned int *cend);
+
+const struct config_item_type *
+get_policy_node_type(struct configfs_attribute **attrs);
+
 struct stm_output {
 	spinlock_t		lock;
 	unsigned int		master;
 	unsigned int		channel;
 	unsigned int		nr_chans;
+	void			*pdrv_private;
 };
 
 struct stm_file {
 	struct stm_device	*stm;
-	struct stp_policy_node	*policy_node;
 	struct stm_output	output;
 };
 
@@ -71,11 +83,35 @@
 	struct stm_device __rcu	*link;
 	struct list_head	link_entry;
 	/* one output per stm_source device */
-	struct stp_policy_node	*policy_node;
 	struct stm_output	output;
 };
 
 #define to_stm_source_device(_d)				\
 	container_of((_d), struct stm_source_device, dev)
 
+void *to_pdrv_policy_node(struct config_item *item);
+
+struct stm_protocol_driver {
+	struct module	*owner;
+	const char	*name;
+	ssize_t		(*write)(struct stm_data *data,
+				 struct stm_output *output, unsigned int chan,
+				 const char *buf, size_t count);
+	void		(*policy_node_init)(void *arg);
+	int		(*output_open)(void *priv, struct stm_output *output);
+	void		(*output_close)(struct stm_output *output);
+	ssize_t		priv_sz;
+	struct configfs_attribute	**policy_attr;
+};
+
+int stm_register_protocol(const struct stm_protocol_driver *pdrv);
+void stm_unregister_protocol(const struct stm_protocol_driver *pdrv);
+int stm_lookup_protocol(const char *name,
+			const struct stm_protocol_driver **pdrv,
+			const struct config_item_type **type);
+void stm_put_protocol(const struct stm_protocol_driver *pdrv);
+ssize_t stm_data_write(struct stm_data *data, unsigned int m,
+		       unsigned int c, bool ts_first, const void *buf,
+		       size_t count);
+
 #endif /* _STM_STM_H_ */