v4.19.13 snapshot.
diff --git a/drivers/hwtracing/Kconfig b/drivers/hwtracing/Kconfig
new file mode 100644
index 0000000..f68e025
--- /dev/null
+++ b/drivers/hwtracing/Kconfig
@@ -0,0 +1,7 @@
+menu "HW tracing support"
+
+source "drivers/hwtracing/stm/Kconfig"
+
+source "drivers/hwtracing/intel_th/Kconfig"
+
+endmenu
diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
new file mode 100644
index 0000000..ad34380
--- /dev/null
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -0,0 +1,117 @@
+#
+# Coresight configuration
+#
+menuconfig CORESIGHT
+	bool "CoreSight Tracing Support"
+	select ARM_AMBA
+	select PERF_EVENTS
+	help
+	  This framework provides a kernel interface for the CoreSight debug
+	  and trace drivers to register themselves with. It's intended to build
+	  a topological view of the CoreSight components based on a DT
+	  specification and configure the right series of components when a
+	  trace source gets enabled.
+
+if CORESIGHT
+config CORESIGHT_LINKS_AND_SINKS
+	bool "CoreSight Link and Sink drivers"
+	help
+	  This enables support for CoreSight link and sink drivers that are
+	  responsible for transporting and collecting the trace data
+	  respectively.  Link and sinks are dynamically aggregated with a trace
+	  entity at run time to form a complete trace path.
+
+config CORESIGHT_LINK_AND_SINK_TMC
+	bool "Coresight generic TMC driver"
+	depends on CORESIGHT_LINKS_AND_SINKS
+	help
+	  This enables support for the Trace Memory Controller driver.
+	  Depending on its configuration the device can act as a link (embedded
+	  trace router - ETR) or sink (embedded trace FIFO).  The driver
+	  complies with the generic implementation of the component without
+	  special enhancement or added features.
+
+config CORESIGHT_CATU
+	bool "Coresight Address Translation Unit (CATU) driver"
+	depends on CORESIGHT_LINK_AND_SINK_TMC
+	help
+	   Enable support for the Coresight Address Translation Unit (CATU).
+	   CATU supports a scatter gather table of 4K pages, with forward/backward
+	   lookup. CATU helps TMC ETR to use a large physically non-contiguous trace
+	   buffer by translating the addresses used by ETR to the physical address
+	   by looking up the provided table. CATU can also be used in pass-through
+	   mode where the address is not translated.
+
+config CORESIGHT_SINK_TPIU
+	bool "Coresight generic TPIU driver"
+	depends on CORESIGHT_LINKS_AND_SINKS
+	help
+	  This enables support for the Trace Port Interface Unit driver,
+	  responsible for bridging the gap between the on-chip coresight
+	  components and a trace for bridging the gap between the on-chip
+	  coresight components and a trace port collection engine, typically
+	  connected to an external host for use case capturing more traces than
+	  the on-board coresight memory can handle.
+
+config CORESIGHT_SINK_ETBV10
+	bool "Coresight ETBv1.0 driver"
+	depends on CORESIGHT_LINKS_AND_SINKS
+	help
+	  This enables support for the Embedded Trace Buffer version 1.0 driver
+	  that complies with the generic implementation of the component without
+	  special enhancement or added features.
+
+config CORESIGHT_SOURCE_ETM3X
+	bool "CoreSight Embedded Trace Macrocell 3.x driver"
+	depends on !ARM64
+	select CORESIGHT_LINKS_AND_SINKS
+	help
+	  This driver provides support for processor ETM3.x and PTM1.x modules,
+	  which allows tracing the instructions that a processor is executing
+	  This is primarily useful for instruction level tracing.  Depending
+	  the ETM version data tracing may also be available.
+
+config CORESIGHT_SOURCE_ETM4X
+	bool "CoreSight Embedded Trace Macrocell 4.x driver"
+	depends on ARM64
+	select CORESIGHT_LINKS_AND_SINKS
+	help
+	  This driver provides support for the ETM4.x tracer module, tracing the
+	  instructions that a processor is executing. This is primarily useful
+	  for instruction level tracing. Depending on the implemented version
+	  data tracing may also be available.
+
+config CORESIGHT_DYNAMIC_REPLICATOR
+	bool "CoreSight Programmable Replicator driver"
+	depends on CORESIGHT_LINKS_AND_SINKS
+	help
+	  This enables support for dynamic CoreSight replicator link driver.
+	  The programmable ATB replicator allows independent filtering of the
+	  trace data based on the traceid.
+
+config CORESIGHT_STM
+	bool "CoreSight System Trace Macrocell driver"
+	depends on (ARM && !(CPU_32v3 || CPU_32v4 || CPU_32v4T)) || ARM64
+	select CORESIGHT_LINKS_AND_SINKS
+	select STM
+	help
+	  This driver provides support for hardware assisted software
+	  instrumentation based tracing. This is primarily used for
+	  logging useful software events or data coming from various entities
+	  in the system, possibly running different OSs
+
+config CORESIGHT_CPU_DEBUG
+	tristate "CoreSight CPU Debug driver"
+	depends on ARM || ARM64
+	depends on DEBUG_FS
+	help
+	  This driver provides support for coresight debugging module. This
+	  is primarily used to dump sample-based profiling registers when
+	  system triggers panic, the driver will parse context registers so
+	  can quickly get to know program counter (PC), secure state,
+	  exception level, etc. Before use debugging functionality, platform
+	  needs to ensure the clock domain and power domain are enabled
+	  properly, please refer Documentation/trace/coresight-cpu-debug.txt
+	  for detailed description and the example for usage.
+
+endif
diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile
new file mode 100644
index 0000000..41870de
--- /dev/null
+++ b/drivers/hwtracing/coresight/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for CoreSight drivers.
+#
+obj-$(CONFIG_CORESIGHT) += coresight.o coresight-etm-perf.o
+obj-$(CONFIG_OF) += of_coresight.o
+obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o \
+					     coresight-tmc-etf.o \
+					     coresight-tmc-etr.o
+obj-$(CONFIG_CORESIGHT_SINK_TPIU) += coresight-tpiu.o
+obj-$(CONFIG_CORESIGHT_SINK_ETBV10) += coresight-etb10.o
+obj-$(CONFIG_CORESIGHT_LINKS_AND_SINKS) += coresight-funnel.o \
+					   coresight-replicator.o
+obj-$(CONFIG_CORESIGHT_SOURCE_ETM3X) += coresight-etm3x.o coresight-etm-cp14.o \
+					coresight-etm3x-sysfs.o
+obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o \
+					coresight-etm4x-sysfs.o
+obj-$(CONFIG_CORESIGHT_DYNAMIC_REPLICATOR) += coresight-dynamic-replicator.o
+obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o
+obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o
+obj-$(CONFIG_CORESIGHT_CATU) += coresight-catu.o
diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c
new file mode 100644
index 0000000..ff94e58
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -0,0 +1,577 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Arm Limited. All rights reserved.
+ *
+ * Coresight Address Translation Unit support
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "coresight-catu.h"
+#include "coresight-priv.h"
+#include "coresight-tmc.h"
+
+#define csdev_to_catu_drvdata(csdev)	\
+	dev_get_drvdata(csdev->dev.parent)
+
+/* Verbose output for CATU table contents */
+#ifdef CATU_DEBUG
+#define catu_dbg(x, ...) dev_dbg(x, __VA_ARGS__)
+#else
+#define catu_dbg(x, ...) do {} while (0)
+#endif
+
+struct catu_etr_buf {
+	struct tmc_sg_table *catu_table;
+	dma_addr_t sladdr;
+};
+
+/*
+ * CATU uses a page size of 4KB for page tables as well as data pages.
+ * Each 64bit entry in the table has the following format.
+ *
+ *	63			12	1  0
+ *	------------------------------------
+ *	|	 Address [63-12] | SBZ	| V|
+ *	------------------------------------
+ *
+ * Where bit[0] V indicates if the address is valid or not.
+ * Each 4K table pages have upto 256 data page pointers, taking upto 2K
+ * size. There are two Link pointers, pointing to the previous and next
+ * table pages respectively at the end of the 4K page. (i.e, entry 510
+ * and 511).
+ *  E.g, a table of two pages could look like :
+ *
+ *                 Table Page 0               Table Page 1
+ * SLADDR ===> x------------------x  x--> x-----------------x
+ * INADDR    ->|  Page 0      | V |  |    | Page 256    | V | <- INADDR+1M
+ *             |------------------|  |    |-----------------|
+ * INADDR+4K ->|  Page 1      | V |  |    |                 |
+ *             |------------------|  |    |-----------------|
+ *             |  Page 2      | V |  |    |                 |
+ *             |------------------|  |    |-----------------|
+ *             |   ...        | V |  |    |    ...          |
+ *             |------------------|  |    |-----------------|
+ * INADDR+1020K|  Page 255    | V |  |    |   Page 511  | V |
+ * SLADDR+2K==>|------------------|  |    |-----------------|
+ *             |  UNUSED      |   |  |    |                 |
+ *             |------------------|  |    |                 |
+ *             |  UNUSED      |   |  |    |                 |
+ *             |------------------|  |    |                 |
+ *             |    ...       |   |  |    |                 |
+ *             |------------------|  |    |-----------------|
+ *             |   IGNORED    | 0 |  |    | Table Page 0| 1 |
+ *             |------------------|  |    |-----------------|
+ *             |  Table Page 1| 1 |--x    | IGNORED     | 0 |
+ *             x------------------x       x-----------------x
+ * SLADDR+4K==>
+ *
+ * The base input address (used by the ETR, programmed in INADDR_{LO,HI})
+ * must be aligned to 1MB (the size addressable by a single page table).
+ * The CATU maps INADDR{LO:HI} to the first page in the table pointed
+ * to by SLADDR{LO:HI} and so on.
+ *
+ */
+typedef u64 cate_t;
+
+#define CATU_PAGE_SHIFT		12
+#define CATU_PAGE_SIZE		(1UL << CATU_PAGE_SHIFT)
+#define CATU_PAGES_PER_SYSPAGE	(PAGE_SIZE / CATU_PAGE_SIZE)
+
+/* Page pointers are only allocated in the first 2K half */
+#define CATU_PTRS_PER_PAGE	((CATU_PAGE_SIZE >> 1) / sizeof(cate_t))
+#define CATU_PTRS_PER_SYSPAGE	(CATU_PAGES_PER_SYSPAGE * CATU_PTRS_PER_PAGE)
+#define CATU_LINK_PREV		((CATU_PAGE_SIZE / sizeof(cate_t)) - 2)
+#define CATU_LINK_NEXT		((CATU_PAGE_SIZE / sizeof(cate_t)) - 1)
+
+#define CATU_ADDR_SHIFT		12
+#define CATU_ADDR_MASK		~(((cate_t)1 << CATU_ADDR_SHIFT) - 1)
+#define CATU_ENTRY_VALID	((cate_t)0x1)
+#define CATU_VALID_ENTRY(addr) \
+	(((cate_t)(addr) & CATU_ADDR_MASK) | CATU_ENTRY_VALID)
+#define CATU_ENTRY_ADDR(entry)	((cate_t)(entry) & ~((cate_t)CATU_ENTRY_VALID))
+
+/* CATU expects the INADDR to be aligned to 1M. */
+#define CATU_DEFAULT_INADDR	(1ULL << 20)
+
+/*
+ * catu_get_table : Retrieve the table pointers for the given @offset
+ * within the buffer. The buffer is wrapped around to a valid offset.
+ *
+ * Returns : The CPU virtual address for the beginning of the table
+ * containing the data page pointer for @offset. If @daddrp is not NULL,
+ * @daddrp points the DMA address of the beginning of the table.
+ */
+static inline cate_t *catu_get_table(struct tmc_sg_table *catu_table,
+				     unsigned long offset,
+				     dma_addr_t *daddrp)
+{
+	unsigned long buf_size = tmc_sg_table_buf_size(catu_table);
+	unsigned int table_nr, pg_idx, pg_offset;
+	struct tmc_pages *table_pages = &catu_table->table_pages;
+	void *ptr;
+
+	/* Make sure offset is within the range */
+	offset %= buf_size;
+
+	/*
+	 * Each table can address 1MB and a single kernel page can
+	 * contain "CATU_PAGES_PER_SYSPAGE" CATU tables.
+	 */
+	table_nr = offset >> 20;
+	/* Find the table page where the table_nr lies in */
+	pg_idx = table_nr / CATU_PAGES_PER_SYSPAGE;
+	pg_offset = (table_nr % CATU_PAGES_PER_SYSPAGE) * CATU_PAGE_SIZE;
+	if (daddrp)
+		*daddrp = table_pages->daddrs[pg_idx] + pg_offset;
+	ptr = page_address(table_pages->pages[pg_idx]);
+	return (cate_t *)((unsigned long)ptr + pg_offset);
+}
+
+#ifdef CATU_DEBUG
+static void catu_dump_table(struct tmc_sg_table *catu_table)
+{
+	int i;
+	cate_t *table;
+	unsigned long table_end, buf_size, offset = 0;
+
+	buf_size = tmc_sg_table_buf_size(catu_table);
+	dev_dbg(catu_table->dev,
+		"Dump table %p, tdaddr: %llx\n",
+		catu_table, catu_table->table_daddr);
+
+	while (offset < buf_size) {
+		table_end = offset + SZ_1M < buf_size ?
+			    offset + SZ_1M : buf_size;
+		table = catu_get_table(catu_table, offset, NULL);
+		for (i = 0; offset < table_end; i++, offset += CATU_PAGE_SIZE)
+			dev_dbg(catu_table->dev, "%d: %llx\n", i, table[i]);
+		dev_dbg(catu_table->dev, "Prev : %llx, Next: %llx\n",
+			table[CATU_LINK_PREV], table[CATU_LINK_NEXT]);
+		dev_dbg(catu_table->dev, "== End of sub-table ===");
+	}
+	dev_dbg(catu_table->dev, "== End of Table ===");
+}
+
+#else
+static inline void catu_dump_table(struct tmc_sg_table *catu_table)
+{
+}
+#endif
+
+static inline cate_t catu_make_entry(dma_addr_t addr)
+{
+	return addr ? CATU_VALID_ENTRY(addr) : 0;
+}
+
+/*
+ * catu_populate_table : Populate the given CATU table.
+ * The table is always populated as a circular table.
+ * i.e, the "prev" link of the "first" table points to the "last"
+ * table and the "next" link of the "last" table points to the
+ * "first" table. The buffer should be made linear by calling
+ * catu_set_table().
+ */
+static void
+catu_populate_table(struct tmc_sg_table *catu_table)
+{
+	int i;
+	int sys_pidx;	/* Index to current system data page */
+	int catu_pidx;	/* Index of CATU page within the system data page */
+	unsigned long offset, buf_size, table_end;
+	dma_addr_t data_daddr;
+	dma_addr_t prev_taddr, next_taddr, cur_taddr;
+	cate_t *table_ptr, *next_table;
+
+	buf_size = tmc_sg_table_buf_size(catu_table);
+	sys_pidx = catu_pidx = 0;
+	offset = 0;
+
+	table_ptr = catu_get_table(catu_table, 0, &cur_taddr);
+	prev_taddr = 0;	/* Prev link for the first table */
+
+	while (offset < buf_size) {
+		/*
+		 * The @offset is always 1M aligned here and we have an
+		 * empty table @table_ptr to fill. Each table can address
+		 * upto 1MB data buffer. The last table may have fewer
+		 * entries if the buffer size is not aligned.
+		 */
+		table_end = (offset + SZ_1M) < buf_size ?
+			    (offset + SZ_1M) : buf_size;
+		for (i = 0; offset < table_end;
+		     i++, offset += CATU_PAGE_SIZE) {
+
+			data_daddr = catu_table->data_pages.daddrs[sys_pidx] +
+				     catu_pidx * CATU_PAGE_SIZE;
+			catu_dbg(catu_table->dev,
+				"[table %5ld:%03d] 0x%llx\n",
+				(offset >> 20), i, data_daddr);
+			table_ptr[i] = catu_make_entry(data_daddr);
+			/* Move the pointers for data pages */
+			catu_pidx = (catu_pidx + 1) % CATU_PAGES_PER_SYSPAGE;
+			if (catu_pidx == 0)
+				sys_pidx++;
+		}
+
+		/*
+		 * If we have finished all the valid entries, fill the rest of
+		 * the table (i.e, last table page) with invalid entries,
+		 * to fail the lookups.
+		 */
+		if (offset == buf_size) {
+			memset(&table_ptr[i], 0,
+			       sizeof(cate_t) * (CATU_PTRS_PER_PAGE - i));
+			next_taddr = 0;
+		} else {
+			next_table = catu_get_table(catu_table,
+						    offset, &next_taddr);
+		}
+
+		table_ptr[CATU_LINK_PREV] = catu_make_entry(prev_taddr);
+		table_ptr[CATU_LINK_NEXT] = catu_make_entry(next_taddr);
+
+		catu_dbg(catu_table->dev,
+			"[table%5ld]: Cur: 0x%llx Prev: 0x%llx, Next: 0x%llx\n",
+			(offset >> 20) - 1,  cur_taddr, prev_taddr, next_taddr);
+
+		/* Update the prev/next addresses */
+		if (next_taddr) {
+			prev_taddr = cur_taddr;
+			cur_taddr = next_taddr;
+			table_ptr = next_table;
+		}
+	}
+
+	/* Sync the table for device */
+	tmc_sg_table_sync_table(catu_table);
+}
+
+static struct tmc_sg_table *
+catu_init_sg_table(struct device *catu_dev, int node,
+		   ssize_t size, void **pages)
+{
+	int nr_tpages;
+	struct tmc_sg_table *catu_table;
+
+	/*
+	 * Each table can address upto 1MB and we can have
+	 * CATU_PAGES_PER_SYSPAGE tables in a system page.
+	 */
+	nr_tpages = DIV_ROUND_UP(size, SZ_1M) / CATU_PAGES_PER_SYSPAGE;
+	catu_table = tmc_alloc_sg_table(catu_dev, node, nr_tpages,
+					size >> PAGE_SHIFT, pages);
+	if (IS_ERR(catu_table))
+		return catu_table;
+
+	catu_populate_table(catu_table);
+	dev_dbg(catu_dev,
+		"Setup table %p, size %ldKB, %d table pages\n",
+		catu_table, (unsigned long)size >> 10,  nr_tpages);
+	catu_dump_table(catu_table);
+	return catu_table;
+}
+
+static void catu_free_etr_buf(struct etr_buf *etr_buf)
+{
+	struct catu_etr_buf *catu_buf;
+
+	if (!etr_buf || etr_buf->mode != ETR_MODE_CATU || !etr_buf->private)
+		return;
+
+	catu_buf = etr_buf->private;
+	tmc_free_sg_table(catu_buf->catu_table);
+	kfree(catu_buf);
+}
+
+static ssize_t catu_get_data_etr_buf(struct etr_buf *etr_buf, u64 offset,
+				     size_t len, char **bufpp)
+{
+	struct catu_etr_buf *catu_buf = etr_buf->private;
+
+	return tmc_sg_table_get_data(catu_buf->catu_table, offset, len, bufpp);
+}
+
+static void catu_sync_etr_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp)
+{
+	struct catu_etr_buf *catu_buf = etr_buf->private;
+	struct tmc_sg_table *catu_table = catu_buf->catu_table;
+	u64 r_offset, w_offset;
+
+	/*
+	 * ETR started off at etr_buf->hwaddr. Convert the RRP/RWP to
+	 * offsets within the trace buffer.
+	 */
+	r_offset = rrp - etr_buf->hwaddr;
+	w_offset = rwp - etr_buf->hwaddr;
+
+	if (!etr_buf->full) {
+		etr_buf->len = w_offset - r_offset;
+		if (w_offset < r_offset)
+			etr_buf->len += etr_buf->size;
+	} else {
+		etr_buf->len = etr_buf->size;
+	}
+
+	etr_buf->offset = r_offset;
+	tmc_sg_table_sync_data_range(catu_table, r_offset, etr_buf->len);
+}
+
+static int catu_alloc_etr_buf(struct tmc_drvdata *tmc_drvdata,
+			      struct etr_buf *etr_buf, int node, void **pages)
+{
+	struct coresight_device *csdev;
+	struct device *catu_dev;
+	struct tmc_sg_table *catu_table;
+	struct catu_etr_buf *catu_buf;
+
+	csdev = tmc_etr_get_catu_device(tmc_drvdata);
+	if (!csdev)
+		return -ENODEV;
+	catu_dev = csdev->dev.parent;
+	catu_buf = kzalloc(sizeof(*catu_buf), GFP_KERNEL);
+	if (!catu_buf)
+		return -ENOMEM;
+
+	catu_table = catu_init_sg_table(catu_dev, node, etr_buf->size, pages);
+	if (IS_ERR(catu_table)) {
+		kfree(catu_buf);
+		return PTR_ERR(catu_table);
+	}
+
+	etr_buf->mode = ETR_MODE_CATU;
+	etr_buf->private = catu_buf;
+	etr_buf->hwaddr = CATU_DEFAULT_INADDR;
+
+	catu_buf->catu_table = catu_table;
+	/* Get the table base address */
+	catu_buf->sladdr = catu_table->table_daddr;
+
+	return 0;
+}
+
+const struct etr_buf_operations etr_catu_buf_ops = {
+	.alloc = catu_alloc_etr_buf,
+	.free = catu_free_etr_buf,
+	.sync = catu_sync_etr_buf,
+	.get_data = catu_get_data_etr_buf,
+};
+
+coresight_simple_reg32(struct catu_drvdata, devid, CORESIGHT_DEVID);
+coresight_simple_reg32(struct catu_drvdata, control, CATU_CONTROL);
+coresight_simple_reg32(struct catu_drvdata, status, CATU_STATUS);
+coresight_simple_reg32(struct catu_drvdata, mode, CATU_MODE);
+coresight_simple_reg32(struct catu_drvdata, axictrl, CATU_AXICTRL);
+coresight_simple_reg32(struct catu_drvdata, irqen, CATU_IRQEN);
+coresight_simple_reg64(struct catu_drvdata, sladdr,
+		       CATU_SLADDRLO, CATU_SLADDRHI);
+coresight_simple_reg64(struct catu_drvdata, inaddr,
+		       CATU_INADDRLO, CATU_INADDRHI);
+
+static struct attribute *catu_mgmt_attrs[] = {
+	&dev_attr_devid.attr,
+	&dev_attr_control.attr,
+	&dev_attr_status.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_axictrl.attr,
+	&dev_attr_irqen.attr,
+	&dev_attr_sladdr.attr,
+	&dev_attr_inaddr.attr,
+	NULL,
+};
+
+static const struct attribute_group catu_mgmt_group = {
+	.attrs = catu_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *catu_groups[] = {
+	&catu_mgmt_group,
+	NULL,
+};
+
+
+static inline int catu_wait_for_ready(struct catu_drvdata *drvdata)
+{
+	return coresight_timeout(drvdata->base,
+				 CATU_STATUS, CATU_STATUS_READY, 1);
+}
+
+static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
+{
+	u32 control, mode;
+	struct etr_buf *etr_buf = data;
+
+	if (catu_wait_for_ready(drvdata))
+		dev_warn(drvdata->dev, "Timeout while waiting for READY\n");
+
+	control = catu_read_control(drvdata);
+	if (control & BIT(CATU_CONTROL_ENABLE)) {
+		dev_warn(drvdata->dev, "CATU is already enabled\n");
+		return -EBUSY;
+	}
+
+	control |= BIT(CATU_CONTROL_ENABLE);
+
+	if (etr_buf && etr_buf->mode == ETR_MODE_CATU) {
+		struct catu_etr_buf *catu_buf = etr_buf->private;
+
+		mode = CATU_MODE_TRANSLATE;
+		catu_write_axictrl(drvdata, CATU_OS_AXICTRL);
+		catu_write_sladdr(drvdata, catu_buf->sladdr);
+		catu_write_inaddr(drvdata, CATU_DEFAULT_INADDR);
+	} else {
+		mode = CATU_MODE_PASS_THROUGH;
+		catu_write_sladdr(drvdata, 0);
+		catu_write_inaddr(drvdata, 0);
+	}
+
+	catu_write_irqen(drvdata, 0);
+	catu_write_mode(drvdata, mode);
+	catu_write_control(drvdata, control);
+	dev_dbg(drvdata->dev, "Enabled in %s mode\n",
+		(mode == CATU_MODE_PASS_THROUGH) ?
+		"Pass through" :
+		"Translate");
+	return 0;
+}
+
+static int catu_enable(struct coresight_device *csdev, void *data)
+{
+	int rc;
+	struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev);
+
+	CS_UNLOCK(catu_drvdata->base);
+	rc = catu_enable_hw(catu_drvdata, data);
+	CS_LOCK(catu_drvdata->base);
+	return rc;
+}
+
+static int catu_disable_hw(struct catu_drvdata *drvdata)
+{
+	int rc = 0;
+
+	catu_write_control(drvdata, 0);
+	if (catu_wait_for_ready(drvdata)) {
+		dev_info(drvdata->dev, "Timeout while waiting for READY\n");
+		rc = -EAGAIN;
+	}
+
+	dev_dbg(drvdata->dev, "Disabled\n");
+	return rc;
+}
+
+static int catu_disable(struct coresight_device *csdev, void *__unused)
+{
+	int rc;
+	struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev);
+
+	CS_UNLOCK(catu_drvdata->base);
+	rc = catu_disable_hw(catu_drvdata);
+	CS_LOCK(catu_drvdata->base);
+	return rc;
+}
+
+const struct coresight_ops_helper catu_helper_ops = {
+	.enable = catu_enable,
+	.disable = catu_disable,
+};
+
+const struct coresight_ops catu_ops = {
+	.helper_ops = &catu_helper_ops,
+};
+
+static int catu_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret = 0;
+	u32 dma_mask;
+	struct catu_drvdata *drvdata;
+	struct coresight_desc catu_desc;
+	struct coresight_platform_data *pdata = NULL;
+	struct device *dev = &adev->dev;
+	struct device_node *np = dev->of_node;
+	void __iomem *base;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata)) {
+			ret = PTR_ERR(pdata);
+			goto out;
+		}
+		dev->platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	drvdata->dev = dev;
+	dev_set_drvdata(dev, drvdata);
+	base = devm_ioremap_resource(dev, &adev->res);
+	if (IS_ERR(base)) {
+		ret = PTR_ERR(base);
+		goto out;
+	}
+
+	/* Setup dma mask for the device */
+	dma_mask = readl_relaxed(base + CORESIGHT_DEVID) & 0x3f;
+	switch (dma_mask) {
+	case 32:
+	case 40:
+	case 44:
+	case 48:
+	case 52:
+	case 56:
+	case 64:
+		break;
+	default:
+		/* Default to the 40bits as supported by TMC-ETR */
+		dma_mask = 40;
+	}
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_mask));
+	if (ret)
+		goto out;
+
+	drvdata->base = base;
+	catu_desc.pdata = pdata;
+	catu_desc.dev = dev;
+	catu_desc.groups = catu_groups;
+	catu_desc.type = CORESIGHT_DEV_TYPE_HELPER;
+	catu_desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CATU;
+	catu_desc.ops = &catu_ops;
+	drvdata->csdev = coresight_register(&catu_desc);
+	if (IS_ERR(drvdata->csdev))
+		ret = PTR_ERR(drvdata->csdev);
+out:
+	pm_runtime_put(&adev->dev);
+	return ret;
+}
+
+static struct amba_id catu_ids[] = {
+	{
+		.id	= 0x000bb9ee,
+		.mask	= 0x000fffff,
+	},
+	{},
+};
+
+static struct amba_driver catu_driver = {
+	.drv = {
+		.name			= "coresight-catu",
+		.owner			= THIS_MODULE,
+		.suppress_bind_attrs	= true,
+	},
+	.probe				= catu_probe,
+	.id_table			= catu_ids,
+};
+
+builtin_amba_driver(catu_driver);
diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h
new file mode 100644
index 0000000..1b281f0
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-catu.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Arm Limited. All rights reserved.
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ */
+
+#ifndef _CORESIGHT_CATU_H
+#define _CORESIGHT_CATU_H
+
+#include "coresight-priv.h"
+
+/* Register offset from base */
+#define CATU_CONTROL		0x000
+#define CATU_MODE		0x004
+#define CATU_AXICTRL		0x008
+#define CATU_IRQEN		0x00c
+#define CATU_SLADDRLO		0x020
+#define CATU_SLADDRHI		0x024
+#define CATU_INADDRLO		0x028
+#define CATU_INADDRHI		0x02c
+#define CATU_STATUS		0x100
+#define CATU_DEVARCH		0xfbc
+
+#define CATU_CONTROL_ENABLE	0
+
+#define CATU_MODE_PASS_THROUGH	0U
+#define CATU_MODE_TRANSLATE	1U
+
+#define CATU_AXICTRL_ARCACHE_SHIFT	4
+#define CATU_AXICTRL_ARCACHE_MASK	0xf
+#define CATU_AXICTRL_ARPROT_MASK	0x3
+#define CATU_AXICTRL_ARCACHE(arcache)		\
+	(((arcache) & CATU_AXICTRL_ARCACHE_MASK) << CATU_AXICTRL_ARCACHE_SHIFT)
+
+#define CATU_AXICTRL_VAL(arcache, arprot)	\
+	(CATU_AXICTRL_ARCACHE(arcache) | ((arprot) & CATU_AXICTRL_ARPROT_MASK))
+
+#define AXI3_AxCACHE_WB_READ_ALLOC	0x7
+/*
+ * AXI - ARPROT bits:
+ * See AMBA AXI & ACE Protocol specification (ARM IHI 0022E)
+ * sectionA4.7 Access Permissions.
+ *
+ * Bit 0: 0 - Unprivileged access, 1 - Privileged access
+ * Bit 1: 0 - Secure access, 1 - Non-secure access.
+ * Bit 2: 0 - Data access, 1 - instruction access.
+ *
+ * CATU AXICTRL:ARPROT[2] is res0 as we always access data.
+ */
+#define CATU_OS_ARPROT			0x2
+
+#define CATU_OS_AXICTRL		\
+	CATU_AXICTRL_VAL(AXI3_AxCACHE_WB_READ_ALLOC, CATU_OS_ARPROT)
+
+#define CATU_STATUS_READY	8
+#define CATU_STATUS_ADRERR	0
+#define CATU_STATUS_AXIERR	4
+
+#define CATU_IRQEN_ON		0x1
+#define CATU_IRQEN_OFF		0x0
+
+struct catu_drvdata {
+	struct device *dev;
+	void __iomem *base;
+	struct coresight_device *csdev;
+	int irq;
+};
+
+#define CATU_REG32(name, offset)					\
+static inline u32							\
+catu_read_##name(struct catu_drvdata *drvdata)				\
+{									\
+	return coresight_read_reg_pair(drvdata->base, offset, -1);	\
+}									\
+static inline void							\
+catu_write_##name(struct catu_drvdata *drvdata, u32 val)		\
+{									\
+	coresight_write_reg_pair(drvdata->base, val, offset, -1);	\
+}
+
+#define CATU_REG_PAIR(name, lo_off, hi_off)				\
+static inline u64							\
+catu_read_##name(struct catu_drvdata *drvdata)				\
+{									\
+	return coresight_read_reg_pair(drvdata->base, lo_off, hi_off);	\
+}									\
+static inline void							\
+catu_write_##name(struct catu_drvdata *drvdata, u64 val)		\
+{									\
+	coresight_write_reg_pair(drvdata->base, val, lo_off, hi_off);	\
+}
+
+CATU_REG32(control, CATU_CONTROL);
+CATU_REG32(mode, CATU_MODE);
+CATU_REG32(irqen, CATU_IRQEN);
+CATU_REG32(axictrl, CATU_AXICTRL);
+CATU_REG_PAIR(sladdr, CATU_SLADDRLO, CATU_SLADDRHI)
+CATU_REG_PAIR(inaddr, CATU_INADDRLO, CATU_INADDRHI)
+
+static inline bool coresight_is_catu_device(struct coresight_device *csdev)
+{
+	if (!IS_ENABLED(CONFIG_CORESIGHT_CATU))
+		return false;
+	if (csdev->type != CORESIGHT_DEV_TYPE_HELPER)
+		return false;
+	if (csdev->subtype.helper_subtype != CORESIGHT_DEV_SUBTYPE_HELPER_CATU)
+		return false;
+	return true;
+}
+
+#ifdef CONFIG_CORESIGHT_CATU
+extern const struct etr_buf_operations etr_catu_buf_ops;
+#else
+/* Dummy declaration for the CATU ops */
+static const struct etr_buf_operations etr_catu_buf_ops;
+#endif
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c
new file mode 100644
index 0000000..45b2460
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c
@@ -0,0 +1,688 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017 Linaro Limited. All rights reserved.
+ *
+ * Author: Leo Yan <leo.yan@linaro.org>
+ */
+#include <linux/amba/bus.h>
+#include <linux/coresight.h>
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pm_qos.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include "coresight-priv.h"
+
+#define EDPCSR				0x0A0
+#define EDCIDSR				0x0A4
+#define EDVIDSR				0x0A8
+#define EDPCSR_HI			0x0AC
+#define EDOSLAR				0x300
+#define EDPRCR				0x310
+#define EDPRSR				0x314
+#define EDDEVID1			0xFC4
+#define EDDEVID				0xFC8
+
+#define EDPCSR_PROHIBITED		0xFFFFFFFF
+
+/* bits definition for EDPCSR */
+#define EDPCSR_THUMB			BIT(0)
+#define EDPCSR_ARM_INST_MASK		GENMASK(31, 2)
+#define EDPCSR_THUMB_INST_MASK		GENMASK(31, 1)
+
+/* bits definition for EDPRCR */
+#define EDPRCR_COREPURQ			BIT(3)
+#define EDPRCR_CORENPDRQ		BIT(0)
+
+/* bits definition for EDPRSR */
+#define EDPRSR_DLK			BIT(6)
+#define EDPRSR_PU			BIT(0)
+
+/* bits definition for EDVIDSR */
+#define EDVIDSR_NS			BIT(31)
+#define EDVIDSR_E2			BIT(30)
+#define EDVIDSR_E3			BIT(29)
+#define EDVIDSR_HV			BIT(28)
+#define EDVIDSR_VMID			GENMASK(7, 0)
+
+/*
+ * bits definition for EDDEVID1:PSCROffset
+ *
+ * NOTE: armv8 and armv7 have different definition for the register,
+ * so consolidate the bits definition as below:
+ *
+ * 0b0000 - Sample offset applies based on the instruction state, we
+ *          rely on EDDEVID to check if EDPCSR is implemented or not
+ * 0b0001 - No offset applies.
+ * 0b0010 - No offset applies, but do not use in AArch32 mode
+ *
+ */
+#define EDDEVID1_PCSR_OFFSET_MASK	GENMASK(3, 0)
+#define EDDEVID1_PCSR_OFFSET_INS_SET	(0x0)
+#define EDDEVID1_PCSR_NO_OFFSET_DIS_AARCH32	(0x2)
+
+/* bits definition for EDDEVID */
+#define EDDEVID_PCSAMPLE_MODE		GENMASK(3, 0)
+#define EDDEVID_IMPL_EDPCSR		(0x1)
+#define EDDEVID_IMPL_EDPCSR_EDCIDSR	(0x2)
+#define EDDEVID_IMPL_FULL		(0x3)
+
+#define DEBUG_WAIT_SLEEP		1000
+#define DEBUG_WAIT_TIMEOUT		32000
+
+struct debug_drvdata {
+	void __iomem	*base;
+	struct device	*dev;
+	int		cpu;
+
+	bool		edpcsr_present;
+	bool		edcidsr_present;
+	bool		edvidsr_present;
+	bool		pc_has_offset;
+
+	u32		edpcsr;
+	u32		edpcsr_hi;
+	u32		edprsr;
+	u32		edvidsr;
+	u32		edcidsr;
+};
+
+static DEFINE_MUTEX(debug_lock);
+static DEFINE_PER_CPU(struct debug_drvdata *, debug_drvdata);
+static int debug_count;
+static struct dentry *debug_debugfs_dir;
+
+static bool debug_enable;
+module_param_named(enable, debug_enable, bool, 0600);
+MODULE_PARM_DESC(enable, "Control to enable coresight CPU debug functionality");
+
+static void debug_os_unlock(struct debug_drvdata *drvdata)
+{
+	/* Unlocks the debug registers */
+	writel_relaxed(0x0, drvdata->base + EDOSLAR);
+
+	/* Make sure the registers are unlocked before accessing */
+	wmb();
+}
+
+/*
+ * According to ARM DDI 0487A.k, before access external debug
+ * registers should firstly check the access permission; if any
+ * below condition has been met then cannot access debug
+ * registers to avoid lockup issue:
+ *
+ * - CPU power domain is powered off;
+ * - The OS Double Lock is locked;
+ *
+ * By checking EDPRSR can get to know if meet these conditions.
+ */
+static bool debug_access_permitted(struct debug_drvdata *drvdata)
+{
+	/* CPU is powered off */
+	if (!(drvdata->edprsr & EDPRSR_PU))
+		return false;
+
+	/* The OS Double Lock is locked */
+	if (drvdata->edprsr & EDPRSR_DLK)
+		return false;
+
+	return true;
+}
+
+static void debug_force_cpu_powered_up(struct debug_drvdata *drvdata)
+{
+	u32 edprcr;
+
+try_again:
+
+	/*
+	 * Send request to power management controller and assert
+	 * DBGPWRUPREQ signal; if power management controller has
+	 * sane implementation, it should enable CPU power domain
+	 * in case CPU is in low power state.
+	 */
+	edprcr = readl_relaxed(drvdata->base + EDPRCR);
+	edprcr |= EDPRCR_COREPURQ;
+	writel_relaxed(edprcr, drvdata->base + EDPRCR);
+
+	/* Wait for CPU to be powered up (timeout~=32ms) */
+	if (readx_poll_timeout_atomic(readl_relaxed, drvdata->base + EDPRSR,
+			drvdata->edprsr, (drvdata->edprsr & EDPRSR_PU),
+			DEBUG_WAIT_SLEEP, DEBUG_WAIT_TIMEOUT)) {
+		/*
+		 * Unfortunately the CPU cannot be powered up, so return
+		 * back and later has no permission to access other
+		 * registers. For this case, should disable CPU low power
+		 * states to ensure CPU power domain is enabled!
+		 */
+		dev_err(drvdata->dev, "%s: power up request for CPU%d failed\n",
+			__func__, drvdata->cpu);
+		return;
+	}
+
+	/*
+	 * At this point the CPU is powered up, so set the no powerdown
+	 * request bit so we don't lose power and emulate power down.
+	 */
+	edprcr = readl_relaxed(drvdata->base + EDPRCR);
+	edprcr |= EDPRCR_COREPURQ | EDPRCR_CORENPDRQ;
+	writel_relaxed(edprcr, drvdata->base + EDPRCR);
+
+	drvdata->edprsr = readl_relaxed(drvdata->base + EDPRSR);
+
+	/* The core power domain got switched off on use, try again */
+	if (unlikely(!(drvdata->edprsr & EDPRSR_PU)))
+		goto try_again;
+}
+
+static void debug_read_regs(struct debug_drvdata *drvdata)
+{
+	u32 save_edprcr;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Unlock os lock */
+	debug_os_unlock(drvdata);
+
+	/* Save EDPRCR register */
+	save_edprcr = readl_relaxed(drvdata->base + EDPRCR);
+
+	/*
+	 * Ensure CPU power domain is enabled to let registers
+	 * are accessiable.
+	 */
+	debug_force_cpu_powered_up(drvdata);
+
+	if (!debug_access_permitted(drvdata))
+		goto out;
+
+	drvdata->edpcsr = readl_relaxed(drvdata->base + EDPCSR);
+
+	/*
+	 * As described in ARM DDI 0487A.k, if the processing
+	 * element (PE) is in debug state, or sample-based
+	 * profiling is prohibited, EDPCSR reads as 0xFFFFFFFF;
+	 * EDCIDSR, EDVIDSR and EDPCSR_HI registers also become
+	 * UNKNOWN state. So directly bail out for this case.
+	 */
+	if (drvdata->edpcsr == EDPCSR_PROHIBITED)
+		goto out;
+
+	/*
+	 * A read of the EDPCSR normally has the side-effect of
+	 * indirectly writing to EDCIDSR, EDVIDSR and EDPCSR_HI;
+	 * at this point it's safe to read value from them.
+	 */
+	if (IS_ENABLED(CONFIG_64BIT))
+		drvdata->edpcsr_hi = readl_relaxed(drvdata->base + EDPCSR_HI);
+
+	if (drvdata->edcidsr_present)
+		drvdata->edcidsr = readl_relaxed(drvdata->base + EDCIDSR);
+
+	if (drvdata->edvidsr_present)
+		drvdata->edvidsr = readl_relaxed(drvdata->base + EDVIDSR);
+
+out:
+	/* Restore EDPRCR register */
+	writel_relaxed(save_edprcr, drvdata->base + EDPRCR);
+
+	CS_LOCK(drvdata->base);
+}
+
+#ifdef CONFIG_64BIT
+static unsigned long debug_adjust_pc(struct debug_drvdata *drvdata)
+{
+	return (unsigned long)drvdata->edpcsr_hi << 32 |
+	       (unsigned long)drvdata->edpcsr;
+}
+#else
+static unsigned long debug_adjust_pc(struct debug_drvdata *drvdata)
+{
+	unsigned long arm_inst_offset = 0, thumb_inst_offset = 0;
+	unsigned long pc;
+
+	pc = (unsigned long)drvdata->edpcsr;
+
+	if (drvdata->pc_has_offset) {
+		arm_inst_offset = 8;
+		thumb_inst_offset = 4;
+	}
+
+	/* Handle thumb instruction */
+	if (pc & EDPCSR_THUMB) {
+		pc = (pc & EDPCSR_THUMB_INST_MASK) - thumb_inst_offset;
+		return pc;
+	}
+
+	/*
+	 * Handle arm instruction offset, if the arm instruction
+	 * is not 4 byte alignment then it's possible the case
+	 * for implementation defined; keep original value for this
+	 * case and print info for notice.
+	 */
+	if (pc & BIT(1))
+		dev_emerg(drvdata->dev,
+			  "Instruction offset is implementation defined\n");
+	else
+		pc = (pc & EDPCSR_ARM_INST_MASK) - arm_inst_offset;
+
+	return pc;
+}
+#endif
+
+static void debug_dump_regs(struct debug_drvdata *drvdata)
+{
+	struct device *dev = drvdata->dev;
+	unsigned long pc;
+
+	dev_emerg(dev, " EDPRSR:  %08x (Power:%s DLK:%s)\n",
+		  drvdata->edprsr,
+		  drvdata->edprsr & EDPRSR_PU ? "On" : "Off",
+		  drvdata->edprsr & EDPRSR_DLK ? "Lock" : "Unlock");
+
+	if (!debug_access_permitted(drvdata)) {
+		dev_emerg(dev, "No permission to access debug registers!\n");
+		return;
+	}
+
+	if (drvdata->edpcsr == EDPCSR_PROHIBITED) {
+		dev_emerg(dev, "CPU is in Debug state or profiling is prohibited!\n");
+		return;
+	}
+
+	pc = debug_adjust_pc(drvdata);
+	dev_emerg(dev, " EDPCSR:  %pS\n", (void *)pc);
+
+	if (drvdata->edcidsr_present)
+		dev_emerg(dev, " EDCIDSR: %08x\n", drvdata->edcidsr);
+
+	if (drvdata->edvidsr_present)
+		dev_emerg(dev, " EDVIDSR: %08x (State:%s Mode:%s Width:%dbits VMID:%x)\n",
+			  drvdata->edvidsr,
+			  drvdata->edvidsr & EDVIDSR_NS ?
+			  "Non-secure" : "Secure",
+			  drvdata->edvidsr & EDVIDSR_E3 ? "EL3" :
+				(drvdata->edvidsr & EDVIDSR_E2 ?
+				 "EL2" : "EL1/0"),
+			  drvdata->edvidsr & EDVIDSR_HV ? 64 : 32,
+			  drvdata->edvidsr & (u32)EDVIDSR_VMID);
+}
+
+static void debug_init_arch_data(void *info)
+{
+	struct debug_drvdata *drvdata = info;
+	u32 mode, pcsr_offset;
+	u32 eddevid, eddevid1;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Read device info */
+	eddevid  = readl_relaxed(drvdata->base + EDDEVID);
+	eddevid1 = readl_relaxed(drvdata->base + EDDEVID1);
+
+	CS_LOCK(drvdata->base);
+
+	/* Parse implementation feature */
+	mode = eddevid & EDDEVID_PCSAMPLE_MODE;
+	pcsr_offset = eddevid1 & EDDEVID1_PCSR_OFFSET_MASK;
+
+	drvdata->edpcsr_present  = false;
+	drvdata->edcidsr_present = false;
+	drvdata->edvidsr_present = false;
+	drvdata->pc_has_offset   = false;
+
+	switch (mode) {
+	case EDDEVID_IMPL_FULL:
+		drvdata->edvidsr_present = true;
+		/* Fall through */
+	case EDDEVID_IMPL_EDPCSR_EDCIDSR:
+		drvdata->edcidsr_present = true;
+		/* Fall through */
+	case EDDEVID_IMPL_EDPCSR:
+		/*
+		 * In ARM DDI 0487A.k, the EDDEVID1.PCSROffset is used to
+		 * define if has the offset for PC sampling value; if read
+		 * back EDDEVID1.PCSROffset == 0x2, then this means the debug
+		 * module does not sample the instruction set state when
+		 * armv8 CPU in AArch32 state.
+		 */
+		drvdata->edpcsr_present =
+			((IS_ENABLED(CONFIG_64BIT) && pcsr_offset != 0) ||
+			 (pcsr_offset != EDDEVID1_PCSR_NO_OFFSET_DIS_AARCH32));
+
+		drvdata->pc_has_offset =
+			(pcsr_offset == EDDEVID1_PCSR_OFFSET_INS_SET);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Dump out information on panic.
+ */
+static int debug_notifier_call(struct notifier_block *self,
+			       unsigned long v, void *p)
+{
+	int cpu;
+	struct debug_drvdata *drvdata;
+
+	mutex_lock(&debug_lock);
+
+	/* Bail out if the functionality is disabled */
+	if (!debug_enable)
+		goto skip_dump;
+
+	pr_emerg("ARM external debug module:\n");
+
+	for_each_possible_cpu(cpu) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		if (!drvdata)
+			continue;
+
+		dev_emerg(drvdata->dev, "CPU[%d]:\n", drvdata->cpu);
+
+		debug_read_regs(drvdata);
+		debug_dump_regs(drvdata);
+	}
+
+skip_dump:
+	mutex_unlock(&debug_lock);
+	return 0;
+}
+
+static struct notifier_block debug_notifier = {
+	.notifier_call = debug_notifier_call,
+};
+
+static int debug_enable_func(void)
+{
+	struct debug_drvdata *drvdata;
+	int cpu, ret = 0;
+	cpumask_t mask;
+
+	/*
+	 * Use cpumask to track which debug power domains have
+	 * been powered on and use it to handle failure case.
+	 */
+	cpumask_clear(&mask);
+
+	for_each_possible_cpu(cpu) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		if (!drvdata)
+			continue;
+
+		ret = pm_runtime_get_sync(drvdata->dev);
+		if (ret < 0)
+			goto err;
+		else
+			cpumask_set_cpu(cpu, &mask);
+	}
+
+	return 0;
+
+err:
+	/*
+	 * If pm_runtime_get_sync() has failed, need rollback on
+	 * all the other CPUs that have been enabled before that.
+	 */
+	for_each_cpu(cpu, &mask) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		pm_runtime_put_noidle(drvdata->dev);
+	}
+
+	return ret;
+}
+
+static int debug_disable_func(void)
+{
+	struct debug_drvdata *drvdata;
+	int cpu, ret, err = 0;
+
+	/*
+	 * Disable debug power domains, records the error and keep
+	 * circling through all other CPUs when an error has been
+	 * encountered.
+	 */
+	for_each_possible_cpu(cpu) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		if (!drvdata)
+			continue;
+
+		ret = pm_runtime_put(drvdata->dev);
+		if (ret < 0)
+			err = ret;
+	}
+
+	return err;
+}
+
+static ssize_t debug_func_knob_write(struct file *f,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	u8 val;
+	int ret;
+
+	ret = kstrtou8_from_user(buf, count, 2, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&debug_lock);
+
+	if (val == debug_enable)
+		goto out;
+
+	if (val)
+		ret = debug_enable_func();
+	else
+		ret = debug_disable_func();
+
+	if (ret) {
+		pr_err("%s: unable to %s debug function: %d\n",
+		       __func__, val ? "enable" : "disable", ret);
+		goto err;
+	}
+
+	debug_enable = val;
+out:
+	ret = count;
+err:
+	mutex_unlock(&debug_lock);
+	return ret;
+}
+
+static ssize_t debug_func_knob_read(struct file *f,
+		char __user *ubuf, size_t count, loff_t *ppos)
+{
+	ssize_t ret;
+	char buf[3];
+
+	mutex_lock(&debug_lock);
+	snprintf(buf, sizeof(buf), "%d\n", debug_enable);
+	mutex_unlock(&debug_lock);
+
+	ret = simple_read_from_buffer(ubuf, count, ppos, buf, sizeof(buf));
+	return ret;
+}
+
+static const struct file_operations debug_func_knob_fops = {
+	.open	= simple_open,
+	.read	= debug_func_knob_read,
+	.write	= debug_func_knob_write,
+};
+
+static int debug_func_init(void)
+{
+	struct dentry *file;
+	int ret;
+
+	/* Create debugfs node */
+	debug_debugfs_dir = debugfs_create_dir("coresight_cpu_debug", NULL);
+	if (!debug_debugfs_dir) {
+		pr_err("%s: unable to create debugfs directory\n", __func__);
+		return -ENOMEM;
+	}
+
+	file = debugfs_create_file("enable", 0644, debug_debugfs_dir, NULL,
+				   &debug_func_knob_fops);
+	if (!file) {
+		pr_err("%s: unable to create enable knob file\n", __func__);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* Register function to be called for panic */
+	ret = atomic_notifier_chain_register(&panic_notifier_list,
+					     &debug_notifier);
+	if (ret) {
+		pr_err("%s: unable to register notifier: %d\n",
+		       __func__, ret);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	debugfs_remove_recursive(debug_debugfs_dir);
+	return ret;
+}
+
+static void debug_func_exit(void)
+{
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+					 &debug_notifier);
+	debugfs_remove_recursive(debug_debugfs_dir);
+}
+
+static int debug_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct debug_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct device_node *np = adev->dev.of_node;
+	int ret;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->cpu = np ? of_coresight_get_cpu(np) : 0;
+	if (per_cpu(debug_drvdata, drvdata->cpu)) {
+		dev_err(dev, "CPU%d drvdata has already been initialized\n",
+			drvdata->cpu);
+		return -EBUSY;
+	}
+
+	drvdata->dev = &adev->dev;
+	amba_set_drvdata(adev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	get_online_cpus();
+	per_cpu(debug_drvdata, drvdata->cpu) = drvdata;
+	ret = smp_call_function_single(drvdata->cpu, debug_init_arch_data,
+				       drvdata, 1);
+	put_online_cpus();
+
+	if (ret) {
+		dev_err(dev, "CPU%d debug arch init failed\n", drvdata->cpu);
+		goto err;
+	}
+
+	if (!drvdata->edpcsr_present) {
+		dev_err(dev, "CPU%d sample-based profiling isn't implemented\n",
+			drvdata->cpu);
+		ret = -ENXIO;
+		goto err;
+	}
+
+	if (!debug_count++) {
+		ret = debug_func_init();
+		if (ret)
+			goto err_func_init;
+	}
+
+	mutex_lock(&debug_lock);
+	/* Turn off debug power domain if debugging is disabled */
+	if (!debug_enable)
+		pm_runtime_put(dev);
+	mutex_unlock(&debug_lock);
+
+	dev_info(dev, "Coresight debug-CPU%d initialized\n", drvdata->cpu);
+	return 0;
+
+err_func_init:
+	debug_count--;
+err:
+	per_cpu(debug_drvdata, drvdata->cpu) = NULL;
+	return ret;
+}
+
+static int debug_remove(struct amba_device *adev)
+{
+	struct device *dev = &adev->dev;
+	struct debug_drvdata *drvdata = amba_get_drvdata(adev);
+
+	per_cpu(debug_drvdata, drvdata->cpu) = NULL;
+
+	mutex_lock(&debug_lock);
+	/* Turn off debug power domain before rmmod the module */
+	if (debug_enable)
+		pm_runtime_put(dev);
+	mutex_unlock(&debug_lock);
+
+	if (!--debug_count)
+		debug_func_exit();
+
+	return 0;
+}
+
+static const struct amba_id debug_ids[] = {
+	{       /* Debug for Cortex-A53 */
+		.id	= 0x000bbd03,
+		.mask	= 0x000fffff,
+	},
+	{       /* Debug for Cortex-A57 */
+		.id	= 0x000bbd07,
+		.mask	= 0x000fffff,
+	},
+	{       /* Debug for Cortex-A72 */
+		.id	= 0x000bbd08,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver debug_driver = {
+	.drv = {
+		.name   = "coresight-cpu-debug",
+		.suppress_bind_attrs = true,
+	},
+	.probe		= debug_probe,
+	.remove		= debug_remove,
+	.id_table	= debug_ids,
+};
+
+module_amba_driver(debug_driver);
+
+MODULE_AUTHOR("Leo Yan <leo.yan@linaro.org>");
+MODULE_DESCRIPTION("ARM Coresight CPU Debug Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c
new file mode 100644
index 0000000..f6d0571
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2015, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/clk.h>
+#include <linux/coresight.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include "coresight-priv.h"
+
+#define REPLICATOR_IDFILTER0		0x000
+#define REPLICATOR_IDFILTER1		0x004
+
+/**
+ * struct replicator_state - specifics associated to a replicator component
+ * @base:	memory mapped base address for this component.
+ * @dev:	the device entity associated with this component
+ * @atclk:	optional clock for the core parts of the replicator.
+ * @csdev:	component vitals needed by the framework
+ */
+struct replicator_state {
+	void __iomem		*base;
+	struct device		*dev;
+	struct clk		*atclk;
+	struct coresight_device	*csdev;
+};
+
+static int replicator_enable(struct coresight_device *csdev, int inport,
+			      int outport)
+{
+	struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	CS_UNLOCK(drvdata->base);
+
+	/*
+	 * Ensure that the other port is disabled
+	 * 0x00 - passing through the replicator unimpeded
+	 * 0xff - disable (or impede) the flow of ATB data
+	 */
+	if (outport == 0) {
+		writel_relaxed(0x00, drvdata->base + REPLICATOR_IDFILTER0);
+		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1);
+	} else {
+		writel_relaxed(0x00, drvdata->base + REPLICATOR_IDFILTER1);
+		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0);
+	}
+
+	CS_LOCK(drvdata->base);
+
+	dev_info(drvdata->dev, "REPLICATOR enabled\n");
+	return 0;
+}
+
+static void replicator_disable(struct coresight_device *csdev, int inport,
+				int outport)
+{
+	struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	CS_UNLOCK(drvdata->base);
+
+	/* disable the flow of ATB data through port */
+	if (outport == 0)
+		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0);
+	else
+		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1);
+
+	CS_LOCK(drvdata->base);
+
+	dev_info(drvdata->dev, "REPLICATOR disabled\n");
+}
+
+static const struct coresight_ops_link replicator_link_ops = {
+	.enable		= replicator_enable,
+	.disable	= replicator_disable,
+};
+
+static const struct coresight_ops replicator_cs_ops = {
+	.link_ops	= &replicator_link_ops,
+};
+
+#define coresight_replicator_reg(name, offset) \
+	coresight_simple_reg32(struct replicator_state, name, offset)
+
+coresight_replicator_reg(idfilter0, REPLICATOR_IDFILTER0);
+coresight_replicator_reg(idfilter1, REPLICATOR_IDFILTER1);
+
+static struct attribute *replicator_mgmt_attrs[] = {
+	&dev_attr_idfilter0.attr,
+	&dev_attr_idfilter1.attr,
+	NULL,
+};
+
+static const struct attribute_group replicator_mgmt_group = {
+	.attrs = replicator_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *replicator_groups[] = {
+	&replicator_mgmt_group,
+	NULL,
+};
+
+static int replicator_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	struct device *dev = &adev->dev;
+	struct resource *res = &adev->res;
+	struct coresight_platform_data *pdata = NULL;
+	struct replicator_state *drvdata;
+	struct coresight_desc desc = { 0 };
+	struct device_node *np = adev->dev.of_node;
+	void __iomem *base;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		adev->dev.platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &adev->dev;
+	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+	dev_set_drvdata(dev, drvdata);
+	pm_runtime_put(&adev->dev);
+
+	desc.type = CORESIGHT_DEV_TYPE_LINK;
+	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
+	desc.ops = &replicator_cs_ops;
+	desc.pdata = adev->dev.platform_data;
+	desc.dev = &adev->dev;
+	desc.groups = replicator_groups;
+	drvdata->csdev = coresight_register(&desc);
+
+	return PTR_ERR_OR_ZERO(drvdata->csdev);
+}
+
+#ifdef CONFIG_PM
+static int replicator_runtime_suspend(struct device *dev)
+{
+	struct replicator_state *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int replicator_runtime_resume(struct device *dev)
+{
+	struct replicator_state *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops replicator_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(replicator_runtime_suspend,
+			   replicator_runtime_resume,
+			   NULL)
+};
+
+static const struct amba_id replicator_ids[] = {
+	{
+		.id     = 0x000bb909,
+		.mask   = 0x000fffff,
+	},
+	{
+		/* Coresight SoC-600 */
+		.id     = 0x000bb9ec,
+		.mask   = 0x000fffff,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver replicator_driver = {
+	.drv = {
+		.name	= "coresight-dynamic-replicator",
+		.pm	= &replicator_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= replicator_probe,
+	.id_table	= replicator_ids,
+};
+builtin_amba_driver(replicator_driver);
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
new file mode 100644
index 0000000..0dad862
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -0,0 +1,760 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Embedded Trace Buffer driver
+ */
+
+#include <asm/local.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/pm_runtime.h>
+#include <linux/seq_file.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+#include <linux/clk.h>
+#include <linux/circ_buf.h>
+#include <linux/mm.h>
+#include <linux/perf_event.h>
+
+
+#include "coresight-priv.h"
+
+#define ETB_RAM_DEPTH_REG	0x004
+#define ETB_STATUS_REG		0x00c
+#define ETB_RAM_READ_DATA_REG	0x010
+#define ETB_RAM_READ_POINTER	0x014
+#define ETB_RAM_WRITE_POINTER	0x018
+#define ETB_TRG			0x01c
+#define ETB_CTL_REG		0x020
+#define ETB_RWD_REG		0x024
+#define ETB_FFSR		0x300
+#define ETB_FFCR		0x304
+#define ETB_ITMISCOP0		0xee0
+#define ETB_ITTRFLINACK		0xee4
+#define ETB_ITTRFLIN		0xee8
+#define ETB_ITATBDATA0		0xeeC
+#define ETB_ITATBCTR2		0xef0
+#define ETB_ITATBCTR1		0xef4
+#define ETB_ITATBCTR0		0xef8
+
+/* register description */
+/* STS - 0x00C */
+#define ETB_STATUS_RAM_FULL	BIT(0)
+/* CTL - 0x020 */
+#define ETB_CTL_CAPT_EN		BIT(0)
+/* FFCR - 0x304 */
+#define ETB_FFCR_EN_FTC		BIT(0)
+#define ETB_FFCR_FON_MAN	BIT(6)
+#define ETB_FFCR_STOP_FI	BIT(12)
+#define ETB_FFCR_STOP_TRIGGER	BIT(13)
+
+#define ETB_FFCR_BIT		6
+#define ETB_FFSR_BIT		1
+#define ETB_FRAME_SIZE_WORDS	4
+
+/**
+ * struct etb_drvdata - specifics associated to an ETB component
+ * @base:	memory mapped base address for this component.
+ * @dev:	the device entity associated to this component.
+ * @atclk:	optional clock for the core parts of the ETB.
+ * @csdev:	component vitals needed by the framework.
+ * @miscdev:	specifics to handle "/dev/xyz.etb" entry.
+ * @spinlock:	only one at a time pls.
+ * @reading:	synchronise user space access to etb buffer.
+ * @mode:	this ETB is being used.
+ * @buf:	area of memory where ETB buffer content gets sent.
+ * @buffer_depth: size of @buf.
+ * @trigger_cntr: amount of words to store after a trigger.
+ */
+struct etb_drvdata {
+	void __iomem		*base;
+	struct device		*dev;
+	struct clk		*atclk;
+	struct coresight_device	*csdev;
+	struct miscdevice	miscdev;
+	spinlock_t		spinlock;
+	local_t			reading;
+	local_t			mode;
+	u8			*buf;
+	u32			buffer_depth;
+	u32			trigger_cntr;
+};
+
+static unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata)
+{
+	u32 depth = 0;
+
+	pm_runtime_get_sync(drvdata->dev);
+
+	/* RO registers don't need locking */
+	depth = readl_relaxed(drvdata->base + ETB_RAM_DEPTH_REG);
+
+	pm_runtime_put(drvdata->dev);
+	return depth;
+}
+
+static void etb_enable_hw(struct etb_drvdata *drvdata)
+{
+	int i;
+	u32 depth;
+
+	CS_UNLOCK(drvdata->base);
+
+	depth = drvdata->buffer_depth;
+	/* reset write RAM pointer address */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
+	/* clear entire RAM buffer */
+	for (i = 0; i < depth; i++)
+		writel_relaxed(0x0, drvdata->base + ETB_RWD_REG);
+
+	/* reset write RAM pointer address */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
+	/* reset read RAM pointer address */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_READ_POINTER);
+
+	writel_relaxed(drvdata->trigger_cntr, drvdata->base + ETB_TRG);
+	writel_relaxed(ETB_FFCR_EN_FTC | ETB_FFCR_STOP_TRIGGER,
+		       drvdata->base + ETB_FFCR);
+	/* ETB trace capture enable */
+	writel_relaxed(ETB_CTL_CAPT_EN, drvdata->base + ETB_CTL_REG);
+
+	CS_LOCK(drvdata->base);
+}
+
+static int etb_enable(struct coresight_device *csdev, u32 mode)
+{
+	u32 val;
+	unsigned long flags;
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	val = local_cmpxchg(&drvdata->mode,
+			    CS_MODE_DISABLED, mode);
+	/*
+	 * When accessing from Perf, a HW buffer can be handled
+	 * by a single trace entity.  In sysFS mode many tracers
+	 * can be logging to the same HW buffer.
+	 */
+	if (val == CS_MODE_PERF)
+		return -EBUSY;
+
+	/* Don't let perf disturb sysFS sessions */
+	if (val == CS_MODE_SYSFS && mode == CS_MODE_PERF)
+		return -EBUSY;
+
+	/* Nothing to do, the tracer is already enabled. */
+	if (val == CS_MODE_SYSFS)
+		goto out;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	etb_enable_hw(drvdata);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+out:
+	dev_info(drvdata->dev, "ETB enabled\n");
+	return 0;
+}
+
+static void etb_disable_hw(struct etb_drvdata *drvdata)
+{
+	u32 ffcr;
+
+	CS_UNLOCK(drvdata->base);
+
+	ffcr = readl_relaxed(drvdata->base + ETB_FFCR);
+	/* stop formatter when a stop has completed */
+	ffcr |= ETB_FFCR_STOP_FI;
+	writel_relaxed(ffcr, drvdata->base + ETB_FFCR);
+	/* manually generate a flush of the system */
+	ffcr |= ETB_FFCR_FON_MAN;
+	writel_relaxed(ffcr, drvdata->base + ETB_FFCR);
+
+	if (coresight_timeout(drvdata->base, ETB_FFCR, ETB_FFCR_BIT, 0)) {
+		dev_err(drvdata->dev,
+		"timeout while waiting for completion of Manual Flush\n");
+	}
+
+	/* disable trace capture */
+	writel_relaxed(0x0, drvdata->base + ETB_CTL_REG);
+
+	if (coresight_timeout(drvdata->base, ETB_FFSR, ETB_FFSR_BIT, 1)) {
+		dev_err(drvdata->dev,
+			"timeout while waiting for Formatter to Stop\n");
+	}
+
+	CS_LOCK(drvdata->base);
+}
+
+static void etb_dump_hw(struct etb_drvdata *drvdata)
+{
+	bool lost = false;
+	int i;
+	u8 *buf_ptr;
+	u32 read_data, depth;
+	u32 read_ptr, write_ptr;
+	u32 frame_off, frame_endoff;
+
+	CS_UNLOCK(drvdata->base);
+
+	read_ptr = readl_relaxed(drvdata->base + ETB_RAM_READ_POINTER);
+	write_ptr = readl_relaxed(drvdata->base + ETB_RAM_WRITE_POINTER);
+
+	frame_off = write_ptr % ETB_FRAME_SIZE_WORDS;
+	frame_endoff = ETB_FRAME_SIZE_WORDS - frame_off;
+	if (frame_off) {
+		dev_err(drvdata->dev,
+			"write_ptr: %lu not aligned to formatter frame size\n",
+			(unsigned long)write_ptr);
+		dev_err(drvdata->dev, "frameoff: %lu, frame_endoff: %lu\n",
+			(unsigned long)frame_off, (unsigned long)frame_endoff);
+		write_ptr += frame_endoff;
+	}
+
+	if ((readl_relaxed(drvdata->base + ETB_STATUS_REG)
+		      & ETB_STATUS_RAM_FULL) == 0) {
+		writel_relaxed(0x0, drvdata->base + ETB_RAM_READ_POINTER);
+	} else {
+		writel_relaxed(write_ptr, drvdata->base + ETB_RAM_READ_POINTER);
+		lost = true;
+	}
+
+	depth = drvdata->buffer_depth;
+	buf_ptr = drvdata->buf;
+	for (i = 0; i < depth; i++) {
+		read_data = readl_relaxed(drvdata->base +
+					  ETB_RAM_READ_DATA_REG);
+		*(u32 *)buf_ptr = read_data;
+		buf_ptr += 4;
+	}
+
+	if (lost)
+		coresight_insert_barrier_packet(drvdata->buf);
+
+	if (frame_off) {
+		buf_ptr -= (frame_endoff * 4);
+		for (i = 0; i < frame_endoff; i++) {
+			*buf_ptr++ = 0x0;
+			*buf_ptr++ = 0x0;
+			*buf_ptr++ = 0x0;
+			*buf_ptr++ = 0x0;
+		}
+	}
+
+	writel_relaxed(read_ptr, drvdata->base + ETB_RAM_READ_POINTER);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void etb_disable(struct coresight_device *csdev)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	unsigned long flags;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	etb_disable_hw(drvdata);
+	etb_dump_hw(drvdata);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	local_set(&drvdata->mode, CS_MODE_DISABLED);
+
+	dev_info(drvdata->dev, "ETB disabled\n");
+}
+
+static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu,
+			      void **pages, int nr_pages, bool overwrite)
+{
+	int node;
+	struct cs_buffers *buf;
+
+	if (cpu == -1)
+		cpu = smp_processor_id();
+	node = cpu_to_node(cpu);
+
+	buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
+	if (!buf)
+		return NULL;
+
+	buf->snapshot = overwrite;
+	buf->nr_pages = nr_pages;
+	buf->data_pages = pages;
+
+	return buf;
+}
+
+static void etb_free_buffer(void *config)
+{
+	struct cs_buffers *buf = config;
+
+	kfree(buf);
+}
+
+static int etb_set_buffer(struct coresight_device *csdev,
+			  struct perf_output_handle *handle,
+			  void *sink_config)
+{
+	int ret = 0;
+	unsigned long head;
+	struct cs_buffers *buf = sink_config;
+
+	/* wrap head around to the amount of space we have */
+	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
+
+	/* find the page to write to */
+	buf->cur = head / PAGE_SIZE;
+
+	/* and offset within that page */
+	buf->offset = head % PAGE_SIZE;
+
+	local_set(&buf->data_size, 0);
+
+	return ret;
+}
+
+static unsigned long etb_reset_buffer(struct coresight_device *csdev,
+				      struct perf_output_handle *handle,
+				      void *sink_config)
+{
+	unsigned long size = 0;
+	struct cs_buffers *buf = sink_config;
+
+	if (buf) {
+		/*
+		 * In snapshot mode ->data_size holds the new address of the
+		 * ring buffer's head.  The size itself is the whole address
+		 * range since we want the latest information.
+		 */
+		if (buf->snapshot)
+			handle->head = local_xchg(&buf->data_size,
+						  buf->nr_pages << PAGE_SHIFT);
+
+		/*
+		 * Tell the tracer PMU how much we got in this run and if
+		 * something went wrong along the way.  Nobody else can use
+		 * this cs_buffers instance until we are done.  As such
+		 * resetting parameters here and squaring off with the ring
+		 * buffer API in the tracer PMU is fine.
+		 */
+		size = local_xchg(&buf->data_size, 0);
+	}
+
+	return size;
+}
+
+static void etb_update_buffer(struct coresight_device *csdev,
+			      struct perf_output_handle *handle,
+			      void *sink_config)
+{
+	bool lost = false;
+	int i, cur;
+	u8 *buf_ptr;
+	const u32 *barrier;
+	u32 read_ptr, write_ptr, capacity;
+	u32 status, read_data, to_read;
+	unsigned long offset;
+	struct cs_buffers *buf = sink_config;
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (!buf)
+		return;
+
+	capacity = drvdata->buffer_depth * ETB_FRAME_SIZE_WORDS;
+
+	etb_disable_hw(drvdata);
+	CS_UNLOCK(drvdata->base);
+
+	/* unit is in words, not bytes */
+	read_ptr = readl_relaxed(drvdata->base + ETB_RAM_READ_POINTER);
+	write_ptr = readl_relaxed(drvdata->base + ETB_RAM_WRITE_POINTER);
+
+	/*
+	 * Entries should be aligned to the frame size.  If they are not
+	 * go back to the last alignment point to give decoding tools a
+	 * chance to fix things.
+	 */
+	if (write_ptr % ETB_FRAME_SIZE_WORDS) {
+		dev_err(drvdata->dev,
+			"write_ptr: %lu not aligned to formatter frame size\n",
+			(unsigned long)write_ptr);
+
+		write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1);
+		lost = true;
+	}
+
+	/*
+	 * Get a hold of the status register and see if a wrap around
+	 * has occurred.  If so adjust things accordingly.  Otherwise
+	 * start at the beginning and go until the write pointer has
+	 * been reached.
+	 */
+	status = readl_relaxed(drvdata->base + ETB_STATUS_REG);
+	if (status & ETB_STATUS_RAM_FULL) {
+		lost = true;
+		to_read = capacity;
+		read_ptr = write_ptr;
+	} else {
+		to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->buffer_depth);
+		to_read *= ETB_FRAME_SIZE_WORDS;
+	}
+
+	/*
+	 * Make sure we don't overwrite data that hasn't been consumed yet.
+	 * It is entirely possible that the HW buffer has more data than the
+	 * ring buffer can currently handle.  If so adjust the start address
+	 * to take only the last traces.
+	 *
+	 * In snapshot mode we are looking to get the latest traces only and as
+	 * such, we don't care about not overwriting data that hasn't been
+	 * processed by user space.
+	 */
+	if (!buf->snapshot && to_read > handle->size) {
+		u32 mask = ~(ETB_FRAME_SIZE_WORDS - 1);
+
+		/* The new read pointer must be frame size aligned */
+		to_read = handle->size & mask;
+		/*
+		 * Move the RAM read pointer up, keeping in mind that
+		 * everything is in frame size units.
+		 */
+		read_ptr = (write_ptr + drvdata->buffer_depth) -
+					to_read / ETB_FRAME_SIZE_WORDS;
+		/* Wrap around if need be*/
+		if (read_ptr > (drvdata->buffer_depth - 1))
+			read_ptr -= drvdata->buffer_depth;
+		/* let the decoder know we've skipped ahead */
+		lost = true;
+	}
+
+	if (lost)
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+
+	/* finally tell HW where we want to start reading from */
+	writel_relaxed(read_ptr, drvdata->base + ETB_RAM_READ_POINTER);
+
+	cur = buf->cur;
+	offset = buf->offset;
+	barrier = barrier_pkt;
+
+	for (i = 0; i < to_read; i += 4) {
+		buf_ptr = buf->data_pages[cur] + offset;
+		read_data = readl_relaxed(drvdata->base +
+					  ETB_RAM_READ_DATA_REG);
+		if (lost && i < CORESIGHT_BARRIER_PKT_SIZE) {
+			read_data = *barrier;
+			barrier++;
+		}
+
+		*(u32 *)buf_ptr = read_data;
+		buf_ptr += 4;
+
+		offset += 4;
+		if (offset >= PAGE_SIZE) {
+			offset = 0;
+			cur++;
+			/* wrap around at the end of the buffer */
+			cur &= buf->nr_pages - 1;
+		}
+	}
+
+	/* reset ETB buffer for next run */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_READ_POINTER);
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
+
+	/*
+	 * In snapshot mode all we have to do is communicate to
+	 * perf_aux_output_end() the address of the current head.  In full
+	 * trace mode the same function expects a size to move rb->aux_head
+	 * forward.
+	 */
+	if (buf->snapshot)
+		local_set(&buf->data_size, (cur * PAGE_SIZE) + offset);
+	else
+		local_add(to_read, &buf->data_size);
+
+	etb_enable_hw(drvdata);
+	CS_LOCK(drvdata->base);
+}
+
+static const struct coresight_ops_sink etb_sink_ops = {
+	.enable		= etb_enable,
+	.disable	= etb_disable,
+	.alloc_buffer	= etb_alloc_buffer,
+	.free_buffer	= etb_free_buffer,
+	.set_buffer	= etb_set_buffer,
+	.reset_buffer	= etb_reset_buffer,
+	.update_buffer	= etb_update_buffer,
+};
+
+static const struct coresight_ops etb_cs_ops = {
+	.sink_ops	= &etb_sink_ops,
+};
+
+static void etb_dump(struct etb_drvdata *drvdata)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (local_read(&drvdata->mode) == CS_MODE_SYSFS) {
+		etb_disable_hw(drvdata);
+		etb_dump_hw(drvdata);
+		etb_enable_hw(drvdata);
+	}
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_info(drvdata->dev, "ETB dumped\n");
+}
+
+static int etb_open(struct inode *inode, struct file *file)
+{
+	struct etb_drvdata *drvdata = container_of(file->private_data,
+						   struct etb_drvdata, miscdev);
+
+	if (local_cmpxchg(&drvdata->reading, 0, 1))
+		return -EBUSY;
+
+	dev_dbg(drvdata->dev, "%s: successfully opened\n", __func__);
+	return 0;
+}
+
+static ssize_t etb_read(struct file *file, char __user *data,
+				size_t len, loff_t *ppos)
+{
+	u32 depth;
+	struct etb_drvdata *drvdata = container_of(file->private_data,
+						   struct etb_drvdata, miscdev);
+
+	etb_dump(drvdata);
+
+	depth = drvdata->buffer_depth;
+	if (*ppos + len > depth * 4)
+		len = depth * 4 - *ppos;
+
+	if (copy_to_user(data, drvdata->buf + *ppos, len)) {
+		dev_dbg(drvdata->dev, "%s: copy_to_user failed\n", __func__);
+		return -EFAULT;
+	}
+
+	*ppos += len;
+
+	dev_dbg(drvdata->dev, "%s: %zu bytes copied, %d bytes left\n",
+		__func__, len, (int)(depth * 4 - *ppos));
+	return len;
+}
+
+static int etb_release(struct inode *inode, struct file *file)
+{
+	struct etb_drvdata *drvdata = container_of(file->private_data,
+						   struct etb_drvdata, miscdev);
+	local_set(&drvdata->reading, 0);
+
+	dev_dbg(drvdata->dev, "%s: released\n", __func__);
+	return 0;
+}
+
+static const struct file_operations etb_fops = {
+	.owner		= THIS_MODULE,
+	.open		= etb_open,
+	.read		= etb_read,
+	.release	= etb_release,
+	.llseek		= no_llseek,
+};
+
+#define coresight_etb10_reg(name, offset)		\
+	coresight_simple_reg32(struct etb_drvdata, name, offset)
+
+coresight_etb10_reg(rdp, ETB_RAM_DEPTH_REG);
+coresight_etb10_reg(sts, ETB_STATUS_REG);
+coresight_etb10_reg(rrp, ETB_RAM_READ_POINTER);
+coresight_etb10_reg(rwp, ETB_RAM_WRITE_POINTER);
+coresight_etb10_reg(trg, ETB_TRG);
+coresight_etb10_reg(ctl, ETB_CTL_REG);
+coresight_etb10_reg(ffsr, ETB_FFSR);
+coresight_etb10_reg(ffcr, ETB_FFCR);
+
+static struct attribute *coresight_etb_mgmt_attrs[] = {
+	&dev_attr_rdp.attr,
+	&dev_attr_sts.attr,
+	&dev_attr_rrp.attr,
+	&dev_attr_rwp.attr,
+	&dev_attr_trg.attr,
+	&dev_attr_ctl.attr,
+	&dev_attr_ffsr.attr,
+	&dev_attr_ffcr.attr,
+	NULL,
+};
+
+static ssize_t trigger_cntr_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->trigger_cntr;
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t trigger_cntr_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etb_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->trigger_cntr = val;
+	return size;
+}
+static DEVICE_ATTR_RW(trigger_cntr);
+
+static struct attribute *coresight_etb_attrs[] = {
+	&dev_attr_trigger_cntr.attr,
+	NULL,
+};
+
+static const struct attribute_group coresight_etb_group = {
+	.attrs = coresight_etb_attrs,
+};
+
+static const struct attribute_group coresight_etb_mgmt_group = {
+	.attrs = coresight_etb_mgmt_attrs,
+	.name = "mgmt",
+};
+
+const struct attribute_group *coresight_etb_groups[] = {
+	&coresight_etb_group,
+	&coresight_etb_mgmt_group,
+	NULL,
+};
+
+static int etb_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct etb_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc desc = { 0 };
+	struct device_node *np = adev->dev.of_node;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		adev->dev.platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &adev->dev;
+	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+	dev_set_drvdata(dev, drvdata);
+
+	/* validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	spin_lock_init(&drvdata->spinlock);
+
+	drvdata->buffer_depth = etb_get_buffer_depth(drvdata);
+	pm_runtime_put(&adev->dev);
+
+	if (drvdata->buffer_depth & 0x80000000)
+		return -EINVAL;
+
+	drvdata->buf = devm_kcalloc(dev,
+				    drvdata->buffer_depth, 4, GFP_KERNEL);
+	if (!drvdata->buf)
+		return -ENOMEM;
+
+	desc.type = CORESIGHT_DEV_TYPE_SINK;
+	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+	desc.ops = &etb_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_etb_groups;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	drvdata->miscdev.name = pdata->name;
+	drvdata->miscdev.minor = MISC_DYNAMIC_MINOR;
+	drvdata->miscdev.fops = &etb_fops;
+	ret = misc_register(&drvdata->miscdev);
+	if (ret)
+		goto err_misc_register;
+
+	return 0;
+
+err_misc_register:
+	coresight_unregister(drvdata->csdev);
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int etb_runtime_suspend(struct device *dev)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int etb_runtime_resume(struct device *dev)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops etb_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(etb_runtime_suspend, etb_runtime_resume, NULL)
+};
+
+static const struct amba_id etb_ids[] = {
+	{
+		.id	= 0x000bb907,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver etb_driver = {
+	.drv = {
+		.name	= "coresight-etb10",
+		.owner	= THIS_MODULE,
+		.pm	= &etb_dev_pm_ops,
+		.suppress_bind_attrs = true,
+
+	},
+	.probe		= etb_probe,
+	.id_table	= etb_ids,
+};
+builtin_amba_driver(etb_driver);
diff --git a/drivers/hwtracing/coresight/coresight-etm-cp14.c b/drivers/hwtracing/coresight/coresight-etm-cp14.c
new file mode 100644
index 0000000..4174a8d
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm-cp14.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <asm/hardware/cp14.h>
+
+#include "coresight-etm.h"
+
+int etm_readl_cp14(u32 reg, unsigned int *val)
+{
+	switch (reg) {
+	case ETMCR:
+		*val = etm_read(ETMCR);
+		return 0;
+	case ETMCCR:
+		*val = etm_read(ETMCCR);
+		return 0;
+	case ETMTRIGGER:
+		*val = etm_read(ETMTRIGGER);
+		return 0;
+	case ETMSR:
+		*val = etm_read(ETMSR);
+		return 0;
+	case ETMSCR:
+		*val = etm_read(ETMSCR);
+		return 0;
+	case ETMTSSCR:
+		*val = etm_read(ETMTSSCR);
+		return 0;
+	case ETMTEEVR:
+		*val = etm_read(ETMTEEVR);
+		return 0;
+	case ETMTECR1:
+		*val = etm_read(ETMTECR1);
+		return 0;
+	case ETMFFLR:
+		*val = etm_read(ETMFFLR);
+		return 0;
+	case ETMACVRn(0):
+		*val = etm_read(ETMACVR0);
+		return 0;
+	case ETMACVRn(1):
+		*val = etm_read(ETMACVR1);
+		return 0;
+	case ETMACVRn(2):
+		*val = etm_read(ETMACVR2);
+		return 0;
+	case ETMACVRn(3):
+		*val = etm_read(ETMACVR3);
+		return 0;
+	case ETMACVRn(4):
+		*val = etm_read(ETMACVR4);
+		return 0;
+	case ETMACVRn(5):
+		*val = etm_read(ETMACVR5);
+		return 0;
+	case ETMACVRn(6):
+		*val = etm_read(ETMACVR6);
+		return 0;
+	case ETMACVRn(7):
+		*val = etm_read(ETMACVR7);
+		return 0;
+	case ETMACVRn(8):
+		*val = etm_read(ETMACVR8);
+		return 0;
+	case ETMACVRn(9):
+		*val = etm_read(ETMACVR9);
+		return 0;
+	case ETMACVRn(10):
+		*val = etm_read(ETMACVR10);
+		return 0;
+	case ETMACVRn(11):
+		*val = etm_read(ETMACVR11);
+		return 0;
+	case ETMACVRn(12):
+		*val = etm_read(ETMACVR12);
+		return 0;
+	case ETMACVRn(13):
+		*val = etm_read(ETMACVR13);
+		return 0;
+	case ETMACVRn(14):
+		*val = etm_read(ETMACVR14);
+		return 0;
+	case ETMACVRn(15):
+		*val = etm_read(ETMACVR15);
+		return 0;
+	case ETMACTRn(0):
+		*val = etm_read(ETMACTR0);
+		return 0;
+	case ETMACTRn(1):
+		*val = etm_read(ETMACTR1);
+		return 0;
+	case ETMACTRn(2):
+		*val = etm_read(ETMACTR2);
+		return 0;
+	case ETMACTRn(3):
+		*val = etm_read(ETMACTR3);
+		return 0;
+	case ETMACTRn(4):
+		*val = etm_read(ETMACTR4);
+		return 0;
+	case ETMACTRn(5):
+		*val = etm_read(ETMACTR5);
+		return 0;
+	case ETMACTRn(6):
+		*val = etm_read(ETMACTR6);
+		return 0;
+	case ETMACTRn(7):
+		*val = etm_read(ETMACTR7);
+		return 0;
+	case ETMACTRn(8):
+		*val = etm_read(ETMACTR8);
+		return 0;
+	case ETMACTRn(9):
+		*val = etm_read(ETMACTR9);
+		return 0;
+	case ETMACTRn(10):
+		*val = etm_read(ETMACTR10);
+		return 0;
+	case ETMACTRn(11):
+		*val = etm_read(ETMACTR11);
+		return 0;
+	case ETMACTRn(12):
+		*val = etm_read(ETMACTR12);
+		return 0;
+	case ETMACTRn(13):
+		*val = etm_read(ETMACTR13);
+		return 0;
+	case ETMACTRn(14):
+		*val = etm_read(ETMACTR14);
+		return 0;
+	case ETMACTRn(15):
+		*val = etm_read(ETMACTR15);
+		return 0;
+	case ETMCNTRLDVRn(0):
+		*val = etm_read(ETMCNTRLDVR0);
+		return 0;
+	case ETMCNTRLDVRn(1):
+		*val = etm_read(ETMCNTRLDVR1);
+		return 0;
+	case ETMCNTRLDVRn(2):
+		*val = etm_read(ETMCNTRLDVR2);
+		return 0;
+	case ETMCNTRLDVRn(3):
+		*val = etm_read(ETMCNTRLDVR3);
+		return 0;
+	case ETMCNTENRn(0):
+		*val = etm_read(ETMCNTENR0);
+		return 0;
+	case ETMCNTENRn(1):
+		*val = etm_read(ETMCNTENR1);
+		return 0;
+	case ETMCNTENRn(2):
+		*val = etm_read(ETMCNTENR2);
+		return 0;
+	case ETMCNTENRn(3):
+		*val = etm_read(ETMCNTENR3);
+		return 0;
+	case ETMCNTRLDEVRn(0):
+		*val = etm_read(ETMCNTRLDEVR0);
+		return 0;
+	case ETMCNTRLDEVRn(1):
+		*val = etm_read(ETMCNTRLDEVR1);
+		return 0;
+	case ETMCNTRLDEVRn(2):
+		*val = etm_read(ETMCNTRLDEVR2);
+		return 0;
+	case ETMCNTRLDEVRn(3):
+		*val = etm_read(ETMCNTRLDEVR3);
+		return 0;
+	case ETMCNTVRn(0):
+		*val = etm_read(ETMCNTVR0);
+		return 0;
+	case ETMCNTVRn(1):
+		*val = etm_read(ETMCNTVR1);
+		return 0;
+	case ETMCNTVRn(2):
+		*val = etm_read(ETMCNTVR2);
+		return 0;
+	case ETMCNTVRn(3):
+		*val = etm_read(ETMCNTVR3);
+		return 0;
+	case ETMSQ12EVR:
+		*val = etm_read(ETMSQ12EVR);
+		return 0;
+	case ETMSQ21EVR:
+		*val = etm_read(ETMSQ21EVR);
+		return 0;
+	case ETMSQ23EVR:
+		*val = etm_read(ETMSQ23EVR);
+		return 0;
+	case ETMSQ31EVR:
+		*val = etm_read(ETMSQ31EVR);
+		return 0;
+	case ETMSQ32EVR:
+		*val = etm_read(ETMSQ32EVR);
+		return 0;
+	case ETMSQ13EVR:
+		*val = etm_read(ETMSQ13EVR);
+		return 0;
+	case ETMSQR:
+		*val = etm_read(ETMSQR);
+		return 0;
+	case ETMEXTOUTEVRn(0):
+		*val = etm_read(ETMEXTOUTEVR0);
+		return 0;
+	case ETMEXTOUTEVRn(1):
+		*val = etm_read(ETMEXTOUTEVR1);
+		return 0;
+	case ETMEXTOUTEVRn(2):
+		*val = etm_read(ETMEXTOUTEVR2);
+		return 0;
+	case ETMEXTOUTEVRn(3):
+		*val = etm_read(ETMEXTOUTEVR3);
+		return 0;
+	case ETMCIDCVRn(0):
+		*val = etm_read(ETMCIDCVR0);
+		return 0;
+	case ETMCIDCVRn(1):
+		*val = etm_read(ETMCIDCVR1);
+		return 0;
+	case ETMCIDCVRn(2):
+		*val = etm_read(ETMCIDCVR2);
+		return 0;
+	case ETMCIDCMR:
+		*val = etm_read(ETMCIDCMR);
+		return 0;
+	case ETMIMPSPEC0:
+		*val = etm_read(ETMIMPSPEC0);
+		return 0;
+	case ETMIMPSPEC1:
+		*val = etm_read(ETMIMPSPEC1);
+		return 0;
+	case ETMIMPSPEC2:
+		*val = etm_read(ETMIMPSPEC2);
+		return 0;
+	case ETMIMPSPEC3:
+		*val = etm_read(ETMIMPSPEC3);
+		return 0;
+	case ETMIMPSPEC4:
+		*val = etm_read(ETMIMPSPEC4);
+		return 0;
+	case ETMIMPSPEC5:
+		*val = etm_read(ETMIMPSPEC5);
+		return 0;
+	case ETMIMPSPEC6:
+		*val = etm_read(ETMIMPSPEC6);
+		return 0;
+	case ETMIMPSPEC7:
+		*val = etm_read(ETMIMPSPEC7);
+		return 0;
+	case ETMSYNCFR:
+		*val = etm_read(ETMSYNCFR);
+		return 0;
+	case ETMIDR:
+		*val = etm_read(ETMIDR);
+		return 0;
+	case ETMCCER:
+		*val = etm_read(ETMCCER);
+		return 0;
+	case ETMEXTINSELR:
+		*val = etm_read(ETMEXTINSELR);
+		return 0;
+	case ETMTESSEICR:
+		*val = etm_read(ETMTESSEICR);
+		return 0;
+	case ETMEIBCR:
+		*val = etm_read(ETMEIBCR);
+		return 0;
+	case ETMTSEVR:
+		*val = etm_read(ETMTSEVR);
+		return 0;
+	case ETMAUXCR:
+		*val = etm_read(ETMAUXCR);
+		return 0;
+	case ETMTRACEIDR:
+		*val = etm_read(ETMTRACEIDR);
+		return 0;
+	case ETMVMIDCVR:
+		*val = etm_read(ETMVMIDCVR);
+		return 0;
+	case ETMOSLSR:
+		*val = etm_read(ETMOSLSR);
+		return 0;
+	case ETMOSSRR:
+		*val = etm_read(ETMOSSRR);
+		return 0;
+	case ETMPDCR:
+		*val = etm_read(ETMPDCR);
+		return 0;
+	case ETMPDSR:
+		*val = etm_read(ETMPDSR);
+		return 0;
+	default:
+		*val = 0;
+		return -EINVAL;
+	}
+}
+
+int etm_writel_cp14(u32 reg, u32 val)
+{
+	switch (reg) {
+	case ETMCR:
+		etm_write(val, ETMCR);
+		break;
+	case ETMTRIGGER:
+		etm_write(val, ETMTRIGGER);
+		break;
+	case ETMSR:
+		etm_write(val, ETMSR);
+		break;
+	case ETMTSSCR:
+		etm_write(val, ETMTSSCR);
+		break;
+	case ETMTEEVR:
+		etm_write(val, ETMTEEVR);
+		break;
+	case ETMTECR1:
+		etm_write(val, ETMTECR1);
+		break;
+	case ETMFFLR:
+		etm_write(val, ETMFFLR);
+		break;
+	case ETMACVRn(0):
+		etm_write(val, ETMACVR0);
+		break;
+	case ETMACVRn(1):
+		etm_write(val, ETMACVR1);
+		break;
+	case ETMACVRn(2):
+		etm_write(val, ETMACVR2);
+		break;
+	case ETMACVRn(3):
+		etm_write(val, ETMACVR3);
+		break;
+	case ETMACVRn(4):
+		etm_write(val, ETMACVR4);
+		break;
+	case ETMACVRn(5):
+		etm_write(val, ETMACVR5);
+		break;
+	case ETMACVRn(6):
+		etm_write(val, ETMACVR6);
+		break;
+	case ETMACVRn(7):
+		etm_write(val, ETMACVR7);
+		break;
+	case ETMACVRn(8):
+		etm_write(val, ETMACVR8);
+		break;
+	case ETMACVRn(9):
+		etm_write(val, ETMACVR9);
+		break;
+	case ETMACVRn(10):
+		etm_write(val, ETMACVR10);
+		break;
+	case ETMACVRn(11):
+		etm_write(val, ETMACVR11);
+		break;
+	case ETMACVRn(12):
+		etm_write(val, ETMACVR12);
+		break;
+	case ETMACVRn(13):
+		etm_write(val, ETMACVR13);
+		break;
+	case ETMACVRn(14):
+		etm_write(val, ETMACVR14);
+		break;
+	case ETMACVRn(15):
+		etm_write(val, ETMACVR15);
+		break;
+	case ETMACTRn(0):
+		etm_write(val, ETMACTR0);
+		break;
+	case ETMACTRn(1):
+		etm_write(val, ETMACTR1);
+		break;
+	case ETMACTRn(2):
+		etm_write(val, ETMACTR2);
+		break;
+	case ETMACTRn(3):
+		etm_write(val, ETMACTR3);
+		break;
+	case ETMACTRn(4):
+		etm_write(val, ETMACTR4);
+		break;
+	case ETMACTRn(5):
+		etm_write(val, ETMACTR5);
+		break;
+	case ETMACTRn(6):
+		etm_write(val, ETMACTR6);
+		break;
+	case ETMACTRn(7):
+		etm_write(val, ETMACTR7);
+		break;
+	case ETMACTRn(8):
+		etm_write(val, ETMACTR8);
+		break;
+	case ETMACTRn(9):
+		etm_write(val, ETMACTR9);
+		break;
+	case ETMACTRn(10):
+		etm_write(val, ETMACTR10);
+		break;
+	case ETMACTRn(11):
+		etm_write(val, ETMACTR11);
+		break;
+	case ETMACTRn(12):
+		etm_write(val, ETMACTR12);
+		break;
+	case ETMACTRn(13):
+		etm_write(val, ETMACTR13);
+		break;
+	case ETMACTRn(14):
+		etm_write(val, ETMACTR14);
+		break;
+	case ETMACTRn(15):
+		etm_write(val, ETMACTR15);
+		break;
+	case ETMCNTRLDVRn(0):
+		etm_write(val, ETMCNTRLDVR0);
+		break;
+	case ETMCNTRLDVRn(1):
+		etm_write(val, ETMCNTRLDVR1);
+		break;
+	case ETMCNTRLDVRn(2):
+		etm_write(val, ETMCNTRLDVR2);
+		break;
+	case ETMCNTRLDVRn(3):
+		etm_write(val, ETMCNTRLDVR3);
+		break;
+	case ETMCNTENRn(0):
+		etm_write(val, ETMCNTENR0);
+		break;
+	case ETMCNTENRn(1):
+		etm_write(val, ETMCNTENR1);
+		break;
+	case ETMCNTENRn(2):
+		etm_write(val, ETMCNTENR2);
+		break;
+	case ETMCNTENRn(3):
+		etm_write(val, ETMCNTENR3);
+		break;
+	case ETMCNTRLDEVRn(0):
+		etm_write(val, ETMCNTRLDEVR0);
+		break;
+	case ETMCNTRLDEVRn(1):
+		etm_write(val, ETMCNTRLDEVR1);
+		break;
+	case ETMCNTRLDEVRn(2):
+		etm_write(val, ETMCNTRLDEVR2);
+		break;
+	case ETMCNTRLDEVRn(3):
+		etm_write(val, ETMCNTRLDEVR3);
+		break;
+	case ETMCNTVRn(0):
+		etm_write(val, ETMCNTVR0);
+		break;
+	case ETMCNTVRn(1):
+		etm_write(val, ETMCNTVR1);
+		break;
+	case ETMCNTVRn(2):
+		etm_write(val, ETMCNTVR2);
+		break;
+	case ETMCNTVRn(3):
+		etm_write(val, ETMCNTVR3);
+		break;
+	case ETMSQ12EVR:
+		etm_write(val, ETMSQ12EVR);
+		break;
+	case ETMSQ21EVR:
+		etm_write(val, ETMSQ21EVR);
+		break;
+	case ETMSQ23EVR:
+		etm_write(val, ETMSQ23EVR);
+		break;
+	case ETMSQ31EVR:
+		etm_write(val, ETMSQ31EVR);
+		break;
+	case ETMSQ32EVR:
+		etm_write(val, ETMSQ32EVR);
+		break;
+	case ETMSQ13EVR:
+		etm_write(val, ETMSQ13EVR);
+		break;
+	case ETMSQR:
+		etm_write(val, ETMSQR);
+		break;
+	case ETMEXTOUTEVRn(0):
+		etm_write(val, ETMEXTOUTEVR0);
+		break;
+	case ETMEXTOUTEVRn(1):
+		etm_write(val, ETMEXTOUTEVR1);
+		break;
+	case ETMEXTOUTEVRn(2):
+		etm_write(val, ETMEXTOUTEVR2);
+		break;
+	case ETMEXTOUTEVRn(3):
+		etm_write(val, ETMEXTOUTEVR3);
+		break;
+	case ETMCIDCVRn(0):
+		etm_write(val, ETMCIDCVR0);
+		break;
+	case ETMCIDCVRn(1):
+		etm_write(val, ETMCIDCVR1);
+		break;
+	case ETMCIDCVRn(2):
+		etm_write(val, ETMCIDCVR2);
+		break;
+	case ETMCIDCMR:
+		etm_write(val, ETMCIDCMR);
+		break;
+	case ETMIMPSPEC0:
+		etm_write(val, ETMIMPSPEC0);
+		break;
+	case ETMIMPSPEC1:
+		etm_write(val, ETMIMPSPEC1);
+		break;
+	case ETMIMPSPEC2:
+		etm_write(val, ETMIMPSPEC2);
+		break;
+	case ETMIMPSPEC3:
+		etm_write(val, ETMIMPSPEC3);
+		break;
+	case ETMIMPSPEC4:
+		etm_write(val, ETMIMPSPEC4);
+		break;
+	case ETMIMPSPEC5:
+		etm_write(val, ETMIMPSPEC5);
+		break;
+	case ETMIMPSPEC6:
+		etm_write(val, ETMIMPSPEC6);
+		break;
+	case ETMIMPSPEC7:
+		etm_write(val, ETMIMPSPEC7);
+		break;
+	case ETMSYNCFR:
+		etm_write(val, ETMSYNCFR);
+		break;
+	case ETMEXTINSELR:
+		etm_write(val, ETMEXTINSELR);
+		break;
+	case ETMTESSEICR:
+		etm_write(val, ETMTESSEICR);
+		break;
+	case ETMEIBCR:
+		etm_write(val, ETMEIBCR);
+		break;
+	case ETMTSEVR:
+		etm_write(val, ETMTSEVR);
+		break;
+	case ETMAUXCR:
+		etm_write(val, ETMAUXCR);
+		break;
+	case ETMTRACEIDR:
+		etm_write(val, ETMTRACEIDR);
+		break;
+	case ETMVMIDCVR:
+		etm_write(val, ETMVMIDCVR);
+		break;
+	case ETMOSLAR:
+		etm_write(val, ETMOSLAR);
+		break;
+	case ETMOSSRR:
+		etm_write(val, ETMOSSRR);
+		break;
+	case ETMPDCR:
+		etm_write(val, ETMPDCR);
+		break;
+	case ETMPDSR:
+		etm_write(val, ETMPDSR);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
new file mode 100644
index 0000000..6776956
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -0,0 +1,496 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/coresight.h>
+#include <linux/coresight-pmu.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#include "coresight-etm-perf.h"
+#include "coresight-priv.h"
+
+static struct pmu etm_pmu;
+static bool etm_perf_up;
+
+/**
+ * struct etm_event_data - Coresight specifics associated to an event
+ * @work:		Handle to free allocated memory outside IRQ context.
+ * @mask:		Hold the CPU(s) this event was set for.
+ * @snk_config:		The sink configuration.
+ * @path:		An array of path, each slot for one CPU.
+ */
+struct etm_event_data {
+	struct work_struct work;
+	cpumask_t mask;
+	void *snk_config;
+	struct list_head **path;
+};
+
+static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle);
+static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
+
+/* ETMv3.5/PTM's ETMCR is 'config' */
+PMU_FORMAT_ATTR(cycacc,		"config:" __stringify(ETM_OPT_CYCACC));
+PMU_FORMAT_ATTR(timestamp,	"config:" __stringify(ETM_OPT_TS));
+PMU_FORMAT_ATTR(retstack,	"config:" __stringify(ETM_OPT_RETSTK));
+
+static struct attribute *etm_config_formats_attr[] = {
+	&format_attr_cycacc.attr,
+	&format_attr_timestamp.attr,
+	&format_attr_retstack.attr,
+	NULL,
+};
+
+static const struct attribute_group etm_pmu_format_group = {
+	.name   = "format",
+	.attrs  = etm_config_formats_attr,
+};
+
+static const struct attribute_group *etm_pmu_attr_groups[] = {
+	&etm_pmu_format_group,
+	NULL,
+};
+
+static void etm_event_read(struct perf_event *event) {}
+
+static int etm_addr_filters_alloc(struct perf_event *event)
+{
+	struct etm_filters *filters;
+	int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
+
+	filters = kzalloc_node(sizeof(struct etm_filters), GFP_KERNEL, node);
+	if (!filters)
+		return -ENOMEM;
+
+	if (event->parent)
+		memcpy(filters, event->parent->hw.addr_filters,
+		       sizeof(*filters));
+
+	event->hw.addr_filters = filters;
+
+	return 0;
+}
+
+static void etm_event_destroy(struct perf_event *event)
+{
+	kfree(event->hw.addr_filters);
+	event->hw.addr_filters = NULL;
+}
+
+static int etm_event_init(struct perf_event *event)
+{
+	int ret = 0;
+
+	if (event->attr.type != etm_pmu.type) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	ret = etm_addr_filters_alloc(event);
+	if (ret)
+		goto out;
+
+	event->destroy = etm_event_destroy;
+out:
+	return ret;
+}
+
+static void free_event_data(struct work_struct *work)
+{
+	int cpu;
+	cpumask_t *mask;
+	struct etm_event_data *event_data;
+	struct coresight_device *sink;
+
+	event_data = container_of(work, struct etm_event_data, work);
+	mask = &event_data->mask;
+	/*
+	 * First deal with the sink configuration.  See comment in
+	 * etm_setup_aux() about why we take the first available path.
+	 */
+	if (event_data->snk_config) {
+		cpu = cpumask_first(mask);
+		sink = coresight_get_sink(event_data->path[cpu]);
+		if (sink_ops(sink)->free_buffer)
+			sink_ops(sink)->free_buffer(event_data->snk_config);
+	}
+
+	for_each_cpu(cpu, mask) {
+		if (!(IS_ERR_OR_NULL(event_data->path[cpu])))
+			coresight_release_path(event_data->path[cpu]);
+	}
+
+	kfree(event_data->path);
+	kfree(event_data);
+}
+
+static void *alloc_event_data(int cpu)
+{
+	int size;
+	cpumask_t *mask;
+	struct etm_event_data *event_data;
+
+	/* First get memory for the session's data */
+	event_data = kzalloc(sizeof(struct etm_event_data), GFP_KERNEL);
+	if (!event_data)
+		return NULL;
+
+	/* Make sure nothing disappears under us */
+	get_online_cpus();
+	size = num_online_cpus();
+
+	mask = &event_data->mask;
+	if (cpu != -1)
+		cpumask_set_cpu(cpu, mask);
+	else
+		cpumask_copy(mask, cpu_online_mask);
+	put_online_cpus();
+
+	/*
+	 * Each CPU has a single path between source and destination.  As such
+	 * allocate an array using CPU numbers as indexes.  That way a path
+	 * for any CPU can easily be accessed at any given time.  We proceed
+	 * the same way for sessions involving a single CPU.  The cost of
+	 * unused memory when dealing with single CPU trace scenarios is small
+	 * compared to the cost of searching through an optimized array.
+	 */
+	event_data->path = kcalloc(size,
+				   sizeof(struct list_head *), GFP_KERNEL);
+	if (!event_data->path) {
+		kfree(event_data);
+		return NULL;
+	}
+
+	return event_data;
+}
+
+static void etm_free_aux(void *data)
+{
+	struct etm_event_data *event_data = data;
+
+	schedule_work(&event_data->work);
+}
+
+static void *etm_setup_aux(int event_cpu, void **pages,
+			   int nr_pages, bool overwrite)
+{
+	int cpu;
+	cpumask_t *mask;
+	struct coresight_device *sink;
+	struct etm_event_data *event_data = NULL;
+
+	event_data = alloc_event_data(event_cpu);
+	if (!event_data)
+		return NULL;
+	INIT_WORK(&event_data->work, free_event_data);
+
+	/*
+	 * In theory nothing prevent tracers in a trace session from being
+	 * associated with different sinks, nor having a sink per tracer.  But
+	 * until we have HW with this kind of topology we need to assume tracers
+	 * in a trace session are using the same sink.  Therefore go through
+	 * the coresight bus and pick the first enabled sink.
+	 *
+	 * When operated from sysFS users are responsible to enable the sink
+	 * while from perf, the perf tools will do it based on the choice made
+	 * on the cmd line.  As such the "enable_sink" flag in sysFS is reset.
+	 */
+	sink = coresight_get_enabled_sink(true);
+	if (!sink)
+		goto err;
+
+	mask = &event_data->mask;
+
+	/* Setup the path for each CPU in a trace session */
+	for_each_cpu(cpu, mask) {
+		struct coresight_device *csdev;
+
+		csdev = per_cpu(csdev_src, cpu);
+		if (!csdev)
+			goto err;
+
+		/*
+		 * Building a path doesn't enable it, it simply builds a
+		 * list of devices from source to sink that can be
+		 * referenced later when the path is actually needed.
+		 */
+		event_data->path[cpu] = coresight_build_path(csdev, sink);
+		if (IS_ERR(event_data->path[cpu]))
+			goto err;
+	}
+
+	if (!sink_ops(sink)->alloc_buffer)
+		goto err;
+
+	cpu = cpumask_first(mask);
+	/* Get the AUX specific data from the sink buffer */
+	event_data->snk_config =
+			sink_ops(sink)->alloc_buffer(sink, cpu, pages,
+						     nr_pages, overwrite);
+	if (!event_data->snk_config)
+		goto err;
+
+out:
+	return event_data;
+
+err:
+	etm_free_aux(event_data);
+	event_data = NULL;
+	goto out;
+}
+
+static void etm_event_start(struct perf_event *event, int flags)
+{
+	int cpu = smp_processor_id();
+	struct etm_event_data *event_data;
+	struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle);
+	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
+
+	if (!csdev)
+		goto fail;
+
+	/*
+	 * Deal with the ring buffer API and get a handle on the
+	 * session's information.
+	 */
+	event_data = perf_aux_output_begin(handle, event);
+	if (!event_data)
+		goto fail;
+
+	/* We need a sink, no need to continue without one */
+	sink = coresight_get_sink(event_data->path[cpu]);
+	if (WARN_ON_ONCE(!sink || !sink_ops(sink)->set_buffer))
+		goto fail_end_stop;
+
+	/* Configure the sink */
+	if (sink_ops(sink)->set_buffer(sink, handle,
+				       event_data->snk_config))
+		goto fail_end_stop;
+
+	/* Nothing will happen without a path */
+	if (coresight_enable_path(event_data->path[cpu], CS_MODE_PERF))
+		goto fail_end_stop;
+
+	/* Tell the perf core the event is alive */
+	event->hw.state = 0;
+
+	/* Finally enable the tracer */
+	if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
+		goto fail_end_stop;
+
+out:
+	return;
+
+fail_end_stop:
+	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+	perf_aux_output_end(handle, 0);
+fail:
+	event->hw.state = PERF_HES_STOPPED;
+	goto out;
+}
+
+static void etm_event_stop(struct perf_event *event, int mode)
+{
+	int cpu = smp_processor_id();
+	unsigned long size;
+	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
+	struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle);
+	struct etm_event_data *event_data = perf_get_aux(handle);
+
+	if (event->hw.state == PERF_HES_STOPPED)
+		return;
+
+	if (!csdev)
+		return;
+
+	sink = coresight_get_sink(event_data->path[cpu]);
+	if (!sink)
+		return;
+
+	/* stop tracer */
+	source_ops(csdev)->disable(csdev, event);
+
+	/* tell the core */
+	event->hw.state = PERF_HES_STOPPED;
+
+	if (mode & PERF_EF_UPDATE) {
+		if (WARN_ON_ONCE(handle->event != event))
+			return;
+
+		/* update trace information */
+		if (!sink_ops(sink)->update_buffer)
+			return;
+
+		sink_ops(sink)->update_buffer(sink, handle,
+					      event_data->snk_config);
+
+		if (!sink_ops(sink)->reset_buffer)
+			return;
+
+		size = sink_ops(sink)->reset_buffer(sink, handle,
+						    event_data->snk_config);
+
+		perf_aux_output_end(handle, size);
+	}
+
+	/* Disabling the path make its elements available to other sessions */
+	coresight_disable_path(event_data->path[cpu]);
+}
+
+static int etm_event_add(struct perf_event *event, int mode)
+{
+	int ret = 0;
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (mode & PERF_EF_START) {
+		etm_event_start(event, 0);
+		if (hwc->state & PERF_HES_STOPPED)
+			ret = -EINVAL;
+	} else {
+		hwc->state = PERF_HES_STOPPED;
+	}
+
+	return ret;
+}
+
+static void etm_event_del(struct perf_event *event, int mode)
+{
+	etm_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int etm_addr_filters_validate(struct list_head *filters)
+{
+	bool range = false, address = false;
+	int index = 0;
+	struct perf_addr_filter *filter;
+
+	list_for_each_entry(filter, filters, entry) {
+		/*
+		 * No need to go further if there's no more
+		 * room for filters.
+		 */
+		if (++index > ETM_ADDR_CMP_MAX)
+			return -EOPNOTSUPP;
+
+		/* filter::size==0 means single address trigger */
+		if (filter->size) {
+			/*
+			 * The existing code relies on START/STOP filters
+			 * being address filters.
+			 */
+			if (filter->action == PERF_ADDR_FILTER_ACTION_START ||
+			    filter->action == PERF_ADDR_FILTER_ACTION_STOP)
+				return -EOPNOTSUPP;
+
+			range = true;
+		} else
+			address = true;
+
+		/*
+		 * At this time we don't allow range and start/stop filtering
+		 * to cohabitate, they have to be mutually exclusive.
+		 */
+		if (range && address)
+			return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static void etm_addr_filters_sync(struct perf_event *event)
+{
+	struct perf_addr_filters_head *head = perf_event_addr_filters(event);
+	unsigned long start, stop, *offs = event->addr_filters_offs;
+	struct etm_filters *filters = event->hw.addr_filters;
+	struct etm_filter *etm_filter;
+	struct perf_addr_filter *filter;
+	int i = 0;
+
+	list_for_each_entry(filter, &head->list, entry) {
+		start = filter->offset + offs[i];
+		stop = start + filter->size;
+		etm_filter = &filters->etm_filter[i];
+
+		switch (filter->action) {
+		case PERF_ADDR_FILTER_ACTION_FILTER:
+			etm_filter->start_addr = start;
+			etm_filter->stop_addr = stop;
+			etm_filter->type = ETM_ADDR_TYPE_RANGE;
+			break;
+		case PERF_ADDR_FILTER_ACTION_START:
+			etm_filter->start_addr = start;
+			etm_filter->type = ETM_ADDR_TYPE_START;
+			break;
+		case PERF_ADDR_FILTER_ACTION_STOP:
+			etm_filter->stop_addr = stop;
+			etm_filter->type = ETM_ADDR_TYPE_STOP;
+			break;
+		}
+		i++;
+	}
+
+	filters->nr_filters = i;
+}
+
+int etm_perf_symlink(struct coresight_device *csdev, bool link)
+{
+	char entry[sizeof("cpu9999999")];
+	int ret = 0, cpu = source_ops(csdev)->cpu_id(csdev);
+	struct device *pmu_dev = etm_pmu.dev;
+	struct device *cs_dev = &csdev->dev;
+
+	sprintf(entry, "cpu%d", cpu);
+
+	if (!etm_perf_up)
+		return -EPROBE_DEFER;
+
+	if (link) {
+		ret = sysfs_create_link(&pmu_dev->kobj, &cs_dev->kobj, entry);
+		if (ret)
+			return ret;
+		per_cpu(csdev_src, cpu) = csdev;
+	} else {
+		sysfs_remove_link(&pmu_dev->kobj, entry);
+		per_cpu(csdev_src, cpu) = NULL;
+	}
+
+	return 0;
+}
+
+static int __init etm_perf_init(void)
+{
+	int ret;
+
+	etm_pmu.capabilities		= PERF_PMU_CAP_EXCLUSIVE;
+
+	etm_pmu.attr_groups		= etm_pmu_attr_groups;
+	etm_pmu.task_ctx_nr		= perf_sw_context;
+	etm_pmu.read			= etm_event_read;
+	etm_pmu.event_init		= etm_event_init;
+	etm_pmu.setup_aux		= etm_setup_aux;
+	etm_pmu.free_aux		= etm_free_aux;
+	etm_pmu.start			= etm_event_start;
+	etm_pmu.stop			= etm_event_stop;
+	etm_pmu.add			= etm_event_add;
+	etm_pmu.del			= etm_event_del;
+	etm_pmu.addr_filters_sync	= etm_addr_filters_sync;
+	etm_pmu.addr_filters_validate	= etm_addr_filters_validate;
+	etm_pmu.nr_addr_filters		= ETM_ADDR_CMP_MAX;
+
+	ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1);
+	if (ret == 0)
+		etm_perf_up = true;
+
+	return ret;
+}
+device_initcall(etm_perf_init);
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
new file mode 100644
index 0000000..4197df4
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef _CORESIGHT_ETM_PERF_H
+#define _CORESIGHT_ETM_PERF_H
+
+#include "coresight-priv.h"
+
+struct coresight_device;
+
+/*
+ * In both ETMv3 and v4 the maximum number of address comparator implentable
+ * is 8.  The actual number is implementation specific and will be checked
+ * when filters are applied.
+ */
+#define ETM_ADDR_CMP_MAX	8
+
+/**
+ * struct etm_filter - single instruction range or start/stop configuration.
+ * @start_addr:	The address to start tracing on.
+ * @stop_addr:	The address to stop tracing on.
+ * @type:	Is this a range or start/stop filter.
+ */
+struct etm_filter {
+	unsigned long start_addr;
+	unsigned long stop_addr;
+	enum etm_addr_type type;
+};
+
+/**
+ * struct etm_filters - set of filters for a session
+ * @etm_filter:	All the filters for this session.
+ * @nr_filters:	Number of filters
+ * @ssstatus:	Status of the start/stop logic.
+ */
+struct etm_filters {
+	struct etm_filter	etm_filter[ETM_ADDR_CMP_MAX];
+	unsigned int		nr_filters;
+	bool			ssstatus;
+};
+
+
+#ifdef CONFIG_CORESIGHT
+int etm_perf_symlink(struct coresight_device *csdev, bool link);
+
+#else
+static inline int etm_perf_symlink(struct coresight_device *csdev, bool link)
+{ return -EINVAL; }
+
+#endif /* CONFIG_CORESIGHT */
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-etm.h b/drivers/hwtracing/coresight/coresight-etm.h
new file mode 100644
index 0000000..79e1ad8
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm.h
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _CORESIGHT_CORESIGHT_ETM_H
+#define _CORESIGHT_CORESIGHT_ETM_H
+
+#include <asm/local.h>
+#include <linux/spinlock.h>
+#include "coresight-priv.h"
+
+/*
+ * Device registers:
+ * 0x000 - 0x2FC: Trace         registers
+ * 0x300 - 0x314: Management    registers
+ * 0x318 - 0xEFC: Trace         registers
+ *
+ * Coresight registers
+ * 0xF00 - 0xF9C: Management    registers
+ * 0xFA0 - 0xFA4: Management    registers in PFTv1.0
+ *                Trace         registers in PFTv1.1
+ * 0xFA8 - 0xFFC: Management    registers
+ */
+
+/* Trace registers (0x000-0x2FC) */
+#define ETMCR			0x000
+#define ETMCCR			0x004
+#define ETMTRIGGER		0x008
+#define ETMSR			0x010
+#define ETMSCR			0x014
+#define ETMTSSCR		0x018
+#define ETMTECR2		0x01c
+#define ETMTEEVR		0x020
+#define ETMTECR1		0x024
+#define ETMFFLR			0x02c
+#define ETMACVRn(n)		(0x040 + (n * 4))
+#define ETMACTRn(n)		(0x080 + (n * 4))
+#define ETMCNTRLDVRn(n)		(0x140 + (n * 4))
+#define ETMCNTENRn(n)		(0x150 + (n * 4))
+#define ETMCNTRLDEVRn(n)	(0x160 + (n * 4))
+#define ETMCNTVRn(n)		(0x170 + (n * 4))
+#define ETMSQ12EVR		0x180
+#define ETMSQ21EVR		0x184
+#define ETMSQ23EVR		0x188
+#define ETMSQ31EVR		0x18c
+#define ETMSQ32EVR		0x190
+#define ETMSQ13EVR		0x194
+#define ETMSQR			0x19c
+#define ETMEXTOUTEVRn(n)	(0x1a0 + (n * 4))
+#define ETMCIDCVRn(n)		(0x1b0 + (n * 4))
+#define ETMCIDCMR		0x1bc
+#define ETMIMPSPEC0		0x1c0
+#define ETMIMPSPEC1		0x1c4
+#define ETMIMPSPEC2		0x1c8
+#define ETMIMPSPEC3		0x1cc
+#define ETMIMPSPEC4		0x1d0
+#define ETMIMPSPEC5		0x1d4
+#define ETMIMPSPEC6		0x1d8
+#define ETMIMPSPEC7		0x1dc
+#define ETMSYNCFR		0x1e0
+#define ETMIDR			0x1e4
+#define ETMCCER			0x1e8
+#define ETMEXTINSELR		0x1ec
+#define ETMTESSEICR		0x1f0
+#define ETMEIBCR		0x1f4
+#define ETMTSEVR		0x1f8
+#define ETMAUXCR		0x1fc
+#define ETMTRACEIDR		0x200
+#define ETMVMIDCVR		0x240
+/* Management registers (0x300-0x314) */
+#define ETMOSLAR		0x300
+#define ETMOSLSR		0x304
+#define ETMOSSRR		0x308
+#define ETMPDCR			0x310
+#define ETMPDSR			0x314
+#define ETM_MAX_ADDR_CMP	16
+#define ETM_MAX_CNTR		4
+#define ETM_MAX_CTXID_CMP	3
+
+/* Register definition */
+/* ETMCR - 0x00 */
+#define ETMCR_PWD_DWN		BIT(0)
+#define ETMCR_STALL_MODE	BIT(7)
+#define ETMCR_BRANCH_BROADCAST	BIT(8)
+#define ETMCR_ETM_PRG		BIT(10)
+#define ETMCR_ETM_EN		BIT(11)
+#define ETMCR_CYC_ACC		BIT(12)
+#define ETMCR_CTXID_SIZE	(BIT(14)|BIT(15))
+#define ETMCR_TIMESTAMP_EN	BIT(28)
+#define ETMCR_RETURN_STACK	BIT(29)
+/* ETMCCR - 0x04 */
+#define ETMCCR_FIFOFULL		BIT(23)
+/* ETMPDCR - 0x310 */
+#define ETMPDCR_PWD_UP		BIT(3)
+/* ETMTECR1 - 0x024 */
+#define ETMTECR1_ADDR_COMP_1	BIT(0)
+#define ETMTECR1_INC_EXC	BIT(24)
+#define ETMTECR1_START_STOP	BIT(25)
+/* ETMCCER - 0x1E8 */
+#define ETMCCER_TIMESTAMP	BIT(22)
+#define ETMCCER_RETSTACK	BIT(23)
+
+#define ETM_MODE_EXCLUDE	BIT(0)
+#define ETM_MODE_CYCACC		BIT(1)
+#define ETM_MODE_STALL		BIT(2)
+#define ETM_MODE_TIMESTAMP	BIT(3)
+#define ETM_MODE_CTXID		BIT(4)
+#define ETM_MODE_BBROAD		BIT(5)
+#define ETM_MODE_RET_STACK	BIT(6)
+#define ETM_MODE_ALL		(ETM_MODE_EXCLUDE | ETM_MODE_CYCACC | \
+				 ETM_MODE_STALL | ETM_MODE_TIMESTAMP | \
+				 ETM_MODE_BBROAD | ETM_MODE_RET_STACK | \
+				 ETM_MODE_CTXID | ETM_MODE_EXCL_KERN | \
+				 ETM_MODE_EXCL_USER)
+
+#define ETM_SQR_MASK		0x3
+#define ETM_TRACEID_MASK	0x3f
+#define ETM_EVENT_MASK		0x1ffff
+#define ETM_SYNC_MASK		0xfff
+#define ETM_ALL_MASK		0xffffffff
+
+#define ETMSR_PROG_BIT		1
+#define ETM_SEQ_STATE_MAX_VAL	(0x2)
+#define PORT_SIZE_MASK		(GENMASK(21, 21) | GENMASK(6, 4))
+
+#define ETM_HARD_WIRE_RES_A	/* Hard wired, always true */	\
+				((0x0f << 0)	|		\
+				/* Resource index A */		\
+				(0x06 << 4))
+
+#define ETM_ADD_COMP_0		/* Single addr comparator 1 */	\
+				((0x00 << 7)	|		\
+				/* Resource index B */		\
+				(0x00 << 11))
+
+#define ETM_EVENT_NOT_A		BIT(14) /* NOT(A) */
+
+#define ETM_DEFAULT_EVENT_VAL	(ETM_HARD_WIRE_RES_A	|	\
+				 ETM_ADD_COMP_0		|	\
+				 ETM_EVENT_NOT_A)
+
+/**
+ * struct etm_config - configuration information related to an ETM
+ * @mode:	controls various modes supported by this ETM/PTM.
+ * @ctrl:	used in conjunction with @mode.
+ * @trigger_event: setting for register ETMTRIGGER.
+ * @startstop_ctrl: setting for register ETMTSSCR.
+ * @enable_event: setting for register ETMTEEVR.
+ * @enable_ctrl1: setting for register ETMTECR1.
+ * @enable_ctrl2: setting for register ETMTECR2.
+ * @fifofull_level: setting for register ETMFFLR.
+ * @addr_idx:	index for the address comparator selection.
+ * @addr_val:	value for address comparator register.
+ * @addr_acctype: access type for address comparator register.
+ * @addr_type:	current status of the comparator register.
+ * @cntr_idx:	index for the counter register selection.
+ * @cntr_rld_val: reload value of a counter register.
+ * @cntr_event:	control for counter enable register.
+ * @cntr_rld_event: value for counter reload event register.
+ * @cntr_val:	counter value register.
+ * @seq_12_event: event causing the transition from 1 to 2.
+ * @seq_21_event: event causing the transition from 2 to 1.
+ * @seq_23_event: event causing the transition from 2 to 3.
+ * @seq_31_event: event causing the transition from 3 to 1.
+ * @seq_32_event: event causing the transition from 3 to 2.
+ * @seq_13_event: event causing the transition from 1 to 3.
+ * @seq_curr_state: current value of the sequencer register.
+ * @ctxid_idx: index for the context ID registers.
+ * @ctxid_pid: value for the context ID to trigger on.
+ * @ctxid_mask: mask applicable to all the context IDs.
+ * @sync_freq:	Synchronisation frequency.
+ * @timestamp_event: Defines an event that requests the insertion
+ *		     of a timestamp into the trace stream.
+ */
+struct etm_config {
+	u32				mode;
+	u32				ctrl;
+	u32				trigger_event;
+	u32				startstop_ctrl;
+	u32				enable_event;
+	u32				enable_ctrl1;
+	u32				enable_ctrl2;
+	u32				fifofull_level;
+	u8				addr_idx;
+	u32				addr_val[ETM_MAX_ADDR_CMP];
+	u32				addr_acctype[ETM_MAX_ADDR_CMP];
+	u32				addr_type[ETM_MAX_ADDR_CMP];
+	u8				cntr_idx;
+	u32				cntr_rld_val[ETM_MAX_CNTR];
+	u32				cntr_event[ETM_MAX_CNTR];
+	u32				cntr_rld_event[ETM_MAX_CNTR];
+	u32				cntr_val[ETM_MAX_CNTR];
+	u32				seq_12_event;
+	u32				seq_21_event;
+	u32				seq_23_event;
+	u32				seq_31_event;
+	u32				seq_32_event;
+	u32				seq_13_event;
+	u32				seq_curr_state;
+	u8				ctxid_idx;
+	u32				ctxid_pid[ETM_MAX_CTXID_CMP];
+	u32				ctxid_mask;
+	u32				sync_freq;
+	u32				timestamp_event;
+};
+
+/**
+ * struct etm_drvdata - specifics associated to an ETM component
+ * @base:	memory mapped base address for this component.
+ * @dev:	the device entity associated to this component.
+ * @atclk:	optional clock for the core parts of the ETM.
+ * @csdev:	component vitals needed by the framework.
+ * @spinlock:	only one at a time pls.
+ * @cpu:	the cpu this component is affined to.
+ * @port_size:	port size as reported by ETMCR bit 4-6 and 21.
+ * @arch:	ETM/PTM version number.
+ * @use_cpu14:	true if management registers need to be accessed via CP14.
+ * @mode:	this tracer's mode, i.e sysFS, Perf or disabled.
+ * @sticky_enable: true if ETM base configuration has been done.
+ * @boot_enable:true if we should start tracing at boot time.
+ * @os_unlock:	true if access to management registers is allowed.
+ * @nr_addr_cmp:Number of pairs of address comparators as found in ETMCCR.
+ * @nr_cntr:	Number of counters as found in ETMCCR bit 13-15.
+ * @nr_ext_inp:	Number of external input as found in ETMCCR bit 17-19.
+ * @nr_ext_out:	Number of external output as found in ETMCCR bit 20-22.
+ * @nr_ctxid_cmp: Number of contextID comparators as found in ETMCCR bit 24-25.
+ * @etmccr:	value of register ETMCCR.
+ * @etmccer:	value of register ETMCCER.
+ * @traceid:	value of the current ID for this component.
+ * @config:	structure holding configuration parameters.
+ */
+struct etm_drvdata {
+	void __iomem			*base;
+	struct device			*dev;
+	struct clk			*atclk;
+	struct coresight_device		*csdev;
+	spinlock_t			spinlock;
+	int				cpu;
+	int				port_size;
+	u8				arch;
+	bool				use_cp14;
+	local_t				mode;
+	bool				sticky_enable;
+	bool				boot_enable;
+	bool				os_unlock;
+	u8				nr_addr_cmp;
+	u8				nr_cntr;
+	u8				nr_ext_inp;
+	u8				nr_ext_out;
+	u8				nr_ctxid_cmp;
+	u32				etmccr;
+	u32				etmccer;
+	u32				traceid;
+	struct etm_config		config;
+};
+
+static inline void etm_writel(struct etm_drvdata *drvdata,
+			      u32 val, u32 off)
+{
+	if (drvdata->use_cp14) {
+		if (etm_writel_cp14(off, val)) {
+			dev_err(drvdata->dev,
+				"invalid CP14 access to ETM reg: %#x", off);
+		}
+	} else {
+		writel_relaxed(val, drvdata->base + off);
+	}
+}
+
+static inline unsigned int etm_readl(struct etm_drvdata *drvdata, u32 off)
+{
+	u32 val;
+
+	if (drvdata->use_cp14) {
+		if (etm_readl_cp14(off, &val)) {
+			dev_err(drvdata->dev,
+				"invalid CP14 access to ETM reg: %#x", off);
+		}
+	} else {
+		val = readl_relaxed(drvdata->base + off);
+	}
+
+	return val;
+}
+
+extern const struct attribute_group *coresight_etm_groups[];
+int etm_get_trace_id(struct etm_drvdata *drvdata);
+void etm_set_default(struct etm_config *config);
+void etm_config_trace_mode(struct etm_config *config);
+struct etm_config *get_etm_config(struct etm_drvdata *drvdata);
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
new file mode 100644
index 0000000..75487b3
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
@@ -0,0 +1,1296 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/pid_namespace.h>
+#include <linux/pm_runtime.h>
+#include <linux/sysfs.h>
+#include "coresight-etm.h"
+#include "coresight-priv.h"
+
+static ssize_t nr_addr_cmp_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_addr_cmp;
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_addr_cmp);
+
+static ssize_t nr_cntr_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_cntr;
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_cntr);
+
+static ssize_t nr_ctxid_cmp_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_ctxid_cmp;
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_ctxid_cmp);
+
+static ssize_t etmsr_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	unsigned long flags, val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	pm_runtime_get_sync(drvdata->dev);
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	CS_UNLOCK(drvdata->base);
+
+	val = etm_readl(drvdata, ETMSR);
+
+	CS_LOCK(drvdata->base);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	pm_runtime_put(drvdata->dev);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(etmsr);
+
+static ssize_t reset_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	int i, ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val) {
+		spin_lock(&drvdata->spinlock);
+		memset(config, 0, sizeof(struct etm_config));
+		config->mode = ETM_MODE_EXCLUDE;
+		config->trigger_event = ETM_DEFAULT_EVENT_VAL;
+		for (i = 0; i < drvdata->nr_addr_cmp; i++) {
+			config->addr_type[i] = ETM_ADDR_TYPE_NONE;
+		}
+
+		etm_set_default(config);
+		spin_unlock(&drvdata->spinlock);
+	}
+
+	return size;
+}
+static DEVICE_ATTR_WO(reset);
+
+static ssize_t mode_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->mode;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t mode_store(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->mode = val & ETM_MODE_ALL;
+
+	if (config->mode & ETM_MODE_EXCLUDE)
+		config->enable_ctrl1 |= ETMTECR1_INC_EXC;
+	else
+		config->enable_ctrl1 &= ~ETMTECR1_INC_EXC;
+
+	if (config->mode & ETM_MODE_CYCACC)
+		config->ctrl |= ETMCR_CYC_ACC;
+	else
+		config->ctrl &= ~ETMCR_CYC_ACC;
+
+	if (config->mode & ETM_MODE_STALL) {
+		if (!(drvdata->etmccr & ETMCCR_FIFOFULL)) {
+			dev_warn(drvdata->dev, "stall mode not supported\n");
+			ret = -EINVAL;
+			goto err_unlock;
+		}
+		config->ctrl |= ETMCR_STALL_MODE;
+	} else
+		config->ctrl &= ~ETMCR_STALL_MODE;
+
+	if (config->mode & ETM_MODE_TIMESTAMP) {
+		if (!(drvdata->etmccer & ETMCCER_TIMESTAMP)) {
+			dev_warn(drvdata->dev, "timestamp not supported\n");
+			ret = -EINVAL;
+			goto err_unlock;
+		}
+		config->ctrl |= ETMCR_TIMESTAMP_EN;
+	} else
+		config->ctrl &= ~ETMCR_TIMESTAMP_EN;
+
+	if (config->mode & ETM_MODE_CTXID)
+		config->ctrl |= ETMCR_CTXID_SIZE;
+	else
+		config->ctrl &= ~ETMCR_CTXID_SIZE;
+
+	if (config->mode & ETM_MODE_BBROAD)
+		config->ctrl |= ETMCR_BRANCH_BROADCAST;
+	else
+		config->ctrl &= ~ETMCR_BRANCH_BROADCAST;
+
+	if (config->mode & ETM_MODE_RET_STACK)
+		config->ctrl |= ETMCR_RETURN_STACK;
+	else
+		config->ctrl &= ~ETMCR_RETURN_STACK;
+
+	if (config->mode & (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER))
+		etm_config_trace_mode(config);
+
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+
+err_unlock:
+	spin_unlock(&drvdata->spinlock);
+	return ret;
+}
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t trigger_event_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->trigger_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t trigger_event_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->trigger_event = val & ETM_EVENT_MASK;
+
+	return size;
+}
+static DEVICE_ATTR_RW(trigger_event);
+
+static ssize_t enable_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->enable_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t enable_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->enable_event = val & ETM_EVENT_MASK;
+
+	return size;
+}
+static DEVICE_ATTR_RW(enable_event);
+
+static ssize_t fifofull_level_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->fifofull_level;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t fifofull_level_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->fifofull_level = val;
+
+	return size;
+}
+static DEVICE_ATTR_RW(fifofull_level);
+
+static ssize_t addr_idx_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->addr_idx;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val >= drvdata->nr_addr_cmp)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->addr_idx = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_idx);
+
+static ssize_t addr_single_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+
+	val = config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_single_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	u8 idx;
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+
+	config->addr_val[idx] = val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_SINGLE;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_single);
+
+static ssize_t addr_range_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val1, val2;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (idx % 2 != 0) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+	if (!((config->addr_type[idx] == ETM_ADDR_TYPE_NONE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) ||
+	      (config->addr_type[idx] == ETM_ADDR_TYPE_RANGE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val1 = config->addr_val[idx];
+	val2 = config->addr_val[idx + 1];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx %#lx\n", val1, val2);
+}
+
+static ssize_t addr_range_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val1, val2;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	if (sscanf(buf, "%lx %lx", &val1, &val2) != 2)
+		return -EINVAL;
+	/* Lower address comparator cannot have a higher address value */
+	if (val1 > val2)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (idx % 2 != 0) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+	if (!((config->addr_type[idx] == ETM_ADDR_TYPE_NONE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) ||
+	      (config->addr_type[idx] == ETM_ADDR_TYPE_RANGE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = val1;
+	config->addr_type[idx] = ETM_ADDR_TYPE_RANGE;
+	config->addr_val[idx + 1] = val2;
+	config->addr_type[idx + 1] = ETM_ADDR_TYPE_RANGE;
+	config->enable_ctrl1 |= (1 << (idx/2));
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_range);
+
+static ssize_t addr_start_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_START)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val = config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_start_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 idx;
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_START)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_START;
+	config->startstop_ctrl |= (1 << idx);
+	config->enable_ctrl1 |= BIT(25);
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_start);
+
+static ssize_t addr_stop_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_STOP)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val = config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_stop_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	u8 idx;
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_STOP)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_STOP;
+	config->startstop_ctrl |= (1 << (idx + 16));
+	config->enable_ctrl1 |= ETMTECR1_START_STOP;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_stop);
+
+static ssize_t addr_acctype_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->addr_acctype[config->addr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_acctype_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->addr_acctype[config->addr_idx] = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_acctype);
+
+static ssize_t cntr_idx_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->cntr_idx;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val >= drvdata->nr_cntr)
+		return -EINVAL;
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->cntr_idx = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_idx);
+
+static ssize_t cntr_rld_val_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->cntr_rld_val[config->cntr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_rld_val_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->cntr_rld_val[config->cntr_idx] = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_rld_val);
+
+static ssize_t cntr_event_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->cntr_event[config->cntr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_event_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->cntr_event[config->cntr_idx] = val & ETM_EVENT_MASK;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_event);
+
+static ssize_t cntr_rld_event_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->cntr_rld_event[config->cntr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_rld_event_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->cntr_rld_event[config->cntr_idx] = val & ETM_EVENT_MASK;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_rld_event);
+
+static ssize_t cntr_val_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	int i, ret = 0;
+	u32 val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	if (!local_read(&drvdata->mode)) {
+		spin_lock(&drvdata->spinlock);
+		for (i = 0; i < drvdata->nr_cntr; i++)
+			ret += sprintf(buf, "counter %d: %x\n",
+				       i, config->cntr_val[i]);
+		spin_unlock(&drvdata->spinlock);
+		return ret;
+	}
+
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		val = etm_readl(drvdata, ETMCNTVRn(i));
+		ret += sprintf(buf, "counter %d: %x\n", i, val);
+	}
+
+	return ret;
+}
+
+static ssize_t cntr_val_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->cntr_val[config->cntr_idx] = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_val);
+
+static ssize_t seq_12_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_12_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_12_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_12_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_12_event);
+
+static ssize_t seq_21_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_21_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_21_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_21_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_21_event);
+
+static ssize_t seq_23_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_23_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_23_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_23_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_23_event);
+
+static ssize_t seq_31_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_31_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_31_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_31_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_31_event);
+
+static ssize_t seq_32_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_32_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_32_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_32_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_32_event);
+
+static ssize_t seq_13_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_13_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_13_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_13_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_13_event);
+
+static ssize_t seq_curr_state_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	unsigned long val, flags;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	if (!local_read(&drvdata->mode)) {
+		val = config->seq_curr_state;
+		goto out;
+	}
+
+	pm_runtime_get_sync(drvdata->dev);
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	CS_UNLOCK(drvdata->base);
+	val = (etm_readl(drvdata, ETMSQR) & ETM_SQR_MASK);
+	CS_LOCK(drvdata->base);
+
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	pm_runtime_put(drvdata->dev);
+out:
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_curr_state_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val > ETM_SEQ_STATE_MAX_VAL)
+		return -EINVAL;
+
+	config->seq_curr_state = val;
+
+	return size;
+}
+static DEVICE_ATTR_RW(seq_curr_state);
+
+static ssize_t ctxid_idx_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->ctxid_idx;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t ctxid_idx_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val >= drvdata->nr_ctxid_cmp)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->ctxid_idx = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_idx);
+
+static ssize_t ctxid_pid_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->ctxid_pid[config->ctxid_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t ctxid_pid_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	int ret;
+	unsigned long pid;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	/*
+	 * When contextID tracing is enabled the tracers will insert the
+	 * value found in the contextID register in the trace stream.  But if
+	 * a process is in a namespace the PID of that process as seen from the
+	 * namespace won't be what the kernel sees, something that makes the
+	 * feature confusing and can potentially leak kernel only information.
+	 * As such refuse to use the feature if @current is not in the initial
+	 * PID namespace.
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 16, &pid);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->ctxid_pid[config->ctxid_idx] = pid;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_pid);
+
+static ssize_t ctxid_mask_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	val = config->ctxid_mask;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t ctxid_mask_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->ctxid_mask = val;
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_mask);
+
+static ssize_t sync_freq_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->sync_freq;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t sync_freq_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->sync_freq = val & ETM_SYNC_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(sync_freq);
+
+static ssize_t timestamp_event_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->timestamp_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t timestamp_event_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->timestamp_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(timestamp_event);
+
+static ssize_t cpu_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	int val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->cpu;
+	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+
+}
+static DEVICE_ATTR_RO(cpu);
+
+static ssize_t traceid_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = etm_get_trace_id(drvdata);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t traceid_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->traceid = val & ETM_TRACEID_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(traceid);
+
+static struct attribute *coresight_etm_attrs[] = {
+	&dev_attr_nr_addr_cmp.attr,
+	&dev_attr_nr_cntr.attr,
+	&dev_attr_nr_ctxid_cmp.attr,
+	&dev_attr_etmsr.attr,
+	&dev_attr_reset.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_trigger_event.attr,
+	&dev_attr_enable_event.attr,
+	&dev_attr_fifofull_level.attr,
+	&dev_attr_addr_idx.attr,
+	&dev_attr_addr_single.attr,
+	&dev_attr_addr_range.attr,
+	&dev_attr_addr_start.attr,
+	&dev_attr_addr_stop.attr,
+	&dev_attr_addr_acctype.attr,
+	&dev_attr_cntr_idx.attr,
+	&dev_attr_cntr_rld_val.attr,
+	&dev_attr_cntr_event.attr,
+	&dev_attr_cntr_rld_event.attr,
+	&dev_attr_cntr_val.attr,
+	&dev_attr_seq_12_event.attr,
+	&dev_attr_seq_21_event.attr,
+	&dev_attr_seq_23_event.attr,
+	&dev_attr_seq_31_event.attr,
+	&dev_attr_seq_32_event.attr,
+	&dev_attr_seq_13_event.attr,
+	&dev_attr_seq_curr_state.attr,
+	&dev_attr_ctxid_idx.attr,
+	&dev_attr_ctxid_pid.attr,
+	&dev_attr_ctxid_mask.attr,
+	&dev_attr_sync_freq.attr,
+	&dev_attr_timestamp_event.attr,
+	&dev_attr_traceid.attr,
+	&dev_attr_cpu.attr,
+	NULL,
+};
+
+#define coresight_etm3x_reg(name, offset)			\
+	coresight_simple_reg32(struct etm_drvdata, name, offset)
+
+coresight_etm3x_reg(etmccr, ETMCCR);
+coresight_etm3x_reg(etmccer, ETMCCER);
+coresight_etm3x_reg(etmscr, ETMSCR);
+coresight_etm3x_reg(etmidr, ETMIDR);
+coresight_etm3x_reg(etmcr, ETMCR);
+coresight_etm3x_reg(etmtraceidr, ETMTRACEIDR);
+coresight_etm3x_reg(etmteevr, ETMTEEVR);
+coresight_etm3x_reg(etmtssvr, ETMTSSCR);
+coresight_etm3x_reg(etmtecr1, ETMTECR1);
+coresight_etm3x_reg(etmtecr2, ETMTECR2);
+
+static struct attribute *coresight_etm_mgmt_attrs[] = {
+	&dev_attr_etmccr.attr,
+	&dev_attr_etmccer.attr,
+	&dev_attr_etmscr.attr,
+	&dev_attr_etmidr.attr,
+	&dev_attr_etmcr.attr,
+	&dev_attr_etmtraceidr.attr,
+	&dev_attr_etmteevr.attr,
+	&dev_attr_etmtssvr.attr,
+	&dev_attr_etmtecr1.attr,
+	&dev_attr_etmtecr2.attr,
+	NULL,
+};
+
+static const struct attribute_group coresight_etm_group = {
+	.attrs = coresight_etm_attrs,
+};
+
+static const struct attribute_group coresight_etm_mgmt_group = {
+	.attrs = coresight_etm_mgmt_attrs,
+	.name = "mgmt",
+};
+
+const struct attribute_group *coresight_etm_groups[] = {
+	&coresight_etm_group,
+	&coresight_etm_mgmt_group,
+	NULL,
+};
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
new file mode 100644
index 0000000..7c74263
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -0,0 +1,937 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Program Flow Trace driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/stat.h>
+#include <linux/pm_runtime.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/coresight.h>
+#include <linux/coresight-pmu.h>
+#include <linux/amba/bus.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/clk.h>
+#include <linux/perf_event.h>
+#include <asm/sections.h>
+
+#include "coresight-etm.h"
+#include "coresight-etm-perf.h"
+
+/*
+ * Not really modular but using module_param is the easiest way to
+ * remain consistent with existing use cases for now.
+ */
+static int boot_enable;
+module_param_named(boot_enable, boot_enable, int, S_IRUGO);
+
+/* The number of ETM/PTM currently registered */
+static int etm_count;
+static struct etm_drvdata *etmdrvdata[NR_CPUS];
+
+static enum cpuhp_state hp_online;
+
+/*
+ * Memory mapped writes to clear os lock are not supported on some processors
+ * and OS lock must be unlocked before any memory mapped access on such
+ * processors, otherwise memory mapped reads/writes will be invalid.
+ */
+static void etm_os_unlock(struct etm_drvdata *drvdata)
+{
+	/* Writing any value to ETMOSLAR unlocks the trace registers */
+	etm_writel(drvdata, 0x0, ETMOSLAR);
+	drvdata->os_unlock = true;
+	isb();
+}
+
+static void etm_set_pwrdwn(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	/* Ensure pending cp14 accesses complete before setting pwrdwn */
+	mb();
+	isb();
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr |= ETMCR_PWD_DWN;
+	etm_writel(drvdata, etmcr, ETMCR);
+}
+
+static void etm_clr_pwrdwn(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr &= ~ETMCR_PWD_DWN;
+	etm_writel(drvdata, etmcr, ETMCR);
+	/* Ensure pwrup completes before subsequent cp14 accesses */
+	mb();
+	isb();
+}
+
+static void etm_set_pwrup(struct etm_drvdata *drvdata)
+{
+	u32 etmpdcr;
+
+	etmpdcr = readl_relaxed(drvdata->base + ETMPDCR);
+	etmpdcr |= ETMPDCR_PWD_UP;
+	writel_relaxed(etmpdcr, drvdata->base + ETMPDCR);
+	/* Ensure pwrup completes before subsequent cp14 accesses */
+	mb();
+	isb();
+}
+
+static void etm_clr_pwrup(struct etm_drvdata *drvdata)
+{
+	u32 etmpdcr;
+
+	/* Ensure pending cp14 accesses complete before clearing pwrup */
+	mb();
+	isb();
+	etmpdcr = readl_relaxed(drvdata->base + ETMPDCR);
+	etmpdcr &= ~ETMPDCR_PWD_UP;
+	writel_relaxed(etmpdcr, drvdata->base + ETMPDCR);
+}
+
+/**
+ * coresight_timeout_etm - loop until a bit has changed to a specific state.
+ * @drvdata: etm's private data structure.
+ * @offset: address of a register, starting from @addr.
+ * @position: the position of the bit of interest.
+ * @value: the value the bit should have.
+ *
+ * Basically the same as @coresight_timeout except for the register access
+ * method where we have to account for CP14 configurations.
+
+ * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if
+ * TIMEOUT_US has elapsed, which ever happens first.
+ */
+
+static int coresight_timeout_etm(struct etm_drvdata *drvdata, u32 offset,
+				  int position, int value)
+{
+	int i;
+	u32 val;
+
+	for (i = TIMEOUT_US; i > 0; i--) {
+		val = etm_readl(drvdata, offset);
+		/* Waiting on the bit to go from 0 to 1 */
+		if (value) {
+			if (val & BIT(position))
+				return 0;
+		/* Waiting on the bit to go from 1 to 0 */
+		} else {
+			if (!(val & BIT(position)))
+				return 0;
+		}
+
+		/*
+		 * Delay is arbitrary - the specification doesn't say how long
+		 * we are expected to wait.  Extra check required to make sure
+		 * we don't wait needlessly on the last iteration.
+		 */
+		if (i - 1)
+			udelay(1);
+	}
+
+	return -EAGAIN;
+}
+
+
+static void etm_set_prog(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr |= ETMCR_ETM_PRG;
+	etm_writel(drvdata, etmcr, ETMCR);
+	/*
+	 * Recommended by spec for cp14 accesses to ensure etmcr write is
+	 * complete before polling etmsr
+	 */
+	isb();
+	if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 1)) {
+		dev_err(drvdata->dev,
+			"%s: timeout observed when probing at offset %#x\n",
+			__func__, ETMSR);
+	}
+}
+
+static void etm_clr_prog(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr &= ~ETMCR_ETM_PRG;
+	etm_writel(drvdata, etmcr, ETMCR);
+	/*
+	 * Recommended by spec for cp14 accesses to ensure etmcr write is
+	 * complete before polling etmsr
+	 */
+	isb();
+	if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 0)) {
+		dev_err(drvdata->dev,
+			"%s: timeout observed when probing at offset %#x\n",
+			__func__, ETMSR);
+	}
+}
+
+void etm_set_default(struct etm_config *config)
+{
+	int i;
+
+	if (WARN_ON_ONCE(!config))
+		return;
+
+	/*
+	 * Taken verbatim from the TRM:
+	 *
+	 * To trace all memory:
+	 *  set bit [24] in register 0x009, the ETMTECR1, to 1
+	 *  set all other bits in register 0x009, the ETMTECR1, to 0
+	 *  set all bits in register 0x007, the ETMTECR2, to 0
+	 *  set register 0x008, the ETMTEEVR, to 0x6F (TRUE).
+	 */
+	config->enable_ctrl1 = BIT(24);
+	config->enable_ctrl2 = 0x0;
+	config->enable_event = ETM_HARD_WIRE_RES_A;
+
+	config->trigger_event = ETM_DEFAULT_EVENT_VAL;
+	config->enable_event = ETM_HARD_WIRE_RES_A;
+
+	config->seq_12_event = ETM_DEFAULT_EVENT_VAL;
+	config->seq_21_event = ETM_DEFAULT_EVENT_VAL;
+	config->seq_23_event = ETM_DEFAULT_EVENT_VAL;
+	config->seq_31_event = ETM_DEFAULT_EVENT_VAL;
+	config->seq_32_event = ETM_DEFAULT_EVENT_VAL;
+	config->seq_13_event = ETM_DEFAULT_EVENT_VAL;
+	config->timestamp_event = ETM_DEFAULT_EVENT_VAL;
+
+	for (i = 0; i < ETM_MAX_CNTR; i++) {
+		config->cntr_rld_val[i] = 0x0;
+		config->cntr_event[i] = ETM_DEFAULT_EVENT_VAL;
+		config->cntr_rld_event[i] = ETM_DEFAULT_EVENT_VAL;
+		config->cntr_val[i] = 0x0;
+	}
+
+	config->seq_curr_state = 0x0;
+	config->ctxid_idx = 0x0;
+	for (i = 0; i < ETM_MAX_CTXID_CMP; i++)
+		config->ctxid_pid[i] = 0x0;
+
+	config->ctxid_mask = 0x0;
+	/* Setting default to 1024 as per TRM recommendation */
+	config->sync_freq = 0x400;
+}
+
+void etm_config_trace_mode(struct etm_config *config)
+{
+	u32 flags, mode;
+
+	mode = config->mode;
+
+	mode &= (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER);
+
+	/* excluding kernel AND user space doesn't make sense */
+	if (mode == (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER))
+		return;
+
+	/* nothing to do if neither flags are set */
+	if (!(mode & ETM_MODE_EXCL_KERN) && !(mode & ETM_MODE_EXCL_USER))
+		return;
+
+	flags = (1 << 0 |	/* instruction execute */
+		 3 << 3 |	/* ARM instruction */
+		 0 << 5 |	/* No data value comparison */
+		 0 << 7 |	/* No exact mach */
+		 0 << 8);	/* Ignore context ID */
+
+	/* No need to worry about single address comparators. */
+	config->enable_ctrl2 = 0x0;
+
+	/* Bit 0 is address range comparator 1 */
+	config->enable_ctrl1 = ETMTECR1_ADDR_COMP_1;
+
+	/*
+	 * On ETMv3.5:
+	 * ETMACTRn[13,11] == Non-secure state comparison control
+	 * ETMACTRn[12,10] == Secure state comparison control
+	 *
+	 * b00 == Match in all modes in this state
+	 * b01 == Do not match in any more in this state
+	 * b10 == Match in all modes excepts user mode in this state
+	 * b11 == Match only in user mode in this state
+	 */
+
+	/* Tracing in secure mode is not supported at this time */
+	flags |= (0 << 12 | 1 << 10);
+
+	if (mode & ETM_MODE_EXCL_USER) {
+		/* exclude user, match all modes except user mode */
+		flags |= (1 << 13 | 0 << 11);
+	} else {
+		/* exclude kernel, match only in user mode */
+		flags |= (1 << 13 | 1 << 11);
+	}
+
+	/*
+	 * The ETMEEVR register is already set to "hard wire A".  As such
+	 * all there is to do is setup an address comparator that spans
+	 * the entire address range and configure the state and mode bits.
+	 */
+	config->addr_val[0] = (u32) 0x0;
+	config->addr_val[1] = (u32) ~0x0;
+	config->addr_acctype[0] = flags;
+	config->addr_acctype[1] = flags;
+	config->addr_type[0] = ETM_ADDR_TYPE_RANGE;
+	config->addr_type[1] = ETM_ADDR_TYPE_RANGE;
+}
+
+#define ETM3X_SUPPORTED_OPTIONS (ETMCR_CYC_ACC | \
+				 ETMCR_TIMESTAMP_EN | \
+				 ETMCR_RETURN_STACK)
+
+static int etm_parse_event_config(struct etm_drvdata *drvdata,
+				  struct perf_event *event)
+{
+	struct etm_config *config = &drvdata->config;
+	struct perf_event_attr *attr = &event->attr;
+
+	if (!attr)
+		return -EINVAL;
+
+	/* Clear configuration from previous run */
+	memset(config, 0, sizeof(struct etm_config));
+
+	if (attr->exclude_kernel)
+		config->mode = ETM_MODE_EXCL_KERN;
+
+	if (attr->exclude_user)
+		config->mode = ETM_MODE_EXCL_USER;
+
+	/* Always start from the default config */
+	etm_set_default(config);
+
+	/*
+	 * By default the tracers are configured to trace the whole address
+	 * range.  Narrow the field only if requested by user space.
+	 */
+	if (config->mode)
+		etm_config_trace_mode(config);
+
+	/*
+	 * At this time only cycle accurate, return stack  and timestamp
+	 * options are available.
+	 */
+	if (attr->config & ~ETM3X_SUPPORTED_OPTIONS)
+		return -EINVAL;
+
+	config->ctrl = attr->config;
+
+	/*
+	 * Possible to have cores with PTM (supports ret stack) and ETM
+	 * (never has ret stack) on the same SoC. So if we have a request
+	 * for return stack that can't be honoured on this core then
+	 * clear the bit - trace will still continue normally
+	 */
+	if ((config->ctrl & ETMCR_RETURN_STACK) &&
+	    !(drvdata->etmccer & ETMCCER_RETSTACK))
+		config->ctrl &= ~ETMCR_RETURN_STACK;
+
+	return 0;
+}
+
+static void etm_enable_hw(void *info)
+{
+	int i;
+	u32 etmcr;
+	struct etm_drvdata *drvdata = info;
+	struct etm_config *config = &drvdata->config;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Turn engine on */
+	etm_clr_pwrdwn(drvdata);
+	/* Apply power to trace registers */
+	etm_set_pwrup(drvdata);
+	/* Make sure all registers are accessible */
+	etm_os_unlock(drvdata);
+
+	etm_set_prog(drvdata);
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	/* Clear setting from a previous run if need be */
+	etmcr &= ~ETM3X_SUPPORTED_OPTIONS;
+	etmcr |= drvdata->port_size;
+	etmcr |= ETMCR_ETM_EN;
+	etm_writel(drvdata, config->ctrl | etmcr, ETMCR);
+	etm_writel(drvdata, config->trigger_event, ETMTRIGGER);
+	etm_writel(drvdata, config->startstop_ctrl, ETMTSSCR);
+	etm_writel(drvdata, config->enable_event, ETMTEEVR);
+	etm_writel(drvdata, config->enable_ctrl1, ETMTECR1);
+	etm_writel(drvdata, config->fifofull_level, ETMFFLR);
+	for (i = 0; i < drvdata->nr_addr_cmp; i++) {
+		etm_writel(drvdata, config->addr_val[i], ETMACVRn(i));
+		etm_writel(drvdata, config->addr_acctype[i], ETMACTRn(i));
+	}
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		etm_writel(drvdata, config->cntr_rld_val[i], ETMCNTRLDVRn(i));
+		etm_writel(drvdata, config->cntr_event[i], ETMCNTENRn(i));
+		etm_writel(drvdata, config->cntr_rld_event[i],
+			   ETMCNTRLDEVRn(i));
+		etm_writel(drvdata, config->cntr_val[i], ETMCNTVRn(i));
+	}
+	etm_writel(drvdata, config->seq_12_event, ETMSQ12EVR);
+	etm_writel(drvdata, config->seq_21_event, ETMSQ21EVR);
+	etm_writel(drvdata, config->seq_23_event, ETMSQ23EVR);
+	etm_writel(drvdata, config->seq_31_event, ETMSQ31EVR);
+	etm_writel(drvdata, config->seq_32_event, ETMSQ32EVR);
+	etm_writel(drvdata, config->seq_13_event, ETMSQ13EVR);
+	etm_writel(drvdata, config->seq_curr_state, ETMSQR);
+	for (i = 0; i < drvdata->nr_ext_out; i++)
+		etm_writel(drvdata, ETM_DEFAULT_EVENT_VAL, ETMEXTOUTEVRn(i));
+	for (i = 0; i < drvdata->nr_ctxid_cmp; i++)
+		etm_writel(drvdata, config->ctxid_pid[i], ETMCIDCVRn(i));
+	etm_writel(drvdata, config->ctxid_mask, ETMCIDCMR);
+	etm_writel(drvdata, config->sync_freq, ETMSYNCFR);
+	/* No external input selected */
+	etm_writel(drvdata, 0x0, ETMEXTINSELR);
+	etm_writel(drvdata, config->timestamp_event, ETMTSEVR);
+	/* No auxiliary control selected */
+	etm_writel(drvdata, 0x0, ETMAUXCR);
+	etm_writel(drvdata, drvdata->traceid, ETMTRACEIDR);
+	/* No VMID comparator value selected */
+	etm_writel(drvdata, 0x0, ETMVMIDCVR);
+
+	etm_clr_prog(drvdata);
+	CS_LOCK(drvdata->base);
+
+	dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu);
+}
+
+static int etm_cpu_id(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return drvdata->cpu;
+}
+
+int etm_get_trace_id(struct etm_drvdata *drvdata)
+{
+	unsigned long flags;
+	int trace_id = -1;
+
+	if (!drvdata)
+		goto out;
+
+	if (!local_read(&drvdata->mode))
+		return drvdata->traceid;
+
+	pm_runtime_get_sync(drvdata->dev);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	CS_UNLOCK(drvdata->base);
+	trace_id = (etm_readl(drvdata, ETMTRACEIDR) & ETM_TRACEID_MASK);
+	CS_LOCK(drvdata->base);
+
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	pm_runtime_put(drvdata->dev);
+
+out:
+	return trace_id;
+
+}
+
+static int etm_trace_id(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return etm_get_trace_id(drvdata);
+}
+
+static int etm_enable_perf(struct coresight_device *csdev,
+			   struct perf_event *event)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
+		return -EINVAL;
+
+	/* Configure the tracer based on the session's specifics */
+	etm_parse_event_config(drvdata, event);
+	/* And enable it */
+	etm_enable_hw(drvdata);
+
+	return 0;
+}
+
+static int etm_enable_sysfs(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int ret;
+
+	spin_lock(&drvdata->spinlock);
+
+	/*
+	 * Configure the ETM only if the CPU is online.  If it isn't online
+	 * hw configuration will take place on the local CPU during bring up.
+	 */
+	if (cpu_online(drvdata->cpu)) {
+		ret = smp_call_function_single(drvdata->cpu,
+					       etm_enable_hw, drvdata, 1);
+		if (ret)
+			goto err;
+	}
+
+	drvdata->sticky_enable = true;
+	spin_unlock(&drvdata->spinlock);
+
+	dev_info(drvdata->dev, "ETM tracing enabled\n");
+	return 0;
+
+err:
+	spin_unlock(&drvdata->spinlock);
+	return ret;
+}
+
+static int etm_enable(struct coresight_device *csdev,
+		      struct perf_event *event, u32 mode)
+{
+	int ret;
+	u32 val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	val = local_cmpxchg(&drvdata->mode, CS_MODE_DISABLED, mode);
+
+	/* Someone is already using the tracer */
+	if (val)
+		return -EBUSY;
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		ret = etm_enable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		ret = etm_enable_perf(csdev, event);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	/* The tracer didn't start */
+	if (ret)
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+
+	return ret;
+}
+
+static void etm_disable_hw(void *info)
+{
+	int i;
+	struct etm_drvdata *drvdata = info;
+	struct etm_config *config = &drvdata->config;
+
+	CS_UNLOCK(drvdata->base);
+	etm_set_prog(drvdata);
+
+	/* Read back sequencer and counters for post trace analysis */
+	config->seq_curr_state = (etm_readl(drvdata, ETMSQR) & ETM_SQR_MASK);
+
+	for (i = 0; i < drvdata->nr_cntr; i++)
+		config->cntr_val[i] = etm_readl(drvdata, ETMCNTVRn(i));
+
+	etm_set_pwrdwn(drvdata);
+	CS_LOCK(drvdata->base);
+
+	dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu);
+}
+
+static void etm_disable_perf(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
+		return;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Setting the prog bit disables tracing immediately */
+	etm_set_prog(drvdata);
+
+	/*
+	 * There is no way to know when the tracer will be used again so
+	 * power down the tracer.
+	 */
+	etm_set_pwrdwn(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void etm_disable_sysfs(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * Taking hotplug lock here protects from clocks getting disabled
+	 * with tracing being left on (crash scenario) if user disable occurs
+	 * after cpu online mask indicates the cpu is offline but before the
+	 * DYING hotplug callback is serviced by the ETM driver.
+	 */
+	cpus_read_lock();
+	spin_lock(&drvdata->spinlock);
+
+	/*
+	 * Executing etm_disable_hw on the cpu whose ETM is being disabled
+	 * ensures that register writes occur when cpu is powered.
+	 */
+	smp_call_function_single(drvdata->cpu, etm_disable_hw, drvdata, 1);
+
+	spin_unlock(&drvdata->spinlock);
+	cpus_read_unlock();
+
+	dev_info(drvdata->dev, "ETM tracing disabled\n");
+}
+
+static void etm_disable(struct coresight_device *csdev,
+			struct perf_event *event)
+{
+	u32 mode;
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * For as long as the tracer isn't disabled another entity can't
+	 * change its status.  As such we can read the status here without
+	 * fearing it will change under us.
+	 */
+	mode = local_read(&drvdata->mode);
+
+	switch (mode) {
+	case CS_MODE_DISABLED:
+		break;
+	case CS_MODE_SYSFS:
+		etm_disable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		etm_disable_perf(csdev);
+		break;
+	default:
+		WARN_ON_ONCE(mode);
+		return;
+	}
+
+	if (mode)
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+}
+
+static const struct coresight_ops_source etm_source_ops = {
+	.cpu_id		= etm_cpu_id,
+	.trace_id	= etm_trace_id,
+	.enable		= etm_enable,
+	.disable	= etm_disable,
+};
+
+static const struct coresight_ops etm_cs_ops = {
+	.source_ops	= &etm_source_ops,
+};
+
+static int etm_online_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return 0;
+
+	if (etmdrvdata[cpu]->boot_enable && !etmdrvdata[cpu]->sticky_enable)
+		coresight_enable(etmdrvdata[cpu]->csdev);
+	return 0;
+}
+
+static int etm_starting_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return 0;
+
+	spin_lock(&etmdrvdata[cpu]->spinlock);
+	if (!etmdrvdata[cpu]->os_unlock) {
+		etm_os_unlock(etmdrvdata[cpu]);
+		etmdrvdata[cpu]->os_unlock = true;
+	}
+
+	if (local_read(&etmdrvdata[cpu]->mode))
+		etm_enable_hw(etmdrvdata[cpu]);
+	spin_unlock(&etmdrvdata[cpu]->spinlock);
+	return 0;
+}
+
+static int etm_dying_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return 0;
+
+	spin_lock(&etmdrvdata[cpu]->spinlock);
+	if (local_read(&etmdrvdata[cpu]->mode))
+		etm_disable_hw(etmdrvdata[cpu]);
+	spin_unlock(&etmdrvdata[cpu]->spinlock);
+	return 0;
+}
+
+static bool etm_arch_supported(u8 arch)
+{
+	switch (arch) {
+	case ETM_ARCH_V3_3:
+		break;
+	case ETM_ARCH_V3_5:
+		break;
+	case PFT_ARCH_V1_0:
+		break;
+	case PFT_ARCH_V1_1:
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+static void etm_init_arch_data(void *info)
+{
+	u32 etmidr;
+	u32 etmccr;
+	struct etm_drvdata *drvdata = info;
+
+	/* Make sure all registers are accessible */
+	etm_os_unlock(drvdata);
+
+	CS_UNLOCK(drvdata->base);
+
+	/* First dummy read */
+	(void)etm_readl(drvdata, ETMPDSR);
+	/* Provide power to ETM: ETMPDCR[3] == 1 */
+	etm_set_pwrup(drvdata);
+	/*
+	 * Clear power down bit since when this bit is set writes to
+	 * certain registers might be ignored.
+	 */
+	etm_clr_pwrdwn(drvdata);
+	/*
+	 * Set prog bit. It will be set from reset but this is included to
+	 * ensure it is set
+	 */
+	etm_set_prog(drvdata);
+
+	/* Find all capabilities */
+	etmidr = etm_readl(drvdata, ETMIDR);
+	drvdata->arch = BMVAL(etmidr, 4, 11);
+	drvdata->port_size = etm_readl(drvdata, ETMCR) & PORT_SIZE_MASK;
+
+	drvdata->etmccer = etm_readl(drvdata, ETMCCER);
+	etmccr = etm_readl(drvdata, ETMCCR);
+	drvdata->etmccr = etmccr;
+	drvdata->nr_addr_cmp = BMVAL(etmccr, 0, 3) * 2;
+	drvdata->nr_cntr = BMVAL(etmccr, 13, 15);
+	drvdata->nr_ext_inp = BMVAL(etmccr, 17, 19);
+	drvdata->nr_ext_out = BMVAL(etmccr, 20, 22);
+	drvdata->nr_ctxid_cmp = BMVAL(etmccr, 24, 25);
+
+	etm_set_pwrdwn(drvdata);
+	etm_clr_pwrup(drvdata);
+	CS_LOCK(drvdata->base);
+}
+
+static void etm_init_trace_id(struct etm_drvdata *drvdata)
+{
+	drvdata->traceid = coresight_get_trace_id(drvdata->cpu);
+}
+
+static int etm_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct etm_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc desc = { 0 };
+	struct device_node *np = adev->dev.of_node;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+
+		adev->dev.platform_data = pdata;
+		drvdata->use_cp14 = of_property_read_bool(np, "arm,cp14");
+	}
+
+	drvdata->dev = &adev->dev;
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	spin_lock_init(&drvdata->spinlock);
+
+	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+
+	drvdata->cpu = pdata ? pdata->cpu : 0;
+
+	cpus_read_lock();
+	etmdrvdata[drvdata->cpu] = drvdata;
+
+	if (smp_call_function_single(drvdata->cpu,
+				     etm_init_arch_data,  drvdata, 1))
+		dev_err(dev, "ETM arch init failed\n");
+
+	if (!etm_count++) {
+		cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
+						     "arm/coresight:starting",
+						     etm_starting_cpu, etm_dying_cpu);
+		ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+							   "arm/coresight:online",
+							   etm_online_cpu, NULL);
+		if (ret < 0)
+			goto err_arch_supported;
+		hp_online = ret;
+	}
+	cpus_read_unlock();
+
+	if (etm_arch_supported(drvdata->arch) == false) {
+		ret = -EINVAL;
+		goto err_arch_supported;
+	}
+
+	etm_init_trace_id(drvdata);
+	etm_set_default(&drvdata->config);
+
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
+	desc.ops = &etm_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_etm_groups;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto err_arch_supported;
+	}
+
+	ret = etm_perf_symlink(drvdata->csdev, true);
+	if (ret) {
+		coresight_unregister(drvdata->csdev);
+		goto err_arch_supported;
+	}
+
+	pm_runtime_put(&adev->dev);
+	dev_info(dev, "%s initialized\n", (char *)id->data);
+	if (boot_enable) {
+		coresight_enable(drvdata->csdev);
+		drvdata->boot_enable = true;
+	}
+
+	return 0;
+
+err_arch_supported:
+	if (--etm_count == 0) {
+		cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING);
+		if (hp_online)
+			cpuhp_remove_state_nocalls(hp_online);
+	}
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int etm_runtime_suspend(struct device *dev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int etm_runtime_resume(struct device *dev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops etm_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(etm_runtime_suspend, etm_runtime_resume, NULL)
+};
+
+static const struct amba_id etm_ids[] = {
+	{	/* ETM 3.3 */
+		.id	= 0x000bb921,
+		.mask	= 0x000fffff,
+		.data	= "ETM 3.3",
+	},
+	{	/* ETM 3.5 - Cortex-A5 */
+		.id	= 0x000bb955,
+		.mask	= 0x000fffff,
+		.data	= "ETM 3.5",
+	},
+	{	/* ETM 3.5 */
+		.id	= 0x000bb956,
+		.mask	= 0x000fffff,
+		.data	= "ETM 3.5",
+	},
+	{	/* PTM 1.0 */
+		.id	= 0x000bb950,
+		.mask	= 0x000fffff,
+		.data	= "PTM 1.0",
+	},
+	{	/* PTM 1.1 */
+		.id	= 0x000bb95f,
+		.mask	= 0x000fffff,
+		.data	= "PTM 1.1",
+	},
+	{	/* PTM 1.1 Qualcomm */
+		.id	= 0x000b006f,
+		.mask	= 0x000fffff,
+		.data	= "PTM 1.1",
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver etm_driver = {
+	.drv = {
+		.name	= "coresight-etm3x",
+		.owner	= THIS_MODULE,
+		.pm	= &etm_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= etm_probe,
+	.id_table	= etm_ids,
+};
+builtin_amba_driver(etm_driver);
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
new file mode 100644
index 0000000..a0365e2
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -0,0 +1,2175 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/pid_namespace.h>
+#include <linux/pm_runtime.h>
+#include <linux/sysfs.h>
+#include "coresight-etm4x.h"
+#include "coresight-priv.h"
+
+static int etm4_set_mode_exclude(struct etmv4_drvdata *drvdata, bool exclude)
+{
+	u8 idx;
+	struct etmv4_config *config = &drvdata->config;
+
+	idx = config->addr_idx;
+
+	/*
+	 * TRCACATRn.TYPE bit[1:0]: type of comparison
+	 * the trace unit performs
+	 */
+	if (BMVAL(config->addr_acc[idx], 0, 1) == ETM_INSTR_ADDR) {
+		if (idx % 2 != 0)
+			return -EINVAL;
+
+		/*
+		 * We are performing instruction address comparison. Set the
+		 * relevant bit of ViewInst Include/Exclude Control register
+		 * for corresponding address comparator pair.
+		 */
+		if (config->addr_type[idx] != ETM_ADDR_TYPE_RANGE ||
+		    config->addr_type[idx + 1] != ETM_ADDR_TYPE_RANGE)
+			return -EINVAL;
+
+		if (exclude == true) {
+			/*
+			 * Set exclude bit and unset the include bit
+			 * corresponding to comparator pair
+			 */
+			config->viiectlr |= BIT(idx / 2 + 16);
+			config->viiectlr &= ~BIT(idx / 2);
+		} else {
+			/*
+			 * Set include bit and unset exclude bit
+			 * corresponding to comparator pair
+			 */
+			config->viiectlr |= BIT(idx / 2);
+			config->viiectlr &= ~BIT(idx / 2 + 16);
+		}
+	}
+	return 0;
+}
+
+static ssize_t nr_pe_cmp_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_pe_cmp;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_pe_cmp);
+
+static ssize_t nr_addr_cmp_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_addr_cmp;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_addr_cmp);
+
+static ssize_t nr_cntr_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_cntr;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_cntr);
+
+static ssize_t nr_ext_inp_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_ext_inp;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_ext_inp);
+
+static ssize_t numcidc_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->numcidc;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(numcidc);
+
+static ssize_t numvmidc_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->numvmidc;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(numvmidc);
+
+static ssize_t nrseqstate_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nrseqstate;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nrseqstate);
+
+static ssize_t nr_resource_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_resource;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_resource);
+
+static ssize_t nr_ss_cmp_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_ss_cmp;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_ss_cmp);
+
+static ssize_t reset_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	int i;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	if (val)
+		config->mode = 0x0;
+
+	/* Disable data tracing: do not trace load and store data transfers */
+	config->mode &= ~(ETM_MODE_LOAD | ETM_MODE_STORE);
+	config->cfg &= ~(BIT(1) | BIT(2));
+
+	/* Disable data value and data address tracing */
+	config->mode &= ~(ETM_MODE_DATA_TRACE_ADDR |
+			   ETM_MODE_DATA_TRACE_VAL);
+	config->cfg &= ~(BIT(16) | BIT(17));
+
+	/* Disable all events tracing */
+	config->eventctrl0 = 0x0;
+	config->eventctrl1 = 0x0;
+
+	/* Disable timestamp event */
+	config->ts_ctrl = 0x0;
+
+	/* Disable stalling */
+	config->stall_ctrl = 0x0;
+
+	/* Reset trace synchronization period  to 2^8 = 256 bytes*/
+	if (drvdata->syncpr == false)
+		config->syncfreq = 0x8;
+
+	/*
+	 * Enable ViewInst to trace everything with start-stop logic in
+	 * started state. ARM recommends start-stop logic is set before
+	 * each trace run.
+	 */
+	config->vinst_ctrl |= BIT(0);
+	if (drvdata->nr_addr_cmp == true) {
+		config->mode |= ETM_MODE_VIEWINST_STARTSTOP;
+		/* SSSTATUS, bit[9] */
+		config->vinst_ctrl |= BIT(9);
+	}
+
+	/* No address range filtering for ViewInst */
+	config->viiectlr = 0x0;
+
+	/* No start-stop filtering for ViewInst */
+	config->vissctlr = 0x0;
+
+	/* Disable seq events */
+	for (i = 0; i < drvdata->nrseqstate-1; i++)
+		config->seq_ctrl[i] = 0x0;
+	config->seq_rst = 0x0;
+	config->seq_state = 0x0;
+
+	/* Disable external input events */
+	config->ext_inp = 0x0;
+
+	config->cntr_idx = 0x0;
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		config->cntrldvr[i] = 0x0;
+		config->cntr_ctrl[i] = 0x0;
+		config->cntr_val[i] = 0x0;
+	}
+
+	config->res_idx = 0x0;
+	for (i = 0; i < drvdata->nr_resource; i++)
+		config->res_ctrl[i] = 0x0;
+
+	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+		config->ss_ctrl[i] = 0x0;
+		config->ss_pe_cmp[i] = 0x0;
+	}
+
+	config->addr_idx = 0x0;
+	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
+		config->addr_val[i] = 0x0;
+		config->addr_acc[i] = 0x0;
+		config->addr_type[i] = ETM_ADDR_TYPE_NONE;
+	}
+
+	config->ctxid_idx = 0x0;
+	for (i = 0; i < drvdata->numcidc; i++)
+		config->ctxid_pid[i] = 0x0;
+
+	config->ctxid_mask0 = 0x0;
+	config->ctxid_mask1 = 0x0;
+
+	config->vmid_idx = 0x0;
+	for (i = 0; i < drvdata->numvmidc; i++)
+		config->vmid_val[i] = 0x0;
+	config->vmid_mask0 = 0x0;
+	config->vmid_mask1 = 0x0;
+
+	drvdata->trcid = drvdata->cpu + 1;
+
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_WO(reset);
+
+static ssize_t mode_show(struct device *dev,
+			 struct device_attribute *attr,
+			 char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->mode;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t mode_store(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	unsigned long val, mode;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	config->mode = val & ETMv4_MODE_ALL;
+
+	if (config->mode & ETM_MODE_EXCLUDE)
+		etm4_set_mode_exclude(drvdata, true);
+	else
+		etm4_set_mode_exclude(drvdata, false);
+
+	if (drvdata->instrp0 == true) {
+		/* start by clearing instruction P0 field */
+		config->cfg  &= ~(BIT(1) | BIT(2));
+		if (config->mode & ETM_MODE_LOAD)
+			/* 0b01 Trace load instructions as P0 instructions */
+			config->cfg  |= BIT(1);
+		if (config->mode & ETM_MODE_STORE)
+			/* 0b10 Trace store instructions as P0 instructions */
+			config->cfg  |= BIT(2);
+		if (config->mode & ETM_MODE_LOAD_STORE)
+			/*
+			 * 0b11 Trace load and store instructions
+			 * as P0 instructions
+			 */
+			config->cfg  |= BIT(1) | BIT(2);
+	}
+
+	/* bit[3], Branch broadcast mode */
+	if ((config->mode & ETM_MODE_BB) && (drvdata->trcbb == true))
+		config->cfg |= BIT(3);
+	else
+		config->cfg &= ~BIT(3);
+
+	/* bit[4], Cycle counting instruction trace bit */
+	if ((config->mode & ETMv4_MODE_CYCACC) &&
+		(drvdata->trccci == true))
+		config->cfg |= BIT(4);
+	else
+		config->cfg &= ~BIT(4);
+
+	/* bit[6], Context ID tracing bit */
+	if ((config->mode & ETMv4_MODE_CTXID) && (drvdata->ctxid_size))
+		config->cfg |= BIT(6);
+	else
+		config->cfg &= ~BIT(6);
+
+	if ((config->mode & ETM_MODE_VMID) && (drvdata->vmid_size))
+		config->cfg |= BIT(7);
+	else
+		config->cfg &= ~BIT(7);
+
+	/* bits[10:8], Conditional instruction tracing bit */
+	mode = ETM_MODE_COND(config->mode);
+	if (drvdata->trccond == true) {
+		config->cfg &= ~(BIT(8) | BIT(9) | BIT(10));
+		config->cfg |= mode << 8;
+	}
+
+	/* bit[11], Global timestamp tracing bit */
+	if ((config->mode & ETMv4_MODE_TIMESTAMP) && (drvdata->ts_size))
+		config->cfg |= BIT(11);
+	else
+		config->cfg &= ~BIT(11);
+
+	/* bit[12], Return stack enable bit */
+	if ((config->mode & ETM_MODE_RETURNSTACK) &&
+					(drvdata->retstack == true))
+		config->cfg |= BIT(12);
+	else
+		config->cfg &= ~BIT(12);
+
+	/* bits[14:13], Q element enable field */
+	mode = ETM_MODE_QELEM(config->mode);
+	/* start by clearing QE bits */
+	config->cfg &= ~(BIT(13) | BIT(14));
+	/* if supported, Q elements with instruction counts are enabled */
+	if ((mode & BIT(0)) && (drvdata->q_support & BIT(0)))
+		config->cfg |= BIT(13);
+	/*
+	 * if supported, Q elements with and without instruction
+	 * counts are enabled
+	 */
+	if ((mode & BIT(1)) && (drvdata->q_support & BIT(1)))
+		config->cfg |= BIT(14);
+
+	/* bit[11], AMBA Trace Bus (ATB) trigger enable bit */
+	if ((config->mode & ETM_MODE_ATB_TRIGGER) &&
+	    (drvdata->atbtrig == true))
+		config->eventctrl1 |= BIT(11);
+	else
+		config->eventctrl1 &= ~BIT(11);
+
+	/* bit[12], Low-power state behavior override bit */
+	if ((config->mode & ETM_MODE_LPOVERRIDE) &&
+	    (drvdata->lpoverride == true))
+		config->eventctrl1 |= BIT(12);
+	else
+		config->eventctrl1 &= ~BIT(12);
+
+	/* bit[8], Instruction stall bit */
+	if (config->mode & ETM_MODE_ISTALL_EN)
+		config->stall_ctrl |= BIT(8);
+	else
+		config->stall_ctrl &= ~BIT(8);
+
+	/* bit[10], Prioritize instruction trace bit */
+	if (config->mode & ETM_MODE_INSTPRIO)
+		config->stall_ctrl |= BIT(10);
+	else
+		config->stall_ctrl &= ~BIT(10);
+
+	/* bit[13], Trace overflow prevention bit */
+	if ((config->mode & ETM_MODE_NOOVERFLOW) &&
+		(drvdata->nooverflow == true))
+		config->stall_ctrl |= BIT(13);
+	else
+		config->stall_ctrl &= ~BIT(13);
+
+	/* bit[9] Start/stop logic control bit */
+	if (config->mode & ETM_MODE_VIEWINST_STARTSTOP)
+		config->vinst_ctrl |= BIT(9);
+	else
+		config->vinst_ctrl &= ~BIT(9);
+
+	/* bit[10], Whether a trace unit must trace a Reset exception */
+	if (config->mode & ETM_MODE_TRACE_RESET)
+		config->vinst_ctrl |= BIT(10);
+	else
+		config->vinst_ctrl &= ~BIT(10);
+
+	/* bit[11], Whether a trace unit must trace a system error exception */
+	if ((config->mode & ETM_MODE_TRACE_ERR) &&
+		(drvdata->trc_error == true))
+		config->vinst_ctrl |= BIT(11);
+	else
+		config->vinst_ctrl &= ~BIT(11);
+
+	if (config->mode & (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER))
+		etm4_config_trace_mode(config);
+
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t pe_show(struct device *dev,
+		       struct device_attribute *attr,
+		       char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->pe_sel;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t pe_store(struct device *dev,
+			struct device_attribute *attr,
+			const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	if (val > drvdata->nr_pe) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+
+	config->pe_sel = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(pe);
+
+static ssize_t event_show(struct device *dev,
+			  struct device_attribute *attr,
+			  char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->eventctrl0;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t event_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	switch (drvdata->nr_event) {
+	case 0x0:
+		/* EVENT0, bits[7:0] */
+		config->eventctrl0 = val & 0xFF;
+		break;
+	case 0x1:
+		 /* EVENT1, bits[15:8] */
+		config->eventctrl0 = val & 0xFFFF;
+		break;
+	case 0x2:
+		/* EVENT2, bits[23:16] */
+		config->eventctrl0 = val & 0xFFFFFF;
+		break;
+	case 0x3:
+		/* EVENT3, bits[31:24] */
+		config->eventctrl0 = val;
+		break;
+	default:
+		break;
+	}
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(event);
+
+static ssize_t event_instren_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = BMVAL(config->eventctrl1, 0, 3);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t event_instren_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	/* start by clearing all instruction event enable bits */
+	config->eventctrl1 &= ~(BIT(0) | BIT(1) | BIT(2) | BIT(3));
+	switch (drvdata->nr_event) {
+	case 0x0:
+		/* generate Event element for event 1 */
+		config->eventctrl1 |= val & BIT(1);
+		break;
+	case 0x1:
+		/* generate Event element for event 1 and 2 */
+		config->eventctrl1 |= val & (BIT(0) | BIT(1));
+		break;
+	case 0x2:
+		/* generate Event element for event 1, 2 and 3 */
+		config->eventctrl1 |= val & (BIT(0) | BIT(1) | BIT(2));
+		break;
+	case 0x3:
+		/* generate Event element for all 4 events */
+		config->eventctrl1 |= val & 0xF;
+		break;
+	default:
+		break;
+	}
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(event_instren);
+
+static ssize_t event_ts_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->ts_ctrl;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t event_ts_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (!drvdata->ts_size)
+		return -EINVAL;
+
+	config->ts_ctrl = val & ETMv4_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(event_ts);
+
+static ssize_t syncfreq_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->syncfreq;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t syncfreq_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (drvdata->syncpr == true)
+		return -EINVAL;
+
+	config->syncfreq = val & ETMv4_SYNC_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(syncfreq);
+
+static ssize_t cyc_threshold_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->ccctlr;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t cyc_threshold_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val < drvdata->ccitmin)
+		return -EINVAL;
+
+	config->ccctlr = val & ETM_CYC_THRESHOLD_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(cyc_threshold);
+
+static ssize_t bb_ctrl_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->bb_ctrl;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t bb_ctrl_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (drvdata->trcbb == false)
+		return -EINVAL;
+	if (!drvdata->nr_addr_cmp)
+		return -EINVAL;
+	/*
+	 * Bit[7:0] selects which address range comparator is used for
+	 * branch broadcast control.
+	 */
+	if (BMVAL(val, 0, 7) > drvdata->nr_addr_cmp)
+		return -EINVAL;
+
+	config->bb_ctrl = val;
+	return size;
+}
+static DEVICE_ATTR_RW(bb_ctrl);
+
+static ssize_t event_vinst_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->vinst_ctrl & ETMv4_EVENT_MASK;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t event_vinst_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	val &= ETMv4_EVENT_MASK;
+	config->vinst_ctrl &= ~ETMv4_EVENT_MASK;
+	config->vinst_ctrl |= val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(event_vinst);
+
+static ssize_t s_exlevel_vinst_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = BMVAL(config->vinst_ctrl, 16, 19);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t s_exlevel_vinst_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	/* clear all EXLEVEL_S bits (bit[18] is never implemented) */
+	config->vinst_ctrl &= ~(BIT(16) | BIT(17) | BIT(19));
+	/* enable instruction tracing for corresponding exception level */
+	val &= drvdata->s_ex_level;
+	config->vinst_ctrl |= (val << 16);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(s_exlevel_vinst);
+
+static ssize_t ns_exlevel_vinst_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/* EXLEVEL_NS, bits[23:20] */
+	val = BMVAL(config->vinst_ctrl, 20, 23);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t ns_exlevel_vinst_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	/* clear EXLEVEL_NS bits (bit[23] is never implemented */
+	config->vinst_ctrl &= ~(BIT(20) | BIT(21) | BIT(22));
+	/* enable instruction tracing for corresponding exception level */
+	val &= drvdata->ns_ex_level;
+	config->vinst_ctrl |= (val << 20);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(ns_exlevel_vinst);
+
+static ssize_t addr_idx_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->addr_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t addr_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->nr_addr_cmp * 2)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->addr_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_idx);
+
+static ssize_t addr_instdatatype_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	ssize_t len;
+	u8 val, idx;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	val = BMVAL(config->addr_acc[idx], 0, 1);
+	len = scnprintf(buf, PAGE_SIZE, "%s\n",
+			val == ETM_INSTR_ADDR ? "instr" :
+			(val == ETM_DATA_LOAD_ADDR ? "data_load" :
+			(val == ETM_DATA_STORE_ADDR ? "data_store" :
+			"data_load_store")));
+	spin_unlock(&drvdata->spinlock);
+	return len;
+}
+
+static ssize_t addr_instdatatype_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t size)
+{
+	u8 idx;
+	char str[20] = "";
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (strlen(buf) >= 20)
+		return -EINVAL;
+	if (sscanf(buf, "%s", str) != 1)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!strcmp(str, "instr"))
+		/* TYPE, bits[1:0] */
+		config->addr_acc[idx] &= ~(BIT(0) | BIT(1));
+
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_instdatatype);
+
+static ssize_t addr_single_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	idx = config->addr_idx;
+	spin_lock(&drvdata->spinlock);
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+	val = (unsigned long)config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t addr_single_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = (u64)val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_SINGLE;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_single);
+
+static ssize_t addr_range_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	u8 idx;
+	unsigned long val1, val2;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (idx % 2 != 0) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+	if (!((config->addr_type[idx] == ETM_ADDR_TYPE_NONE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) ||
+	      (config->addr_type[idx] == ETM_ADDR_TYPE_RANGE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val1 = (unsigned long)config->addr_val[idx];
+	val2 = (unsigned long)config->addr_val[idx + 1];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2);
+}
+
+static ssize_t addr_range_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val1, val2;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (sscanf(buf, "%lx %lx", &val1, &val2) != 2)
+		return -EINVAL;
+	/* lower address comparator cannot have a higher address value */
+	if (val1 > val2)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (idx % 2 != 0) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	if (!((config->addr_type[idx] == ETM_ADDR_TYPE_NONE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) ||
+	      (config->addr_type[idx] == ETM_ADDR_TYPE_RANGE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = (u64)val1;
+	config->addr_type[idx] = ETM_ADDR_TYPE_RANGE;
+	config->addr_val[idx + 1] = (u64)val2;
+	config->addr_type[idx + 1] = ETM_ADDR_TYPE_RANGE;
+	/*
+	 * Program include or exclude control bits for vinst or vdata
+	 * whenever we change addr comparators to ETM_ADDR_TYPE_RANGE
+	 */
+	if (config->mode & ETM_MODE_EXCLUDE)
+		etm4_set_mode_exclude(drvdata, true);
+	else
+		etm4_set_mode_exclude(drvdata, false);
+
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_range);
+
+static ssize_t addr_start_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_START)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val = (unsigned long)config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t addr_start_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!drvdata->nr_addr_cmp) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_START)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = (u64)val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_START;
+	config->vissctlr |= BIT(idx);
+	/* SSSTATUS, bit[9] - turn on start/stop logic */
+	config->vinst_ctrl |= BIT(9);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_start);
+
+static ssize_t addr_stop_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_STOP)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val = (unsigned long)config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t addr_stop_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!drvdata->nr_addr_cmp) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	       config->addr_type[idx] == ETM_ADDR_TYPE_STOP)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = (u64)val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_STOP;
+	config->vissctlr |= BIT(idx + 16);
+	/* SSSTATUS, bit[9] - turn on start/stop logic */
+	config->vinst_ctrl |= BIT(9);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_stop);
+
+static ssize_t addr_ctxtype_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	ssize_t len;
+	u8 idx, val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	/* CONTEXTTYPE, bits[3:2] */
+	val = BMVAL(config->addr_acc[idx], 2, 3);
+	len = scnprintf(buf, PAGE_SIZE, "%s\n", val == ETM_CTX_NONE ? "none" :
+			(val == ETM_CTX_CTXID ? "ctxid" :
+			(val == ETM_CTX_VMID ? "vmid" : "all")));
+	spin_unlock(&drvdata->spinlock);
+	return len;
+}
+
+static ssize_t addr_ctxtype_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	u8 idx;
+	char str[10] = "";
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (strlen(buf) >= 10)
+		return -EINVAL;
+	if (sscanf(buf, "%s", str) != 1)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!strcmp(str, "none"))
+		/* start by clearing context type bits */
+		config->addr_acc[idx] &= ~(BIT(2) | BIT(3));
+	else if (!strcmp(str, "ctxid")) {
+		/* 0b01 The trace unit performs a Context ID */
+		if (drvdata->numcidc) {
+			config->addr_acc[idx] |= BIT(2);
+			config->addr_acc[idx] &= ~BIT(3);
+		}
+	} else if (!strcmp(str, "vmid")) {
+		/* 0b10 The trace unit performs a VMID */
+		if (drvdata->numvmidc) {
+			config->addr_acc[idx] &= ~BIT(2);
+			config->addr_acc[idx] |= BIT(3);
+		}
+	} else if (!strcmp(str, "all")) {
+		/*
+		 * 0b11 The trace unit performs a Context ID
+		 * comparison and a VMID
+		 */
+		if (drvdata->numcidc)
+			config->addr_acc[idx] |= BIT(2);
+		if (drvdata->numvmidc)
+			config->addr_acc[idx] |= BIT(3);
+	}
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_ctxtype);
+
+static ssize_t addr_context_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	/* context ID comparator bits[6:4] */
+	val = BMVAL(config->addr_acc[idx], 4, 6);
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t addr_context_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if ((drvdata->numcidc <= 1) && (drvdata->numvmidc <= 1))
+		return -EINVAL;
+	if (val >=  (drvdata->numcidc >= drvdata->numvmidc ?
+		     drvdata->numcidc : drvdata->numvmidc))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	/* clear context ID comparator bits[6:4] */
+	config->addr_acc[idx] &= ~(BIT(4) | BIT(5) | BIT(6));
+	config->addr_acc[idx] |= (val << 4);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_context);
+
+static ssize_t seq_idx_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->seq_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t seq_idx_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->nrseqstate - 1)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->seq_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(seq_idx);
+
+static ssize_t seq_state_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->seq_state;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t seq_state_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->nrseqstate)
+		return -EINVAL;
+
+	config->seq_state = val;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_state);
+
+static ssize_t seq_event_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->seq_idx;
+	val = config->seq_ctrl[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t seq_event_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->seq_idx;
+	/* RST, bits[7:0] */
+	config->seq_ctrl[idx] = val & 0xFF;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(seq_event);
+
+static ssize_t seq_reset_event_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->seq_rst;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t seq_reset_event_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (!(drvdata->nrseqstate))
+		return -EINVAL;
+
+	config->seq_rst = val & ETMv4_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_reset_event);
+
+static ssize_t cntr_idx_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->cntr_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t cntr_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->nr_cntr)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->cntr_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_idx);
+
+static ssize_t cntrldvr_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	val = config->cntrldvr[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t cntrldvr_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val > ETM_CNTR_MAX_VAL)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	config->cntrldvr[idx] = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(cntrldvr);
+
+static ssize_t cntr_val_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	val = config->cntr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t cntr_val_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val > ETM_CNTR_MAX_VAL)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	config->cntr_val[idx] = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_val);
+
+static ssize_t cntr_ctrl_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	val = config->cntr_ctrl[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t cntr_ctrl_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	config->cntr_ctrl[idx] = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_ctrl);
+
+static ssize_t res_idx_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->res_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t res_idx_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	/* Resource selector pair 0 is always implemented and reserved */
+	if ((val == 0) || (val >= drvdata->nr_resource))
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->res_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(res_idx);
+
+static ssize_t res_ctrl_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->res_idx;
+	val = config->res_ctrl[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t res_ctrl_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->res_idx;
+	/* For odd idx pair inversal bit is RES0 */
+	if (idx % 2 != 0)
+		/* PAIRINV, bit[21] */
+		val &= ~BIT(21);
+	config->res_ctrl[idx] = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(res_ctrl);
+
+static ssize_t ctxid_idx_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->ctxid_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t ctxid_idx_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->numcidc)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->ctxid_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_idx);
+
+static ssize_t ctxid_pid_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->ctxid_idx;
+	val = (unsigned long)config->ctxid_pid[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t ctxid_pid_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long pid;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * When contextID tracing is enabled the tracers will insert the
+	 * value found in the contextID register in the trace stream.  But if
+	 * a process is in a namespace the PID of that process as seen from the
+	 * namespace won't be what the kernel sees, something that makes the
+	 * feature confusing and can potentially leak kernel only information.
+	 * As such refuse to use the feature if @current is not in the initial
+	 * PID namespace.
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	/*
+	 * only implemented when ctxid tracing is enabled, i.e. at least one
+	 * ctxid comparator is implemented and ctxid is greater than 0 bits
+	 * in length
+	 */
+	if (!drvdata->ctxid_size || !drvdata->numcidc)
+		return -EINVAL;
+	if (kstrtoul(buf, 16, &pid))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->ctxid_idx;
+	config->ctxid_pid[idx] = (u64)pid;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_pid);
+
+static ssize_t ctxid_masks_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	unsigned long val1, val2;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	val1 = config->ctxid_mask0;
+	val2 = config->ctxid_mask1;
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2);
+}
+
+static ssize_t ctxid_masks_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 i, j, maskbyte;
+	unsigned long val1, val2, mask;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	/*
+	 * only implemented when ctxid tracing is enabled, i.e. at least one
+	 * ctxid comparator is implemented and ctxid is greater than 0 bits
+	 * in length
+	 */
+	if (!drvdata->ctxid_size || !drvdata->numcidc)
+		return -EINVAL;
+	if (sscanf(buf, "%lx %lx", &val1, &val2) != 2)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	/*
+	 * each byte[0..3] controls mask value applied to ctxid
+	 * comparator[0..3]
+	 */
+	switch (drvdata->numcidc) {
+	case 0x1:
+		/* COMP0, bits[7:0] */
+		config->ctxid_mask0 = val1 & 0xFF;
+		break;
+	case 0x2:
+		/* COMP1, bits[15:8] */
+		config->ctxid_mask0 = val1 & 0xFFFF;
+		break;
+	case 0x3:
+		/* COMP2, bits[23:16] */
+		config->ctxid_mask0 = val1 & 0xFFFFFF;
+		break;
+	case 0x4:
+		 /* COMP3, bits[31:24] */
+		config->ctxid_mask0 = val1;
+		break;
+	case 0x5:
+		/* COMP4, bits[7:0] */
+		config->ctxid_mask0 = val1;
+		config->ctxid_mask1 = val2 & 0xFF;
+		break;
+	case 0x6:
+		/* COMP5, bits[15:8] */
+		config->ctxid_mask0 = val1;
+		config->ctxid_mask1 = val2 & 0xFFFF;
+		break;
+	case 0x7:
+		/* COMP6, bits[23:16] */
+		config->ctxid_mask0 = val1;
+		config->ctxid_mask1 = val2 & 0xFFFFFF;
+		break;
+	case 0x8:
+		/* COMP7, bits[31:24] */
+		config->ctxid_mask0 = val1;
+		config->ctxid_mask1 = val2;
+		break;
+	default:
+		break;
+	}
+	/*
+	 * If software sets a mask bit to 1, it must program relevant byte
+	 * of ctxid comparator value 0x0, otherwise behavior is unpredictable.
+	 * For example, if bit[3] of ctxid_mask0 is 1, we must clear bits[31:24]
+	 * of ctxid comparator0 value (corresponding to byte 0) register.
+	 */
+	mask = config->ctxid_mask0;
+	for (i = 0; i < drvdata->numcidc; i++) {
+		/* mask value of corresponding ctxid comparator */
+		maskbyte = mask & ETMv4_EVENT_MASK;
+		/*
+		 * each bit corresponds to a byte of respective ctxid comparator
+		 * value register
+		 */
+		for (j = 0; j < 8; j++) {
+			if (maskbyte & 1)
+				config->ctxid_pid[i] &= ~(0xFFUL << (j * 8));
+			maskbyte >>= 1;
+		}
+		/* Select the next ctxid comparator mask value */
+		if (i == 3)
+			/* ctxid comparators[4-7] */
+			mask = config->ctxid_mask1;
+		else
+			mask >>= 0x8;
+	}
+
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_masks);
+
+static ssize_t vmid_idx_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->vmid_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t vmid_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->numvmidc)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->vmid_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(vmid_idx);
+
+static ssize_t vmid_val_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = (unsigned long)config->vmid_val[config->vmid_idx];
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t vmid_val_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * only implemented when vmid tracing is enabled, i.e. at least one
+	 * vmid comparator is implemented and at least 8 bit vmid size
+	 */
+	if (!drvdata->vmid_size || !drvdata->numvmidc)
+		return -EINVAL;
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	config->vmid_val[config->vmid_idx] = (u64)val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(vmid_val);
+
+static ssize_t vmid_masks_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	unsigned long val1, val2;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val1 = config->vmid_mask0;
+	val2 = config->vmid_mask1;
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2);
+}
+
+static ssize_t vmid_masks_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 i, j, maskbyte;
+	unsigned long val1, val2, mask;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * only implemented when vmid tracing is enabled, i.e. at least one
+	 * vmid comparator is implemented and at least 8 bit vmid size
+	 */
+	if (!drvdata->vmid_size || !drvdata->numvmidc)
+		return -EINVAL;
+	if (sscanf(buf, "%lx %lx", &val1, &val2) != 2)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+
+	/*
+	 * each byte[0..3] controls mask value applied to vmid
+	 * comparator[0..3]
+	 */
+	switch (drvdata->numvmidc) {
+	case 0x1:
+		/* COMP0, bits[7:0] */
+		config->vmid_mask0 = val1 & 0xFF;
+		break;
+	case 0x2:
+		/* COMP1, bits[15:8] */
+		config->vmid_mask0 = val1 & 0xFFFF;
+		break;
+	case 0x3:
+		/* COMP2, bits[23:16] */
+		config->vmid_mask0 = val1 & 0xFFFFFF;
+		break;
+	case 0x4:
+		/* COMP3, bits[31:24] */
+		config->vmid_mask0 = val1;
+		break;
+	case 0x5:
+		/* COMP4, bits[7:0] */
+		config->vmid_mask0 = val1;
+		config->vmid_mask1 = val2 & 0xFF;
+		break;
+	case 0x6:
+		/* COMP5, bits[15:8] */
+		config->vmid_mask0 = val1;
+		config->vmid_mask1 = val2 & 0xFFFF;
+		break;
+	case 0x7:
+		/* COMP6, bits[23:16] */
+		config->vmid_mask0 = val1;
+		config->vmid_mask1 = val2 & 0xFFFFFF;
+		break;
+	case 0x8:
+		/* COMP7, bits[31:24] */
+		config->vmid_mask0 = val1;
+		config->vmid_mask1 = val2;
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * If software sets a mask bit to 1, it must program relevant byte
+	 * of vmid comparator value 0x0, otherwise behavior is unpredictable.
+	 * For example, if bit[3] of vmid_mask0 is 1, we must clear bits[31:24]
+	 * of vmid comparator0 value (corresponding to byte 0) register.
+	 */
+	mask = config->vmid_mask0;
+	for (i = 0; i < drvdata->numvmidc; i++) {
+		/* mask value of corresponding vmid comparator */
+		maskbyte = mask & ETMv4_EVENT_MASK;
+		/*
+		 * each bit corresponds to a byte of respective vmid comparator
+		 * value register
+		 */
+		for (j = 0; j < 8; j++) {
+			if (maskbyte & 1)
+				config->vmid_val[i] &= ~(0xFFUL << (j * 8));
+			maskbyte >>= 1;
+		}
+		/* Select the next vmid comparator mask value */
+		if (i == 3)
+			/* vmid comparators[4-7] */
+			mask = config->vmid_mask1;
+		else
+			mask >>= 0x8;
+	}
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(vmid_masks);
+
+static ssize_t cpu_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	int val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->cpu;
+	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+
+}
+static DEVICE_ATTR_RO(cpu);
+
+static struct attribute *coresight_etmv4_attrs[] = {
+	&dev_attr_nr_pe_cmp.attr,
+	&dev_attr_nr_addr_cmp.attr,
+	&dev_attr_nr_cntr.attr,
+	&dev_attr_nr_ext_inp.attr,
+	&dev_attr_numcidc.attr,
+	&dev_attr_numvmidc.attr,
+	&dev_attr_nrseqstate.attr,
+	&dev_attr_nr_resource.attr,
+	&dev_attr_nr_ss_cmp.attr,
+	&dev_attr_reset.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_pe.attr,
+	&dev_attr_event.attr,
+	&dev_attr_event_instren.attr,
+	&dev_attr_event_ts.attr,
+	&dev_attr_syncfreq.attr,
+	&dev_attr_cyc_threshold.attr,
+	&dev_attr_bb_ctrl.attr,
+	&dev_attr_event_vinst.attr,
+	&dev_attr_s_exlevel_vinst.attr,
+	&dev_attr_ns_exlevel_vinst.attr,
+	&dev_attr_addr_idx.attr,
+	&dev_attr_addr_instdatatype.attr,
+	&dev_attr_addr_single.attr,
+	&dev_attr_addr_range.attr,
+	&dev_attr_addr_start.attr,
+	&dev_attr_addr_stop.attr,
+	&dev_attr_addr_ctxtype.attr,
+	&dev_attr_addr_context.attr,
+	&dev_attr_seq_idx.attr,
+	&dev_attr_seq_state.attr,
+	&dev_attr_seq_event.attr,
+	&dev_attr_seq_reset_event.attr,
+	&dev_attr_cntr_idx.attr,
+	&dev_attr_cntrldvr.attr,
+	&dev_attr_cntr_val.attr,
+	&dev_attr_cntr_ctrl.attr,
+	&dev_attr_res_idx.attr,
+	&dev_attr_res_ctrl.attr,
+	&dev_attr_ctxid_idx.attr,
+	&dev_attr_ctxid_pid.attr,
+	&dev_attr_ctxid_masks.attr,
+	&dev_attr_vmid_idx.attr,
+	&dev_attr_vmid_val.attr,
+	&dev_attr_vmid_masks.attr,
+	&dev_attr_cpu.attr,
+	NULL,
+};
+
+struct etmv4_reg {
+	void __iomem *addr;
+	u32 data;
+};
+
+static void do_smp_cross_read(void *data)
+{
+	struct etmv4_reg *reg = data;
+
+	reg->data = readl_relaxed(reg->addr);
+}
+
+static u32 etmv4_cross_read(const struct device *dev, u32 offset)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
+	struct etmv4_reg reg;
+
+	reg.addr = drvdata->base + offset;
+	/*
+	 * smp cross call ensures the CPU will be powered up before
+	 * accessing the ETMv4 trace core registers
+	 */
+	smp_call_function_single(drvdata->cpu, do_smp_cross_read, &reg, 1);
+	return reg.data;
+}
+
+#define coresight_etm4x_reg(name, offset)			\
+	coresight_simple_reg32(struct etmv4_drvdata, name, offset)
+
+#define coresight_etm4x_cross_read(name, offset)			\
+	coresight_simple_func(struct etmv4_drvdata, etmv4_cross_read,	\
+			      name, offset)
+
+coresight_etm4x_reg(trcpdcr, TRCPDCR);
+coresight_etm4x_reg(trcpdsr, TRCPDSR);
+coresight_etm4x_reg(trclsr, TRCLSR);
+coresight_etm4x_reg(trcauthstatus, TRCAUTHSTATUS);
+coresight_etm4x_reg(trcdevid, TRCDEVID);
+coresight_etm4x_reg(trcdevtype, TRCDEVTYPE);
+coresight_etm4x_reg(trcpidr0, TRCPIDR0);
+coresight_etm4x_reg(trcpidr1, TRCPIDR1);
+coresight_etm4x_reg(trcpidr2, TRCPIDR2);
+coresight_etm4x_reg(trcpidr3, TRCPIDR3);
+coresight_etm4x_cross_read(trcoslsr, TRCOSLSR);
+coresight_etm4x_cross_read(trcconfig, TRCCONFIGR);
+coresight_etm4x_cross_read(trctraceid, TRCTRACEIDR);
+
+static struct attribute *coresight_etmv4_mgmt_attrs[] = {
+	&dev_attr_trcoslsr.attr,
+	&dev_attr_trcpdcr.attr,
+	&dev_attr_trcpdsr.attr,
+	&dev_attr_trclsr.attr,
+	&dev_attr_trcconfig.attr,
+	&dev_attr_trctraceid.attr,
+	&dev_attr_trcauthstatus.attr,
+	&dev_attr_trcdevid.attr,
+	&dev_attr_trcdevtype.attr,
+	&dev_attr_trcpidr0.attr,
+	&dev_attr_trcpidr1.attr,
+	&dev_attr_trcpidr2.attr,
+	&dev_attr_trcpidr3.attr,
+	NULL,
+};
+
+coresight_etm4x_cross_read(trcidr0, TRCIDR0);
+coresight_etm4x_cross_read(trcidr1, TRCIDR1);
+coresight_etm4x_cross_read(trcidr2, TRCIDR2);
+coresight_etm4x_cross_read(trcidr3, TRCIDR3);
+coresight_etm4x_cross_read(trcidr4, TRCIDR4);
+coresight_etm4x_cross_read(trcidr5, TRCIDR5);
+/* trcidr[6,7] are reserved */
+coresight_etm4x_cross_read(trcidr8, TRCIDR8);
+coresight_etm4x_cross_read(trcidr9, TRCIDR9);
+coresight_etm4x_cross_read(trcidr10, TRCIDR10);
+coresight_etm4x_cross_read(trcidr11, TRCIDR11);
+coresight_etm4x_cross_read(trcidr12, TRCIDR12);
+coresight_etm4x_cross_read(trcidr13, TRCIDR13);
+
+static struct attribute *coresight_etmv4_trcidr_attrs[] = {
+	&dev_attr_trcidr0.attr,
+	&dev_attr_trcidr1.attr,
+	&dev_attr_trcidr2.attr,
+	&dev_attr_trcidr3.attr,
+	&dev_attr_trcidr4.attr,
+	&dev_attr_trcidr5.attr,
+	/* trcidr[6,7] are reserved */
+	&dev_attr_trcidr8.attr,
+	&dev_attr_trcidr9.attr,
+	&dev_attr_trcidr10.attr,
+	&dev_attr_trcidr11.attr,
+	&dev_attr_trcidr12.attr,
+	&dev_attr_trcidr13.attr,
+	NULL,
+};
+
+static const struct attribute_group coresight_etmv4_group = {
+	.attrs = coresight_etmv4_attrs,
+};
+
+static const struct attribute_group coresight_etmv4_mgmt_group = {
+	.attrs = coresight_etmv4_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group coresight_etmv4_trcidr_group = {
+	.attrs = coresight_etmv4_trcidr_attrs,
+	.name = "trcidr",
+};
+
+const struct attribute_group *coresight_etmv4_groups[] = {
+	&coresight_etmv4_group,
+	&coresight_etmv4_mgmt_group,
+	&coresight_etmv4_trcidr_group,
+	NULL,
+};
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
new file mode 100644
index 0000000..1d94ebe
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -0,0 +1,1072 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/stat.h>
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/coresight.h>
+#include <linux/coresight-pmu.h>
+#include <linux/pm_wakeup.h>
+#include <linux/amba/bus.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/perf_event.h>
+#include <linux/pm_runtime.h>
+#include <asm/sections.h>
+#include <asm/local.h>
+
+#include "coresight-etm4x.h"
+#include "coresight-etm-perf.h"
+
+static int boot_enable;
+module_param_named(boot_enable, boot_enable, int, S_IRUGO);
+
+/* The number of ETMv4 currently registered */
+static int etm4_count;
+static struct etmv4_drvdata *etmdrvdata[NR_CPUS];
+static void etm4_set_default_config(struct etmv4_config *config);
+static int etm4_set_event_filters(struct etmv4_drvdata *drvdata,
+				  struct perf_event *event);
+
+static enum cpuhp_state hp_online;
+
+static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
+{
+	/* Writing any value to ETMOSLAR unlocks the trace registers */
+	writel_relaxed(0x0, drvdata->base + TRCOSLAR);
+	drvdata->os_unlock = true;
+	isb();
+}
+
+static bool etm4_arch_supported(u8 arch)
+{
+	switch (arch) {
+	case ETM_ARCH_V4:
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+static int etm4_cpu_id(struct coresight_device *csdev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return drvdata->cpu;
+}
+
+static int etm4_trace_id(struct coresight_device *csdev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return drvdata->trcid;
+}
+
+static void etm4_enable_hw(void *info)
+{
+	int i;
+	struct etmv4_drvdata *drvdata = info;
+	struct etmv4_config *config = &drvdata->config;
+
+	CS_UNLOCK(drvdata->base);
+
+	etm4_os_unlock(drvdata);
+
+	/* Disable the trace unit before programming trace registers */
+	writel_relaxed(0, drvdata->base + TRCPRGCTLR);
+
+	/* wait for TRCSTATR.IDLE to go up */
+	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
+		dev_err(drvdata->dev,
+			"timeout while waiting for Idle Trace Status\n");
+
+	writel_relaxed(config->pe_sel, drvdata->base + TRCPROCSELR);
+	writel_relaxed(config->cfg, drvdata->base + TRCCONFIGR);
+	/* nothing specific implemented */
+	writel_relaxed(0x0, drvdata->base + TRCAUXCTLR);
+	writel_relaxed(config->eventctrl0, drvdata->base + TRCEVENTCTL0R);
+	writel_relaxed(config->eventctrl1, drvdata->base + TRCEVENTCTL1R);
+	writel_relaxed(config->stall_ctrl, drvdata->base + TRCSTALLCTLR);
+	writel_relaxed(config->ts_ctrl, drvdata->base + TRCTSCTLR);
+	writel_relaxed(config->syncfreq, drvdata->base + TRCSYNCPR);
+	writel_relaxed(config->ccctlr, drvdata->base + TRCCCCTLR);
+	writel_relaxed(config->bb_ctrl, drvdata->base + TRCBBCTLR);
+	writel_relaxed(drvdata->trcid, drvdata->base + TRCTRACEIDR);
+	writel_relaxed(config->vinst_ctrl, drvdata->base + TRCVICTLR);
+	writel_relaxed(config->viiectlr, drvdata->base + TRCVIIECTLR);
+	writel_relaxed(config->vissctlr,
+		       drvdata->base + TRCVISSCTLR);
+	writel_relaxed(config->vipcssctlr,
+		       drvdata->base + TRCVIPCSSCTLR);
+	for (i = 0; i < drvdata->nrseqstate - 1; i++)
+		writel_relaxed(config->seq_ctrl[i],
+			       drvdata->base + TRCSEQEVRn(i));
+	writel_relaxed(config->seq_rst, drvdata->base + TRCSEQRSTEVR);
+	writel_relaxed(config->seq_state, drvdata->base + TRCSEQSTR);
+	writel_relaxed(config->ext_inp, drvdata->base + TRCEXTINSELR);
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		writel_relaxed(config->cntrldvr[i],
+			       drvdata->base + TRCCNTRLDVRn(i));
+		writel_relaxed(config->cntr_ctrl[i],
+			       drvdata->base + TRCCNTCTLRn(i));
+		writel_relaxed(config->cntr_val[i],
+			       drvdata->base + TRCCNTVRn(i));
+	}
+
+	/* Resource selector pair 0 is always implemented and reserved */
+	for (i = 0; i < drvdata->nr_resource * 2; i++)
+		writel_relaxed(config->res_ctrl[i],
+			       drvdata->base + TRCRSCTLRn(i));
+
+	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+		writel_relaxed(config->ss_ctrl[i],
+			       drvdata->base + TRCSSCCRn(i));
+		writel_relaxed(config->ss_status[i],
+			       drvdata->base + TRCSSCSRn(i));
+		writel_relaxed(config->ss_pe_cmp[i],
+			       drvdata->base + TRCSSPCICRn(i));
+	}
+	for (i = 0; i < drvdata->nr_addr_cmp; i++) {
+		writeq_relaxed(config->addr_val[i],
+			       drvdata->base + TRCACVRn(i));
+		writeq_relaxed(config->addr_acc[i],
+			       drvdata->base + TRCACATRn(i));
+	}
+	for (i = 0; i < drvdata->numcidc; i++)
+		writeq_relaxed(config->ctxid_pid[i],
+			       drvdata->base + TRCCIDCVRn(i));
+	writel_relaxed(config->ctxid_mask0, drvdata->base + TRCCIDCCTLR0);
+	writel_relaxed(config->ctxid_mask1, drvdata->base + TRCCIDCCTLR1);
+
+	for (i = 0; i < drvdata->numvmidc; i++)
+		writeq_relaxed(config->vmid_val[i],
+			       drvdata->base + TRCVMIDCVRn(i));
+	writel_relaxed(config->vmid_mask0, drvdata->base + TRCVMIDCCTLR0);
+	writel_relaxed(config->vmid_mask1, drvdata->base + TRCVMIDCCTLR1);
+
+	/*
+	 * Request to keep the trace unit powered and also
+	 * emulation of powerdown
+	 */
+	writel_relaxed(readl_relaxed(drvdata->base + TRCPDCR) | TRCPDCR_PU,
+		       drvdata->base + TRCPDCR);
+
+	/* Enable the trace unit */
+	writel_relaxed(1, drvdata->base + TRCPRGCTLR);
+
+	/* wait for TRCSTATR.IDLE to go back down to '0' */
+	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 0))
+		dev_err(drvdata->dev,
+			"timeout while waiting for Idle Trace Status\n");
+
+	CS_LOCK(drvdata->base);
+
+	dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu);
+}
+
+static int etm4_parse_event_config(struct etmv4_drvdata *drvdata,
+				   struct perf_event *event)
+{
+	int ret = 0;
+	struct etmv4_config *config = &drvdata->config;
+	struct perf_event_attr *attr = &event->attr;
+
+	if (!attr) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Clear configuration from previous run */
+	memset(config, 0, sizeof(struct etmv4_config));
+
+	if (attr->exclude_kernel)
+		config->mode = ETM_MODE_EXCL_KERN;
+
+	if (attr->exclude_user)
+		config->mode = ETM_MODE_EXCL_USER;
+
+	/* Always start from the default config */
+	etm4_set_default_config(config);
+
+	/* Configure filters specified on the perf cmd line, if any. */
+	ret = etm4_set_event_filters(drvdata, event);
+	if (ret)
+		goto out;
+
+	/* Go from generic option to ETMv4 specifics */
+	if (attr->config & BIT(ETM_OPT_CYCACC)) {
+		config->cfg |= BIT(4);
+		/* TRM: Must program this for cycacc to work */
+		config->ccctlr = ETM_CYC_THRESHOLD_DEFAULT;
+	}
+	if (attr->config & BIT(ETM_OPT_TS))
+		/* bit[11], Global timestamp tracing bit */
+		config->cfg |= BIT(11);
+	/* return stack - enable if selected and supported */
+	if ((attr->config & BIT(ETM_OPT_RETSTK)) && drvdata->retstack)
+		/* bit[12], Return stack enable bit */
+		config->cfg |= BIT(12);
+
+out:
+	return ret;
+}
+
+static int etm4_enable_perf(struct coresight_device *csdev,
+			    struct perf_event *event)
+{
+	int ret = 0;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id())) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Configure the tracer based on the session's specifics */
+	ret = etm4_parse_event_config(drvdata, event);
+	if (ret)
+		goto out;
+	/* And enable it */
+	etm4_enable_hw(drvdata);
+
+out:
+	return ret;
+}
+
+static int etm4_enable_sysfs(struct coresight_device *csdev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int ret;
+
+	spin_lock(&drvdata->spinlock);
+
+	/*
+	 * Executing etm4_enable_hw on the cpu whose ETM is being enabled
+	 * ensures that register writes occur when cpu is powered.
+	 */
+	ret = smp_call_function_single(drvdata->cpu,
+				       etm4_enable_hw, drvdata, 1);
+	if (ret)
+		goto err;
+
+	drvdata->sticky_enable = true;
+	spin_unlock(&drvdata->spinlock);
+
+	dev_info(drvdata->dev, "ETM tracing enabled\n");
+	return 0;
+
+err:
+	spin_unlock(&drvdata->spinlock);
+	return ret;
+}
+
+static int etm4_enable(struct coresight_device *csdev,
+		       struct perf_event *event, u32 mode)
+{
+	int ret;
+	u32 val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	val = local_cmpxchg(&drvdata->mode, CS_MODE_DISABLED, mode);
+
+	/* Someone is already using the tracer */
+	if (val)
+		return -EBUSY;
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		ret = etm4_enable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		ret = etm4_enable_perf(csdev, event);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	/* The tracer didn't start */
+	if (ret)
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+
+	return ret;
+}
+
+static void etm4_disable_hw(void *info)
+{
+	u32 control;
+	struct etmv4_drvdata *drvdata = info;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* power can be removed from the trace unit now */
+	control = readl_relaxed(drvdata->base + TRCPDCR);
+	control &= ~TRCPDCR_PU;
+	writel_relaxed(control, drvdata->base + TRCPDCR);
+
+	control = readl_relaxed(drvdata->base + TRCPRGCTLR);
+
+	/* EN, bit[0] Trace unit enable bit */
+	control &= ~0x1;
+
+	/* make sure everything completes before disabling */
+	mb();
+	isb();
+	writel_relaxed(control, drvdata->base + TRCPRGCTLR);
+
+	CS_LOCK(drvdata->base);
+
+	dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu);
+}
+
+static int etm4_disable_perf(struct coresight_device *csdev,
+			     struct perf_event *event)
+{
+	u32 control;
+	struct etm_filters *filters = event->hw.addr_filters;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
+		return -EINVAL;
+
+	etm4_disable_hw(drvdata);
+
+	/*
+	 * Check if the start/stop logic was active when the unit was stopped.
+	 * That way we can re-enable the start/stop logic when the process is
+	 * scheduled again.  Configuration of the start/stop logic happens in
+	 * function etm4_set_event_filters().
+	 */
+	control = readl_relaxed(drvdata->base + TRCVICTLR);
+	/* TRCVICTLR::SSSTATUS, bit[9] */
+	filters->ssstatus = (control & BIT(9));
+
+	return 0;
+}
+
+static void etm4_disable_sysfs(struct coresight_device *csdev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * Taking hotplug lock here protects from clocks getting disabled
+	 * with tracing being left on (crash scenario) if user disable occurs
+	 * after cpu online mask indicates the cpu is offline but before the
+	 * DYING hotplug callback is serviced by the ETM driver.
+	 */
+	cpus_read_lock();
+	spin_lock(&drvdata->spinlock);
+
+	/*
+	 * Executing etm4_disable_hw on the cpu whose ETM is being disabled
+	 * ensures that register writes occur when cpu is powered.
+	 */
+	smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1);
+
+	spin_unlock(&drvdata->spinlock);
+	cpus_read_unlock();
+
+	dev_info(drvdata->dev, "ETM tracing disabled\n");
+}
+
+static void etm4_disable(struct coresight_device *csdev,
+			 struct perf_event *event)
+{
+	u32 mode;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * For as long as the tracer isn't disabled another entity can't
+	 * change its status.  As such we can read the status here without
+	 * fearing it will change under us.
+	 */
+	mode = local_read(&drvdata->mode);
+
+	switch (mode) {
+	case CS_MODE_DISABLED:
+		break;
+	case CS_MODE_SYSFS:
+		etm4_disable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		etm4_disable_perf(csdev, event);
+		break;
+	}
+
+	if (mode)
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+}
+
+static const struct coresight_ops_source etm4_source_ops = {
+	.cpu_id		= etm4_cpu_id,
+	.trace_id	= etm4_trace_id,
+	.enable		= etm4_enable,
+	.disable	= etm4_disable,
+};
+
+static const struct coresight_ops etm4_cs_ops = {
+	.source_ops	= &etm4_source_ops,
+};
+
+static void etm4_init_arch_data(void *info)
+{
+	u32 etmidr0;
+	u32 etmidr1;
+	u32 etmidr2;
+	u32 etmidr3;
+	u32 etmidr4;
+	u32 etmidr5;
+	struct etmv4_drvdata *drvdata = info;
+
+	/* Make sure all registers are accessible */
+	etm4_os_unlock(drvdata);
+
+	CS_UNLOCK(drvdata->base);
+
+	/* find all capabilities of the tracing unit */
+	etmidr0 = readl_relaxed(drvdata->base + TRCIDR0);
+
+	/* INSTP0, bits[2:1] P0 tracing support field */
+	if (BMVAL(etmidr0, 1, 1) && BMVAL(etmidr0, 2, 2))
+		drvdata->instrp0 = true;
+	else
+		drvdata->instrp0 = false;
+
+	/* TRCBB, bit[5] Branch broadcast tracing support bit */
+	if (BMVAL(etmidr0, 5, 5))
+		drvdata->trcbb = true;
+	else
+		drvdata->trcbb = false;
+
+	/* TRCCOND, bit[6] Conditional instruction tracing support bit */
+	if (BMVAL(etmidr0, 6, 6))
+		drvdata->trccond = true;
+	else
+		drvdata->trccond = false;
+
+	/* TRCCCI, bit[7] Cycle counting instruction bit */
+	if (BMVAL(etmidr0, 7, 7))
+		drvdata->trccci = true;
+	else
+		drvdata->trccci = false;
+
+	/* RETSTACK, bit[9] Return stack bit */
+	if (BMVAL(etmidr0, 9, 9))
+		drvdata->retstack = true;
+	else
+		drvdata->retstack = false;
+
+	/* NUMEVENT, bits[11:10] Number of events field */
+	drvdata->nr_event = BMVAL(etmidr0, 10, 11);
+	/* QSUPP, bits[16:15] Q element support field */
+	drvdata->q_support = BMVAL(etmidr0, 15, 16);
+	/* TSSIZE, bits[28:24] Global timestamp size field */
+	drvdata->ts_size = BMVAL(etmidr0, 24, 28);
+
+	/* base architecture of trace unit */
+	etmidr1 = readl_relaxed(drvdata->base + TRCIDR1);
+	/*
+	 * TRCARCHMIN, bits[7:4] architecture the minor version number
+	 * TRCARCHMAJ, bits[11:8] architecture major versin number
+	 */
+	drvdata->arch = BMVAL(etmidr1, 4, 11);
+
+	/* maximum size of resources */
+	etmidr2 = readl_relaxed(drvdata->base + TRCIDR2);
+	/* CIDSIZE, bits[9:5] Indicates the Context ID size */
+	drvdata->ctxid_size = BMVAL(etmidr2, 5, 9);
+	/* VMIDSIZE, bits[14:10] Indicates the VMID size */
+	drvdata->vmid_size = BMVAL(etmidr2, 10, 14);
+	/* CCSIZE, bits[28:25] size of the cycle counter in bits minus 12 */
+	drvdata->ccsize = BMVAL(etmidr2, 25, 28);
+
+	etmidr3 = readl_relaxed(drvdata->base + TRCIDR3);
+	/* CCITMIN, bits[11:0] minimum threshold value that can be programmed */
+	drvdata->ccitmin = BMVAL(etmidr3, 0, 11);
+	/* EXLEVEL_S, bits[19:16] Secure state instruction tracing */
+	drvdata->s_ex_level = BMVAL(etmidr3, 16, 19);
+	/* EXLEVEL_NS, bits[23:20] Non-secure state instruction tracing */
+	drvdata->ns_ex_level = BMVAL(etmidr3, 20, 23);
+
+	/*
+	 * TRCERR, bit[24] whether a trace unit can trace a
+	 * system error exception.
+	 */
+	if (BMVAL(etmidr3, 24, 24))
+		drvdata->trc_error = true;
+	else
+		drvdata->trc_error = false;
+
+	/* SYNCPR, bit[25] implementation has a fixed synchronization period? */
+	if (BMVAL(etmidr3, 25, 25))
+		drvdata->syncpr = true;
+	else
+		drvdata->syncpr = false;
+
+	/* STALLCTL, bit[26] is stall control implemented? */
+	if (BMVAL(etmidr3, 26, 26))
+		drvdata->stallctl = true;
+	else
+		drvdata->stallctl = false;
+
+	/* SYSSTALL, bit[27] implementation can support stall control? */
+	if (BMVAL(etmidr3, 27, 27))
+		drvdata->sysstall = true;
+	else
+		drvdata->sysstall = false;
+
+	/* NUMPROC, bits[30:28] the number of PEs available for tracing */
+	drvdata->nr_pe = BMVAL(etmidr3, 28, 30);
+
+	/* NOOVERFLOW, bit[31] is trace overflow prevention supported */
+	if (BMVAL(etmidr3, 31, 31))
+		drvdata->nooverflow = true;
+	else
+		drvdata->nooverflow = false;
+
+	/* number of resources trace unit supports */
+	etmidr4 = readl_relaxed(drvdata->base + TRCIDR4);
+	/* NUMACPAIRS, bits[0:3] number of addr comparator pairs for tracing */
+	drvdata->nr_addr_cmp = BMVAL(etmidr4, 0, 3);
+	/* NUMPC, bits[15:12] number of PE comparator inputs for tracing */
+	drvdata->nr_pe_cmp = BMVAL(etmidr4, 12, 15);
+	/*
+	 * NUMRSPAIR, bits[19:16]
+	 * The number of resource pairs conveyed by the HW starts at 0, i.e a
+	 * value of 0x0 indicate 1 resource pair, 0x1 indicate two and so on.
+	 * As such add 1 to the value of NUMRSPAIR for a better representation.
+	 */
+	drvdata->nr_resource = BMVAL(etmidr4, 16, 19) + 1;
+	/*
+	 * NUMSSCC, bits[23:20] the number of single-shot
+	 * comparator control for tracing
+	 */
+	drvdata->nr_ss_cmp = BMVAL(etmidr4, 20, 23);
+	/* NUMCIDC, bits[27:24] number of Context ID comparators for tracing */
+	drvdata->numcidc = BMVAL(etmidr4, 24, 27);
+	/* NUMVMIDC, bits[31:28] number of VMID comparators for tracing */
+	drvdata->numvmidc = BMVAL(etmidr4, 28, 31);
+
+	etmidr5 = readl_relaxed(drvdata->base + TRCIDR5);
+	/* NUMEXTIN, bits[8:0] number of external inputs implemented */
+	drvdata->nr_ext_inp = BMVAL(etmidr5, 0, 8);
+	/* TRACEIDSIZE, bits[21:16] indicates the trace ID width */
+	drvdata->trcid_size = BMVAL(etmidr5, 16, 21);
+	/* ATBTRIG, bit[22] implementation can support ATB triggers? */
+	if (BMVAL(etmidr5, 22, 22))
+		drvdata->atbtrig = true;
+	else
+		drvdata->atbtrig = false;
+	/*
+	 * LPOVERRIDE, bit[23] implementation supports
+	 * low-power state override
+	 */
+	if (BMVAL(etmidr5, 23, 23))
+		drvdata->lpoverride = true;
+	else
+		drvdata->lpoverride = false;
+	/* NUMSEQSTATE, bits[27:25] number of sequencer states implemented */
+	drvdata->nrseqstate = BMVAL(etmidr5, 25, 27);
+	/* NUMCNTR, bits[30:28] number of counters available for tracing */
+	drvdata->nr_cntr = BMVAL(etmidr5, 28, 30);
+	CS_LOCK(drvdata->base);
+}
+
+static void etm4_set_default_config(struct etmv4_config *config)
+{
+	/* disable all events tracing */
+	config->eventctrl0 = 0x0;
+	config->eventctrl1 = 0x0;
+
+	/* disable stalling */
+	config->stall_ctrl = 0x0;
+
+	/* enable trace synchronization every 4096 bytes, if available */
+	config->syncfreq = 0xC;
+
+	/* disable timestamp event */
+	config->ts_ctrl = 0x0;
+
+	/* TRCVICTLR::EVENT = 0x01, select the always on logic */
+	config->vinst_ctrl |= BIT(0);
+}
+
+static u64 etm4_get_access_type(struct etmv4_config *config)
+{
+	u64 access_type = 0;
+
+	/*
+	 * EXLEVEL_NS, bits[15:12]
+	 * The Exception levels are:
+	 *   Bit[12] Exception level 0 - Application
+	 *   Bit[13] Exception level 1 - OS
+	 *   Bit[14] Exception level 2 - Hypervisor
+	 *   Bit[15] Never implemented
+	 *
+	 * Always stay away from hypervisor mode.
+	 */
+	access_type = ETM_EXLEVEL_NS_HYP;
+
+	if (config->mode & ETM_MODE_EXCL_KERN)
+		access_type |= ETM_EXLEVEL_NS_OS;
+
+	if (config->mode & ETM_MODE_EXCL_USER)
+		access_type |= ETM_EXLEVEL_NS_APP;
+
+	/*
+	 * EXLEVEL_S, bits[11:8], don't trace anything happening
+	 * in secure state.
+	 */
+	access_type |= (ETM_EXLEVEL_S_APP	|
+			ETM_EXLEVEL_S_OS	|
+			ETM_EXLEVEL_S_HYP);
+
+	return access_type;
+}
+
+static void etm4_set_comparator_filter(struct etmv4_config *config,
+				       u64 start, u64 stop, int comparator)
+{
+	u64 access_type = etm4_get_access_type(config);
+
+	/* First half of default address comparator */
+	config->addr_val[comparator] = start;
+	config->addr_acc[comparator] = access_type;
+	config->addr_type[comparator] = ETM_ADDR_TYPE_RANGE;
+
+	/* Second half of default address comparator */
+	config->addr_val[comparator + 1] = stop;
+	config->addr_acc[comparator + 1] = access_type;
+	config->addr_type[comparator + 1] = ETM_ADDR_TYPE_RANGE;
+
+	/*
+	 * Configure the ViewInst function to include this address range
+	 * comparator.
+	 *
+	 * @comparator is divided by two since it is the index in the
+	 * etmv4_config::addr_val array but register TRCVIIECTLR deals with
+	 * address range comparator _pairs_.
+	 *
+	 * Therefore:
+	 *	index 0 -> compatator pair 0
+	 *	index 2 -> comparator pair 1
+	 *	index 4 -> comparator pair 2
+	 *	...
+	 *	index 14 -> comparator pair 7
+	 */
+	config->viiectlr |= BIT(comparator / 2);
+}
+
+static void etm4_set_start_stop_filter(struct etmv4_config *config,
+				       u64 address, int comparator,
+				       enum etm_addr_type type)
+{
+	int shift;
+	u64 access_type = etm4_get_access_type(config);
+
+	/* Configure the comparator */
+	config->addr_val[comparator] = address;
+	config->addr_acc[comparator] = access_type;
+	config->addr_type[comparator] = type;
+
+	/*
+	 * Configure ViewInst Start-Stop control register.
+	 * Addresses configured to start tracing go from bit 0 to n-1,
+	 * while those configured to stop tracing from 16 to 16 + n-1.
+	 */
+	shift = (type == ETM_ADDR_TYPE_START ? 0 : 16);
+	config->vissctlr |= BIT(shift + comparator);
+}
+
+static void etm4_set_default_filter(struct etmv4_config *config)
+{
+	u64 start, stop;
+
+	/*
+	 * Configure address range comparator '0' to encompass all
+	 * possible addresses.
+	 */
+	start = 0x0;
+	stop = ~0x0;
+
+	etm4_set_comparator_filter(config, start, stop,
+				   ETM_DEFAULT_ADDR_COMP);
+
+	/*
+	 * TRCVICTLR::SSSTATUS == 1, the start-stop logic is
+	 * in the started state
+	 */
+	config->vinst_ctrl |= BIT(9);
+
+	/* No start-stop filtering for ViewInst */
+	config->vissctlr = 0x0;
+}
+
+static void etm4_set_default(struct etmv4_config *config)
+{
+	if (WARN_ON_ONCE(!config))
+		return;
+
+	/*
+	 * Make default initialisation trace everything
+	 *
+	 * Select the "always true" resource selector on the
+	 * "Enablign Event" line and configure address range comparator
+	 * '0' to trace all the possible address range.  From there
+	 * configure the "include/exclude" engine to include address
+	 * range comparator '0'.
+	 */
+	etm4_set_default_config(config);
+	etm4_set_default_filter(config);
+}
+
+static int etm4_get_next_comparator(struct etmv4_drvdata *drvdata, u32 type)
+{
+	int nr_comparator, index = 0;
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * nr_addr_cmp holds the number of comparator _pair_, so time 2
+	 * for the total number of comparators.
+	 */
+	nr_comparator = drvdata->nr_addr_cmp * 2;
+
+	/* Go through the tally of comparators looking for a free one. */
+	while (index < nr_comparator) {
+		switch (type) {
+		case ETM_ADDR_TYPE_RANGE:
+			if (config->addr_type[index] == ETM_ADDR_TYPE_NONE &&
+			    config->addr_type[index + 1] == ETM_ADDR_TYPE_NONE)
+				return index;
+
+			/* Address range comparators go in pairs */
+			index += 2;
+			break;
+		case ETM_ADDR_TYPE_START:
+		case ETM_ADDR_TYPE_STOP:
+			if (config->addr_type[index] == ETM_ADDR_TYPE_NONE)
+				return index;
+
+			/* Start/stop address can have odd indexes */
+			index += 1;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	/* If we are here all the comparators have been used. */
+	return -ENOSPC;
+}
+
+static int etm4_set_event_filters(struct etmv4_drvdata *drvdata,
+				  struct perf_event *event)
+{
+	int i, comparator, ret = 0;
+	u64 address;
+	struct etmv4_config *config = &drvdata->config;
+	struct etm_filters *filters = event->hw.addr_filters;
+
+	if (!filters)
+		goto default_filter;
+
+	/* Sync events with what Perf got */
+	perf_event_addr_filters_sync(event);
+
+	/*
+	 * If there are no filters to deal with simply go ahead with
+	 * the default filter, i.e the entire address range.
+	 */
+	if (!filters->nr_filters)
+		goto default_filter;
+
+	for (i = 0; i < filters->nr_filters; i++) {
+		struct etm_filter *filter = &filters->etm_filter[i];
+		enum etm_addr_type type = filter->type;
+
+		/* See if a comparator is free. */
+		comparator = etm4_get_next_comparator(drvdata, type);
+		if (comparator < 0) {
+			ret = comparator;
+			goto out;
+		}
+
+		switch (type) {
+		case ETM_ADDR_TYPE_RANGE:
+			etm4_set_comparator_filter(config,
+						   filter->start_addr,
+						   filter->stop_addr,
+						   comparator);
+			/*
+			 * TRCVICTLR::SSSTATUS == 1, the start-stop logic is
+			 * in the started state
+			 */
+			config->vinst_ctrl |= BIT(9);
+
+			/* No start-stop filtering for ViewInst */
+			config->vissctlr = 0x0;
+			break;
+		case ETM_ADDR_TYPE_START:
+		case ETM_ADDR_TYPE_STOP:
+			/* Get the right start or stop address */
+			address = (type == ETM_ADDR_TYPE_START ?
+				   filter->start_addr :
+				   filter->stop_addr);
+
+			/* Configure comparator */
+			etm4_set_start_stop_filter(config, address,
+						   comparator, type);
+
+			/*
+			 * If filters::ssstatus == 1, trace acquisition was
+			 * started but the process was yanked away before the
+			 * the stop address was hit.  As such the start/stop
+			 * logic needs to be re-started so that tracing can
+			 * resume where it left.
+			 *
+			 * The start/stop logic status when a process is
+			 * scheduled out is checked in function
+			 * etm4_disable_perf().
+			 */
+			if (filters->ssstatus)
+				config->vinst_ctrl |= BIT(9);
+
+			/* No include/exclude filtering for ViewInst */
+			config->viiectlr = 0x0;
+			break;
+		default:
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	goto out;
+
+
+default_filter:
+	etm4_set_default_filter(config);
+
+out:
+	return ret;
+}
+
+void etm4_config_trace_mode(struct etmv4_config *config)
+{
+	u32 addr_acc, mode;
+
+	mode = config->mode;
+	mode &= (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER);
+
+	/* excluding kernel AND user space doesn't make sense */
+	WARN_ON_ONCE(mode == (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER));
+
+	/* nothing to do if neither flags are set */
+	if (!(mode & ETM_MODE_EXCL_KERN) && !(mode & ETM_MODE_EXCL_USER))
+		return;
+
+	addr_acc = config->addr_acc[ETM_DEFAULT_ADDR_COMP];
+	/* clear default config */
+	addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS);
+
+	/*
+	 * EXLEVEL_NS, bits[15:12]
+	 * The Exception levels are:
+	 *   Bit[12] Exception level 0 - Application
+	 *   Bit[13] Exception level 1 - OS
+	 *   Bit[14] Exception level 2 - Hypervisor
+	 *   Bit[15] Never implemented
+	 */
+	if (mode & ETM_MODE_EXCL_KERN)
+		addr_acc |= ETM_EXLEVEL_NS_OS;
+	else
+		addr_acc |= ETM_EXLEVEL_NS_APP;
+
+	config->addr_acc[ETM_DEFAULT_ADDR_COMP] = addr_acc;
+	config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] = addr_acc;
+}
+
+static int etm4_online_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return 0;
+
+	if (etmdrvdata[cpu]->boot_enable && !etmdrvdata[cpu]->sticky_enable)
+		coresight_enable(etmdrvdata[cpu]->csdev);
+	return 0;
+}
+
+static int etm4_starting_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return 0;
+
+	spin_lock(&etmdrvdata[cpu]->spinlock);
+	if (!etmdrvdata[cpu]->os_unlock) {
+		etm4_os_unlock(etmdrvdata[cpu]);
+		etmdrvdata[cpu]->os_unlock = true;
+	}
+
+	if (local_read(&etmdrvdata[cpu]->mode))
+		etm4_enable_hw(etmdrvdata[cpu]);
+	spin_unlock(&etmdrvdata[cpu]->spinlock);
+	return 0;
+}
+
+static int etm4_dying_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return 0;
+
+	spin_lock(&etmdrvdata[cpu]->spinlock);
+	if (local_read(&etmdrvdata[cpu]->mode))
+		etm4_disable_hw(etmdrvdata[cpu]);
+	spin_unlock(&etmdrvdata[cpu]->spinlock);
+	return 0;
+}
+
+static void etm4_init_trace_id(struct etmv4_drvdata *drvdata)
+{
+	drvdata->trcid = coresight_get_trace_id(drvdata->cpu);
+}
+
+static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct etmv4_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc desc = { 0 };
+	struct device_node *np = adev->dev.of_node;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		adev->dev.platform_data = pdata;
+	}
+
+	drvdata->dev = &adev->dev;
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	spin_lock_init(&drvdata->spinlock);
+
+	drvdata->cpu = pdata ? pdata->cpu : 0;
+
+	cpus_read_lock();
+	etmdrvdata[drvdata->cpu] = drvdata;
+
+	if (smp_call_function_single(drvdata->cpu,
+				etm4_init_arch_data,  drvdata, 1))
+		dev_err(dev, "ETM arch init failed\n");
+
+	if (!etm4_count++) {
+		cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
+						     "arm/coresight4:starting",
+						     etm4_starting_cpu, etm4_dying_cpu);
+		ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+							   "arm/coresight4:online",
+							   etm4_online_cpu, NULL);
+		if (ret < 0)
+			goto err_arch_supported;
+		hp_online = ret;
+	}
+
+	cpus_read_unlock();
+
+	if (etm4_arch_supported(drvdata->arch) == false) {
+		ret = -EINVAL;
+		goto err_arch_supported;
+	}
+
+	etm4_init_trace_id(drvdata);
+	etm4_set_default(&drvdata->config);
+
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
+	desc.ops = &etm4_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_etmv4_groups;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto err_arch_supported;
+	}
+
+	ret = etm_perf_symlink(drvdata->csdev, true);
+	if (ret) {
+		coresight_unregister(drvdata->csdev);
+		goto err_arch_supported;
+	}
+
+	pm_runtime_put(&adev->dev);
+	dev_info(dev, "CPU%d: ETM v%d.%d initialized\n",
+		 drvdata->cpu, drvdata->arch >> 4, drvdata->arch & 0xf);
+
+	if (boot_enable) {
+		coresight_enable(drvdata->csdev);
+		drvdata->boot_enable = true;
+	}
+
+	return 0;
+
+err_arch_supported:
+	if (--etm4_count == 0) {
+		cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING);
+		if (hp_online)
+			cpuhp_remove_state_nocalls(hp_online);
+	}
+	return ret;
+}
+
+#define ETM4x_AMBA_ID(pid)			\
+	{					\
+		.id	= pid,			\
+		.mask	= 0x000fffff,		\
+	}
+
+static const struct amba_id etm4_ids[] = {
+	ETM4x_AMBA_ID(0x000bb95d),		/* Cortex-A53 */
+	ETM4x_AMBA_ID(0x000bb95e),		/* Cortex-A57 */
+	ETM4x_AMBA_ID(0x000bb95a),		/* Cortex-A72 */
+	ETM4x_AMBA_ID(0x000bb959),		/* Cortex-A73 */
+	ETM4x_AMBA_ID(0x000bb9da),		/* Cortex-A35 */
+	{},
+};
+
+static struct amba_driver etm4x_driver = {
+	.drv = {
+		.name   = "coresight-etm4x",
+		.suppress_bind_attrs = true,
+	},
+	.probe		= etm4_probe,
+	.id_table	= etm4_ids,
+};
+builtin_amba_driver(etm4x_driver);
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
new file mode 100644
index 0000000..52786e9
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -0,0 +1,406 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _CORESIGHT_CORESIGHT_ETM_H
+#define _CORESIGHT_CORESIGHT_ETM_H
+
+#include <asm/local.h>
+#include <linux/spinlock.h>
+#include "coresight-priv.h"
+
+/*
+ * Device registers:
+ * 0x000 - 0x2FC: Trace		registers
+ * 0x300 - 0x314: Management	registers
+ * 0x318 - 0xEFC: Trace		registers
+ * 0xF00: Management		registers
+ * 0xFA0 - 0xFA4: Trace		registers
+ * 0xFA8 - 0xFFC: Management	registers
+ */
+/* Trace registers (0x000-0x2FC) */
+/* Main control and configuration registers */
+#define TRCPRGCTLR			0x004
+#define TRCPROCSELR			0x008
+#define TRCSTATR			0x00C
+#define TRCCONFIGR			0x010
+#define TRCAUXCTLR			0x018
+#define TRCEVENTCTL0R			0x020
+#define TRCEVENTCTL1R			0x024
+#define TRCSTALLCTLR			0x02C
+#define TRCTSCTLR			0x030
+#define TRCSYNCPR			0x034
+#define TRCCCCTLR			0x038
+#define TRCBBCTLR			0x03C
+#define TRCTRACEIDR			0x040
+#define TRCQCTLR			0x044
+/* Filtering control registers */
+#define TRCVICTLR			0x080
+#define TRCVIIECTLR			0x084
+#define TRCVISSCTLR			0x088
+#define TRCVIPCSSCTLR			0x08C
+#define TRCVDCTLR			0x0A0
+#define TRCVDSACCTLR			0x0A4
+#define TRCVDARCCTLR			0x0A8
+/* Derived resources registers */
+#define TRCSEQEVRn(n)			(0x100 + (n * 4))
+#define TRCSEQRSTEVR			0x118
+#define TRCSEQSTR			0x11C
+#define TRCEXTINSELR			0x120
+#define TRCCNTRLDVRn(n)			(0x140 + (n * 4))
+#define TRCCNTCTLRn(n)			(0x150 + (n * 4))
+#define TRCCNTVRn(n)			(0x160 + (n * 4))
+/* ID registers */
+#define TRCIDR8				0x180
+#define TRCIDR9				0x184
+#define TRCIDR10			0x188
+#define TRCIDR11			0x18C
+#define TRCIDR12			0x190
+#define TRCIDR13			0x194
+#define TRCIMSPEC0			0x1C0
+#define TRCIMSPECn(n)			(0x1C0 + (n * 4))
+#define TRCIDR0				0x1E0
+#define TRCIDR1				0x1E4
+#define TRCIDR2				0x1E8
+#define TRCIDR3				0x1EC
+#define TRCIDR4				0x1F0
+#define TRCIDR5				0x1F4
+#define TRCIDR6				0x1F8
+#define TRCIDR7				0x1FC
+/* Resource selection registers */
+#define TRCRSCTLRn(n)			(0x200 + (n * 4))
+/* Single-shot comparator registers */
+#define TRCSSCCRn(n)			(0x280 + (n * 4))
+#define TRCSSCSRn(n)			(0x2A0 + (n * 4))
+#define TRCSSPCICRn(n)			(0x2C0 + (n * 4))
+/* Management registers (0x300-0x314) */
+#define TRCOSLAR			0x300
+#define TRCOSLSR			0x304
+#define TRCPDCR				0x310
+#define TRCPDSR				0x314
+/* Trace registers (0x318-0xEFC) */
+/* Comparator registers */
+#define TRCACVRn(n)			(0x400 + (n * 8))
+#define TRCACATRn(n)			(0x480 + (n * 8))
+#define TRCDVCVRn(n)			(0x500 + (n * 16))
+#define TRCDVCMRn(n)			(0x580 + (n * 16))
+#define TRCCIDCVRn(n)			(0x600 + (n * 8))
+#define TRCVMIDCVRn(n)			(0x640 + (n * 8))
+#define TRCCIDCCTLR0			0x680
+#define TRCCIDCCTLR1			0x684
+#define TRCVMIDCCTLR0			0x688
+#define TRCVMIDCCTLR1			0x68C
+/* Management register (0xF00) */
+/* Integration control registers */
+#define TRCITCTRL			0xF00
+/* Trace registers (0xFA0-0xFA4) */
+/* Claim tag registers */
+#define TRCCLAIMSET			0xFA0
+#define TRCCLAIMCLR			0xFA4
+/* Management registers (0xFA8-0xFFC) */
+#define TRCDEVAFF0			0xFA8
+#define TRCDEVAFF1			0xFAC
+#define TRCLAR				0xFB0
+#define TRCLSR				0xFB4
+#define TRCAUTHSTATUS			0xFB8
+#define TRCDEVARCH			0xFBC
+#define TRCDEVID			0xFC8
+#define TRCDEVTYPE			0xFCC
+#define TRCPIDR4			0xFD0
+#define TRCPIDR5			0xFD4
+#define TRCPIDR6			0xFD8
+#define TRCPIDR7			0xFDC
+#define TRCPIDR0			0xFE0
+#define TRCPIDR1			0xFE4
+#define TRCPIDR2			0xFE8
+#define TRCPIDR3			0xFEC
+#define TRCCIDR0			0xFF0
+#define TRCCIDR1			0xFF4
+#define TRCCIDR2			0xFF8
+#define TRCCIDR3			0xFFC
+
+/* ETMv4 resources */
+#define ETM_MAX_NR_PE			8
+#define ETMv4_MAX_CNTR			4
+#define ETM_MAX_SEQ_STATES		4
+#define ETM_MAX_EXT_INP_SEL		4
+#define ETM_MAX_EXT_INP			256
+#define ETM_MAX_EXT_OUT			4
+#define ETM_MAX_SINGLE_ADDR_CMP		16
+#define ETM_MAX_ADDR_RANGE_CMP		(ETM_MAX_SINGLE_ADDR_CMP / 2)
+#define ETM_MAX_DATA_VAL_CMP		8
+#define ETMv4_MAX_CTXID_CMP		8
+#define ETM_MAX_VMID_CMP		8
+#define ETM_MAX_PE_CMP			8
+#define ETM_MAX_RES_SEL			16
+#define ETM_MAX_SS_CMP			8
+
+#define ETM_ARCH_V4			0x40
+#define ETMv4_SYNC_MASK			0x1F
+#define ETM_CYC_THRESHOLD_MASK		0xFFF
+#define ETM_CYC_THRESHOLD_DEFAULT       0x100
+#define ETMv4_EVENT_MASK		0xFF
+#define ETM_CNTR_MAX_VAL		0xFFFF
+#define ETM_TRACEID_MASK		0x3f
+
+/* ETMv4 programming modes */
+#define ETM_MODE_EXCLUDE		BIT(0)
+#define ETM_MODE_LOAD			BIT(1)
+#define ETM_MODE_STORE			BIT(2)
+#define ETM_MODE_LOAD_STORE		BIT(3)
+#define ETM_MODE_BB			BIT(4)
+#define ETMv4_MODE_CYCACC		BIT(5)
+#define ETMv4_MODE_CTXID		BIT(6)
+#define ETM_MODE_VMID			BIT(7)
+#define ETM_MODE_COND(val)		BMVAL(val, 8, 10)
+#define ETMv4_MODE_TIMESTAMP		BIT(11)
+#define ETM_MODE_RETURNSTACK		BIT(12)
+#define ETM_MODE_QELEM(val)		BMVAL(val, 13, 14)
+#define ETM_MODE_DATA_TRACE_ADDR	BIT(15)
+#define ETM_MODE_DATA_TRACE_VAL		BIT(16)
+#define ETM_MODE_ISTALL			BIT(17)
+#define ETM_MODE_DSTALL			BIT(18)
+#define ETM_MODE_ATB_TRIGGER		BIT(19)
+#define ETM_MODE_LPOVERRIDE		BIT(20)
+#define ETM_MODE_ISTALL_EN		BIT(21)
+#define ETM_MODE_DSTALL_EN		BIT(22)
+#define ETM_MODE_INSTPRIO		BIT(23)
+#define ETM_MODE_NOOVERFLOW		BIT(24)
+#define ETM_MODE_TRACE_RESET		BIT(25)
+#define ETM_MODE_TRACE_ERR		BIT(26)
+#define ETM_MODE_VIEWINST_STARTSTOP	BIT(27)
+#define ETMv4_MODE_ALL			(GENMASK(27, 0) | \
+					 ETM_MODE_EXCL_KERN | \
+					 ETM_MODE_EXCL_USER)
+
+#define TRCSTATR_IDLE_BIT		0
+#define ETM_DEFAULT_ADDR_COMP		0
+
+/* PowerDown Control Register bits */
+#define TRCPDCR_PU			BIT(3)
+
+/* secure state access levels */
+#define ETM_EXLEVEL_S_APP		BIT(8)
+#define ETM_EXLEVEL_S_OS		BIT(9)
+#define ETM_EXLEVEL_S_NA		BIT(10)
+#define ETM_EXLEVEL_S_HYP		BIT(11)
+/* non-secure state access levels */
+#define ETM_EXLEVEL_NS_APP		BIT(12)
+#define ETM_EXLEVEL_NS_OS		BIT(13)
+#define ETM_EXLEVEL_NS_HYP		BIT(14)
+#define ETM_EXLEVEL_NS_NA		BIT(15)
+
+/**
+ * struct etmv4_config - configuration information related to an ETMv4
+ * @mode:	Controls various modes supported by this ETM.
+ * @pe_sel:	Controls which PE to trace.
+ * @cfg:	Controls the tracing options.
+ * @eventctrl0: Controls the tracing of arbitrary events.
+ * @eventctrl1: Controls the behavior of the events that @event_ctrl0 selects.
+ * @stallctl:	If functionality that prevents trace unit buffer overflows
+ *		is available.
+ * @ts_ctrl:	Controls the insertion of global timestamps in the
+ *		trace streams.
+ * @syncfreq:	Controls how often trace synchronization requests occur.
+ *		the TRCCCCTLR register.
+ * @ccctlr:	Sets the threshold value for cycle counting.
+ * @vinst_ctrl:	Controls instruction trace filtering.
+ * @viiectlr:	Set or read, the address range comparators.
+ * @vissctlr:	Set, or read, the single address comparators that control the
+ *		ViewInst start-stop logic.
+ * @vipcssctlr:	Set, or read, which PE comparator inputs can control the
+ *		ViewInst start-stop logic.
+ * @seq_idx:	Sequencor index selector.
+ * @seq_ctrl:	Control for the sequencer state transition control register.
+ * @seq_rst:	Moves the sequencer to state 0 when a programmed event occurs.
+ * @seq_state:	Set, or read the sequencer state.
+ * @cntr_idx:	Counter index seletor.
+ * @cntrldvr:	Sets or returns the reload count value for a counter.
+ * @cntr_ctrl:	Controls the operation of a counter.
+ * @cntr_val:	Sets or returns the value for a counter.
+ * @res_idx:	Resource index selector.
+ * @res_ctrl:	Controls the selection of the resources in the trace unit.
+ * @ss_ctrl:	Controls the corresponding single-shot comparator resource.
+ * @ss_status:	The status of the corresponding single-shot comparator.
+ * @ss_pe_cmp:	Selects the PE comparator inputs for Single-shot control.
+ * @addr_idx:	Address comparator index selector.
+ * @addr_val:	Value for address comparator.
+ * @addr_acc:	Address comparator access type.
+ * @addr_type:	Current status of the comparator register.
+ * @ctxid_idx:	Context ID index selector.
+ * @ctxid_pid:	Value of the context ID comparator.
+ * @ctxid_mask0:Context ID comparator mask for comparator 0-3.
+ * @ctxid_mask1:Context ID comparator mask for comparator 4-7.
+ * @vmid_idx:	VM ID index selector.
+ * @vmid_val:	Value of the VM ID comparator.
+ * @vmid_mask0:	VM ID comparator mask for comparator 0-3.
+ * @vmid_mask1:	VM ID comparator mask for comparator 4-7.
+ * @ext_inp:	External input selection.
+ */
+struct etmv4_config {
+	u32				mode;
+	u32				pe_sel;
+	u32				cfg;
+	u32				eventctrl0;
+	u32				eventctrl1;
+	u32				stall_ctrl;
+	u32				ts_ctrl;
+	u32				syncfreq;
+	u32				ccctlr;
+	u32				bb_ctrl;
+	u32				vinst_ctrl;
+	u32				viiectlr;
+	u32				vissctlr;
+	u32				vipcssctlr;
+	u8				seq_idx;
+	u32				seq_ctrl[ETM_MAX_SEQ_STATES];
+	u32				seq_rst;
+	u32				seq_state;
+	u8				cntr_idx;
+	u32				cntrldvr[ETMv4_MAX_CNTR];
+	u32				cntr_ctrl[ETMv4_MAX_CNTR];
+	u32				cntr_val[ETMv4_MAX_CNTR];
+	u8				res_idx;
+	u32				res_ctrl[ETM_MAX_RES_SEL];
+	u32				ss_ctrl[ETM_MAX_SS_CMP];
+	u32				ss_status[ETM_MAX_SS_CMP];
+	u32				ss_pe_cmp[ETM_MAX_SS_CMP];
+	u8				addr_idx;
+	u64				addr_val[ETM_MAX_SINGLE_ADDR_CMP];
+	u64				addr_acc[ETM_MAX_SINGLE_ADDR_CMP];
+	u8				addr_type[ETM_MAX_SINGLE_ADDR_CMP];
+	u8				ctxid_idx;
+	u64				ctxid_pid[ETMv4_MAX_CTXID_CMP];
+	u32				ctxid_mask0;
+	u32				ctxid_mask1;
+	u8				vmid_idx;
+	u64				vmid_val[ETM_MAX_VMID_CMP];
+	u32				vmid_mask0;
+	u32				vmid_mask1;
+	u32				ext_inp;
+};
+
+/**
+ * struct etm4_drvdata - specifics associated to an ETM component
+ * @base:       Memory mapped base address for this component.
+ * @dev:        The device entity associated to this component.
+ * @csdev:      Component vitals needed by the framework.
+ * @spinlock:   Only one at a time pls.
+ * @mode:	This tracer's mode, i.e sysFS, Perf or disabled.
+ * @cpu:        The cpu this component is affined to.
+ * @arch:       ETM version number.
+ * @nr_pe:	The number of processing entity available for tracing.
+ * @nr_pe_cmp:	The number of processing entity comparator inputs that are
+ *		available for tracing.
+ * @nr_addr_cmp:Number of pairs of address comparators available
+ *		as found in ETMIDR4 0-3.
+ * @nr_cntr:    Number of counters as found in ETMIDR5 bit 28-30.
+ * @nr_ext_inp: Number of external input.
+ * @numcidc:	Number of contextID comparators.
+ * @numvmidc:	Number of VMID comparators.
+ * @nrseqstate: The number of sequencer states that are implemented.
+ * @nr_event:	Indicates how many events the trace unit support.
+ * @nr_resource:The number of resource selection pairs available for tracing.
+ * @nr_ss_cmp:	Number of single-shot comparator controls that are available.
+ * @trcid:	value of the current ID for this component.
+ * @trcid_size: Indicates the trace ID width.
+ * @ts_size:	Global timestamp size field.
+ * @ctxid_size:	Size of the context ID field to consider.
+ * @vmid_size:	Size of the VM ID comparator to consider.
+ * @ccsize:	Indicates the size of the cycle counter in bits.
+ * @ccitmin:	minimum value that can be programmed in
+ * @s_ex_level:	In secure state, indicates whether instruction tracing is
+ *		supported for the corresponding Exception level.
+ * @ns_ex_level:In non-secure state, indicates whether instruction tracing is
+ *		supported for the corresponding Exception level.
+ * @sticky_enable: true if ETM base configuration has been done.
+ * @boot_enable:True if we should start tracing at boot time.
+ * @os_unlock:  True if access to management registers is allowed.
+ * @instrp0:	Tracing of load and store instructions
+ *		as P0 elements is supported.
+ * @trcbb:	Indicates if the trace unit supports branch broadcast tracing.
+ * @trccond:	If the trace unit supports conditional
+ *		instruction tracing.
+ * @retstack:	Indicates if the implementation supports a return stack.
+ * @trccci:	Indicates if the trace unit supports cycle counting
+ *		for instruction.
+ * @q_support:	Q element support characteristics.
+ * @trc_error:	Whether a trace unit can trace a system
+ *		error exception.
+ * @syncpr:	Indicates if an implementation has a fixed
+ *		synchronization period.
+ * @stall_ctrl:	Enables trace unit functionality that prevents trace
+ *		unit buffer overflows.
+ * @sysstall:	Does the system support stall control of the PE?
+ * @nooverflow:	Indicate if overflow prevention is supported.
+ * @atbtrig:	If the implementation can support ATB triggers
+ * @lpoverride:	If the implementation can support low-power state over.
+ * @config:	structure holding configuration parameters.
+ */
+struct etmv4_drvdata {
+	void __iomem			*base;
+	struct device			*dev;
+	struct coresight_device		*csdev;
+	spinlock_t			spinlock;
+	local_t				mode;
+	int				cpu;
+	u8				arch;
+	u8				nr_pe;
+	u8				nr_pe_cmp;
+	u8				nr_addr_cmp;
+	u8				nr_cntr;
+	u8				nr_ext_inp;
+	u8				numcidc;
+	u8				numvmidc;
+	u8				nrseqstate;
+	u8				nr_event;
+	u8				nr_resource;
+	u8				nr_ss_cmp;
+	u8				trcid;
+	u8				trcid_size;
+	u8				ts_size;
+	u8				ctxid_size;
+	u8				vmid_size;
+	u8				ccsize;
+	u8				ccitmin;
+	u8				s_ex_level;
+	u8				ns_ex_level;
+	u8				q_support;
+	bool				sticky_enable;
+	bool				boot_enable;
+	bool				os_unlock;
+	bool				instrp0;
+	bool				trcbb;
+	bool				trccond;
+	bool				retstack;
+	bool				trccci;
+	bool				trc_error;
+	bool				syncpr;
+	bool				stallctl;
+	bool				sysstall;
+	bool				nooverflow;
+	bool				atbtrig;
+	bool				lpoverride;
+	struct etmv4_config		config;
+};
+
+/* Address comparator access types */
+enum etm_addr_acctype {
+	ETM_INSTR_ADDR,
+	ETM_DATA_LOAD_ADDR,
+	ETM_DATA_STORE_ADDR,
+	ETM_DATA_LOAD_STORE_ADDR,
+};
+
+/* Address comparator context types */
+enum etm_addr_ctxtype {
+	ETM_CTX_NONE,
+	ETM_CTX_CTXID,
+	ETM_CTX_VMID,
+	ETM_CTX_CTXID_VMID,
+};
+
+extern const struct attribute_group *coresight_etmv4_groups[];
+void etm4_config_trace_mode(struct etmv4_config *config);
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
new file mode 100644
index 0000000..448145a
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Funnel driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+#include <linux/clk.h>
+
+#include "coresight-priv.h"
+
+#define FUNNEL_FUNCTL		0x000
+#define FUNNEL_PRICTL		0x004
+
+#define FUNNEL_HOLDTIME_MASK	0xf00
+#define FUNNEL_HOLDTIME_SHFT	0x8
+#define FUNNEL_HOLDTIME		(0x7 << FUNNEL_HOLDTIME_SHFT)
+
+/**
+ * struct funnel_drvdata - specifics associated to a funnel component
+ * @base:	memory mapped base address for this component.
+ * @dev:	the device entity associated to this component.
+ * @atclk:	optional clock for the core parts of the funnel.
+ * @csdev:	component vitals needed by the framework.
+ * @priority:	port selection order.
+ */
+struct funnel_drvdata {
+	void __iomem		*base;
+	struct device		*dev;
+	struct clk		*atclk;
+	struct coresight_device	*csdev;
+	unsigned long		priority;
+};
+
+static void funnel_enable_hw(struct funnel_drvdata *drvdata, int port)
+{
+	u32 functl;
+
+	CS_UNLOCK(drvdata->base);
+
+	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
+	functl &= ~FUNNEL_HOLDTIME_MASK;
+	functl |= FUNNEL_HOLDTIME;
+	functl |= (1 << port);
+	writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL);
+	writel_relaxed(drvdata->priority, drvdata->base + FUNNEL_PRICTL);
+
+	CS_LOCK(drvdata->base);
+}
+
+static int funnel_enable(struct coresight_device *csdev, int inport,
+			 int outport)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	funnel_enable_hw(drvdata, inport);
+
+	dev_info(drvdata->dev, "FUNNEL inport %d enabled\n", inport);
+	return 0;
+}
+
+static void funnel_disable_hw(struct funnel_drvdata *drvdata, int inport)
+{
+	u32 functl;
+
+	CS_UNLOCK(drvdata->base);
+
+	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
+	functl &= ~(1 << inport);
+	writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void funnel_disable(struct coresight_device *csdev, int inport,
+			   int outport)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	funnel_disable_hw(drvdata, inport);
+
+	dev_info(drvdata->dev, "FUNNEL inport %d disabled\n", inport);
+}
+
+static const struct coresight_ops_link funnel_link_ops = {
+	.enable		= funnel_enable,
+	.disable	= funnel_disable,
+};
+
+static const struct coresight_ops funnel_cs_ops = {
+	.link_ops	= &funnel_link_ops,
+};
+
+static ssize_t priority_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->priority;
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t priority_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->priority = val;
+	return size;
+}
+static DEVICE_ATTR_RW(priority);
+
+static u32 get_funnel_ctrl_hw(struct funnel_drvdata *drvdata)
+{
+	u32 functl;
+
+	CS_UNLOCK(drvdata->base);
+	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
+	CS_LOCK(drvdata->base);
+
+	return functl;
+}
+
+static ssize_t funnel_ctrl_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	u32 val;
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	pm_runtime_get_sync(drvdata->dev);
+
+	val = get_funnel_ctrl_hw(drvdata);
+
+	pm_runtime_put(drvdata->dev);
+
+	return sprintf(buf, "%#x\n", val);
+}
+static DEVICE_ATTR_RO(funnel_ctrl);
+
+static struct attribute *coresight_funnel_attrs[] = {
+	&dev_attr_funnel_ctrl.attr,
+	&dev_attr_priority.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_funnel);
+
+static int funnel_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct funnel_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc desc = { 0 };
+	struct device_node *np = adev->dev.of_node;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		adev->dev.platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &adev->dev;
+	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+	pm_runtime_put(&adev->dev);
+
+	desc.type = CORESIGHT_DEV_TYPE_LINK;
+	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG;
+	desc.ops = &funnel_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_funnel_groups;
+	drvdata->csdev = coresight_register(&desc);
+
+	return PTR_ERR_OR_ZERO(drvdata->csdev);
+}
+
+#ifdef CONFIG_PM
+static int funnel_runtime_suspend(struct device *dev)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int funnel_runtime_resume(struct device *dev)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops funnel_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(funnel_runtime_suspend, funnel_runtime_resume, NULL)
+};
+
+static const struct amba_id funnel_ids[] = {
+	{
+		.id     = 0x000bb908,
+		.mask   = 0x000fffff,
+	},
+	{
+		/* Coresight SoC-600 */
+		.id     = 0x000bb9eb,
+		.mask   = 0x000fffff,
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver funnel_driver = {
+	.drv = {
+		.name	= "coresight-funnel",
+		.owner	= THIS_MODULE,
+		.pm	= &funnel_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= funnel_probe,
+	.id_table	= funnel_ids,
+};
+builtin_amba_driver(funnel_driver);
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
new file mode 100644
index 0000000..1a6cf35
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _CORESIGHT_PRIV_H
+#define _CORESIGHT_PRIV_H
+
+#include <linux/bitops.h>
+#include <linux/io.h>
+#include <linux/coresight.h>
+#include <linux/pm_runtime.h>
+
+/*
+ * Coresight management registers (0xf00-0xfcc)
+ * 0xfa0 - 0xfa4: Management	registers in PFTv1.0
+ *		  Trace		registers in PFTv1.1
+ */
+#define CORESIGHT_ITCTRL	0xf00
+#define CORESIGHT_CLAIMSET	0xfa0
+#define CORESIGHT_CLAIMCLR	0xfa4
+#define CORESIGHT_LAR		0xfb0
+#define CORESIGHT_LSR		0xfb4
+#define CORESIGHT_AUTHSTATUS	0xfb8
+#define CORESIGHT_DEVID		0xfc8
+#define CORESIGHT_DEVTYPE	0xfcc
+
+#define TIMEOUT_US		100
+#define BMVAL(val, lsb, msb)	((val & GENMASK(msb, lsb)) >> lsb)
+
+#define ETM_MODE_EXCL_KERN	BIT(30)
+#define ETM_MODE_EXCL_USER	BIT(31)
+
+typedef u32 (*coresight_read_fn)(const struct device *, u32 offset);
+#define __coresight_simple_func(type, func, name, lo_off, hi_off)	\
+static ssize_t name##_show(struct device *_dev,				\
+			   struct device_attribute *attr, char *buf)	\
+{									\
+	type *drvdata = dev_get_drvdata(_dev->parent);			\
+	coresight_read_fn fn = func;					\
+	u64 val;							\
+	pm_runtime_get_sync(_dev->parent);				\
+	if (fn)								\
+		val = (u64)fn(_dev->parent, lo_off);			\
+	else								\
+		val = coresight_read_reg_pair(drvdata->base,		\
+						 lo_off, hi_off);	\
+	pm_runtime_put_sync(_dev->parent);				\
+	return scnprintf(buf, PAGE_SIZE, "0x%llx\n", val);		\
+}									\
+static DEVICE_ATTR_RO(name)
+
+#define coresight_simple_func(type, func, name, offset)			\
+	__coresight_simple_func(type, func, name, offset, -1)
+#define coresight_simple_reg32(type, name, offset)			\
+	__coresight_simple_func(type, NULL, name, offset, -1)
+#define coresight_simple_reg64(type, name, lo_off, hi_off)		\
+	__coresight_simple_func(type, NULL, name, lo_off, hi_off)
+
+extern const u32 barrier_pkt[4];
+#define CORESIGHT_BARRIER_PKT_SIZE (sizeof(barrier_pkt))
+
+enum etm_addr_type {
+	ETM_ADDR_TYPE_NONE,
+	ETM_ADDR_TYPE_SINGLE,
+	ETM_ADDR_TYPE_RANGE,
+	ETM_ADDR_TYPE_START,
+	ETM_ADDR_TYPE_STOP,
+};
+
+enum cs_mode {
+	CS_MODE_DISABLED,
+	CS_MODE_SYSFS,
+	CS_MODE_PERF,
+};
+
+/**
+ * struct cs_buffer - keep track of a recording session' specifics
+ * @cur:	index of the current buffer
+ * @nr_pages:	max number of pages granted to us
+ * @offset:	offset within the current buffer
+ * @data_size:	how much we collected in this run
+ * @snapshot:	is this run in snapshot mode
+ * @data_pages:	a handle the ring buffer
+ */
+struct cs_buffers {
+	unsigned int		cur;
+	unsigned int		nr_pages;
+	unsigned long		offset;
+	local_t			data_size;
+	bool			snapshot;
+	void			**data_pages;
+};
+
+static inline void coresight_insert_barrier_packet(void *buf)
+{
+	if (buf)
+		memcpy(buf, barrier_pkt, CORESIGHT_BARRIER_PKT_SIZE);
+}
+
+
+static inline void CS_LOCK(void __iomem *addr)
+{
+	do {
+		/* Wait for things to settle */
+		mb();
+		writel_relaxed(0x0, addr + CORESIGHT_LAR);
+	} while (0);
+}
+
+static inline void CS_UNLOCK(void __iomem *addr)
+{
+	do {
+		writel_relaxed(CORESIGHT_UNLOCK, addr + CORESIGHT_LAR);
+		/* Make sure everyone has seen this */
+		mb();
+	} while (0);
+}
+
+static inline u64
+coresight_read_reg_pair(void __iomem *addr, s32 lo_offset, s32 hi_offset)
+{
+	u64 val;
+
+	val = readl_relaxed(addr + lo_offset);
+	val |= (hi_offset < 0) ? 0 :
+	       (u64)readl_relaxed(addr + hi_offset) << 32;
+	return val;
+}
+
+static inline void coresight_write_reg_pair(void __iomem *addr, u64 val,
+						 s32 lo_offset, s32 hi_offset)
+{
+	writel_relaxed((u32)val, addr + lo_offset);
+	if (hi_offset >= 0)
+		writel_relaxed((u32)(val >> 32), addr + hi_offset);
+}
+
+void coresight_disable_path(struct list_head *path);
+int coresight_enable_path(struct list_head *path, u32 mode);
+struct coresight_device *coresight_get_sink(struct list_head *path);
+struct coresight_device *coresight_get_enabled_sink(bool reset);
+struct list_head *coresight_build_path(struct coresight_device *csdev,
+				       struct coresight_device *sink);
+void coresight_release_path(struct list_head *path);
+
+#ifdef CONFIG_CORESIGHT_SOURCE_ETM3X
+extern int etm_readl_cp14(u32 off, unsigned int *val);
+extern int etm_writel_cp14(u32 off, u32 val);
+#else
+static inline int etm_readl_cp14(u32 off, unsigned int *val) { return 0; }
+static inline int etm_writel_cp14(u32 off, u32 val) { return 0; }
+#endif
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
new file mode 100644
index 0000000..8d2eaaa
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Replicator driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/coresight.h>
+
+#include "coresight-priv.h"
+
+/**
+ * struct replicator_drvdata - specifics associated to a replicator component
+ * @dev:	the device entity associated with this component
+ * @atclk:	optional clock for the core parts of the replicator.
+ * @csdev:	component vitals needed by the framework
+ */
+struct replicator_drvdata {
+	struct device		*dev;
+	struct clk		*atclk;
+	struct coresight_device	*csdev;
+};
+
+static int replicator_enable(struct coresight_device *csdev, int inport,
+			     int outport)
+{
+	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	dev_info(drvdata->dev, "REPLICATOR enabled\n");
+	return 0;
+}
+
+static void replicator_disable(struct coresight_device *csdev, int inport,
+			       int outport)
+{
+	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	dev_info(drvdata->dev, "REPLICATOR disabled\n");
+}
+
+static const struct coresight_ops_link replicator_link_ops = {
+	.enable		= replicator_enable,
+	.disable	= replicator_disable,
+};
+
+static const struct coresight_ops replicator_cs_ops = {
+	.link_ops	= &replicator_link_ops,
+};
+
+static int replicator_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct device *dev = &pdev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct replicator_drvdata *drvdata;
+	struct coresight_desc desc = { 0 };
+	struct device_node *np = pdev->dev.of_node;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		pdev->dev.platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &pdev->dev;
+	drvdata->atclk = devm_clk_get(&pdev->dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	platform_set_drvdata(pdev, drvdata);
+
+	desc.type = CORESIGHT_DEV_TYPE_LINK;
+	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
+	desc.ops = &replicator_cs_ops;
+	desc.pdata = pdev->dev.platform_data;
+	desc.dev = &pdev->dev;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto out_disable_pm;
+	}
+
+	pm_runtime_put(&pdev->dev);
+
+	return 0;
+
+out_disable_pm:
+	if (!IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+	pm_runtime_put_noidle(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int replicator_runtime_suspend(struct device *dev)
+{
+	struct replicator_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int replicator_runtime_resume(struct device *dev)
+{
+	struct replicator_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops replicator_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(replicator_runtime_suspend,
+			   replicator_runtime_resume, NULL)
+};
+
+static const struct of_device_id replicator_match[] = {
+	{.compatible = "arm,coresight-replicator"},
+	{}
+};
+
+static struct platform_driver replicator_driver = {
+	.probe          = replicator_probe,
+	.driver         = {
+		.name   = "coresight-replicator",
+		.of_match_table = replicator_match,
+		.pm	= &replicator_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(replicator_driver);
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
new file mode 100644
index 0000000..c46c70a
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -0,0 +1,936 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight System Trace Macrocell driver
+ *
+ * Initial implementation by Pratik Patel
+ * (C) 2014-2015 Pratik Patel <pratikp@codeaurora.org>
+ *
+ * Serious refactoring, code cleanup and upgrading to the Coresight upstream
+ * framework by Mathieu Poirier
+ * (C) 2015-2016 Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * Guaranteed timing and support for various packet type coming from the
+ * generic STM API by Chunyan Zhang
+ * (C) 2015-2016 Chunyan Zhang <zhang.chunyan@linaro.org>
+ */
+#include <asm/local.h>
+#include <linux/amba/bus.h>
+#include <linux/bitmap.h>
+#include <linux/clk.h>
+#include <linux/coresight.h>
+#include <linux/coresight-stm.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/of_address.h>
+#include <linux/perf_event.h>
+#include <linux/pm_runtime.h>
+#include <linux/stm.h>
+
+#include "coresight-priv.h"
+
+#define STMDMASTARTR			0xc04
+#define STMDMASTOPR			0xc08
+#define STMDMASTATR			0xc0c
+#define STMDMACTLR			0xc10
+#define STMDMAIDR			0xcfc
+#define STMHEER				0xd00
+#define STMHETER			0xd20
+#define STMHEBSR			0xd60
+#define STMHEMCR			0xd64
+#define STMHEMASTR			0xdf4
+#define STMHEFEAT1R			0xdf8
+#define STMHEIDR			0xdfc
+#define STMSPER				0xe00
+#define STMSPTER			0xe20
+#define STMPRIVMASKR			0xe40
+#define STMSPSCR			0xe60
+#define STMSPMSCR			0xe64
+#define STMSPOVERRIDER			0xe68
+#define STMSPMOVERRIDER			0xe6c
+#define STMSPTRIGCSR			0xe70
+#define STMTCSR				0xe80
+#define STMTSSTIMR			0xe84
+#define STMTSFREQR			0xe8c
+#define STMSYNCR			0xe90
+#define STMAUXCR			0xe94
+#define STMSPFEAT1R			0xea0
+#define STMSPFEAT2R			0xea4
+#define STMSPFEAT3R			0xea8
+#define STMITTRIGGER			0xee8
+#define STMITATBDATA0			0xeec
+#define STMITATBCTR2			0xef0
+#define STMITATBID			0xef4
+#define STMITATBCTR0			0xef8
+
+#define STM_32_CHANNEL			32
+#define BYTES_PER_CHANNEL		256
+#define STM_TRACE_BUF_SIZE		4096
+#define STM_SW_MASTER_END		127
+
+/* Register bit definition */
+#define STMTCSR_BUSY_BIT		23
+/* Reserve the first 10 channels for kernel usage */
+#define STM_CHANNEL_OFFSET		0
+
+enum stm_pkt_type {
+	STM_PKT_TYPE_DATA	= 0x98,
+	STM_PKT_TYPE_FLAG	= 0xE8,
+	STM_PKT_TYPE_TRIG	= 0xF8,
+};
+
+#define stm_channel_addr(drvdata, ch)	(drvdata->chs.base +	\
+					(ch * BYTES_PER_CHANNEL))
+#define stm_channel_off(type, opts)	(type & ~opts)
+
+static int boot_nr_channel;
+
+/*
+ * Not really modular but using module_param is the easiest way to
+ * remain consistent with existing use cases for now.
+ */
+module_param_named(
+	boot_nr_channel, boot_nr_channel, int, S_IRUGO
+);
+
+/**
+ * struct channel_space - central management entity for extended ports
+ * @base:		memory mapped base address where channels start.
+ * @phys:		physical base address of channel region.
+ * @guaraneed:		is the channel delivery guaranteed.
+ */
+struct channel_space {
+	void __iomem		*base;
+	phys_addr_t		phys;
+	unsigned long		*guaranteed;
+};
+
+/**
+ * struct stm_drvdata - specifics associated to an STM component
+ * @base:		memory mapped base address for this component.
+ * @dev:		the device entity associated to this component.
+ * @atclk:		optional clock for the core parts of the STM.
+ * @csdev:		component vitals needed by the framework.
+ * @spinlock:		only one at a time pls.
+ * @chs:		the channels accociated to this STM.
+ * @stm:		structure associated to the generic STM interface.
+ * @mode:		this tracer's mode, i.e sysFS, or disabled.
+ * @traceid:		value of the current ID for this component.
+ * @write_bytes:	Maximus bytes this STM can write at a time.
+ * @stmsper:		settings for register STMSPER.
+ * @stmspscr:		settings for register STMSPSCR.
+ * @numsp:		the total number of stimulus port support by this STM.
+ * @stmheer:		settings for register STMHEER.
+ * @stmheter:		settings for register STMHETER.
+ * @stmhebsr:		settings for register STMHEBSR.
+ */
+struct stm_drvdata {
+	void __iomem		*base;
+	struct device		*dev;
+	struct clk		*atclk;
+	struct coresight_device	*csdev;
+	spinlock_t		spinlock;
+	struct channel_space	chs;
+	struct stm_data		stm;
+	local_t			mode;
+	u8			traceid;
+	u32			write_bytes;
+	u32			stmsper;
+	u32			stmspscr;
+	u32			numsp;
+	u32			stmheer;
+	u32			stmheter;
+	u32			stmhebsr;
+};
+
+static void stm_hwevent_enable_hw(struct stm_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	writel_relaxed(drvdata->stmhebsr, drvdata->base + STMHEBSR);
+	writel_relaxed(drvdata->stmheter, drvdata->base + STMHETER);
+	writel_relaxed(drvdata->stmheer, drvdata->base + STMHEER);
+	writel_relaxed(0x01 |	/* Enable HW event tracing */
+		       0x04,	/* Error detection on event tracing */
+		       drvdata->base + STMHEMCR);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void stm_port_enable_hw(struct stm_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+	/* ATB trigger enable on direct writes to TRIG locations */
+	writel_relaxed(0x10,
+		       drvdata->base + STMSPTRIGCSR);
+	writel_relaxed(drvdata->stmspscr, drvdata->base + STMSPSCR);
+	writel_relaxed(drvdata->stmsper, drvdata->base + STMSPER);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void stm_enable_hw(struct stm_drvdata *drvdata)
+{
+	if (drvdata->stmheer)
+		stm_hwevent_enable_hw(drvdata);
+
+	stm_port_enable_hw(drvdata);
+
+	CS_UNLOCK(drvdata->base);
+
+	/* 4096 byte between synchronisation packets */
+	writel_relaxed(0xFFF, drvdata->base + STMSYNCR);
+	writel_relaxed((drvdata->traceid << 16 | /* trace id */
+			0x02 |			 /* timestamp enable */
+			0x01),			 /* global STM enable */
+			drvdata->base + STMTCSR);
+
+	CS_LOCK(drvdata->base);
+}
+
+static int stm_enable(struct coresight_device *csdev,
+		      struct perf_event *event, u32 mode)
+{
+	u32 val;
+	struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (mode != CS_MODE_SYSFS)
+		return -EINVAL;
+
+	val = local_cmpxchg(&drvdata->mode, CS_MODE_DISABLED, mode);
+
+	/* Someone is already using the tracer */
+	if (val)
+		return -EBUSY;
+
+	pm_runtime_get_sync(drvdata->dev);
+
+	spin_lock(&drvdata->spinlock);
+	stm_enable_hw(drvdata);
+	spin_unlock(&drvdata->spinlock);
+
+	dev_info(drvdata->dev, "STM tracing enabled\n");
+	return 0;
+}
+
+static void stm_hwevent_disable_hw(struct stm_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	writel_relaxed(0x0, drvdata->base + STMHEMCR);
+	writel_relaxed(0x0, drvdata->base + STMHEER);
+	writel_relaxed(0x0, drvdata->base + STMHETER);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void stm_port_disable_hw(struct stm_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	writel_relaxed(0x0, drvdata->base + STMSPER);
+	writel_relaxed(0x0, drvdata->base + STMSPTRIGCSR);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void stm_disable_hw(struct stm_drvdata *drvdata)
+{
+	u32 val;
+
+	CS_UNLOCK(drvdata->base);
+
+	val = readl_relaxed(drvdata->base + STMTCSR);
+	val &= ~0x1; /* clear global STM enable [0] */
+	writel_relaxed(val, drvdata->base + STMTCSR);
+
+	CS_LOCK(drvdata->base);
+
+	stm_port_disable_hw(drvdata);
+	if (drvdata->stmheer)
+		stm_hwevent_disable_hw(drvdata);
+}
+
+static void stm_disable(struct coresight_device *csdev,
+			struct perf_event *event)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * For as long as the tracer isn't disabled another entity can't
+	 * change its status.  As such we can read the status here without
+	 * fearing it will change under us.
+	 */
+	if (local_read(&drvdata->mode) == CS_MODE_SYSFS) {
+		spin_lock(&drvdata->spinlock);
+		stm_disable_hw(drvdata);
+		spin_unlock(&drvdata->spinlock);
+
+		/* Wait until the engine has completely stopped */
+		coresight_timeout(drvdata->base, STMTCSR, STMTCSR_BUSY_BIT, 0);
+
+		pm_runtime_put(drvdata->dev);
+
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+		dev_info(drvdata->dev, "STM tracing disabled\n");
+	}
+}
+
+static int stm_trace_id(struct coresight_device *csdev)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return drvdata->traceid;
+}
+
+static const struct coresight_ops_source stm_source_ops = {
+	.trace_id	= stm_trace_id,
+	.enable		= stm_enable,
+	.disable	= stm_disable,
+};
+
+static const struct coresight_ops stm_cs_ops = {
+	.source_ops	= &stm_source_ops,
+};
+
+static inline bool stm_addr_unaligned(const void *addr, u8 write_bytes)
+{
+	return ((unsigned long)addr & (write_bytes - 1));
+}
+
+static void stm_send(void __iomem *addr, const void *data,
+		     u32 size, u8 write_bytes)
+{
+	u8 paload[8];
+
+	if (stm_addr_unaligned(data, write_bytes)) {
+		memcpy(paload, data, size);
+		data = paload;
+	}
+
+	/* now we are 64bit/32bit aligned */
+	switch (size) {
+#ifdef CONFIG_64BIT
+	case 8:
+		writeq_relaxed(*(u64 *)data, addr);
+		break;
+#endif
+	case 4:
+		writel_relaxed(*(u32 *)data, addr);
+		break;
+	case 2:
+		writew_relaxed(*(u16 *)data, addr);
+		break;
+	case 1:
+		writeb_relaxed(*(u8 *)data, addr);
+		break;
+	default:
+		break;
+	}
+}
+
+static int stm_generic_link(struct stm_data *stm_data,
+			    unsigned int master,  unsigned int channel)
+{
+	struct stm_drvdata *drvdata = container_of(stm_data,
+						   struct stm_drvdata, stm);
+	if (!drvdata || !drvdata->csdev)
+		return -EINVAL;
+
+	return coresight_enable(drvdata->csdev);
+}
+
+static void stm_generic_unlink(struct stm_data *stm_data,
+			       unsigned int master,  unsigned int channel)
+{
+	struct stm_drvdata *drvdata = container_of(stm_data,
+						   struct stm_drvdata, stm);
+	if (!drvdata || !drvdata->csdev)
+		return;
+
+	coresight_disable(drvdata->csdev);
+}
+
+static phys_addr_t
+stm_mmio_addr(struct stm_data *stm_data, unsigned int master,
+	      unsigned int channel, unsigned int nr_chans)
+{
+	struct stm_drvdata *drvdata = container_of(stm_data,
+						   struct stm_drvdata, stm);
+	phys_addr_t addr;
+
+	addr = drvdata->chs.phys + channel * BYTES_PER_CHANNEL;
+
+	if (offset_in_page(addr) ||
+	    offset_in_page(nr_chans * BYTES_PER_CHANNEL))
+		return 0;
+
+	return addr;
+}
+
+static long stm_generic_set_options(struct stm_data *stm_data,
+				    unsigned int master,
+				    unsigned int channel,
+				    unsigned int nr_chans,
+				    unsigned long options)
+{
+	struct stm_drvdata *drvdata = container_of(stm_data,
+						   struct stm_drvdata, stm);
+	if (!(drvdata && local_read(&drvdata->mode)))
+		return -EINVAL;
+
+	if (channel >= drvdata->numsp)
+		return -EINVAL;
+
+	switch (options) {
+	case STM_OPTION_GUARANTEED:
+		set_bit(channel, drvdata->chs.guaranteed);
+		break;
+
+	case STM_OPTION_INVARIANT:
+		clear_bit(channel, drvdata->chs.guaranteed);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static ssize_t notrace stm_generic_packet(struct stm_data *stm_data,
+				  unsigned int master,
+				  unsigned int channel,
+				  unsigned int packet,
+				  unsigned int flags,
+				  unsigned int size,
+				  const unsigned char *payload)
+{
+	void __iomem *ch_addr;
+	struct stm_drvdata *drvdata = container_of(stm_data,
+						   struct stm_drvdata, stm);
+
+	if (!(drvdata && local_read(&drvdata->mode)))
+		return -EACCES;
+
+	if (channel >= drvdata->numsp)
+		return -EINVAL;
+
+	ch_addr = stm_channel_addr(drvdata, channel);
+
+	flags = (flags == STP_PACKET_TIMESTAMPED) ? STM_FLAG_TIMESTAMPED : 0;
+	flags |= test_bit(channel, drvdata->chs.guaranteed) ?
+			   STM_FLAG_GUARANTEED : 0;
+
+	if (size > drvdata->write_bytes)
+		size = drvdata->write_bytes;
+	else
+		size = rounddown_pow_of_two(size);
+
+	switch (packet) {
+	case STP_PACKET_FLAG:
+		ch_addr += stm_channel_off(STM_PKT_TYPE_FLAG, flags);
+
+		/*
+		 * The generic STM core sets a size of '0' on flag packets.
+		 * As such send a flag packet of size '1' and tell the
+		 * core we did so.
+		 */
+		stm_send(ch_addr, payload, 1, drvdata->write_bytes);
+		size = 1;
+		break;
+
+	case STP_PACKET_DATA:
+		ch_addr += stm_channel_off(STM_PKT_TYPE_DATA, flags);
+		stm_send(ch_addr, payload, size,
+				drvdata->write_bytes);
+		break;
+
+	default:
+		return -ENOTSUPP;
+	}
+
+	return size;
+}
+
+static ssize_t hwevent_enable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->stmheer;
+
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t hwevent_enable_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val;
+	int ret = 0;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return -EINVAL;
+
+	drvdata->stmheer = val;
+	/* HW event enable and trigger go hand in hand */
+	drvdata->stmheter = val;
+
+	return size;
+}
+static DEVICE_ATTR_RW(hwevent_enable);
+
+static ssize_t hwevent_select_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->stmhebsr;
+
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t hwevent_select_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val;
+	int ret = 0;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return -EINVAL;
+
+	drvdata->stmhebsr = val;
+
+	return size;
+}
+static DEVICE_ATTR_RW(hwevent_select);
+
+static ssize_t port_select_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val;
+
+	if (!local_read(&drvdata->mode)) {
+		val = drvdata->stmspscr;
+	} else {
+		spin_lock(&drvdata->spinlock);
+		val = readl_relaxed(drvdata->base + STMSPSCR);
+		spin_unlock(&drvdata->spinlock);
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t port_select_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val, stmsper;
+	int ret = 0;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->stmspscr = val;
+
+	if (local_read(&drvdata->mode)) {
+		CS_UNLOCK(drvdata->base);
+		/* Process as per ARM's TRM recommendation */
+		stmsper = readl_relaxed(drvdata->base + STMSPER);
+		writel_relaxed(0x0, drvdata->base + STMSPER);
+		writel_relaxed(drvdata->stmspscr, drvdata->base + STMSPSCR);
+		writel_relaxed(stmsper, drvdata->base + STMSPER);
+		CS_LOCK(drvdata->base);
+	}
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(port_select);
+
+static ssize_t port_enable_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val;
+
+	if (!local_read(&drvdata->mode)) {
+		val = drvdata->stmsper;
+	} else {
+		spin_lock(&drvdata->spinlock);
+		val = readl_relaxed(drvdata->base + STMSPER);
+		spin_unlock(&drvdata->spinlock);
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t port_enable_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val;
+	int ret = 0;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->stmsper = val;
+
+	if (local_read(&drvdata->mode)) {
+		CS_UNLOCK(drvdata->base);
+		writel_relaxed(drvdata->stmsper, drvdata->base + STMSPER);
+		CS_LOCK(drvdata->base);
+	}
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(port_enable);
+
+static ssize_t traceid_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->traceid;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t traceid_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	/* traceid field is 7bit wide on STM32 */
+	drvdata->traceid = val & 0x7f;
+	return size;
+}
+static DEVICE_ATTR_RW(traceid);
+
+#define coresight_stm_reg(name, offset)	\
+	coresight_simple_reg32(struct stm_drvdata, name, offset)
+
+coresight_stm_reg(tcsr, STMTCSR);
+coresight_stm_reg(tsfreqr, STMTSFREQR);
+coresight_stm_reg(syncr, STMSYNCR);
+coresight_stm_reg(sper, STMSPER);
+coresight_stm_reg(spter, STMSPTER);
+coresight_stm_reg(privmaskr, STMPRIVMASKR);
+coresight_stm_reg(spscr, STMSPSCR);
+coresight_stm_reg(spmscr, STMSPMSCR);
+coresight_stm_reg(spfeat1r, STMSPFEAT1R);
+coresight_stm_reg(spfeat2r, STMSPFEAT2R);
+coresight_stm_reg(spfeat3r, STMSPFEAT3R);
+coresight_stm_reg(devid, CORESIGHT_DEVID);
+
+static struct attribute *coresight_stm_attrs[] = {
+	&dev_attr_hwevent_enable.attr,
+	&dev_attr_hwevent_select.attr,
+	&dev_attr_port_enable.attr,
+	&dev_attr_port_select.attr,
+	&dev_attr_traceid.attr,
+	NULL,
+};
+
+static struct attribute *coresight_stm_mgmt_attrs[] = {
+	&dev_attr_tcsr.attr,
+	&dev_attr_tsfreqr.attr,
+	&dev_attr_syncr.attr,
+	&dev_attr_sper.attr,
+	&dev_attr_spter.attr,
+	&dev_attr_privmaskr.attr,
+	&dev_attr_spscr.attr,
+	&dev_attr_spmscr.attr,
+	&dev_attr_spfeat1r.attr,
+	&dev_attr_spfeat2r.attr,
+	&dev_attr_spfeat3r.attr,
+	&dev_attr_devid.attr,
+	NULL,
+};
+
+static const struct attribute_group coresight_stm_group = {
+	.attrs = coresight_stm_attrs,
+};
+
+static const struct attribute_group coresight_stm_mgmt_group = {
+	.attrs = coresight_stm_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *coresight_stm_groups[] = {
+	&coresight_stm_group,
+	&coresight_stm_mgmt_group,
+	NULL,
+};
+
+static int stm_get_resource_byname(struct device_node *np,
+				   char *ch_base, struct resource *res)
+{
+	const char *name = NULL;
+	int index = 0, found = 0;
+
+	while (!of_property_read_string_index(np, "reg-names", index, &name)) {
+		if (strcmp(ch_base, name)) {
+			index++;
+			continue;
+		}
+
+		/* We have a match and @index is where it's at */
+		found = 1;
+		break;
+	}
+
+	if (!found)
+		return -EINVAL;
+
+	return of_address_to_resource(np, index, res);
+}
+
+static u32 stm_fundamental_data_size(struct stm_drvdata *drvdata)
+{
+	u32 stmspfeat2r;
+
+	if (!IS_ENABLED(CONFIG_64BIT))
+		return 4;
+
+	stmspfeat2r = readl_relaxed(drvdata->base + STMSPFEAT2R);
+
+	/*
+	 * bit[15:12] represents the fundamental data size
+	 * 0 - 32-bit data
+	 * 1 - 64-bit data
+	 */
+	return BMVAL(stmspfeat2r, 12, 15) ? 8 : 4;
+}
+
+static u32 stm_num_stimulus_port(struct stm_drvdata *drvdata)
+{
+	u32 numsp;
+
+	numsp = readl_relaxed(drvdata->base + CORESIGHT_DEVID);
+	/*
+	 * NUMPS in STMDEVID is 17 bit long and if equal to 0x0,
+	 * 32 stimulus ports are supported.
+	 */
+	numsp &= 0x1ffff;
+	if (!numsp)
+		numsp = STM_32_CHANNEL;
+	return numsp;
+}
+
+static void stm_init_default_data(struct stm_drvdata *drvdata)
+{
+	/* Don't use port selection */
+	drvdata->stmspscr = 0x0;
+	/*
+	 * Enable all channel regardless of their number.  When port
+	 * selection isn't used (see above) STMSPER applies to all
+	 * 32 channel group available, hence setting all 32 bits to 1
+	 */
+	drvdata->stmsper = ~0x0;
+
+	/*
+	 * The trace ID value for *ETM* tracers start at CPU_ID * 2 + 0x10 and
+	 * anything equal to or higher than 0x70 is reserved.  Since 0x00 is
+	 * also reserved the STM trace ID needs to be higher than 0x00 and
+	 * lowner than 0x10.
+	 */
+	drvdata->traceid = 0x1;
+
+	/* Set invariant transaction timing on all channels */
+	bitmap_clear(drvdata->chs.guaranteed, 0, drvdata->numsp);
+}
+
+static void stm_init_generic_data(struct stm_drvdata *drvdata)
+{
+	drvdata->stm.name = dev_name(drvdata->dev);
+
+	/*
+	 * MasterIDs are assigned at HW design phase. As such the core is
+	 * using a single master for interaction with this device.
+	 */
+	drvdata->stm.sw_start = 1;
+	drvdata->stm.sw_end = 1;
+	drvdata->stm.hw_override = true;
+	drvdata->stm.sw_nchannels = drvdata->numsp;
+	drvdata->stm.sw_mmiosz = BYTES_PER_CHANNEL;
+	drvdata->stm.packet = stm_generic_packet;
+	drvdata->stm.mmio_addr = stm_mmio_addr;
+	drvdata->stm.link = stm_generic_link;
+	drvdata->stm.unlink = stm_generic_unlink;
+	drvdata->stm.set_options = stm_generic_set_options;
+}
+
+static int stm_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	unsigned long *guaranteed;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct stm_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct resource ch_res;
+	size_t res_size, bitmap_size;
+	struct coresight_desc desc = { 0 };
+	struct device_node *np = adev->dev.of_node;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		adev->dev.platform_data = pdata;
+	}
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &adev->dev;
+	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+	dev_set_drvdata(dev, drvdata);
+
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+	drvdata->base = base;
+
+	ret = stm_get_resource_byname(np, "stm-stimulus-base", &ch_res);
+	if (ret)
+		return ret;
+	drvdata->chs.phys = ch_res.start;
+
+	base = devm_ioremap_resource(dev, &ch_res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+	drvdata->chs.base = base;
+
+	drvdata->write_bytes = stm_fundamental_data_size(drvdata);
+
+	if (boot_nr_channel) {
+		drvdata->numsp = boot_nr_channel;
+		res_size = min((resource_size_t)(boot_nr_channel *
+				  BYTES_PER_CHANNEL), resource_size(res));
+	} else {
+		drvdata->numsp = stm_num_stimulus_port(drvdata);
+		res_size = min((resource_size_t)(drvdata->numsp *
+				 BYTES_PER_CHANNEL), resource_size(res));
+	}
+	bitmap_size = BITS_TO_LONGS(drvdata->numsp) * sizeof(long);
+
+	guaranteed = devm_kzalloc(dev, bitmap_size, GFP_KERNEL);
+	if (!guaranteed)
+		return -ENOMEM;
+	drvdata->chs.guaranteed = guaranteed;
+
+	spin_lock_init(&drvdata->spinlock);
+
+	stm_init_default_data(drvdata);
+	stm_init_generic_data(drvdata);
+
+	if (stm_register_device(dev, &drvdata->stm, THIS_MODULE)) {
+		dev_info(dev,
+			 "stm_register_device failed, probing deffered\n");
+		return -EPROBE_DEFER;
+	}
+
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE;
+	desc.ops = &stm_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_stm_groups;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto stm_unregister;
+	}
+
+	pm_runtime_put(&adev->dev);
+
+	dev_info(dev, "%s initialized\n", (char *)id->data);
+	return 0;
+
+stm_unregister:
+	stm_unregister_device(&drvdata->stm);
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int stm_runtime_suspend(struct device *dev)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int stm_runtime_resume(struct device *dev)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops stm_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(stm_runtime_suspend, stm_runtime_resume, NULL)
+};
+
+static const struct amba_id stm_ids[] = {
+	{
+		.id     = 0x000bb962,
+		.mask   = 0x000fffff,
+		.data	= "STM32",
+	},
+	{
+		.id	= 0x000bb963,
+		.mask	= 0x000fffff,
+		.data	= "STM500",
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver stm_driver = {
+	.drv = {
+		.name   = "coresight-stm",
+		.owner	= THIS_MODULE,
+		.pm	= &stm_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+	.probe          = stm_probe,
+	.id_table	= stm_ids,
+};
+
+builtin_amba_driver(stm_driver);
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
new file mode 100644
index 0000000..0549249
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2016 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/coresight.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include "coresight-priv.h"
+#include "coresight-tmc.h"
+
+static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	/* Wait for TMCSReady bit to be set */
+	tmc_wait_for_tmcready(drvdata);
+
+	writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE);
+	writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI |
+		       TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT |
+		       TMC_FFCR_TRIGON_TRIGIN,
+		       drvdata->base + TMC_FFCR);
+
+	writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG);
+	tmc_enable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tmc_etb_dump_hw(struct tmc_drvdata *drvdata)
+{
+	char *bufp;
+	u32 read_data, lost;
+	int i;
+
+	/* Check if the buffer wrapped around. */
+	lost = readl_relaxed(drvdata->base + TMC_STS) & TMC_STS_FULL;
+	bufp = drvdata->buf;
+	drvdata->len = 0;
+	while (1) {
+		for (i = 0; i < drvdata->memwidth; i++) {
+			read_data = readl_relaxed(drvdata->base + TMC_RRD);
+			if (read_data == 0xFFFFFFFF)
+				goto done;
+			memcpy(bufp, &read_data, 4);
+			bufp += 4;
+			drvdata->len += 4;
+		}
+	}
+done:
+	if (lost)
+		coresight_insert_barrier_packet(drvdata->buf);
+	return;
+}
+
+static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	/*
+	 * When operating in sysFS mode the content of the buffer needs to be
+	 * read before the TMC is disabled.
+	 */
+	if (drvdata->mode == CS_MODE_SYSFS)
+		tmc_etb_dump_hw(drvdata);
+	tmc_disable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	/* Wait for TMCSReady bit to be set */
+	tmc_wait_for_tmcready(drvdata);
+
+	writel_relaxed(TMC_MODE_HARDWARE_FIFO, drvdata->base + TMC_MODE);
+	writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI,
+		       drvdata->base + TMC_FFCR);
+	writel_relaxed(0x0, drvdata->base + TMC_BUFWM);
+	tmc_enable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	tmc_disable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+/*
+ * Return the available trace data in the buffer from @pos, with
+ * a maximum limit of @len, updating the @bufpp on where to
+ * find it.
+ */
+ssize_t tmc_etb_get_sysfs_trace(struct tmc_drvdata *drvdata,
+				loff_t pos, size_t len, char **bufpp)
+{
+	ssize_t actual = len;
+
+	/* Adjust the len to available size @pos */
+	if (pos + actual > drvdata->len)
+		actual = drvdata->len - pos;
+	if (actual > 0)
+		*bufpp = drvdata->buf + pos;
+	return actual;
+}
+
+static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev)
+{
+	int ret = 0;
+	bool used = false;
+	char *buf = NULL;
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * If we don't have a buffer release the lock and allocate memory.
+	 * Otherwise keep the lock and move along.
+	 */
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (!drvdata->buf) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+		/* Allocating the memory here while outside of the spinlock */
+		buf = kzalloc(drvdata->size, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+
+		/* Let's try again */
+		spin_lock_irqsave(&drvdata->spinlock, flags);
+	}
+
+	if (drvdata->reading) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/*
+	 * In sysFS mode we can have multiple writers per sink.  Since this
+	 * sink is already enabled no memory is needed and the HW need not be
+	 * touched.
+	 */
+	if (drvdata->mode == CS_MODE_SYSFS)
+		goto out;
+
+	/*
+	 * If drvdata::buf isn't NULL, memory was allocated for a previous
+	 * trace run but wasn't read.  If so simply zero-out the memory.
+	 * Otherwise use the memory allocated above.
+	 *
+	 * The memory is freed when users read the buffer using the
+	 * /dev/xyz.{etf|etb} interface.  See tmc_read_unprepare_etf() for
+	 * details.
+	 */
+	if (drvdata->buf) {
+		memset(drvdata->buf, 0, drvdata->size);
+	} else {
+		used = true;
+		drvdata->buf = buf;
+	}
+
+	drvdata->mode = CS_MODE_SYSFS;
+	tmc_etb_enable_hw(drvdata);
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	/* Free memory outside the spinlock if need be */
+	if (!used)
+		kfree(buf);
+
+	return ret;
+}
+
+static int tmc_enable_etf_sink_perf(struct coresight_device *csdev)
+{
+	int ret = 0;
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->reading) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * In Perf mode there can be only one writer per sink.  There
+	 * is also no need to continue if the ETB/ETR is already operated
+	 * from sysFS.
+	 */
+	if (drvdata->mode != CS_MODE_DISABLED) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	drvdata->mode = CS_MODE_PERF;
+	tmc_etb_enable_hw(drvdata);
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	return ret;
+}
+
+static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode)
+{
+	int ret;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		ret = tmc_enable_etf_sink_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		ret = tmc_enable_etf_sink_perf(csdev);
+		break;
+	/* We shouldn't be here */
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		return ret;
+
+	dev_info(drvdata->dev, "TMC-ETB/ETF enabled\n");
+	return 0;
+}
+
+static void tmc_disable_etf_sink(struct coresight_device *csdev)
+{
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->reading) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return;
+	}
+
+	/* Disable the TMC only if it needs to */
+	if (drvdata->mode != CS_MODE_DISABLED) {
+		tmc_etb_disable_hw(drvdata);
+		drvdata->mode = CS_MODE_DISABLED;
+	}
+
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_info(drvdata->dev, "TMC-ETB/ETF disabled\n");
+}
+
+static int tmc_enable_etf_link(struct coresight_device *csdev,
+			       int inport, int outport)
+{
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->reading) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return -EBUSY;
+	}
+
+	tmc_etf_enable_hw(drvdata);
+	drvdata->mode = CS_MODE_SYSFS;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_info(drvdata->dev, "TMC-ETF enabled\n");
+	return 0;
+}
+
+static void tmc_disable_etf_link(struct coresight_device *csdev,
+				 int inport, int outport)
+{
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->reading) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return;
+	}
+
+	tmc_etf_disable_hw(drvdata);
+	drvdata->mode = CS_MODE_DISABLED;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_info(drvdata->dev, "TMC-ETF disabled\n");
+}
+
+static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu,
+				  void **pages, int nr_pages, bool overwrite)
+{
+	int node;
+	struct cs_buffers *buf;
+
+	if (cpu == -1)
+		cpu = smp_processor_id();
+	node = cpu_to_node(cpu);
+
+	/* Allocate memory structure for interaction with Perf */
+	buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
+	if (!buf)
+		return NULL;
+
+	buf->snapshot = overwrite;
+	buf->nr_pages = nr_pages;
+	buf->data_pages = pages;
+
+	return buf;
+}
+
+static void tmc_free_etf_buffer(void *config)
+{
+	struct cs_buffers *buf = config;
+
+	kfree(buf);
+}
+
+static int tmc_set_etf_buffer(struct coresight_device *csdev,
+			      struct perf_output_handle *handle,
+			      void *sink_config)
+{
+	int ret = 0;
+	unsigned long head;
+	struct cs_buffers *buf = sink_config;
+
+	/* wrap head around to the amount of space we have */
+	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
+
+	/* find the page to write to */
+	buf->cur = head / PAGE_SIZE;
+
+	/* and offset within that page */
+	buf->offset = head % PAGE_SIZE;
+
+	local_set(&buf->data_size, 0);
+
+	return ret;
+}
+
+static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
+					  struct perf_output_handle *handle,
+					  void *sink_config)
+{
+	long size = 0;
+	struct cs_buffers *buf = sink_config;
+
+	if (buf) {
+		/*
+		 * In snapshot mode ->data_size holds the new address of the
+		 * ring buffer's head.  The size itself is the whole address
+		 * range since we want the latest information.
+		 */
+		if (buf->snapshot)
+			handle->head = local_xchg(&buf->data_size,
+						  buf->nr_pages << PAGE_SHIFT);
+		/*
+		 * Tell the tracer PMU how much we got in this run and if
+		 * something went wrong along the way.  Nobody else can use
+		 * this cs_buffers instance until we are done.  As such
+		 * resetting parameters here and squaring off with the ring
+		 * buffer API in the tracer PMU is fine.
+		 */
+		size = local_xchg(&buf->data_size, 0);
+	}
+
+	return size;
+}
+
+static void tmc_update_etf_buffer(struct coresight_device *csdev,
+				  struct perf_output_handle *handle,
+				  void *sink_config)
+{
+	bool lost = false;
+	int i, cur;
+	const u32 *barrier;
+	u32 *buf_ptr;
+	u64 read_ptr, write_ptr;
+	u32 status, to_read;
+	unsigned long offset;
+	struct cs_buffers *buf = sink_config;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (!buf)
+		return;
+
+	/* This shouldn't happen */
+	if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF))
+		return;
+
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+
+	read_ptr = tmc_read_rrp(drvdata);
+	write_ptr = tmc_read_rwp(drvdata);
+
+	/*
+	 * Get a hold of the status register and see if a wrap around
+	 * has occurred.  If so adjust things accordingly.
+	 */
+	status = readl_relaxed(drvdata->base + TMC_STS);
+	if (status & TMC_STS_FULL) {
+		lost = true;
+		to_read = drvdata->size;
+	} else {
+		to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size);
+	}
+
+	/*
+	 * The TMC RAM buffer may be bigger than the space available in the
+	 * perf ring buffer (handle->size).  If so advance the RRP so that we
+	 * get the latest trace data.
+	 */
+	if (to_read > handle->size) {
+		u32 mask = 0;
+
+		/*
+		 * The value written to RRP must be byte-address aligned to
+		 * the width of the trace memory databus _and_ to a frame
+		 * boundary (16 byte), whichever is the biggest. For example,
+		 * for 32-bit, 64-bit and 128-bit wide trace memory, the four
+		 * LSBs must be 0s. For 256-bit wide trace memory, the five
+		 * LSBs must be 0s.
+		 */
+		switch (drvdata->memwidth) {
+		case TMC_MEM_INTF_WIDTH_32BITS:
+		case TMC_MEM_INTF_WIDTH_64BITS:
+		case TMC_MEM_INTF_WIDTH_128BITS:
+			mask = GENMASK(31, 5);
+			break;
+		case TMC_MEM_INTF_WIDTH_256BITS:
+			mask = GENMASK(31, 6);
+			break;
+		}
+
+		/*
+		 * Make sure the new size is aligned in accordance with the
+		 * requirement explained above.
+		 */
+		to_read = handle->size & mask;
+		/* Move the RAM read pointer up */
+		read_ptr = (write_ptr + drvdata->size) - to_read;
+		/* Make sure we are still within our limits */
+		if (read_ptr > (drvdata->size - 1))
+			read_ptr -= drvdata->size;
+		/* Tell the HW */
+		tmc_write_rrp(drvdata, read_ptr);
+		lost = true;
+	}
+
+	if (lost)
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+
+	cur = buf->cur;
+	offset = buf->offset;
+	barrier = barrier_pkt;
+
+	/* for every byte to read */
+	for (i = 0; i < to_read; i += 4) {
+		buf_ptr = buf->data_pages[cur] + offset;
+		*buf_ptr = readl_relaxed(drvdata->base + TMC_RRD);
+
+		if (lost && *barrier) {
+			*buf_ptr = *barrier;
+			barrier++;
+		}
+
+		offset += 4;
+		if (offset >= PAGE_SIZE) {
+			offset = 0;
+			cur++;
+			/* wrap around at the end of the buffer */
+			cur &= buf->nr_pages - 1;
+		}
+	}
+
+	/*
+	 * In snapshot mode all we have to do is communicate to
+	 * perf_aux_output_end() the address of the current head.  In full
+	 * trace mode the same function expects a size to move rb->aux_head
+	 * forward.
+	 */
+	if (buf->snapshot)
+		local_set(&buf->data_size, (cur * PAGE_SIZE) + offset);
+	else
+		local_add(to_read, &buf->data_size);
+
+	CS_LOCK(drvdata->base);
+}
+
+static const struct coresight_ops_sink tmc_etf_sink_ops = {
+	.enable		= tmc_enable_etf_sink,
+	.disable	= tmc_disable_etf_sink,
+	.alloc_buffer	= tmc_alloc_etf_buffer,
+	.free_buffer	= tmc_free_etf_buffer,
+	.set_buffer	= tmc_set_etf_buffer,
+	.reset_buffer	= tmc_reset_etf_buffer,
+	.update_buffer	= tmc_update_etf_buffer,
+};
+
+static const struct coresight_ops_link tmc_etf_link_ops = {
+	.enable		= tmc_enable_etf_link,
+	.disable	= tmc_disable_etf_link,
+};
+
+const struct coresight_ops tmc_etb_cs_ops = {
+	.sink_ops	= &tmc_etf_sink_ops,
+};
+
+const struct coresight_ops tmc_etf_cs_ops = {
+	.sink_ops	= &tmc_etf_sink_ops,
+	.link_ops	= &tmc_etf_link_ops,
+};
+
+int tmc_read_prepare_etb(struct tmc_drvdata *drvdata)
+{
+	enum tmc_mode mode;
+	int ret = 0;
+	unsigned long flags;
+
+	/* config types are set a boot time and never change */
+	if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETB &&
+			 drvdata->config_type != TMC_CONFIG_TYPE_ETF))
+		return -EINVAL;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	if (drvdata->reading) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* There is no point in reading a TMC in HW FIFO mode */
+	mode = readl_relaxed(drvdata->base + TMC_MODE);
+	if (mode != TMC_MODE_CIRCULAR_BUFFER) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Don't interfere if operated from Perf */
+	if (drvdata->mode == CS_MODE_PERF) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* If drvdata::buf is NULL the trace data has been read already */
+	if (drvdata->buf == NULL) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Disable the TMC if need be */
+	if (drvdata->mode == CS_MODE_SYSFS)
+		tmc_etb_disable_hw(drvdata);
+
+	drvdata->reading = true;
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	return ret;
+}
+
+int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata)
+{
+	char *buf = NULL;
+	enum tmc_mode mode;
+	unsigned long flags;
+
+	/* config types are set a boot time and never change */
+	if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETB &&
+			 drvdata->config_type != TMC_CONFIG_TYPE_ETF))
+		return -EINVAL;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* There is no point in reading a TMC in HW FIFO mode */
+	mode = readl_relaxed(drvdata->base + TMC_MODE);
+	if (mode != TMC_MODE_CIRCULAR_BUFFER) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return -EINVAL;
+	}
+
+	/* Re-enable the TMC if need be */
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		/*
+		 * The trace run will continue with the same allocated trace
+		 * buffer. As such zero-out the buffer so that we don't end
+		 * up with stale data.
+		 *
+		 * Since the tracer is still enabled drvdata::buf
+		 * can't be NULL.
+		 */
+		memset(drvdata->buf, 0, drvdata->size);
+		tmc_etb_enable_hw(drvdata);
+	} else {
+		/*
+		 * The ETB/ETF is not tracing and the buffer was just read.
+		 * As such prepare to free the trace buffer.
+		 */
+		buf = drvdata->buf;
+		drvdata->buf = NULL;
+	}
+
+	drvdata->reading = false;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	/*
+	 * Free allocated memory outside of the spinlock.  There is no need
+	 * to assert the validity of 'buf' since calling kfree(NULL) is safe.
+	 */
+	kfree(buf);
+
+	return 0;
+}
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
new file mode 100644
index 0000000..2eda5de
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -0,0 +1,1212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2016 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/coresight.h>
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include "coresight-catu.h"
+#include "coresight-priv.h"
+#include "coresight-tmc.h"
+
+struct etr_flat_buf {
+	struct device	*dev;
+	dma_addr_t	daddr;
+	void		*vaddr;
+	size_t		size;
+};
+
+/*
+ * The TMC ETR SG has a page size of 4K. The SG table contains pointers
+ * to 4KB buffers. However, the OS may use a PAGE_SIZE different from
+ * 4K (i.e, 16KB or 64KB). This implies that a single OS page could
+ * contain more than one SG buffer and tables.
+ *
+ * A table entry has the following format:
+ *
+ * ---Bit31------------Bit4-------Bit1-----Bit0--
+ * |     Address[39:12]    | SBZ |  Entry Type  |
+ * ----------------------------------------------
+ *
+ * Address: Bits [39:12] of a physical page address. Bits [11:0] are
+ *	    always zero.
+ *
+ * Entry type:
+ *	b00 - Reserved.
+ *	b01 - Last entry in the tables, points to 4K page buffer.
+ *	b10 - Normal entry, points to 4K page buffer.
+ *	b11 - Link. The address points to the base of next table.
+ */
+
+typedef u32 sgte_t;
+
+#define ETR_SG_PAGE_SHIFT		12
+#define ETR_SG_PAGE_SIZE		(1UL << ETR_SG_PAGE_SHIFT)
+#define ETR_SG_PAGES_PER_SYSPAGE	(PAGE_SIZE / ETR_SG_PAGE_SIZE)
+#define ETR_SG_PTRS_PER_PAGE		(ETR_SG_PAGE_SIZE / sizeof(sgte_t))
+#define ETR_SG_PTRS_PER_SYSPAGE		(PAGE_SIZE / sizeof(sgte_t))
+
+#define ETR_SG_ET_MASK			0x3
+#define ETR_SG_ET_LAST			0x1
+#define ETR_SG_ET_NORMAL		0x2
+#define ETR_SG_ET_LINK			0x3
+
+#define ETR_SG_ADDR_SHIFT		4
+
+#define ETR_SG_ENTRY(addr, type) \
+	(sgte_t)((((addr) >> ETR_SG_PAGE_SHIFT) << ETR_SG_ADDR_SHIFT) | \
+		 (type & ETR_SG_ET_MASK))
+
+#define ETR_SG_ADDR(entry) \
+	(((dma_addr_t)(entry) >> ETR_SG_ADDR_SHIFT) << ETR_SG_PAGE_SHIFT)
+#define ETR_SG_ET(entry)		((entry) & ETR_SG_ET_MASK)
+
+/*
+ * struct etr_sg_table : ETR SG Table
+ * @sg_table:		Generic SG Table holding the data/table pages.
+ * @hwaddr:		hwaddress used by the TMC, which is the base
+ *			address of the table.
+ */
+struct etr_sg_table {
+	struct tmc_sg_table	*sg_table;
+	dma_addr_t		hwaddr;
+};
+
+/*
+ * tmc_etr_sg_table_entries: Total number of table entries required to map
+ * @nr_pages system pages.
+ *
+ * We need to map @nr_pages * ETR_SG_PAGES_PER_SYSPAGE data pages.
+ * Each TMC page can map (ETR_SG_PTRS_PER_PAGE - 1) buffer pointers,
+ * with the last entry pointing to another page of table entries.
+ * If we spill over to a new page for mapping 1 entry, we could as
+ * well replace the link entry of the previous page with the last entry.
+ */
+static inline unsigned long __attribute_const__
+tmc_etr_sg_table_entries(int nr_pages)
+{
+	unsigned long nr_sgpages = nr_pages * ETR_SG_PAGES_PER_SYSPAGE;
+	unsigned long nr_sglinks = nr_sgpages / (ETR_SG_PTRS_PER_PAGE - 1);
+	/*
+	 * If we spill over to a new page for 1 entry, we could as well
+	 * make it the LAST entry in the previous page, skipping the Link
+	 * address.
+	 */
+	if (nr_sglinks && (nr_sgpages % (ETR_SG_PTRS_PER_PAGE - 1) < 2))
+		nr_sglinks--;
+	return nr_sgpages + nr_sglinks;
+}
+
+/*
+ * tmc_pages_get_offset:  Go through all the pages in the tmc_pages
+ * and map the device address @addr to an offset within the virtual
+ * contiguous buffer.
+ */
+static long
+tmc_pages_get_offset(struct tmc_pages *tmc_pages, dma_addr_t addr)
+{
+	int i;
+	dma_addr_t page_start;
+
+	for (i = 0; i < tmc_pages->nr_pages; i++) {
+		page_start = tmc_pages->daddrs[i];
+		if (addr >= page_start && addr < (page_start + PAGE_SIZE))
+			return i * PAGE_SIZE + (addr - page_start);
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * tmc_pages_free : Unmap and free the pages used by tmc_pages.
+ * If the pages were not allocated in tmc_pages_alloc(), we would
+ * simply drop the refcount.
+ */
+static void tmc_pages_free(struct tmc_pages *tmc_pages,
+			   struct device *dev, enum dma_data_direction dir)
+{
+	int i;
+
+	for (i = 0; i < tmc_pages->nr_pages; i++) {
+		if (tmc_pages->daddrs && tmc_pages->daddrs[i])
+			dma_unmap_page(dev, tmc_pages->daddrs[i],
+					 PAGE_SIZE, dir);
+		if (tmc_pages->pages && tmc_pages->pages[i])
+			__free_page(tmc_pages->pages[i]);
+	}
+
+	kfree(tmc_pages->pages);
+	kfree(tmc_pages->daddrs);
+	tmc_pages->pages = NULL;
+	tmc_pages->daddrs = NULL;
+	tmc_pages->nr_pages = 0;
+}
+
+/*
+ * tmc_pages_alloc : Allocate and map pages for a given @tmc_pages.
+ * If @pages is not NULL, the list of page virtual addresses are
+ * used as the data pages. The pages are then dma_map'ed for @dev
+ * with dma_direction @dir.
+ *
+ * Returns 0 upon success, else the error number.
+ */
+static int tmc_pages_alloc(struct tmc_pages *tmc_pages,
+			   struct device *dev, int node,
+			   enum dma_data_direction dir, void **pages)
+{
+	int i, nr_pages;
+	dma_addr_t paddr;
+	struct page *page;
+
+	nr_pages = tmc_pages->nr_pages;
+	tmc_pages->daddrs = kcalloc(nr_pages, sizeof(*tmc_pages->daddrs),
+					 GFP_KERNEL);
+	if (!tmc_pages->daddrs)
+		return -ENOMEM;
+	tmc_pages->pages = kcalloc(nr_pages, sizeof(*tmc_pages->pages),
+					 GFP_KERNEL);
+	if (!tmc_pages->pages) {
+		kfree(tmc_pages->daddrs);
+		tmc_pages->daddrs = NULL;
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		if (pages && pages[i]) {
+			page = virt_to_page(pages[i]);
+			/* Hold a refcount on the page */
+			get_page(page);
+		} else {
+			page = alloc_pages_node(node,
+						GFP_KERNEL | __GFP_ZERO, 0);
+		}
+		paddr = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
+		if (dma_mapping_error(dev, paddr))
+			goto err;
+		tmc_pages->daddrs[i] = paddr;
+		tmc_pages->pages[i] = page;
+	}
+	return 0;
+err:
+	tmc_pages_free(tmc_pages, dev, dir);
+	return -ENOMEM;
+}
+
+static inline long
+tmc_sg_get_data_page_offset(struct tmc_sg_table *sg_table, dma_addr_t addr)
+{
+	return tmc_pages_get_offset(&sg_table->data_pages, addr);
+}
+
+static inline void tmc_free_table_pages(struct tmc_sg_table *sg_table)
+{
+	if (sg_table->table_vaddr)
+		vunmap(sg_table->table_vaddr);
+	tmc_pages_free(&sg_table->table_pages, sg_table->dev, DMA_TO_DEVICE);
+}
+
+static void tmc_free_data_pages(struct tmc_sg_table *sg_table)
+{
+	if (sg_table->data_vaddr)
+		vunmap(sg_table->data_vaddr);
+	tmc_pages_free(&sg_table->data_pages, sg_table->dev, DMA_FROM_DEVICE);
+}
+
+void tmc_free_sg_table(struct tmc_sg_table *sg_table)
+{
+	tmc_free_table_pages(sg_table);
+	tmc_free_data_pages(sg_table);
+}
+
+/*
+ * Alloc pages for the table. Since this will be used by the device,
+ * allocate the pages closer to the device (i.e, dev_to_node(dev)
+ * rather than the CPU node).
+ */
+static int tmc_alloc_table_pages(struct tmc_sg_table *sg_table)
+{
+	int rc;
+	struct tmc_pages *table_pages = &sg_table->table_pages;
+
+	rc = tmc_pages_alloc(table_pages, sg_table->dev,
+			     dev_to_node(sg_table->dev),
+			     DMA_TO_DEVICE, NULL);
+	if (rc)
+		return rc;
+	sg_table->table_vaddr = vmap(table_pages->pages,
+				     table_pages->nr_pages,
+				     VM_MAP,
+				     PAGE_KERNEL);
+	if (!sg_table->table_vaddr)
+		rc = -ENOMEM;
+	else
+		sg_table->table_daddr = table_pages->daddrs[0];
+	return rc;
+}
+
+static int tmc_alloc_data_pages(struct tmc_sg_table *sg_table, void **pages)
+{
+	int rc;
+
+	/* Allocate data pages on the node requested by the caller */
+	rc = tmc_pages_alloc(&sg_table->data_pages,
+			     sg_table->dev, sg_table->node,
+			     DMA_FROM_DEVICE, pages);
+	if (!rc) {
+		sg_table->data_vaddr = vmap(sg_table->data_pages.pages,
+					    sg_table->data_pages.nr_pages,
+					    VM_MAP,
+					    PAGE_KERNEL);
+		if (!sg_table->data_vaddr)
+			rc = -ENOMEM;
+	}
+	return rc;
+}
+
+/*
+ * tmc_alloc_sg_table: Allocate and setup dma pages for the TMC SG table
+ * and data buffers. TMC writes to the data buffers and reads from the SG
+ * Table pages.
+ *
+ * @dev		- Device to which page should be DMA mapped.
+ * @node	- Numa node for mem allocations
+ * @nr_tpages	- Number of pages for the table entries.
+ * @nr_dpages	- Number of pages for Data buffer.
+ * @pages	- Optional list of virtual address of pages.
+ */
+struct tmc_sg_table *tmc_alloc_sg_table(struct device *dev,
+					int node,
+					int nr_tpages,
+					int nr_dpages,
+					void **pages)
+{
+	long rc;
+	struct tmc_sg_table *sg_table;
+
+	sg_table = kzalloc(sizeof(*sg_table), GFP_KERNEL);
+	if (!sg_table)
+		return ERR_PTR(-ENOMEM);
+	sg_table->data_pages.nr_pages = nr_dpages;
+	sg_table->table_pages.nr_pages = nr_tpages;
+	sg_table->node = node;
+	sg_table->dev = dev;
+
+	rc  = tmc_alloc_data_pages(sg_table, pages);
+	if (!rc)
+		rc = tmc_alloc_table_pages(sg_table);
+	if (rc) {
+		tmc_free_sg_table(sg_table);
+		kfree(sg_table);
+		return ERR_PTR(rc);
+	}
+
+	return sg_table;
+}
+
+/*
+ * tmc_sg_table_sync_data_range: Sync the data buffer written
+ * by the device from @offset upto a @size bytes.
+ */
+void tmc_sg_table_sync_data_range(struct tmc_sg_table *table,
+				  u64 offset, u64 size)
+{
+	int i, index, start;
+	int npages = DIV_ROUND_UP(size, PAGE_SIZE);
+	struct device *dev = table->dev;
+	struct tmc_pages *data = &table->data_pages;
+
+	start = offset >> PAGE_SHIFT;
+	for (i = start; i < (start + npages); i++) {
+		index = i % data->nr_pages;
+		dma_sync_single_for_cpu(dev, data->daddrs[index],
+					PAGE_SIZE, DMA_FROM_DEVICE);
+	}
+}
+
+/* tmc_sg_sync_table: Sync the page table */
+void tmc_sg_table_sync_table(struct tmc_sg_table *sg_table)
+{
+	int i;
+	struct device *dev = sg_table->dev;
+	struct tmc_pages *table_pages = &sg_table->table_pages;
+
+	for (i = 0; i < table_pages->nr_pages; i++)
+		dma_sync_single_for_device(dev, table_pages->daddrs[i],
+					   PAGE_SIZE, DMA_TO_DEVICE);
+}
+
+/*
+ * tmc_sg_table_get_data: Get the buffer pointer for data @offset
+ * in the SG buffer. The @bufpp is updated to point to the buffer.
+ * Returns :
+ *	the length of linear data available at @offset.
+ *	or
+ *	<= 0 if no data is available.
+ */
+ssize_t tmc_sg_table_get_data(struct tmc_sg_table *sg_table,
+			      u64 offset, size_t len, char **bufpp)
+{
+	size_t size;
+	int pg_idx = offset >> PAGE_SHIFT;
+	int pg_offset = offset & (PAGE_SIZE - 1);
+	struct tmc_pages *data_pages = &sg_table->data_pages;
+
+	size = tmc_sg_table_buf_size(sg_table);
+	if (offset >= size)
+		return -EINVAL;
+
+	/* Make sure we don't go beyond the end */
+	len = (len < (size - offset)) ? len : size - offset;
+	/* Respect the page boundaries */
+	len = (len < (PAGE_SIZE - pg_offset)) ? len : (PAGE_SIZE - pg_offset);
+	if (len > 0)
+		*bufpp = page_address(data_pages->pages[pg_idx]) + pg_offset;
+	return len;
+}
+
+#ifdef ETR_SG_DEBUG
+/* Map a dma address to virtual address */
+static unsigned long
+tmc_sg_daddr_to_vaddr(struct tmc_sg_table *sg_table,
+		      dma_addr_t addr, bool table)
+{
+	long offset;
+	unsigned long base;
+	struct tmc_pages *tmc_pages;
+
+	if (table) {
+		tmc_pages = &sg_table->table_pages;
+		base = (unsigned long)sg_table->table_vaddr;
+	} else {
+		tmc_pages = &sg_table->data_pages;
+		base = (unsigned long)sg_table->data_vaddr;
+	}
+
+	offset = tmc_pages_get_offset(tmc_pages, addr);
+	if (offset < 0)
+		return 0;
+	return base + offset;
+}
+
+/* Dump the given sg_table */
+static void tmc_etr_sg_table_dump(struct etr_sg_table *etr_table)
+{
+	sgte_t *ptr;
+	int i = 0;
+	dma_addr_t addr;
+	struct tmc_sg_table *sg_table = etr_table->sg_table;
+
+	ptr = (sgte_t *)tmc_sg_daddr_to_vaddr(sg_table,
+					      etr_table->hwaddr, true);
+	while (ptr) {
+		addr = ETR_SG_ADDR(*ptr);
+		switch (ETR_SG_ET(*ptr)) {
+		case ETR_SG_ET_NORMAL:
+			dev_dbg(sg_table->dev,
+				"%05d: %p\t:[N] 0x%llx\n", i, ptr, addr);
+			ptr++;
+			break;
+		case ETR_SG_ET_LINK:
+			dev_dbg(sg_table->dev,
+				"%05d: *** %p\t:{L} 0x%llx ***\n",
+				 i, ptr, addr);
+			ptr = (sgte_t *)tmc_sg_daddr_to_vaddr(sg_table,
+							      addr, true);
+			break;
+		case ETR_SG_ET_LAST:
+			dev_dbg(sg_table->dev,
+				"%05d: ### %p\t:[L] 0x%llx ###\n",
+				 i, ptr, addr);
+			return;
+		default:
+			dev_dbg(sg_table->dev,
+				"%05d: xxx %p\t:[INVALID] 0x%llx xxx\n",
+				 i, ptr, addr);
+			return;
+		}
+		i++;
+	}
+	dev_dbg(sg_table->dev, "******* End of Table *****\n");
+}
+#else
+static inline void tmc_etr_sg_table_dump(struct etr_sg_table *etr_table) {}
+#endif
+
+/*
+ * Populate the SG Table page table entries from table/data
+ * pages allocated. Each Data page has ETR_SG_PAGES_PER_SYSPAGE SG pages.
+ * So does a Table page. So we keep track of indices of the tables
+ * in each system page and move the pointers accordingly.
+ */
+#define INC_IDX_ROUND(idx, size) ((idx) = ((idx) + 1) % (size))
+static void tmc_etr_sg_table_populate(struct etr_sg_table *etr_table)
+{
+	dma_addr_t paddr;
+	int i, type, nr_entries;
+	int tpidx = 0; /* index to the current system table_page */
+	int sgtidx = 0;	/* index to the sg_table within the current syspage */
+	int sgtentry = 0; /* the entry within the sg_table */
+	int dpidx = 0; /* index to the current system data_page */
+	int spidx = 0; /* index to the SG page within the current data page */
+	sgte_t *ptr; /* pointer to the table entry to fill */
+	struct tmc_sg_table *sg_table = etr_table->sg_table;
+	dma_addr_t *table_daddrs = sg_table->table_pages.daddrs;
+	dma_addr_t *data_daddrs = sg_table->data_pages.daddrs;
+
+	nr_entries = tmc_etr_sg_table_entries(sg_table->data_pages.nr_pages);
+	/*
+	 * Use the contiguous virtual address of the table to update entries.
+	 */
+	ptr = sg_table->table_vaddr;
+	/*
+	 * Fill all the entries, except the last entry to avoid special
+	 * checks within the loop.
+	 */
+	for (i = 0; i < nr_entries - 1; i++) {
+		if (sgtentry == ETR_SG_PTRS_PER_PAGE - 1) {
+			/*
+			 * Last entry in a sg_table page is a link address to
+			 * the next table page. If this sg_table is the last
+			 * one in the system page, it links to the first
+			 * sg_table in the next system page. Otherwise, it
+			 * links to the next sg_table page within the system
+			 * page.
+			 */
+			if (sgtidx == ETR_SG_PAGES_PER_SYSPAGE - 1) {
+				paddr = table_daddrs[tpidx + 1];
+			} else {
+				paddr = table_daddrs[tpidx] +
+					(ETR_SG_PAGE_SIZE * (sgtidx + 1));
+			}
+			type = ETR_SG_ET_LINK;
+		} else {
+			/*
+			 * Update the indices to the data_pages to point to the
+			 * next sg_page in the data buffer.
+			 */
+			type = ETR_SG_ET_NORMAL;
+			paddr = data_daddrs[dpidx] + spidx * ETR_SG_PAGE_SIZE;
+			if (!INC_IDX_ROUND(spidx, ETR_SG_PAGES_PER_SYSPAGE))
+				dpidx++;
+		}
+		*ptr++ = ETR_SG_ENTRY(paddr, type);
+		/*
+		 * Move to the next table pointer, moving the table page index
+		 * if necessary
+		 */
+		if (!INC_IDX_ROUND(sgtentry, ETR_SG_PTRS_PER_PAGE)) {
+			if (!INC_IDX_ROUND(sgtidx, ETR_SG_PAGES_PER_SYSPAGE))
+				tpidx++;
+		}
+	}
+
+	/* Set up the last entry, which is always a data pointer */
+	paddr = data_daddrs[dpidx] + spidx * ETR_SG_PAGE_SIZE;
+	*ptr++ = ETR_SG_ENTRY(paddr, ETR_SG_ET_LAST);
+}
+
+/*
+ * tmc_init_etr_sg_table: Allocate a TMC ETR SG table, data buffer of @size and
+ * populate the table.
+ *
+ * @dev		- Device pointer for the TMC
+ * @node	- NUMA node where the memory should be allocated
+ * @size	- Total size of the data buffer
+ * @pages	- Optional list of page virtual address
+ */
+static struct etr_sg_table *
+tmc_init_etr_sg_table(struct device *dev, int node,
+		      unsigned long size, void **pages)
+{
+	int nr_entries, nr_tpages;
+	int nr_dpages = size >> PAGE_SHIFT;
+	struct tmc_sg_table *sg_table;
+	struct etr_sg_table *etr_table;
+
+	etr_table = kzalloc(sizeof(*etr_table), GFP_KERNEL);
+	if (!etr_table)
+		return ERR_PTR(-ENOMEM);
+	nr_entries = tmc_etr_sg_table_entries(nr_dpages);
+	nr_tpages = DIV_ROUND_UP(nr_entries, ETR_SG_PTRS_PER_SYSPAGE);
+
+	sg_table = tmc_alloc_sg_table(dev, node, nr_tpages, nr_dpages, pages);
+	if (IS_ERR(sg_table)) {
+		kfree(etr_table);
+		return ERR_PTR(PTR_ERR(sg_table));
+	}
+
+	etr_table->sg_table = sg_table;
+	/* TMC should use table base address for DBA */
+	etr_table->hwaddr = sg_table->table_daddr;
+	tmc_etr_sg_table_populate(etr_table);
+	/* Sync the table pages for the HW */
+	tmc_sg_table_sync_table(sg_table);
+	tmc_etr_sg_table_dump(etr_table);
+
+	return etr_table;
+}
+
+/*
+ * tmc_etr_alloc_flat_buf: Allocate a contiguous DMA buffer.
+ */
+static int tmc_etr_alloc_flat_buf(struct tmc_drvdata *drvdata,
+				  struct etr_buf *etr_buf, int node,
+				  void **pages)
+{
+	struct etr_flat_buf *flat_buf;
+
+	/* We cannot reuse existing pages for flat buf */
+	if (pages)
+		return -EINVAL;
+
+	flat_buf = kzalloc(sizeof(*flat_buf), GFP_KERNEL);
+	if (!flat_buf)
+		return -ENOMEM;
+
+	flat_buf->vaddr = dma_alloc_coherent(drvdata->dev, etr_buf->size,
+					     &flat_buf->daddr, GFP_KERNEL);
+	if (!flat_buf->vaddr) {
+		kfree(flat_buf);
+		return -ENOMEM;
+	}
+
+	flat_buf->size = etr_buf->size;
+	flat_buf->dev = drvdata->dev;
+	etr_buf->hwaddr = flat_buf->daddr;
+	etr_buf->mode = ETR_MODE_FLAT;
+	etr_buf->private = flat_buf;
+	return 0;
+}
+
+static void tmc_etr_free_flat_buf(struct etr_buf *etr_buf)
+{
+	struct etr_flat_buf *flat_buf = etr_buf->private;
+
+	if (flat_buf && flat_buf->daddr)
+		dma_free_coherent(flat_buf->dev, flat_buf->size,
+				  flat_buf->vaddr, flat_buf->daddr);
+	kfree(flat_buf);
+}
+
+static void tmc_etr_sync_flat_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp)
+{
+	/*
+	 * Adjust the buffer to point to the beginning of the trace data
+	 * and update the available trace data.
+	 */
+	etr_buf->offset = rrp - etr_buf->hwaddr;
+	if (etr_buf->full)
+		etr_buf->len = etr_buf->size;
+	else
+		etr_buf->len = rwp - rrp;
+}
+
+static ssize_t tmc_etr_get_data_flat_buf(struct etr_buf *etr_buf,
+					 u64 offset, size_t len, char **bufpp)
+{
+	struct etr_flat_buf *flat_buf = etr_buf->private;
+
+	*bufpp = (char *)flat_buf->vaddr + offset;
+	/*
+	 * tmc_etr_buf_get_data already adjusts the length to handle
+	 * buffer wrapping around.
+	 */
+	return len;
+}
+
+static const struct etr_buf_operations etr_flat_buf_ops = {
+	.alloc = tmc_etr_alloc_flat_buf,
+	.free = tmc_etr_free_flat_buf,
+	.sync = tmc_etr_sync_flat_buf,
+	.get_data = tmc_etr_get_data_flat_buf,
+};
+
+/*
+ * tmc_etr_alloc_sg_buf: Allocate an SG buf @etr_buf. Setup the parameters
+ * appropriately.
+ */
+static int tmc_etr_alloc_sg_buf(struct tmc_drvdata *drvdata,
+				struct etr_buf *etr_buf, int node,
+				void **pages)
+{
+	struct etr_sg_table *etr_table;
+
+	etr_table = tmc_init_etr_sg_table(drvdata->dev, node,
+					  etr_buf->size, pages);
+	if (IS_ERR(etr_table))
+		return -ENOMEM;
+	etr_buf->hwaddr = etr_table->hwaddr;
+	etr_buf->mode = ETR_MODE_ETR_SG;
+	etr_buf->private = etr_table;
+	return 0;
+}
+
+static void tmc_etr_free_sg_buf(struct etr_buf *etr_buf)
+{
+	struct etr_sg_table *etr_table = etr_buf->private;
+
+	if (etr_table) {
+		tmc_free_sg_table(etr_table->sg_table);
+		kfree(etr_table);
+	}
+}
+
+static ssize_t tmc_etr_get_data_sg_buf(struct etr_buf *etr_buf, u64 offset,
+				       size_t len, char **bufpp)
+{
+	struct etr_sg_table *etr_table = etr_buf->private;
+
+	return tmc_sg_table_get_data(etr_table->sg_table, offset, len, bufpp);
+}
+
+static void tmc_etr_sync_sg_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp)
+{
+	long r_offset, w_offset;
+	struct etr_sg_table *etr_table = etr_buf->private;
+	struct tmc_sg_table *table = etr_table->sg_table;
+
+	/* Convert hw address to offset in the buffer */
+	r_offset = tmc_sg_get_data_page_offset(table, rrp);
+	if (r_offset < 0) {
+		dev_warn(table->dev,
+			 "Unable to map RRP %llx to offset\n", rrp);
+		etr_buf->len = 0;
+		return;
+	}
+
+	w_offset = tmc_sg_get_data_page_offset(table, rwp);
+	if (w_offset < 0) {
+		dev_warn(table->dev,
+			 "Unable to map RWP %llx to offset\n", rwp);
+		etr_buf->len = 0;
+		return;
+	}
+
+	etr_buf->offset = r_offset;
+	if (etr_buf->full)
+		etr_buf->len = etr_buf->size;
+	else
+		etr_buf->len = ((w_offset < r_offset) ? etr_buf->size : 0) +
+				w_offset - r_offset;
+	tmc_sg_table_sync_data_range(table, r_offset, etr_buf->len);
+}
+
+static const struct etr_buf_operations etr_sg_buf_ops = {
+	.alloc = tmc_etr_alloc_sg_buf,
+	.free = tmc_etr_free_sg_buf,
+	.sync = tmc_etr_sync_sg_buf,
+	.get_data = tmc_etr_get_data_sg_buf,
+};
+
+/*
+ * TMC ETR could be connected to a CATU device, which can provide address
+ * translation service. This is represented by the Output port of the TMC
+ * (ETR) connected to the input port of the CATU.
+ *
+ * Returns	: coresight_device ptr for the CATU device if a CATU is found.
+ *		: NULL otherwise.
+ */
+struct coresight_device *
+tmc_etr_get_catu_device(struct tmc_drvdata *drvdata)
+{
+	int i;
+	struct coresight_device *tmp, *etr = drvdata->csdev;
+
+	if (!IS_ENABLED(CONFIG_CORESIGHT_CATU))
+		return NULL;
+
+	for (i = 0; i < etr->nr_outport; i++) {
+		tmp = etr->conns[i].child_dev;
+		if (tmp && coresight_is_catu_device(tmp))
+			return tmp;
+	}
+
+	return NULL;
+}
+
+static inline void tmc_etr_enable_catu(struct tmc_drvdata *drvdata)
+{
+	struct coresight_device *catu = tmc_etr_get_catu_device(drvdata);
+
+	if (catu && helper_ops(catu)->enable)
+		helper_ops(catu)->enable(catu, drvdata->etr_buf);
+}
+
+static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata)
+{
+	struct coresight_device *catu = tmc_etr_get_catu_device(drvdata);
+
+	if (catu && helper_ops(catu)->disable)
+		helper_ops(catu)->disable(catu, drvdata->etr_buf);
+}
+
+static const struct etr_buf_operations *etr_buf_ops[] = {
+	[ETR_MODE_FLAT] = &etr_flat_buf_ops,
+	[ETR_MODE_ETR_SG] = &etr_sg_buf_ops,
+	[ETR_MODE_CATU] = &etr_catu_buf_ops,
+};
+
+static inline int tmc_etr_mode_alloc_buf(int mode,
+					 struct tmc_drvdata *drvdata,
+					 struct etr_buf *etr_buf, int node,
+					 void **pages)
+{
+	int rc = -EINVAL;
+
+	switch (mode) {
+	case ETR_MODE_FLAT:
+	case ETR_MODE_ETR_SG:
+	case ETR_MODE_CATU:
+		if (etr_buf_ops[mode]->alloc)
+			rc = etr_buf_ops[mode]->alloc(drvdata, etr_buf,
+						      node, pages);
+		if (!rc)
+			etr_buf->ops = etr_buf_ops[mode];
+		return rc;
+	default:
+		return -EINVAL;
+	}
+}
+
+/*
+ * tmc_alloc_etr_buf: Allocate a buffer use by ETR.
+ * @drvdata	: ETR device details.
+ * @size	: size of the requested buffer.
+ * @flags	: Required properties for the buffer.
+ * @node	: Node for memory allocations.
+ * @pages	: An optional list of pages.
+ */
+static struct etr_buf *tmc_alloc_etr_buf(struct tmc_drvdata *drvdata,
+					 ssize_t size, int flags,
+					 int node, void **pages)
+{
+	int rc = -ENOMEM;
+	bool has_etr_sg, has_iommu;
+	bool has_sg, has_catu;
+	struct etr_buf *etr_buf;
+
+	has_etr_sg = tmc_etr_has_cap(drvdata, TMC_ETR_SG);
+	has_iommu = iommu_get_domain_for_dev(drvdata->dev);
+	has_catu = !!tmc_etr_get_catu_device(drvdata);
+
+	has_sg = has_catu || has_etr_sg;
+
+	etr_buf = kzalloc(sizeof(*etr_buf), GFP_KERNEL);
+	if (!etr_buf)
+		return ERR_PTR(-ENOMEM);
+
+	etr_buf->size = size;
+
+	/*
+	 * If we have to use an existing list of pages, we cannot reliably
+	 * use a contiguous DMA memory (even if we have an IOMMU). Otherwise,
+	 * we use the contiguous DMA memory if at least one of the following
+	 * conditions is true:
+	 *  a) The ETR cannot use Scatter-Gather.
+	 *  b) we have a backing IOMMU
+	 *  c) The requested memory size is smaller (< 1M).
+	 *
+	 * Fallback to available mechanisms.
+	 *
+	 */
+	if (!pages &&
+	    (!has_sg || has_iommu || size < SZ_1M))
+		rc = tmc_etr_mode_alloc_buf(ETR_MODE_FLAT, drvdata,
+					    etr_buf, node, pages);
+	if (rc && has_etr_sg)
+		rc = tmc_etr_mode_alloc_buf(ETR_MODE_ETR_SG, drvdata,
+					    etr_buf, node, pages);
+	if (rc && has_catu)
+		rc = tmc_etr_mode_alloc_buf(ETR_MODE_CATU, drvdata,
+					    etr_buf, node, pages);
+	if (rc) {
+		kfree(etr_buf);
+		return ERR_PTR(rc);
+	}
+
+	dev_dbg(drvdata->dev, "allocated buffer of size %ldKB in mode %d\n",
+		(unsigned long)size >> 10, etr_buf->mode);
+	return etr_buf;
+}
+
+static void tmc_free_etr_buf(struct etr_buf *etr_buf)
+{
+	WARN_ON(!etr_buf->ops || !etr_buf->ops->free);
+	etr_buf->ops->free(etr_buf);
+	kfree(etr_buf);
+}
+
+/*
+ * tmc_etr_buf_get_data: Get the pointer the trace data at @offset
+ * with a maximum of @len bytes.
+ * Returns: The size of the linear data available @pos, with *bufpp
+ * updated to point to the buffer.
+ */
+static ssize_t tmc_etr_buf_get_data(struct etr_buf *etr_buf,
+				    u64 offset, size_t len, char **bufpp)
+{
+	/* Adjust the length to limit this transaction to end of buffer */
+	len = (len < (etr_buf->size - offset)) ? len : etr_buf->size - offset;
+
+	return etr_buf->ops->get_data(etr_buf, (u64)offset, len, bufpp);
+}
+
+static inline s64
+tmc_etr_buf_insert_barrier_packet(struct etr_buf *etr_buf, u64 offset)
+{
+	ssize_t len;
+	char *bufp;
+
+	len = tmc_etr_buf_get_data(etr_buf, offset,
+				   CORESIGHT_BARRIER_PKT_SIZE, &bufp);
+	if (WARN_ON(len < CORESIGHT_BARRIER_PKT_SIZE))
+		return -EINVAL;
+	coresight_insert_barrier_packet(bufp);
+	return offset + CORESIGHT_BARRIER_PKT_SIZE;
+}
+
+/*
+ * tmc_sync_etr_buf: Sync the trace buffer availability with drvdata.
+ * Makes sure the trace data is synced to the memory for consumption.
+ * @etr_buf->offset will hold the offset to the beginning of the trace data
+ * within the buffer, with @etr_buf->len bytes to consume.
+ */
+static void tmc_sync_etr_buf(struct tmc_drvdata *drvdata)
+{
+	struct etr_buf *etr_buf = drvdata->etr_buf;
+	u64 rrp, rwp;
+	u32 status;
+
+	rrp = tmc_read_rrp(drvdata);
+	rwp = tmc_read_rwp(drvdata);
+	status = readl_relaxed(drvdata->base + TMC_STS);
+	etr_buf->full = status & TMC_STS_FULL;
+
+	WARN_ON(!etr_buf->ops || !etr_buf->ops->sync);
+
+	etr_buf->ops->sync(etr_buf, rrp, rwp);
+
+	/* Insert barrier packets at the beginning, if there was an overflow */
+	if (etr_buf->full)
+		tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset);
+}
+
+static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
+{
+	u32 axictl, sts;
+	struct etr_buf *etr_buf = drvdata->etr_buf;
+
+	/*
+	 * If this ETR is connected to a CATU, enable it before we turn
+	 * this on
+	 */
+	tmc_etr_enable_catu(drvdata);
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Wait for TMCSReady bit to be set */
+	tmc_wait_for_tmcready(drvdata);
+
+	writel_relaxed(etr_buf->size / 4, drvdata->base + TMC_RSZ);
+	writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE);
+
+	axictl = readl_relaxed(drvdata->base + TMC_AXICTL);
+	axictl &= ~TMC_AXICTL_CLEAR_MASK;
+	axictl |= (TMC_AXICTL_PROT_CTL_B1 | TMC_AXICTL_WR_BURST_16);
+	axictl |= TMC_AXICTL_AXCACHE_OS;
+
+	if (tmc_etr_has_cap(drvdata, TMC_ETR_AXI_ARCACHE)) {
+		axictl &= ~TMC_AXICTL_ARCACHE_MASK;
+		axictl |= TMC_AXICTL_ARCACHE_OS;
+	}
+
+	if (etr_buf->mode == ETR_MODE_ETR_SG) {
+		if (WARN_ON(!tmc_etr_has_cap(drvdata, TMC_ETR_SG)))
+			return;
+		axictl |= TMC_AXICTL_SCT_GAT_MODE;
+	}
+
+	writel_relaxed(axictl, drvdata->base + TMC_AXICTL);
+	tmc_write_dba(drvdata, etr_buf->hwaddr);
+	/*
+	 * If the TMC pointers must be programmed before the session,
+	 * we have to set it properly (i.e, RRP/RWP to base address and
+	 * STS to "not full").
+	 */
+	if (tmc_etr_has_cap(drvdata, TMC_ETR_SAVE_RESTORE)) {
+		tmc_write_rrp(drvdata, etr_buf->hwaddr);
+		tmc_write_rwp(drvdata, etr_buf->hwaddr);
+		sts = readl_relaxed(drvdata->base + TMC_STS) & ~TMC_STS_FULL;
+		writel_relaxed(sts, drvdata->base + TMC_STS);
+	}
+
+	writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI |
+		       TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT |
+		       TMC_FFCR_TRIGON_TRIGIN,
+		       drvdata->base + TMC_FFCR);
+	writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG);
+	tmc_enable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+/*
+ * Return the available trace data in the buffer (starts at etr_buf->offset,
+ * limited by etr_buf->len) from @pos, with a maximum limit of @len,
+ * also updating the @bufpp on where to find it. Since the trace data
+ * starts at anywhere in the buffer, depending on the RRP, we adjust the
+ * @len returned to handle buffer wrapping around.
+ */
+ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata,
+				loff_t pos, size_t len, char **bufpp)
+{
+	s64 offset;
+	ssize_t actual = len;
+	struct etr_buf *etr_buf = drvdata->etr_buf;
+
+	if (pos + actual > etr_buf->len)
+		actual = etr_buf->len - pos;
+	if (actual <= 0)
+		return actual;
+
+	/* Compute the offset from which we read the data */
+	offset = etr_buf->offset + pos;
+	if (offset >= etr_buf->size)
+		offset -= etr_buf->size;
+	return tmc_etr_buf_get_data(etr_buf, offset, actual, bufpp);
+}
+
+static struct etr_buf *
+tmc_etr_setup_sysfs_buf(struct tmc_drvdata *drvdata)
+{
+	return tmc_alloc_etr_buf(drvdata, drvdata->size,
+				 0, cpu_to_node(0), NULL);
+}
+
+static void
+tmc_etr_free_sysfs_buf(struct etr_buf *buf)
+{
+	if (buf)
+		tmc_free_etr_buf(buf);
+}
+
+static void tmc_etr_sync_sysfs_buf(struct tmc_drvdata *drvdata)
+{
+	tmc_sync_etr_buf(drvdata);
+}
+
+static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	/*
+	 * When operating in sysFS mode the content of the buffer needs to be
+	 * read before the TMC is disabled.
+	 */
+	if (drvdata->mode == CS_MODE_SYSFS)
+		tmc_etr_sync_sysfs_buf(drvdata);
+
+	tmc_disable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+
+	/* Disable CATU device if this ETR is connected to one */
+	tmc_etr_disable_catu(drvdata);
+}
+
+static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
+{
+	int ret = 0;
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etr_buf *new_buf = NULL, *free_buf = NULL;
+
+	/*
+	 * If we are enabling the ETR from disabled state, we need to make
+	 * sure we have a buffer with the right size. The etr_buf is not reset
+	 * immediately after we stop the tracing in SYSFS mode as we wait for
+	 * the user to collect the data. We may be able to reuse the existing
+	 * buffer, provided the size matches. Any allocation has to be done
+	 * with the lock released.
+	 */
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (!drvdata->etr_buf || (drvdata->etr_buf->size != drvdata->size)) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+		/* Allocate memory with the locks released */
+		free_buf = new_buf = tmc_etr_setup_sysfs_buf(drvdata);
+		if (IS_ERR(new_buf))
+			return PTR_ERR(new_buf);
+
+		/* Let's try again */
+		spin_lock_irqsave(&drvdata->spinlock, flags);
+	}
+
+	if (drvdata->reading || drvdata->mode == CS_MODE_PERF) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/*
+	 * In sysFS mode we can have multiple writers per sink.  Since this
+	 * sink is already enabled no memory is needed and the HW need not be
+	 * touched, even if the buffer size has changed.
+	 */
+	if (drvdata->mode == CS_MODE_SYSFS)
+		goto out;
+
+	/*
+	 * If we don't have a buffer or it doesn't match the requested size,
+	 * use the buffer allocated above. Otherwise reuse the existing buffer.
+	 */
+	if (!drvdata->etr_buf ||
+	    (new_buf && drvdata->etr_buf->size != new_buf->size)) {
+		free_buf = drvdata->etr_buf;
+		drvdata->etr_buf = new_buf;
+	}
+
+	drvdata->mode = CS_MODE_SYSFS;
+	tmc_etr_enable_hw(drvdata);
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	/* Free memory outside the spinlock if need be */
+	if (free_buf)
+		tmc_etr_free_sysfs_buf(free_buf);
+
+	if (!ret)
+		dev_info(drvdata->dev, "TMC-ETR enabled\n");
+
+	return ret;
+}
+
+static int tmc_enable_etr_sink_perf(struct coresight_device *csdev)
+{
+	/* We don't support perf mode yet ! */
+	return -EINVAL;
+}
+
+static int tmc_enable_etr_sink(struct coresight_device *csdev, u32 mode)
+{
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		return tmc_enable_etr_sink_sysfs(csdev);
+	case CS_MODE_PERF:
+		return tmc_enable_etr_sink_perf(csdev);
+	}
+
+	/* We shouldn't be here */
+	return -EINVAL;
+}
+
+static void tmc_disable_etr_sink(struct coresight_device *csdev)
+{
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->reading) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return;
+	}
+
+	/* Disable the TMC only if it needs to */
+	if (drvdata->mode != CS_MODE_DISABLED) {
+		tmc_etr_disable_hw(drvdata);
+		drvdata->mode = CS_MODE_DISABLED;
+	}
+
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_info(drvdata->dev, "TMC-ETR disabled\n");
+}
+
+static const struct coresight_ops_sink tmc_etr_sink_ops = {
+	.enable		= tmc_enable_etr_sink,
+	.disable	= tmc_disable_etr_sink,
+};
+
+const struct coresight_ops tmc_etr_cs_ops = {
+	.sink_ops	= &tmc_etr_sink_ops,
+};
+
+int tmc_read_prepare_etr(struct tmc_drvdata *drvdata)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	/* config types are set a boot time and never change */
+	if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETR))
+		return -EINVAL;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->reading) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* Don't interfere if operated from Perf */
+	if (drvdata->mode == CS_MODE_PERF) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* If drvdata::etr_buf is NULL the trace data has been read already */
+	if (drvdata->etr_buf == NULL) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Disable the TMC if need be */
+	if (drvdata->mode == CS_MODE_SYSFS)
+		tmc_etr_disable_hw(drvdata);
+
+	drvdata->reading = true;
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	return ret;
+}
+
+int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata)
+{
+	unsigned long flags;
+	struct etr_buf *etr_buf = NULL;
+
+	/* config types are set a boot time and never change */
+	if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETR))
+		return -EINVAL;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* RE-enable the TMC if need be */
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		/*
+		 * The trace run will continue with the same allocated trace
+		 * buffer. Since the tracer is still enabled drvdata::buf can't
+		 * be NULL.
+		 */
+		tmc_etr_enable_hw(drvdata);
+	} else {
+		/*
+		 * The ETR is not tracing and the buffer was just read.
+		 * As such prepare to free the trace buffer.
+		 */
+		etr_buf =  drvdata->etr_buf;
+		drvdata->etr_buf = NULL;
+	}
+
+	drvdata->reading = false;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	/* Free allocated memory out side of the spinlock */
+	if (etr_buf)
+		tmc_free_etr_buf(etr_buf);
+
+	return 0;
+}
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
new file mode 100644
index 0000000..1b817ec
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -0,0 +1,510 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Trace Memory Controller driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/property.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+
+#include "coresight-priv.h"
+#include "coresight-tmc.h"
+
+void tmc_wait_for_tmcready(struct tmc_drvdata *drvdata)
+{
+	/* Ensure formatter, unformatter and hardware fifo are empty */
+	if (coresight_timeout(drvdata->base,
+			      TMC_STS, TMC_STS_TMCREADY_BIT, 1)) {
+		dev_err(drvdata->dev,
+			"timeout while waiting for TMC to be Ready\n");
+	}
+}
+
+void tmc_flush_and_stop(struct tmc_drvdata *drvdata)
+{
+	u32 ffcr;
+
+	ffcr = readl_relaxed(drvdata->base + TMC_FFCR);
+	ffcr |= TMC_FFCR_STOP_ON_FLUSH;
+	writel_relaxed(ffcr, drvdata->base + TMC_FFCR);
+	ffcr |= BIT(TMC_FFCR_FLUSHMAN_BIT);
+	writel_relaxed(ffcr, drvdata->base + TMC_FFCR);
+	/* Ensure flush completes */
+	if (coresight_timeout(drvdata->base,
+			      TMC_FFCR, TMC_FFCR_FLUSHMAN_BIT, 0)) {
+		dev_err(drvdata->dev,
+		"timeout while waiting for completion of Manual Flush\n");
+	}
+
+	tmc_wait_for_tmcready(drvdata);
+}
+
+void tmc_enable_hw(struct tmc_drvdata *drvdata)
+{
+	writel_relaxed(TMC_CTL_CAPT_EN, drvdata->base + TMC_CTL);
+}
+
+void tmc_disable_hw(struct tmc_drvdata *drvdata)
+{
+	writel_relaxed(0x0, drvdata->base + TMC_CTL);
+}
+
+static int tmc_read_prepare(struct tmc_drvdata *drvdata)
+{
+	int ret = 0;
+
+	switch (drvdata->config_type) {
+	case TMC_CONFIG_TYPE_ETB:
+	case TMC_CONFIG_TYPE_ETF:
+		ret = tmc_read_prepare_etb(drvdata);
+		break;
+	case TMC_CONFIG_TYPE_ETR:
+		ret = tmc_read_prepare_etr(drvdata);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (!ret)
+		dev_info(drvdata->dev, "TMC read start\n");
+
+	return ret;
+}
+
+static int tmc_read_unprepare(struct tmc_drvdata *drvdata)
+{
+	int ret = 0;
+
+	switch (drvdata->config_type) {
+	case TMC_CONFIG_TYPE_ETB:
+	case TMC_CONFIG_TYPE_ETF:
+		ret = tmc_read_unprepare_etb(drvdata);
+		break;
+	case TMC_CONFIG_TYPE_ETR:
+		ret = tmc_read_unprepare_etr(drvdata);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (!ret)
+		dev_info(drvdata->dev, "TMC read end\n");
+
+	return ret;
+}
+
+static int tmc_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct tmc_drvdata *drvdata = container_of(file->private_data,
+						   struct tmc_drvdata, miscdev);
+
+	ret = tmc_read_prepare(drvdata);
+	if (ret)
+		return ret;
+
+	nonseekable_open(inode, file);
+
+	dev_dbg(drvdata->dev, "%s: successfully opened\n", __func__);
+	return 0;
+}
+
+static inline ssize_t tmc_get_sysfs_trace(struct tmc_drvdata *drvdata,
+					  loff_t pos, size_t len, char **bufpp)
+{
+	switch (drvdata->config_type) {
+	case TMC_CONFIG_TYPE_ETB:
+	case TMC_CONFIG_TYPE_ETF:
+		return tmc_etb_get_sysfs_trace(drvdata, pos, len, bufpp);
+	case TMC_CONFIG_TYPE_ETR:
+		return tmc_etr_get_sysfs_trace(drvdata, pos, len, bufpp);
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t tmc_read(struct file *file, char __user *data, size_t len,
+			loff_t *ppos)
+{
+	char *bufp;
+	ssize_t actual;
+	struct tmc_drvdata *drvdata = container_of(file->private_data,
+						   struct tmc_drvdata, miscdev);
+	actual = tmc_get_sysfs_trace(drvdata, *ppos, len, &bufp);
+	if (actual <= 0)
+		return 0;
+
+	if (copy_to_user(data, bufp, actual)) {
+		dev_dbg(drvdata->dev, "%s: copy_to_user failed\n", __func__);
+		return -EFAULT;
+	}
+
+	*ppos += actual;
+	dev_dbg(drvdata->dev, "%zu bytes copied\n", actual);
+
+	return actual;
+}
+
+static int tmc_release(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct tmc_drvdata *drvdata = container_of(file->private_data,
+						   struct tmc_drvdata, miscdev);
+
+	ret = tmc_read_unprepare(drvdata);
+	if (ret)
+		return ret;
+
+	dev_dbg(drvdata->dev, "%s: released\n", __func__);
+	return 0;
+}
+
+static const struct file_operations tmc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= tmc_open,
+	.read		= tmc_read,
+	.release	= tmc_release,
+	.llseek		= no_llseek,
+};
+
+static enum tmc_mem_intf_width tmc_get_memwidth(u32 devid)
+{
+	enum tmc_mem_intf_width memwidth;
+
+	/*
+	 * Excerpt from the TRM:
+	 *
+	 * DEVID::MEMWIDTH[10:8]
+	 * 0x2 Memory interface databus is 32 bits wide.
+	 * 0x3 Memory interface databus is 64 bits wide.
+	 * 0x4 Memory interface databus is 128 bits wide.
+	 * 0x5 Memory interface databus is 256 bits wide.
+	 */
+	switch (BMVAL(devid, 8, 10)) {
+	case 0x2:
+		memwidth = TMC_MEM_INTF_WIDTH_32BITS;
+		break;
+	case 0x3:
+		memwidth = TMC_MEM_INTF_WIDTH_64BITS;
+		break;
+	case 0x4:
+		memwidth = TMC_MEM_INTF_WIDTH_128BITS;
+		break;
+	case 0x5:
+		memwidth = TMC_MEM_INTF_WIDTH_256BITS;
+		break;
+	default:
+		memwidth = 0;
+	}
+
+	return memwidth;
+}
+
+#define coresight_tmc_reg(name, offset)			\
+	coresight_simple_reg32(struct tmc_drvdata, name, offset)
+#define coresight_tmc_reg64(name, lo_off, hi_off)	\
+	coresight_simple_reg64(struct tmc_drvdata, name, lo_off, hi_off)
+
+coresight_tmc_reg(rsz, TMC_RSZ);
+coresight_tmc_reg(sts, TMC_STS);
+coresight_tmc_reg(trg, TMC_TRG);
+coresight_tmc_reg(ctl, TMC_CTL);
+coresight_tmc_reg(ffsr, TMC_FFSR);
+coresight_tmc_reg(ffcr, TMC_FFCR);
+coresight_tmc_reg(mode, TMC_MODE);
+coresight_tmc_reg(pscr, TMC_PSCR);
+coresight_tmc_reg(axictl, TMC_AXICTL);
+coresight_tmc_reg(devid, CORESIGHT_DEVID);
+coresight_tmc_reg64(rrp, TMC_RRP, TMC_RRPHI);
+coresight_tmc_reg64(rwp, TMC_RWP, TMC_RWPHI);
+coresight_tmc_reg64(dba, TMC_DBALO, TMC_DBAHI);
+
+static struct attribute *coresight_tmc_mgmt_attrs[] = {
+	&dev_attr_rsz.attr,
+	&dev_attr_sts.attr,
+	&dev_attr_rrp.attr,
+	&dev_attr_rwp.attr,
+	&dev_attr_trg.attr,
+	&dev_attr_ctl.attr,
+	&dev_attr_ffsr.attr,
+	&dev_attr_ffcr.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_pscr.attr,
+	&dev_attr_devid.attr,
+	&dev_attr_dba.attr,
+	&dev_attr_axictl.attr,
+	NULL,
+};
+
+static ssize_t trigger_cntr_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->trigger_cntr;
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t trigger_cntr_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->trigger_cntr = val;
+	return size;
+}
+static DEVICE_ATTR_RW(trigger_cntr);
+
+static ssize_t buffer_size_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	return sprintf(buf, "%#x\n", drvdata->size);
+}
+
+static ssize_t buffer_size_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	/* Only permitted for TMC-ETRs */
+	if (drvdata->config_type != TMC_CONFIG_TYPE_ETR)
+		return -EPERM;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+	/* The buffer size should be page aligned */
+	if (val & (PAGE_SIZE - 1))
+		return -EINVAL;
+	drvdata->size = val;
+	return size;
+}
+
+static DEVICE_ATTR_RW(buffer_size);
+
+static struct attribute *coresight_tmc_attrs[] = {
+	&dev_attr_trigger_cntr.attr,
+	&dev_attr_buffer_size.attr,
+	NULL,
+};
+
+static const struct attribute_group coresight_tmc_group = {
+	.attrs = coresight_tmc_attrs,
+};
+
+static const struct attribute_group coresight_tmc_mgmt_group = {
+	.attrs = coresight_tmc_mgmt_attrs,
+	.name = "mgmt",
+};
+
+const struct attribute_group *coresight_tmc_groups[] = {
+	&coresight_tmc_group,
+	&coresight_tmc_mgmt_group,
+	NULL,
+};
+
+static inline bool tmc_etr_can_use_sg(struct tmc_drvdata *drvdata)
+{
+	return fwnode_property_present(drvdata->dev->fwnode,
+				       "arm,scatter-gather");
+}
+
+/* Detect and initialise the capabilities of a TMC ETR */
+static int tmc_etr_setup_caps(struct tmc_drvdata *drvdata,
+			     u32 devid, void *dev_caps)
+{
+	u32 dma_mask = 0;
+
+	/* Set the unadvertised capabilities */
+	tmc_etr_init_caps(drvdata, (u32)(unsigned long)dev_caps);
+
+	if (!(devid & TMC_DEVID_NOSCAT) && tmc_etr_can_use_sg(drvdata))
+		tmc_etr_set_cap(drvdata, TMC_ETR_SG);
+
+	/* Check if the AXI address width is available */
+	if (devid & TMC_DEVID_AXIAW_VALID)
+		dma_mask = ((devid >> TMC_DEVID_AXIAW_SHIFT) &
+				TMC_DEVID_AXIAW_MASK);
+
+	/*
+	 * Unless specified in the device configuration, ETR uses a 40-bit
+	 * AXI master in place of the embedded SRAM of ETB/ETF.
+	 */
+	switch (dma_mask) {
+	case 32:
+	case 40:
+	case 44:
+	case 48:
+	case 52:
+		dev_info(drvdata->dev, "Detected dma mask %dbits\n", dma_mask);
+		break;
+	default:
+		dma_mask = 40;
+	}
+
+	return dma_set_mask_and_coherent(drvdata->dev, DMA_BIT_MASK(dma_mask));
+}
+
+static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret = 0;
+	u32 devid;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct tmc_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc desc = { 0 };
+	struct device_node *np = adev->dev.of_node;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata)) {
+			ret = PTR_ERR(pdata);
+			goto out;
+		}
+		adev->dev.platform_data = pdata;
+	}
+
+	ret = -ENOMEM;
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		goto out;
+
+	drvdata->dev = &adev->dev;
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base)) {
+		ret = PTR_ERR(base);
+		goto out;
+	}
+
+	drvdata->base = base;
+
+	spin_lock_init(&drvdata->spinlock);
+
+	devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID);
+	drvdata->config_type = BMVAL(devid, 6, 7);
+	drvdata->memwidth = tmc_get_memwidth(devid);
+
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
+		if (np)
+			ret = of_property_read_u32(np,
+						   "arm,buffer-size",
+						   &drvdata->size);
+		if (ret)
+			drvdata->size = SZ_1M;
+	} else {
+		drvdata->size = readl_relaxed(drvdata->base + TMC_RSZ) * 4;
+	}
+
+	pm_runtime_put(&adev->dev);
+
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_tmc_groups;
+
+	switch (drvdata->config_type) {
+	case TMC_CONFIG_TYPE_ETB:
+		desc.type = CORESIGHT_DEV_TYPE_SINK;
+		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+		desc.ops = &tmc_etb_cs_ops;
+		break;
+	case TMC_CONFIG_TYPE_ETR:
+		desc.type = CORESIGHT_DEV_TYPE_SINK;
+		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+		desc.ops = &tmc_etr_cs_ops;
+		ret = tmc_etr_setup_caps(drvdata, devid, id->data);
+		if (ret)
+			goto out;
+		break;
+	case TMC_CONFIG_TYPE_ETF:
+		desc.type = CORESIGHT_DEV_TYPE_LINKSINK;
+		desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO;
+		desc.ops = &tmc_etf_cs_ops;
+		break;
+	default:
+		pr_err("%s: Unsupported TMC config\n", pdata->name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto out;
+	}
+
+	drvdata->miscdev.name = pdata->name;
+	drvdata->miscdev.minor = MISC_DYNAMIC_MINOR;
+	drvdata->miscdev.fops = &tmc_fops;
+	ret = misc_register(&drvdata->miscdev);
+	if (ret)
+		coresight_unregister(drvdata->csdev);
+out:
+	return ret;
+}
+
+static const struct amba_id tmc_ids[] = {
+	{
+		.id     = 0x000bb961,
+		.mask   = 0x000fffff,
+	},
+	{
+		/* Coresight SoC 600 TMC-ETR/ETS */
+		.id	= 0x000bb9e8,
+		.mask	= 0x000fffff,
+		.data	= (void *)(unsigned long)CORESIGHT_SOC_600_ETR_CAPS,
+	},
+	{
+		/* Coresight SoC 600 TMC-ETB */
+		.id	= 0x000bb9e9,
+		.mask	= 0x000fffff,
+	},
+	{
+		/* Coresight SoC 600 TMC-ETF */
+		.id	= 0x000bb9ea,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver tmc_driver = {
+	.drv = {
+		.name   = "coresight-tmc",
+		.owner  = THIS_MODULE,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= tmc_probe,
+	.id_table	= tmc_ids,
+};
+builtin_amba_driver(tmc_driver);
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
new file mode 100644
index 0000000..7027bd6
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef _CORESIGHT_TMC_H
+#define _CORESIGHT_TMC_H
+
+#include <linux/dma-mapping.h>
+#include <linux/miscdevice.h>
+
+#define TMC_RSZ			0x004
+#define TMC_STS			0x00c
+#define TMC_RRD			0x010
+#define TMC_RRP			0x014
+#define TMC_RWP			0x018
+#define TMC_TRG			0x01c
+#define TMC_CTL			0x020
+#define TMC_RWD			0x024
+#define TMC_MODE		0x028
+#define TMC_LBUFLEVEL		0x02c
+#define TMC_CBUFLEVEL		0x030
+#define TMC_BUFWM		0x034
+#define TMC_RRPHI		0x038
+#define TMC_RWPHI		0x03c
+#define TMC_AXICTL		0x110
+#define TMC_DBALO		0x118
+#define TMC_DBAHI		0x11c
+#define TMC_FFSR		0x300
+#define TMC_FFCR		0x304
+#define TMC_PSCR		0x308
+#define TMC_ITMISCOP0		0xee0
+#define TMC_ITTRFLIN		0xee8
+#define TMC_ITATBDATA0		0xeec
+#define TMC_ITATBCTR2		0xef0
+#define TMC_ITATBCTR1		0xef4
+#define TMC_ITATBCTR0		0xef8
+
+/* register description */
+/* TMC_CTL - 0x020 */
+#define TMC_CTL_CAPT_EN		BIT(0)
+/* TMC_STS - 0x00C */
+#define TMC_STS_TMCREADY_BIT	2
+#define TMC_STS_FULL		BIT(0)
+#define TMC_STS_TRIGGERED	BIT(1)
+/*
+ * TMC_AXICTL - 0x110
+ *
+ * TMC AXICTL format for SoC-400
+ *	Bits [0-1]	: ProtCtrlBit0-1
+ *	Bits [2-5]	: CacheCtrlBits 0-3 (AXCACHE)
+ *	Bit  6		: Reserved
+ *	Bit  7		: ScatterGatherMode
+ *	Bits [8-11]	: WrBurstLen
+ *	Bits [12-31]	: Reserved.
+ * TMC AXICTL format for SoC-600, as above except:
+ *	Bits [2-5]	: AXI WCACHE
+ *	Bits [16-19]	: AXI RCACHE
+ *	Bits [20-31]	: Reserved
+ */
+#define TMC_AXICTL_CLEAR_MASK 0xfbf
+#define TMC_AXICTL_ARCACHE_MASK (0xf << 16)
+
+#define TMC_AXICTL_PROT_CTL_B0	BIT(0)
+#define TMC_AXICTL_PROT_CTL_B1	BIT(1)
+#define TMC_AXICTL_SCT_GAT_MODE	BIT(7)
+#define TMC_AXICTL_WR_BURST_16	0xF00
+/* Write-back Read and Write-allocate */
+#define TMC_AXICTL_AXCACHE_OS	(0xf << 2)
+#define TMC_AXICTL_ARCACHE_OS	(0xf << 16)
+
+/* TMC_FFCR - 0x304 */
+#define TMC_FFCR_FLUSHMAN_BIT	6
+#define TMC_FFCR_EN_FMT		BIT(0)
+#define TMC_FFCR_EN_TI		BIT(1)
+#define TMC_FFCR_FON_FLIN	BIT(4)
+#define TMC_FFCR_FON_TRIG_EVT	BIT(5)
+#define TMC_FFCR_TRIGON_TRIGIN	BIT(8)
+#define TMC_FFCR_STOP_ON_FLUSH	BIT(12)
+
+
+#define TMC_DEVID_NOSCAT	BIT(24)
+
+#define TMC_DEVID_AXIAW_VALID	BIT(16)
+#define TMC_DEVID_AXIAW_SHIFT	17
+#define TMC_DEVID_AXIAW_MASK	0x7f
+
+enum tmc_config_type {
+	TMC_CONFIG_TYPE_ETB,
+	TMC_CONFIG_TYPE_ETR,
+	TMC_CONFIG_TYPE_ETF,
+};
+
+enum tmc_mode {
+	TMC_MODE_CIRCULAR_BUFFER,
+	TMC_MODE_SOFTWARE_FIFO,
+	TMC_MODE_HARDWARE_FIFO,
+};
+
+enum tmc_mem_intf_width {
+	TMC_MEM_INTF_WIDTH_32BITS	= 1,
+	TMC_MEM_INTF_WIDTH_64BITS	= 2,
+	TMC_MEM_INTF_WIDTH_128BITS	= 4,
+	TMC_MEM_INTF_WIDTH_256BITS	= 8,
+};
+
+/* TMC ETR Capability bit definitions */
+#define TMC_ETR_SG			(0x1U << 0)
+/* ETR has separate read/write cache encodings */
+#define TMC_ETR_AXI_ARCACHE		(0x1U << 1)
+/*
+ * TMC_ETR_SAVE_RESTORE - Values of RRP/RWP/STS.Full are
+ * retained when TMC leaves Disabled state, allowing us to continue
+ * the tracing from a point where we stopped. This also implies that
+ * the RRP/RWP/STS.Full should always be programmed to the correct
+ * value. Unfortunately this is not advertised by the hardware,
+ * so we have to rely on PID of the IP to detect the functionality.
+ */
+#define TMC_ETR_SAVE_RESTORE		(0x1U << 2)
+
+/* Coresight SoC-600 TMC-ETR unadvertised capabilities */
+#define CORESIGHT_SOC_600_ETR_CAPS	\
+	(TMC_ETR_SAVE_RESTORE | TMC_ETR_AXI_ARCACHE)
+
+enum etr_mode {
+	ETR_MODE_FLAT,		/* Uses contiguous flat buffer */
+	ETR_MODE_ETR_SG,	/* Uses in-built TMC ETR SG mechanism */
+	ETR_MODE_CATU,		/* Use SG mechanism in CATU */
+};
+
+struct etr_buf_operations;
+
+/**
+ * struct etr_buf - Details of the buffer used by ETR
+ * @mode	: Mode of the ETR buffer, contiguous, Scatter Gather etc.
+ * @full	: Trace data overflow
+ * @size	: Size of the buffer.
+ * @hwaddr	: Address to be programmed in the TMC:DBA{LO,HI}
+ * @offset	: Offset of the trace data in the buffer for consumption.
+ * @len		: Available trace data @buf (may round up to the beginning).
+ * @ops		: ETR buffer operations for the mode.
+ * @private	: Backend specific information for the buf
+ */
+struct etr_buf {
+	enum etr_mode			mode;
+	bool				full;
+	ssize_t				size;
+	dma_addr_t			hwaddr;
+	unsigned long			offset;
+	s64				len;
+	const struct etr_buf_operations	*ops;
+	void				*private;
+};
+
+/**
+ * struct tmc_drvdata - specifics associated to an TMC component
+ * @base:	memory mapped base address for this component.
+ * @dev:	the device entity associated to this component.
+ * @csdev:	component vitals needed by the framework.
+ * @miscdev:	specifics to handle "/dev/xyz.tmc" entry.
+ * @spinlock:	only one at a time pls.
+ * @buf:	Snapshot of the trace data for ETF/ETB.
+ * @etr_buf:	details of buffer used in TMC-ETR
+ * @len:	size of the available trace for ETF/ETB.
+ * @size:	trace buffer size for this TMC (common for all modes).
+ * @mode:	how this TMC is being used.
+ * @config_type: TMC variant, must be of type @tmc_config_type.
+ * @memwidth:	width of the memory interface databus, in bytes.
+ * @trigger_cntr: amount of words to store after a trigger.
+ * @etr_caps:	Bitmask of capabilities of the TMC ETR, inferred from the
+ *		device configuration register (DEVID)
+ */
+struct tmc_drvdata {
+	void __iomem		*base;
+	struct device		*dev;
+	struct coresight_device	*csdev;
+	struct miscdevice	miscdev;
+	spinlock_t		spinlock;
+	bool			reading;
+	union {
+		char		*buf;		/* TMC ETB */
+		struct etr_buf	*etr_buf;	/* TMC ETR */
+	};
+	u32			len;
+	u32			size;
+	u32			mode;
+	enum tmc_config_type	config_type;
+	enum tmc_mem_intf_width	memwidth;
+	u32			trigger_cntr;
+	u32			etr_caps;
+};
+
+struct etr_buf_operations {
+	int (*alloc)(struct tmc_drvdata *drvdata, struct etr_buf *etr_buf,
+		     int node, void **pages);
+	void (*sync)(struct etr_buf *etr_buf, u64 rrp, u64 rwp);
+	ssize_t (*get_data)(struct etr_buf *etr_buf, u64 offset, size_t len,
+			    char **bufpp);
+	void (*free)(struct etr_buf *etr_buf);
+};
+
+/**
+ * struct tmc_pages - Collection of pages used for SG.
+ * @nr_pages:		Number of pages in the list.
+ * @daddrs:		Array of DMA'able page address.
+ * @pages:		Array pages for the buffer.
+ */
+struct tmc_pages {
+	int nr_pages;
+	dma_addr_t	*daddrs;
+	struct page	**pages;
+};
+
+/*
+ * struct tmc_sg_table - Generic SG table for TMC
+ * @dev:		Device for DMA allocations
+ * @table_vaddr:	Contiguous Virtual address for PageTable
+ * @data_vaddr:		Contiguous Virtual address for Data Buffer
+ * @table_daddr:	DMA address of the PageTable base
+ * @node:		Node for Page allocations
+ * @table_pages:	List of pages & dma address for Table
+ * @data_pages:		List of pages & dma address for Data
+ */
+struct tmc_sg_table {
+	struct device *dev;
+	void *table_vaddr;
+	void *data_vaddr;
+	dma_addr_t table_daddr;
+	int node;
+	struct tmc_pages table_pages;
+	struct tmc_pages data_pages;
+};
+
+/* Generic functions */
+void tmc_wait_for_tmcready(struct tmc_drvdata *drvdata);
+void tmc_flush_and_stop(struct tmc_drvdata *drvdata);
+void tmc_enable_hw(struct tmc_drvdata *drvdata);
+void tmc_disable_hw(struct tmc_drvdata *drvdata);
+
+/* ETB/ETF functions */
+int tmc_read_prepare_etb(struct tmc_drvdata *drvdata);
+int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata);
+extern const struct coresight_ops tmc_etb_cs_ops;
+extern const struct coresight_ops tmc_etf_cs_ops;
+
+ssize_t tmc_etb_get_sysfs_trace(struct tmc_drvdata *drvdata,
+				loff_t pos, size_t len, char **bufpp);
+/* ETR functions */
+int tmc_read_prepare_etr(struct tmc_drvdata *drvdata);
+int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata);
+extern const struct coresight_ops tmc_etr_cs_ops;
+ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata,
+				loff_t pos, size_t len, char **bufpp);
+
+
+#define TMC_REG_PAIR(name, lo_off, hi_off)				\
+static inline u64							\
+tmc_read_##name(struct tmc_drvdata *drvdata)				\
+{									\
+	return coresight_read_reg_pair(drvdata->base, lo_off, hi_off);	\
+}									\
+static inline void							\
+tmc_write_##name(struct tmc_drvdata *drvdata, u64 val)			\
+{									\
+	coresight_write_reg_pair(drvdata->base, val, lo_off, hi_off);	\
+}
+
+TMC_REG_PAIR(rrp, TMC_RRP, TMC_RRPHI)
+TMC_REG_PAIR(rwp, TMC_RWP, TMC_RWPHI)
+TMC_REG_PAIR(dba, TMC_DBALO, TMC_DBAHI)
+
+/* Initialise the caps from unadvertised static capabilities of the device */
+static inline void tmc_etr_init_caps(struct tmc_drvdata *drvdata, u32 dev_caps)
+{
+	WARN_ON(drvdata->etr_caps);
+	drvdata->etr_caps = dev_caps;
+}
+
+static inline void tmc_etr_set_cap(struct tmc_drvdata *drvdata, u32 cap)
+{
+	drvdata->etr_caps |= cap;
+}
+
+static inline bool tmc_etr_has_cap(struct tmc_drvdata *drvdata, u32 cap)
+{
+	return !!(drvdata->etr_caps & cap);
+}
+
+struct tmc_sg_table *tmc_alloc_sg_table(struct device *dev,
+					int node,
+					int nr_tpages,
+					int nr_dpages,
+					void **pages);
+void tmc_free_sg_table(struct tmc_sg_table *sg_table);
+void tmc_sg_table_sync_table(struct tmc_sg_table *sg_table);
+void tmc_sg_table_sync_data_range(struct tmc_sg_table *table,
+				  u64 offset, u64 size);
+ssize_t tmc_sg_table_get_data(struct tmc_sg_table *sg_table,
+			      u64 offset, size_t len, char **bufpp);
+static inline unsigned long
+tmc_sg_table_buf_size(struct tmc_sg_table *sg_table)
+{
+	return sg_table->data_pages.nr_pages << PAGE_SHIFT;
+}
+
+struct coresight_device *tmc_etr_get_catu_device(struct tmc_drvdata *drvdata);
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
new file mode 100644
index 0000000..459ef93
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Trace Port Interface Unit driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+#include <linux/clk.h>
+
+#include "coresight-priv.h"
+
+#define TPIU_SUPP_PORTSZ	0x000
+#define TPIU_CURR_PORTSZ	0x004
+#define TPIU_SUPP_TRIGMODES	0x100
+#define TPIU_TRIG_CNTRVAL	0x104
+#define TPIU_TRIG_MULT		0x108
+#define TPIU_SUPP_TESTPATM	0x200
+#define TPIU_CURR_TESTPATM	0x204
+#define TPIU_TEST_PATREPCNTR	0x208
+#define TPIU_FFSR		0x300
+#define TPIU_FFCR		0x304
+#define TPIU_FSYNC_CNTR		0x308
+#define TPIU_EXTCTL_INPORT	0x400
+#define TPIU_EXTCTL_OUTPORT	0x404
+#define TPIU_ITTRFLINACK	0xee4
+#define TPIU_ITTRFLIN		0xee8
+#define TPIU_ITATBDATA0		0xeec
+#define TPIU_ITATBCTR2		0xef0
+#define TPIU_ITATBCTR1		0xef4
+#define TPIU_ITATBCTR0		0xef8
+
+/** register definition **/
+/* FFSR - 0x300 */
+#define FFSR_FT_STOPPED_BIT	1
+/* FFCR - 0x304 */
+#define FFCR_FON_MAN_BIT	6
+#define FFCR_FON_MAN		BIT(6)
+#define FFCR_STOP_FI		BIT(12)
+
+/**
+ * @base:	memory mapped base address for this component.
+ * @dev:	the device entity associated to this component.
+ * @atclk:	optional clock for the core parts of the TPIU.
+ * @csdev:	component vitals needed by the framework.
+ */
+struct tpiu_drvdata {
+	void __iomem		*base;
+	struct device		*dev;
+	struct clk		*atclk;
+	struct coresight_device	*csdev;
+};
+
+static void tpiu_enable_hw(struct tpiu_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	/* TODO: fill this up */
+
+	CS_LOCK(drvdata->base);
+}
+
+static int tpiu_enable(struct coresight_device *csdev, u32 mode)
+{
+	struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	tpiu_enable_hw(drvdata);
+
+	dev_info(drvdata->dev, "TPIU enabled\n");
+	return 0;
+}
+
+static void tpiu_disable_hw(struct tpiu_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	/* Clear formatter and stop on flush */
+	writel_relaxed(FFCR_STOP_FI, drvdata->base + TPIU_FFCR);
+	/* Generate manual flush */
+	writel_relaxed(FFCR_STOP_FI | FFCR_FON_MAN, drvdata->base + TPIU_FFCR);
+	/* Wait for flush to complete */
+	coresight_timeout(drvdata->base, TPIU_FFCR, FFCR_FON_MAN_BIT, 0);
+	/* Wait for formatter to stop */
+	coresight_timeout(drvdata->base, TPIU_FFSR, FFSR_FT_STOPPED_BIT, 1);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tpiu_disable(struct coresight_device *csdev)
+{
+	struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	tpiu_disable_hw(drvdata);
+
+	dev_info(drvdata->dev, "TPIU disabled\n");
+}
+
+static const struct coresight_ops_sink tpiu_sink_ops = {
+	.enable		= tpiu_enable,
+	.disable	= tpiu_disable,
+};
+
+static const struct coresight_ops tpiu_cs_ops = {
+	.sink_ops	= &tpiu_sink_ops,
+};
+
+static int tpiu_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct tpiu_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc desc = { 0 };
+	struct device_node *np = adev->dev.of_node;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		adev->dev.platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &adev->dev;
+	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	/* Disable tpiu to support older devices */
+	tpiu_disable_hw(drvdata);
+
+	pm_runtime_put(&adev->dev);
+
+	desc.type = CORESIGHT_DEV_TYPE_SINK;
+	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PORT;
+	desc.ops = &tpiu_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	drvdata->csdev = coresight_register(&desc);
+
+	return PTR_ERR_OR_ZERO(drvdata->csdev);
+}
+
+#ifdef CONFIG_PM
+static int tpiu_runtime_suspend(struct device *dev)
+{
+	struct tpiu_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int tpiu_runtime_resume(struct device *dev)
+{
+	struct tpiu_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops tpiu_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(tpiu_runtime_suspend, tpiu_runtime_resume, NULL)
+};
+
+static const struct amba_id tpiu_ids[] = {
+	{
+		.id	= 0x000bb912,
+		.mask	= 0x000fffff,
+	},
+	{
+		.id	= 0x0004b912,
+		.mask	= 0x0007ffff,
+	},
+	{
+		/* Coresight SoC-600 */
+		.id	= 0x000bb9e7,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver tpiu_driver = {
+	.drv = {
+		.name	= "coresight-tpiu",
+		.owner	= THIS_MODULE,
+		.pm	= &tpiu_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= tpiu_probe,
+	.id_table	= tpiu_ids,
+};
+builtin_amba_driver(tpiu_driver);
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
new file mode 100644
index 0000000..3e07fd3
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -0,0 +1,1092 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/clk.h>
+#include <linux/coresight.h>
+#include <linux/of_platform.h>
+#include <linux/delay.h>
+#include <linux/pm_runtime.h>
+
+#include "coresight-priv.h"
+
+static DEFINE_MUTEX(coresight_mutex);
+
+/**
+ * struct coresight_node - elements of a path, from source to sink
+ * @csdev:	Address of an element.
+ * @link:	hook to the list.
+ */
+struct coresight_node {
+	struct coresight_device *csdev;
+	struct list_head link;
+};
+
+/*
+ * When operating Coresight drivers from the sysFS interface, only a single
+ * path can exist from a tracer (associated to a CPU) to a sink.
+ */
+static DEFINE_PER_CPU(struct list_head *, tracer_path);
+
+/*
+ * As of this writing only a single STM can be found in CS topologies.  Since
+ * there is no way to know if we'll ever see more and what kind of
+ * configuration they will enact, for the time being only define a single path
+ * for STM.
+ */
+static struct list_head *stm_path;
+
+/*
+ * When losing synchronisation a new barrier packet needs to be inserted at the
+ * beginning of the data collected in a buffer.  That way the decoder knows that
+ * it needs to look for another sync sequence.
+ */
+const u32 barrier_pkt[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
+
+static int coresight_id_match(struct device *dev, void *data)
+{
+	int trace_id, i_trace_id;
+	struct coresight_device *csdev, *i_csdev;
+
+	csdev = data;
+	i_csdev = to_coresight_device(dev);
+
+	/*
+	 * No need to care about oneself and components that are not
+	 * sources or not enabled
+	 */
+	if (i_csdev == csdev || !i_csdev->enable ||
+	    i_csdev->type != CORESIGHT_DEV_TYPE_SOURCE)
+		return 0;
+
+	/* Get the source ID for both compoment */
+	trace_id = source_ops(csdev)->trace_id(csdev);
+	i_trace_id = source_ops(i_csdev)->trace_id(i_csdev);
+
+	/* All you need is one */
+	if (trace_id == i_trace_id)
+		return 1;
+
+	return 0;
+}
+
+static int coresight_source_is_unique(struct coresight_device *csdev)
+{
+	int trace_id = source_ops(csdev)->trace_id(csdev);
+
+	/* this shouldn't happen */
+	if (trace_id < 0)
+		return 0;
+
+	return !bus_for_each_dev(&coresight_bustype, NULL,
+				 csdev, coresight_id_match);
+}
+
+static int coresight_find_link_inport(struct coresight_device *csdev,
+				      struct coresight_device *parent)
+{
+	int i;
+	struct coresight_connection *conn;
+
+	for (i = 0; i < parent->nr_outport; i++) {
+		conn = &parent->conns[i];
+		if (conn->child_dev == csdev)
+			return conn->child_port;
+	}
+
+	dev_err(&csdev->dev, "couldn't find inport, parent: %s, child: %s\n",
+		dev_name(&parent->dev), dev_name(&csdev->dev));
+
+	return -ENODEV;
+}
+
+static int coresight_find_link_outport(struct coresight_device *csdev,
+				       struct coresight_device *child)
+{
+	int i;
+	struct coresight_connection *conn;
+
+	for (i = 0; i < csdev->nr_outport; i++) {
+		conn = &csdev->conns[i];
+		if (conn->child_dev == child)
+			return conn->outport;
+	}
+
+	dev_err(&csdev->dev, "couldn't find outport, parent: %s, child: %s\n",
+		dev_name(&csdev->dev), dev_name(&child->dev));
+
+	return -ENODEV;
+}
+
+static int coresight_enable_sink(struct coresight_device *csdev, u32 mode)
+{
+	int ret;
+
+	if (!csdev->enable) {
+		if (sink_ops(csdev)->enable) {
+			ret = sink_ops(csdev)->enable(csdev, mode);
+			if (ret)
+				return ret;
+		}
+		csdev->enable = true;
+	}
+
+	atomic_inc(csdev->refcnt);
+
+	return 0;
+}
+
+static void coresight_disable_sink(struct coresight_device *csdev)
+{
+	if (atomic_dec_return(csdev->refcnt) == 0) {
+		if (sink_ops(csdev)->disable) {
+			sink_ops(csdev)->disable(csdev);
+			csdev->enable = false;
+		}
+	}
+}
+
+static int coresight_enable_link(struct coresight_device *csdev,
+				 struct coresight_device *parent,
+				 struct coresight_device *child)
+{
+	int ret;
+	int link_subtype;
+	int refport, inport, outport;
+
+	if (!parent || !child)
+		return -EINVAL;
+
+	inport = coresight_find_link_inport(csdev, parent);
+	outport = coresight_find_link_outport(csdev, child);
+	link_subtype = csdev->subtype.link_subtype;
+
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG)
+		refport = inport;
+	else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT)
+		refport = outport;
+	else
+		refport = 0;
+
+	if (refport < 0)
+		return refport;
+
+	if (atomic_inc_return(&csdev->refcnt[refport]) == 1) {
+		if (link_ops(csdev)->enable) {
+			ret = link_ops(csdev)->enable(csdev, inport, outport);
+			if (ret)
+				return ret;
+		}
+	}
+
+	csdev->enable = true;
+
+	return 0;
+}
+
+static void coresight_disable_link(struct coresight_device *csdev,
+				   struct coresight_device *parent,
+				   struct coresight_device *child)
+{
+	int i, nr_conns;
+	int link_subtype;
+	int refport, inport, outport;
+
+	if (!parent || !child)
+		return;
+
+	inport = coresight_find_link_inport(csdev, parent);
+	outport = coresight_find_link_outport(csdev, child);
+	link_subtype = csdev->subtype.link_subtype;
+
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) {
+		refport = inport;
+		nr_conns = csdev->nr_inport;
+	} else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) {
+		refport = outport;
+		nr_conns = csdev->nr_outport;
+	} else {
+		refport = 0;
+		nr_conns = 1;
+	}
+
+	if (atomic_dec_return(&csdev->refcnt[refport]) == 0) {
+		if (link_ops(csdev)->disable)
+			link_ops(csdev)->disable(csdev, inport, outport);
+	}
+
+	for (i = 0; i < nr_conns; i++)
+		if (atomic_read(&csdev->refcnt[i]) != 0)
+			return;
+
+	csdev->enable = false;
+}
+
+static int coresight_enable_source(struct coresight_device *csdev, u32 mode)
+{
+	int ret;
+
+	if (!coresight_source_is_unique(csdev)) {
+		dev_warn(&csdev->dev, "traceID %d not unique\n",
+			 source_ops(csdev)->trace_id(csdev));
+		return -EINVAL;
+	}
+
+	if (!csdev->enable) {
+		if (source_ops(csdev)->enable) {
+			ret = source_ops(csdev)->enable(csdev, NULL, mode);
+			if (ret)
+				return ret;
+		}
+		csdev->enable = true;
+	}
+
+	atomic_inc(csdev->refcnt);
+
+	return 0;
+}
+
+/**
+ *  coresight_disable_source - Drop the reference count by 1 and disable
+ *  the device if there are no users left.
+ *
+ *  @csdev - The coresight device to disable
+ *
+ *  Returns true if the device has been disabled.
+ */
+static bool coresight_disable_source(struct coresight_device *csdev)
+{
+	if (atomic_dec_return(csdev->refcnt) == 0) {
+		if (source_ops(csdev)->disable)
+			source_ops(csdev)->disable(csdev, NULL);
+		csdev->enable = false;
+	}
+	return !csdev->enable;
+}
+
+void coresight_disable_path(struct list_head *path)
+{
+	u32 type;
+	struct coresight_node *nd;
+	struct coresight_device *csdev, *parent, *child;
+
+	list_for_each_entry(nd, path, link) {
+		csdev = nd->csdev;
+		type = csdev->type;
+
+		/*
+		 * ETF devices are tricky... They can be a link or a sink,
+		 * depending on how they are configured.  If an ETF has been
+		 * "activated" it will be configured as a sink, otherwise
+		 * go ahead with the link configuration.
+		 */
+		if (type == CORESIGHT_DEV_TYPE_LINKSINK)
+			type = (csdev == coresight_get_sink(path)) ?
+						CORESIGHT_DEV_TYPE_SINK :
+						CORESIGHT_DEV_TYPE_LINK;
+
+		switch (type) {
+		case CORESIGHT_DEV_TYPE_SINK:
+			coresight_disable_sink(csdev);
+			break;
+		case CORESIGHT_DEV_TYPE_SOURCE:
+			/* sources are disabled from either sysFS or Perf */
+			break;
+		case CORESIGHT_DEV_TYPE_LINK:
+			parent = list_prev_entry(nd, link)->csdev;
+			child = list_next_entry(nd, link)->csdev;
+			coresight_disable_link(csdev, parent, child);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+int coresight_enable_path(struct list_head *path, u32 mode)
+{
+
+	int ret = 0;
+	u32 type;
+	struct coresight_node *nd;
+	struct coresight_device *csdev, *parent, *child;
+
+	list_for_each_entry_reverse(nd, path, link) {
+		csdev = nd->csdev;
+		type = csdev->type;
+
+		/*
+		 * ETF devices are tricky... They can be a link or a sink,
+		 * depending on how they are configured.  If an ETF has been
+		 * "activated" it will be configured as a sink, otherwise
+		 * go ahead with the link configuration.
+		 */
+		if (type == CORESIGHT_DEV_TYPE_LINKSINK)
+			type = (csdev == coresight_get_sink(path)) ?
+						CORESIGHT_DEV_TYPE_SINK :
+						CORESIGHT_DEV_TYPE_LINK;
+
+		switch (type) {
+		case CORESIGHT_DEV_TYPE_SINK:
+			ret = coresight_enable_sink(csdev, mode);
+			if (ret)
+				goto err;
+			break;
+		case CORESIGHT_DEV_TYPE_SOURCE:
+			/* sources are enabled from either sysFS or Perf */
+			break;
+		case CORESIGHT_DEV_TYPE_LINK:
+			parent = list_prev_entry(nd, link)->csdev;
+			child = list_next_entry(nd, link)->csdev;
+			ret = coresight_enable_link(csdev, parent, child);
+			if (ret)
+				goto err;
+			break;
+		default:
+			goto err;
+		}
+	}
+
+out:
+	return ret;
+err:
+	coresight_disable_path(path);
+	goto out;
+}
+
+struct coresight_device *coresight_get_sink(struct list_head *path)
+{
+	struct coresight_device *csdev;
+
+	if (!path)
+		return NULL;
+
+	csdev = list_last_entry(path, struct coresight_node, link)->csdev;
+	if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
+	    csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
+		return NULL;
+
+	return csdev;
+}
+
+static int coresight_enabled_sink(struct device *dev, void *data)
+{
+	bool *reset = data;
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+	     csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) &&
+	     csdev->activated) {
+		/*
+		 * Now that we have a handle on the sink for this session,
+		 * disable the sysFS "enable_sink" flag so that possible
+		 * concurrent perf session that wish to use another sink don't
+		 * trip on it.  Doing so has no ramification for the current
+		 * session.
+		 */
+		if (*reset)
+			csdev->activated = false;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * coresight_get_enabled_sink - returns the first enabled sink found on the bus
+ * @deactivate:	Whether the 'enable_sink' flag should be reset
+ *
+ * When operated from perf the deactivate parameter should be set to 'true'.
+ * That way the "enabled_sink" flag of the sink that was selected can be reset,
+ * allowing for other concurrent perf sessions to choose a different sink.
+ *
+ * When operated from sysFS users have full control and as such the deactivate
+ * parameter should be set to 'false', hence mandating users to explicitly
+ * clear the flag.
+ */
+struct coresight_device *coresight_get_enabled_sink(bool deactivate)
+{
+	struct device *dev = NULL;
+
+	dev = bus_find_device(&coresight_bustype, NULL, &deactivate,
+			      coresight_enabled_sink);
+
+	return dev ? to_coresight_device(dev) : NULL;
+}
+
+/*
+ * coresight_grab_device - Power up this device and any of the helper
+ * devices connected to it for trace operation. Since the helper devices
+ * don't appear on the trace path, they should be handled along with the
+ * the master device.
+ */
+static void coresight_grab_device(struct coresight_device *csdev)
+{
+	int i;
+
+	for (i = 0; i < csdev->nr_outport; i++) {
+		struct coresight_device *child = csdev->conns[i].child_dev;
+
+		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+			pm_runtime_get_sync(child->dev.parent);
+	}
+	pm_runtime_get_sync(csdev->dev.parent);
+}
+
+/*
+ * coresight_drop_device - Release this device and any of the helper
+ * devices connected to it.
+ */
+static void coresight_drop_device(struct coresight_device *csdev)
+{
+	int i;
+
+	pm_runtime_put(csdev->dev.parent);
+	for (i = 0; i < csdev->nr_outport; i++) {
+		struct coresight_device *child = csdev->conns[i].child_dev;
+
+		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+			pm_runtime_put(child->dev.parent);
+	}
+}
+
+/**
+ * _coresight_build_path - recursively build a path from a @csdev to a sink.
+ * @csdev:	The device to start from.
+ * @path:	The list to add devices to.
+ *
+ * The tree of Coresight device is traversed until an activated sink is
+ * found.  From there the sink is added to the list along with all the
+ * devices that led to that point - the end result is a list from source
+ * to sink. In that list the source is the first device and the sink the
+ * last one.
+ */
+static int _coresight_build_path(struct coresight_device *csdev,
+				 struct coresight_device *sink,
+				 struct list_head *path)
+{
+	int i;
+	bool found = false;
+	struct coresight_node *node;
+
+	/* An activated sink has been found.  Enqueue the element */
+	if (csdev == sink)
+		goto out;
+
+	/* Not a sink - recursively explore each port found on this element */
+	for (i = 0; i < csdev->nr_outport; i++) {
+		struct coresight_device *child_dev = csdev->conns[i].child_dev;
+
+		if (child_dev &&
+		    _coresight_build_path(child_dev, sink, path) == 0) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found)
+		return -ENODEV;
+
+out:
+	/*
+	 * A path from this element to a sink has been found.  The elements
+	 * leading to the sink are already enqueued, all that is left to do
+	 * is tell the PM runtime core we need this element and add a node
+	 * for it.
+	 */
+	node = kzalloc(sizeof(struct coresight_node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	coresight_grab_device(csdev);
+	node->csdev = csdev;
+	list_add(&node->link, path);
+
+	return 0;
+}
+
+struct list_head *coresight_build_path(struct coresight_device *source,
+				       struct coresight_device *sink)
+{
+	struct list_head *path;
+	int rc;
+
+	if (!sink)
+		return ERR_PTR(-EINVAL);
+
+	path = kzalloc(sizeof(struct list_head), GFP_KERNEL);
+	if (!path)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(path);
+
+	rc = _coresight_build_path(source, sink, path);
+	if (rc) {
+		kfree(path);
+		return ERR_PTR(rc);
+	}
+
+	return path;
+}
+
+/**
+ * coresight_release_path - release a previously built path.
+ * @path:	the path to release.
+ *
+ * Go through all the elements of a path and 1) removed it from the list and
+ * 2) free the memory allocated for each node.
+ */
+void coresight_release_path(struct list_head *path)
+{
+	struct coresight_device *csdev;
+	struct coresight_node *nd, *next;
+
+	list_for_each_entry_safe(nd, next, path, link) {
+		csdev = nd->csdev;
+
+		coresight_drop_device(csdev);
+		list_del(&nd->link);
+		kfree(nd);
+	}
+
+	kfree(path);
+	path = NULL;
+}
+
+/** coresight_validate_source - make sure a source has the right credentials
+ *  @csdev:	the device structure for a source.
+ *  @function:	the function this was called from.
+ *
+ * Assumes the coresight_mutex is held.
+ */
+static int coresight_validate_source(struct coresight_device *csdev,
+				     const char *function)
+{
+	u32 type, subtype;
+
+	type = csdev->type;
+	subtype = csdev->subtype.source_subtype;
+
+	if (type != CORESIGHT_DEV_TYPE_SOURCE) {
+		dev_err(&csdev->dev, "wrong device type in %s\n", function);
+		return -EINVAL;
+	}
+
+	if (subtype != CORESIGHT_DEV_SUBTYPE_SOURCE_PROC &&
+	    subtype != CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE) {
+		dev_err(&csdev->dev, "wrong device subtype in %s\n", function);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int coresight_enable(struct coresight_device *csdev)
+{
+	int cpu, ret = 0;
+	struct coresight_device *sink;
+	struct list_head *path;
+	enum coresight_dev_subtype_source subtype;
+
+	subtype = csdev->subtype.source_subtype;
+
+	mutex_lock(&coresight_mutex);
+
+	ret = coresight_validate_source(csdev, __func__);
+	if (ret)
+		goto out;
+
+	if (csdev->enable) {
+		/*
+		 * There could be multiple applications driving the software
+		 * source. So keep the refcount for each such user when the
+		 * source is already enabled.
+		 */
+		if (subtype == CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE)
+			atomic_inc(csdev->refcnt);
+		goto out;
+	}
+
+	/*
+	 * Search for a valid sink for this session but don't reset the
+	 * "enable_sink" flag in sysFS.  Users get to do that explicitly.
+	 */
+	sink = coresight_get_enabled_sink(false);
+	if (!sink) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	path = coresight_build_path(csdev, sink);
+	if (IS_ERR(path)) {
+		pr_err("building path(s) failed\n");
+		ret = PTR_ERR(path);
+		goto out;
+	}
+
+	ret = coresight_enable_path(path, CS_MODE_SYSFS);
+	if (ret)
+		goto err_path;
+
+	ret = coresight_enable_source(csdev, CS_MODE_SYSFS);
+	if (ret)
+		goto err_source;
+
+	switch (subtype) {
+	case CORESIGHT_DEV_SUBTYPE_SOURCE_PROC:
+		/*
+		 * When working from sysFS it is important to keep track
+		 * of the paths that were created so that they can be
+		 * undone in 'coresight_disable()'.  Since there can only
+		 * be a single session per tracer (when working from sysFS)
+		 * a per-cpu variable will do just fine.
+		 */
+		cpu = source_ops(csdev)->cpu_id(csdev);
+		per_cpu(tracer_path, cpu) = path;
+		break;
+	case CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE:
+		stm_path = path;
+		break;
+	default:
+		/* We can't be here */
+		break;
+	}
+
+out:
+	mutex_unlock(&coresight_mutex);
+	return ret;
+
+err_source:
+	coresight_disable_path(path);
+
+err_path:
+	coresight_release_path(path);
+	goto out;
+}
+EXPORT_SYMBOL_GPL(coresight_enable);
+
+void coresight_disable(struct coresight_device *csdev)
+{
+	int cpu, ret;
+	struct list_head *path = NULL;
+
+	mutex_lock(&coresight_mutex);
+
+	ret = coresight_validate_source(csdev, __func__);
+	if (ret)
+		goto out;
+
+	if (!csdev->enable || !coresight_disable_source(csdev))
+		goto out;
+
+	switch (csdev->subtype.source_subtype) {
+	case CORESIGHT_DEV_SUBTYPE_SOURCE_PROC:
+		cpu = source_ops(csdev)->cpu_id(csdev);
+		path = per_cpu(tracer_path, cpu);
+		per_cpu(tracer_path, cpu) = NULL;
+		break;
+	case CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE:
+		path = stm_path;
+		stm_path = NULL;
+		break;
+	default:
+		/* We can't be here */
+		break;
+	}
+
+	coresight_disable_path(path);
+	coresight_release_path(path);
+
+out:
+	mutex_unlock(&coresight_mutex);
+}
+EXPORT_SYMBOL_GPL(coresight_disable);
+
+static ssize_t enable_sink_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", csdev->activated);
+}
+
+static ssize_t enable_sink_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val)
+		csdev->activated = true;
+	else
+		csdev->activated = false;
+
+	return size;
+
+}
+static DEVICE_ATTR_RW(enable_sink);
+
+static ssize_t enable_source_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", csdev->enable);
+}
+
+static ssize_t enable_source_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	int ret = 0;
+	unsigned long val;
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val) {
+		ret = coresight_enable(csdev);
+		if (ret)
+			return ret;
+	} else {
+		coresight_disable(csdev);
+	}
+
+	return size;
+}
+static DEVICE_ATTR_RW(enable_source);
+
+static struct attribute *coresight_sink_attrs[] = {
+	&dev_attr_enable_sink.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_sink);
+
+static struct attribute *coresight_source_attrs[] = {
+	&dev_attr_enable_source.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_source);
+
+static struct device_type coresight_dev_type[] = {
+	{
+		.name = "none",
+	},
+	{
+		.name = "sink",
+		.groups = coresight_sink_groups,
+	},
+	{
+		.name = "link",
+	},
+	{
+		.name = "linksink",
+		.groups = coresight_sink_groups,
+	},
+	{
+		.name = "source",
+		.groups = coresight_source_groups,
+	},
+	{
+		.name = "helper",
+	},
+};
+
+static void coresight_device_release(struct device *dev)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	kfree(csdev->conns);
+	kfree(csdev->refcnt);
+	kfree(csdev);
+}
+
+static int coresight_orphan_match(struct device *dev, void *data)
+{
+	int i;
+	bool still_orphan = false;
+	struct coresight_device *csdev, *i_csdev;
+	struct coresight_connection *conn;
+
+	csdev = data;
+	i_csdev = to_coresight_device(dev);
+
+	/* No need to check oneself */
+	if (csdev == i_csdev)
+		return 0;
+
+	/* Move on to another component if no connection is orphan */
+	if (!i_csdev->orphan)
+		return 0;
+	/*
+	 * Circle throuch all the connection of that component.  If we find
+	 * an orphan connection whose name matches @csdev, link it.
+	 */
+	for (i = 0; i < i_csdev->nr_outport; i++) {
+		conn = &i_csdev->conns[i];
+
+		/* We have found at least one orphan connection */
+		if (conn->child_dev == NULL) {
+			/* Does it match this newly added device? */
+			if (conn->child_name &&
+			    !strcmp(dev_name(&csdev->dev), conn->child_name)) {
+				conn->child_dev = csdev;
+			} else {
+				/* This component still has an orphan */
+				still_orphan = true;
+			}
+		}
+	}
+
+	i_csdev->orphan = still_orphan;
+
+	/*
+	 * Returning '0' ensures that all known component on the
+	 * bus will be checked.
+	 */
+	return 0;
+}
+
+static void coresight_fixup_orphan_conns(struct coresight_device *csdev)
+{
+	/*
+	 * No need to check for a return value as orphan connection(s)
+	 * are hooked-up with each newly added component.
+	 */
+	bus_for_each_dev(&coresight_bustype, NULL,
+			 csdev, coresight_orphan_match);
+}
+
+
+static void coresight_fixup_device_conns(struct coresight_device *csdev)
+{
+	int i;
+
+	for (i = 0; i < csdev->nr_outport; i++) {
+		struct coresight_connection *conn = &csdev->conns[i];
+		struct device *dev = NULL;
+
+		if (conn->child_name)
+			dev = bus_find_device_by_name(&coresight_bustype, NULL,
+						      conn->child_name);
+		if (dev) {
+			conn->child_dev = to_coresight_device(dev);
+			/* and put reference from 'bus_find_device()' */
+			put_device(dev);
+		} else {
+			csdev->orphan = true;
+			conn->child_dev = NULL;
+		}
+	}
+}
+
+static int coresight_remove_match(struct device *dev, void *data)
+{
+	int i;
+	struct coresight_device *csdev, *iterator;
+	struct coresight_connection *conn;
+
+	csdev = data;
+	iterator = to_coresight_device(dev);
+
+	/* No need to check oneself */
+	if (csdev == iterator)
+		return 0;
+
+	/*
+	 * Circle throuch all the connection of that component.  If we find
+	 * a connection whose name matches @csdev, remove it.
+	 */
+	for (i = 0; i < iterator->nr_outport; i++) {
+		conn = &iterator->conns[i];
+
+		if (conn->child_dev == NULL)
+			continue;
+
+		if (!strcmp(dev_name(&csdev->dev), conn->child_name)) {
+			iterator->orphan = true;
+			conn->child_dev = NULL;
+			/* No need to continue */
+			break;
+		}
+	}
+
+	/*
+	 * Returning '0' ensures that all known component on the
+	 * bus will be checked.
+	 */
+	return 0;
+}
+
+static void coresight_remove_conns(struct coresight_device *csdev)
+{
+	bus_for_each_dev(&coresight_bustype, NULL,
+			 csdev, coresight_remove_match);
+}
+
+/**
+ * coresight_timeout - loop until a bit has changed to a specific state.
+ * @addr: base address of the area of interest.
+ * @offset: address of a register, starting from @addr.
+ * @position: the position of the bit of interest.
+ * @value: the value the bit should have.
+ *
+ * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if
+ * TIMEOUT_US has elapsed, which ever happens first.
+ */
+
+int coresight_timeout(void __iomem *addr, u32 offset, int position, int value)
+{
+	int i;
+	u32 val;
+
+	for (i = TIMEOUT_US; i > 0; i--) {
+		val = __raw_readl(addr + offset);
+		/* waiting on the bit to go from 0 to 1 */
+		if (value) {
+			if (val & BIT(position))
+				return 0;
+		/* waiting on the bit to go from 1 to 0 */
+		} else {
+			if (!(val & BIT(position)))
+				return 0;
+		}
+
+		/*
+		 * Delay is arbitrary - the specification doesn't say how long
+		 * we are expected to wait.  Extra check required to make sure
+		 * we don't wait needlessly on the last iteration.
+		 */
+		if (i - 1)
+			udelay(1);
+	}
+
+	return -EAGAIN;
+}
+
+struct bus_type coresight_bustype = {
+	.name	= "coresight",
+};
+
+static int __init coresight_init(void)
+{
+	return bus_register(&coresight_bustype);
+}
+postcore_initcall(coresight_init);
+
+struct coresight_device *coresight_register(struct coresight_desc *desc)
+{
+	int i;
+	int ret;
+	int link_subtype;
+	int nr_refcnts = 1;
+	atomic_t *refcnts = NULL;
+	struct coresight_device *csdev;
+	struct coresight_connection *conns = NULL;
+
+	csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
+	if (!csdev) {
+		ret = -ENOMEM;
+		goto err_kzalloc_csdev;
+	}
+
+	if (desc->type == CORESIGHT_DEV_TYPE_LINK ||
+	    desc->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+		link_subtype = desc->subtype.link_subtype;
+
+		if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG)
+			nr_refcnts = desc->pdata->nr_inport;
+		else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT)
+			nr_refcnts = desc->pdata->nr_outport;
+	}
+
+	refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
+	if (!refcnts) {
+		ret = -ENOMEM;
+		goto err_kzalloc_refcnts;
+	}
+
+	csdev->refcnt = refcnts;
+
+	csdev->nr_inport = desc->pdata->nr_inport;
+	csdev->nr_outport = desc->pdata->nr_outport;
+
+	/* Initialise connections if there is at least one outport */
+	if (csdev->nr_outport) {
+		conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL);
+		if (!conns) {
+			ret = -ENOMEM;
+			goto err_kzalloc_conns;
+		}
+
+		for (i = 0; i < csdev->nr_outport; i++) {
+			conns[i].outport = desc->pdata->outports[i];
+			conns[i].child_name = desc->pdata->child_names[i];
+			conns[i].child_port = desc->pdata->child_ports[i];
+		}
+	}
+
+	csdev->conns = conns;
+
+	csdev->type = desc->type;
+	csdev->subtype = desc->subtype;
+	csdev->ops = desc->ops;
+	csdev->orphan = false;
+
+	csdev->dev.type = &coresight_dev_type[desc->type];
+	csdev->dev.groups = desc->groups;
+	csdev->dev.parent = desc->dev;
+	csdev->dev.release = coresight_device_release;
+	csdev->dev.bus = &coresight_bustype;
+	dev_set_name(&csdev->dev, "%s", desc->pdata->name);
+
+	ret = device_register(&csdev->dev);
+	if (ret) {
+		put_device(&csdev->dev);
+		goto err_kzalloc_csdev;
+	}
+
+	mutex_lock(&coresight_mutex);
+
+	coresight_fixup_device_conns(csdev);
+	coresight_fixup_orphan_conns(csdev);
+
+	mutex_unlock(&coresight_mutex);
+
+	return csdev;
+
+err_kzalloc_conns:
+	kfree(refcnts);
+err_kzalloc_refcnts:
+	kfree(csdev);
+err_kzalloc_csdev:
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(coresight_register);
+
+void coresight_unregister(struct coresight_device *csdev)
+{
+	/* Remove references of that device in the topology */
+	coresight_remove_conns(csdev);
+	device_unregister(&csdev->dev);
+}
+EXPORT_SYMBOL_GPL(coresight_unregister);
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
new file mode 100644
index 0000000..6880bee
--- /dev/null
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_graph.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/amba/bus.h>
+#include <linux/coresight.h>
+#include <linux/cpumask.h>
+#include <asm/smp_plat.h>
+
+
+static int of_dev_node_match(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static struct device *
+of_coresight_get_endpoint_device(struct device_node *endpoint)
+{
+	struct device *dev = NULL;
+
+	/*
+	 * If we have a non-configurable replicator, it will be found on the
+	 * platform bus.
+	 */
+	dev = bus_find_device(&platform_bus_type, NULL,
+			      endpoint, of_dev_node_match);
+	if (dev)
+		return dev;
+
+	/*
+	 * We have a configurable component - circle through the AMBA bus
+	 * looking for the device that matches the endpoint node.
+	 */
+	return bus_find_device(&amba_bustype, NULL,
+			       endpoint, of_dev_node_match);
+}
+
+static void of_coresight_get_ports(const struct device_node *node,
+				   int *nr_inport, int *nr_outport)
+{
+	struct device_node *ep = NULL;
+	int in = 0, out = 0;
+
+	do {
+		ep = of_graph_get_next_endpoint(node, ep);
+		if (!ep)
+			break;
+
+		if (of_property_read_bool(ep, "slave-mode"))
+			in++;
+		else
+			out++;
+
+	} while (ep);
+
+	*nr_inport = in;
+	*nr_outport = out;
+}
+
+static int of_coresight_alloc_memory(struct device *dev,
+			struct coresight_platform_data *pdata)
+{
+	/* List of output port on this component */
+	pdata->outports = devm_kcalloc(dev,
+				       pdata->nr_outport,
+				       sizeof(*pdata->outports),
+				       GFP_KERNEL);
+	if (!pdata->outports)
+		return -ENOMEM;
+
+	/* Children connected to this component via @outports */
+	pdata->child_names = devm_kcalloc(dev,
+					  pdata->nr_outport,
+					  sizeof(*pdata->child_names),
+					  GFP_KERNEL);
+	if (!pdata->child_names)
+		return -ENOMEM;
+
+	/* Port number on the child this component is connected to */
+	pdata->child_ports = devm_kcalloc(dev,
+					  pdata->nr_outport,
+					  sizeof(*pdata->child_ports),
+					  GFP_KERNEL);
+	if (!pdata->child_ports)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int of_coresight_get_cpu(const struct device_node *node)
+{
+	int cpu;
+	struct device_node *dn;
+
+	dn = of_parse_phandle(node, "cpu", 0);
+	/* Affinity defaults to CPU0 */
+	if (!dn)
+		return 0;
+	cpu = of_cpu_node_to_id(dn);
+	of_node_put(dn);
+
+	/* Affinity to CPU0 if no cpu nodes are found */
+	return (cpu < 0) ? 0 : cpu;
+}
+EXPORT_SYMBOL_GPL(of_coresight_get_cpu);
+
+struct coresight_platform_data *
+of_get_coresight_platform_data(struct device *dev,
+			       const struct device_node *node)
+{
+	int i = 0, ret = 0;
+	struct coresight_platform_data *pdata;
+	struct of_endpoint endpoint, rendpoint;
+	struct device *rdev;
+	struct device_node *ep = NULL;
+	struct device_node *rparent = NULL;
+	struct device_node *rport = NULL;
+
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	/* Use device name as sysfs handle */
+	pdata->name = dev_name(dev);
+
+	/* Get the number of input and output port for this component */
+	of_coresight_get_ports(node, &pdata->nr_inport, &pdata->nr_outport);
+
+	if (pdata->nr_outport) {
+		ret = of_coresight_alloc_memory(dev, pdata);
+		if (ret)
+			return ERR_PTR(ret);
+
+		/* Iterate through each port to discover topology */
+		do {
+			/* Get a handle on a port */
+			ep = of_graph_get_next_endpoint(node, ep);
+			if (!ep)
+				break;
+
+			/*
+			 * No need to deal with input ports, processing for as
+			 * processing for output ports will deal with them.
+			 */
+			if (of_find_property(ep, "slave-mode", NULL))
+				continue;
+
+			/* Get a handle on the local endpoint */
+			ret = of_graph_parse_endpoint(ep, &endpoint);
+
+			if (ret)
+				continue;
+
+			/* The local out port number */
+			pdata->outports[i] = endpoint.port;
+
+			/*
+			 * Get a handle on the remote port and parent
+			 * attached to it.
+			 */
+			rparent = of_graph_get_remote_port_parent(ep);
+			rport = of_graph_get_remote_port(ep);
+
+			if (!rparent || !rport)
+				continue;
+
+			if (of_graph_parse_endpoint(rport, &rendpoint))
+				continue;
+
+			rdev = of_coresight_get_endpoint_device(rparent);
+			if (!rdev)
+				return ERR_PTR(-EPROBE_DEFER);
+
+			pdata->child_names[i] = dev_name(rdev);
+			pdata->child_ports[i] = rendpoint.id;
+
+			i++;
+		} while (ep);
+	}
+
+	pdata->cpu = of_coresight_get_cpu(node);
+
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(of_get_coresight_platform_data);
diff --git a/drivers/hwtracing/intel_th/Kconfig b/drivers/hwtracing/intel_th/Kconfig
new file mode 100644
index 0000000..ca0527d
--- /dev/null
+++ b/drivers/hwtracing/intel_th/Kconfig
@@ -0,0 +1,85 @@
+config INTEL_TH
+	tristate "Intel(R) Trace Hub controller"
+	depends on HAS_DMA && HAS_IOMEM
+	help
+	  Intel(R) Trace Hub (TH) is a set of hardware blocks (subdevices) that
+	  produce, switch and output trace data from multiple hardware and
+	  software sources over several types of trace output ports encoded
+	  in System Trace Protocol (MIPI STPv2) and is intended to perform
+	  full system debugging.
+
+	  This option enables intel_th bus and common code used by TH
+	  subdevices to interact with each other and hardware and for
+	  platform glue layers to drive Intel TH devices.
+
+	  Say Y here to enable Intel(R) Trace Hub controller support.
+
+if INTEL_TH
+
+config INTEL_TH_PCI
+	tristate "Intel(R) Trace Hub PCI controller"
+	depends on PCI
+	help
+	  Intel(R) Trace Hub may exist as a PCI device. This option enables
+	  support glue layer for PCI-based Intel TH.
+
+	  Say Y here to enable PCI Intel TH support.
+
+config INTEL_TH_ACPI
+	tristate "Intel(R) Trace Hub ACPI controller"
+	depends on ACPI
+	help
+	  Intel(R) Trace Hub may exist as an ACPI device. This option enables
+	  support glue layer for ACPI-based Intel TH. This typically implies
+	  'host debugger' mode, that is, the trace configuration and capture
+	  is handled by an external debug host and corresponding controls will
+	  not be available on the target.
+
+	  Say Y here to enable ACPI Intel TH support.
+
+config INTEL_TH_GTH
+	tristate "Intel(R) Trace Hub Global Trace Hub"
+	help
+	  Global Trace Hub (GTH) is the central component of the
+	  Intel TH infrastructure and acts as a switch for source
+	  and output devices. This driver is required for other
+	  Intel TH subdevices to initialize.
+
+	  Say Y here to enable GTH subdevice of Intel(R) Trace Hub.
+
+config INTEL_TH_STH
+	tristate "Intel(R) Trace Hub Software Trace Hub support"
+	depends on STM
+	help
+	  Software Trace Hub (STH) enables trace data from software
+	  trace sources to be sent out via Intel(R) Trace Hub. It
+	  uses stm class device to interface with its sources.
+
+	  Say Y here to enable STH subdevice of Intel(R) Trace Hub.
+
+config INTEL_TH_MSU
+	tristate "Intel(R) Trace Hub Memory Storage Unit"
+	help
+	  Memory Storage Unit (MSU) trace output device enables
+	  storing STP traces to system memory. It supports single
+	  and multiblock modes of operation and provides read()
+	  and mmap() access to the collected data.
+
+	  Say Y here to enable MSU output device for Intel TH.
+
+config INTEL_TH_PTI
+	tristate "Intel(R) Trace Hub PTI output"
+	help
+	  Parallel Trace Interface unit (PTI) is a trace output device
+	  of Intel TH architecture that facilitates STP trace output via
+	  a PTI port.
+
+	  Say Y to enable PTI output of Intel TH data.
+
+config INTEL_TH_DEBUG
+	bool "Intel(R) Trace Hub debugging"
+	depends on DEBUG_FS
+	help
+	  Say Y here to enable debugging.
+
+endif
diff --git a/drivers/hwtracing/intel_th/Makefile b/drivers/hwtracing/intel_th/Makefile
new file mode 100644
index 0000000..d9252fa
--- /dev/null
+++ b/drivers/hwtracing/intel_th/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_INTEL_TH)		+= intel_th.o
+intel_th-y			:= core.o
+intel_th-$(CONFIG_INTEL_TH_DEBUG) += debug.o
+
+obj-$(CONFIG_INTEL_TH_PCI)	+= intel_th_pci.o
+intel_th_pci-y			:= pci.o
+
+obj-$(CONFIG_INTEL_TH_ACPI)	+= intel_th_acpi.o
+intel_th_acpi-y			:= acpi.o
+
+obj-$(CONFIG_INTEL_TH_GTH)	+= intel_th_gth.o
+intel_th_gth-y			:= gth.o
+
+obj-$(CONFIG_INTEL_TH_STH)	+= intel_th_sth.o
+intel_th_sth-y			:= sth.o
+
+obj-$(CONFIG_INTEL_TH_MSU)	+= intel_th_msu.o
+intel_th_msu-y			:= msu.o
+
+obj-$(CONFIG_INTEL_TH_PTI)	+= intel_th_pti.o
+intel_th_pti-y			:= pti.o
diff --git a/drivers/hwtracing/intel_th/acpi.c b/drivers/hwtracing/intel_th/acpi.c
new file mode 100644
index 0000000..87bc374
--- /dev/null
+++ b/drivers/hwtracing/intel_th/acpi.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub ACPI driver
+ *
+ * Copyright (C) 2017 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+
+#include "intel_th.h"
+
+#define DRIVER_NAME "intel_th_acpi"
+
+static const struct intel_th_drvdata intel_th_acpi_pch = {
+	.host_mode_only	= 1,
+};
+
+static const struct intel_th_drvdata intel_th_acpi_uncore = {
+	.host_mode_only	= 1,
+};
+
+static const struct acpi_device_id intel_th_acpi_ids[] = {
+	{ "INTC1000",	(kernel_ulong_t)&intel_th_acpi_uncore },
+	{ "INTC1001",	(kernel_ulong_t)&intel_th_acpi_pch },
+	{ "",		0 },
+};
+
+MODULE_DEVICE_TABLE(acpi, intel_th_acpi_ids);
+
+static int intel_th_acpi_probe(struct platform_device *pdev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	const struct acpi_device_id *id;
+	struct intel_th *th;
+
+	id = acpi_match_device(intel_th_acpi_ids, &pdev->dev);
+	if (!id)
+		return -ENODEV;
+
+	th = intel_th_alloc(&pdev->dev, (void *)id->driver_data,
+			    pdev->resource, pdev->num_resources, -1);
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+
+	adev->driver_data = th;
+
+	return 0;
+}
+
+static int intel_th_acpi_remove(struct platform_device *pdev)
+{
+	struct intel_th *th = platform_get_drvdata(pdev);
+
+	intel_th_free(th);
+
+	return 0;
+}
+
+static struct platform_driver intel_th_acpi_driver = {
+	.probe		= intel_th_acpi_probe,
+	.remove		= intel_th_acpi_remove,
+	.driver		= {
+		.name			= DRIVER_NAME,
+		.acpi_match_table	= intel_th_acpi_ids,
+	},
+};
+
+module_platform_driver(intel_th_acpi_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub ACPI controller driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@intel.com>");
diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
new file mode 100644
index 0000000..fc6b7f8
--- /dev/null
+++ b/drivers/hwtracing/intel_th/core.c
@@ -0,0 +1,981 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub driver core
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/kdev_t.h>
+#include <linux/debugfs.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/dma-mapping.h>
+
+#include "intel_th.h"
+#include "debug.h"
+
+static bool host_mode __read_mostly;
+module_param(host_mode, bool, 0444);
+
+static DEFINE_IDA(intel_th_ida);
+
+static int intel_th_match(struct device *dev, struct device_driver *driver)
+{
+	struct intel_th_driver *thdrv = to_intel_th_driver(driver);
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+
+	if (thdev->type == INTEL_TH_SWITCH &&
+	    (!thdrv->enable || !thdrv->disable))
+		return 0;
+
+	return !strcmp(thdev->name, driver->name);
+}
+
+static int intel_th_child_remove(struct device *dev, void *data)
+{
+	device_release_driver(dev);
+
+	return 0;
+}
+
+static int intel_th_probe(struct device *dev)
+{
+	struct intel_th_driver *thdrv = to_intel_th_driver(dev->driver);
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+	struct intel_th_driver *hubdrv;
+	struct intel_th_device *hub = NULL;
+	int ret;
+
+	if (thdev->type == INTEL_TH_SWITCH)
+		hub = thdev;
+	else if (dev->parent)
+		hub = to_intel_th_device(dev->parent);
+
+	if (!hub || !hub->dev.driver)
+		return -EPROBE_DEFER;
+
+	hubdrv = to_intel_th_driver(hub->dev.driver);
+
+	pm_runtime_set_active(dev);
+	pm_runtime_no_callbacks(dev);
+	pm_runtime_enable(dev);
+
+	ret = thdrv->probe(to_intel_th_device(dev));
+	if (ret)
+		goto out_pm;
+
+	if (thdrv->attr_group) {
+		ret = sysfs_create_group(&thdev->dev.kobj, thdrv->attr_group);
+		if (ret)
+			goto out;
+	}
+
+	if (thdev->type == INTEL_TH_OUTPUT &&
+	    !intel_th_output_assigned(thdev))
+		/* does not talk to hardware */
+		ret = hubdrv->assign(hub, thdev);
+
+out:
+	if (ret)
+		thdrv->remove(thdev);
+
+out_pm:
+	if (ret)
+		pm_runtime_disable(dev);
+
+	return ret;
+}
+
+static void intel_th_device_remove(struct intel_th_device *thdev);
+
+static int intel_th_remove(struct device *dev)
+{
+	struct intel_th_driver *thdrv = to_intel_th_driver(dev->driver);
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+	struct intel_th_device *hub = to_intel_th_hub(thdev);
+	int err;
+
+	if (thdev->type == INTEL_TH_SWITCH) {
+		struct intel_th *th = to_intel_th(hub);
+		int i, lowest;
+
+		/* disconnect outputs */
+		err = device_for_each_child(dev, thdev, intel_th_child_remove);
+		if (err)
+			return err;
+
+		/*
+		 * Remove outputs, that is, hub's children: they are created
+		 * at hub's probe time by having the hub call
+		 * intel_th_output_enable() for each of them.
+		 */
+		for (i = 0, lowest = -1; i < th->num_thdevs; i++) {
+			/*
+			 * Move the non-output devices from higher up the
+			 * th->thdev[] array to lower positions to maintain
+			 * a contiguous array.
+			 */
+			if (th->thdev[i]->type != INTEL_TH_OUTPUT) {
+				if (lowest >= 0) {
+					th->thdev[lowest] = th->thdev[i];
+					th->thdev[i] = NULL;
+					++lowest;
+				}
+
+				continue;
+			}
+
+			if (lowest == -1)
+				lowest = i;
+
+			intel_th_device_remove(th->thdev[i]);
+			th->thdev[i] = NULL;
+		}
+
+		if (lowest >= 0)
+			th->num_thdevs = lowest;
+	}
+
+	if (thdrv->attr_group)
+		sysfs_remove_group(&thdev->dev.kobj, thdrv->attr_group);
+
+	pm_runtime_get_sync(dev);
+
+	thdrv->remove(thdev);
+
+	if (intel_th_output_assigned(thdev)) {
+		struct intel_th_driver *hubdrv =
+			to_intel_th_driver(dev->parent->driver);
+
+		if (hub->dev.driver)
+			/* does not talk to hardware */
+			hubdrv->unassign(hub, thdev);
+	}
+
+	pm_runtime_disable(dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+
+	return 0;
+}
+
+static struct bus_type intel_th_bus = {
+	.name		= "intel_th",
+	.match		= intel_th_match,
+	.probe		= intel_th_probe,
+	.remove		= intel_th_remove,
+};
+
+static void intel_th_device_free(struct intel_th_device *thdev);
+
+static void intel_th_device_release(struct device *dev)
+{
+	intel_th_device_free(to_intel_th_device(dev));
+}
+
+static struct device_type intel_th_source_device_type = {
+	.name		= "intel_th_source_device",
+	.release	= intel_th_device_release,
+};
+
+static char *intel_th_output_devnode(struct device *dev, umode_t *mode,
+				     kuid_t *uid, kgid_t *gid)
+{
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+	struct intel_th *th = to_intel_th(thdev);
+	char *node;
+
+	if (thdev->id >= 0)
+		node = kasprintf(GFP_KERNEL, "intel_th%d/%s%d", th->id,
+				 thdev->name, thdev->id);
+	else
+		node = kasprintf(GFP_KERNEL, "intel_th%d/%s", th->id,
+				 thdev->name);
+
+	return node;
+}
+
+static ssize_t port_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+
+	if (thdev->output.port >= 0)
+		return scnprintf(buf, PAGE_SIZE, "%u\n", thdev->output.port);
+
+	return scnprintf(buf, PAGE_SIZE, "unassigned\n");
+}
+
+static DEVICE_ATTR_RO(port);
+
+static int intel_th_output_activate(struct intel_th_device *thdev)
+{
+	struct intel_th_driver *thdrv =
+		to_intel_th_driver_or_null(thdev->dev.driver);
+	struct intel_th *th = to_intel_th(thdev);
+	int ret = 0;
+
+	if (!thdrv)
+		return -ENODEV;
+
+	if (!try_module_get(thdrv->driver.owner))
+		return -ENODEV;
+
+	pm_runtime_get_sync(&thdev->dev);
+
+	if (th->activate)
+		ret = th->activate(th);
+	if (ret)
+		goto fail_put;
+
+	if (thdrv->activate)
+		ret = thdrv->activate(thdev);
+	else
+		intel_th_trace_enable(thdev);
+
+	if (ret)
+		goto fail_deactivate;
+
+	return 0;
+
+fail_deactivate:
+	if (th->deactivate)
+		th->deactivate(th);
+
+fail_put:
+	pm_runtime_put(&thdev->dev);
+	module_put(thdrv->driver.owner);
+
+	return ret;
+}
+
+static void intel_th_output_deactivate(struct intel_th_device *thdev)
+{
+	struct intel_th_driver *thdrv =
+		to_intel_th_driver_or_null(thdev->dev.driver);
+	struct intel_th *th = to_intel_th(thdev);
+
+	if (!thdrv)
+		return;
+
+	if (thdrv->deactivate)
+		thdrv->deactivate(thdev);
+	else
+		intel_th_trace_disable(thdev);
+
+	if (th->deactivate)
+		th->deactivate(th);
+
+	pm_runtime_put(&thdev->dev);
+	module_put(thdrv->driver.owner);
+}
+
+static ssize_t active_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", thdev->output.active);
+}
+
+static ssize_t active_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t size)
+{
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (!!val != thdev->output.active) {
+		if (val)
+			ret = intel_th_output_activate(thdev);
+		else
+			intel_th_output_deactivate(thdev);
+	}
+
+	return ret ? ret : size;
+}
+
+static DEVICE_ATTR_RW(active);
+
+static struct attribute *intel_th_output_attrs[] = {
+	&dev_attr_port.attr,
+	&dev_attr_active.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(intel_th_output);
+
+static struct device_type intel_th_output_device_type = {
+	.name		= "intel_th_output_device",
+	.groups		= intel_th_output_groups,
+	.release	= intel_th_device_release,
+	.devnode	= intel_th_output_devnode,
+};
+
+static struct device_type intel_th_switch_device_type = {
+	.name		= "intel_th_switch_device",
+	.release	= intel_th_device_release,
+};
+
+static struct device_type *intel_th_device_type[] = {
+	[INTEL_TH_SOURCE]	= &intel_th_source_device_type,
+	[INTEL_TH_OUTPUT]	= &intel_th_output_device_type,
+	[INTEL_TH_SWITCH]	= &intel_th_switch_device_type,
+};
+
+int intel_th_driver_register(struct intel_th_driver *thdrv)
+{
+	if (!thdrv->probe || !thdrv->remove)
+		return -EINVAL;
+
+	thdrv->driver.bus = &intel_th_bus;
+
+	return driver_register(&thdrv->driver);
+}
+EXPORT_SYMBOL_GPL(intel_th_driver_register);
+
+void intel_th_driver_unregister(struct intel_th_driver *thdrv)
+{
+	driver_unregister(&thdrv->driver);
+}
+EXPORT_SYMBOL_GPL(intel_th_driver_unregister);
+
+static struct intel_th_device *
+intel_th_device_alloc(struct intel_th *th, unsigned int type, const char *name,
+		      int id)
+{
+	struct device *parent;
+	struct intel_th_device *thdev;
+
+	if (type == INTEL_TH_OUTPUT)
+		parent = &th->hub->dev;
+	else
+		parent = th->dev;
+
+	thdev = kzalloc(sizeof(*thdev) + strlen(name) + 1, GFP_KERNEL);
+	if (!thdev)
+		return NULL;
+
+	thdev->id = id;
+	thdev->type = type;
+
+	strcpy(thdev->name, name);
+	device_initialize(&thdev->dev);
+	thdev->dev.bus = &intel_th_bus;
+	thdev->dev.type = intel_th_device_type[type];
+	thdev->dev.parent = parent;
+	thdev->dev.dma_mask = parent->dma_mask;
+	thdev->dev.dma_parms = parent->dma_parms;
+	dma_set_coherent_mask(&thdev->dev, parent->coherent_dma_mask);
+	if (id >= 0)
+		dev_set_name(&thdev->dev, "%d-%s%d", th->id, name, id);
+	else
+		dev_set_name(&thdev->dev, "%d-%s", th->id, name);
+
+	return thdev;
+}
+
+static int intel_th_device_add_resources(struct intel_th_device *thdev,
+					 struct resource *res, int nres)
+{
+	struct resource *r;
+
+	r = kmemdup(res, sizeof(*res) * nres, GFP_KERNEL);
+	if (!r)
+		return -ENOMEM;
+
+	thdev->resource = r;
+	thdev->num_resources = nres;
+
+	return 0;
+}
+
+static void intel_th_device_remove(struct intel_th_device *thdev)
+{
+	device_del(&thdev->dev);
+	put_device(&thdev->dev);
+}
+
+static void intel_th_device_free(struct intel_th_device *thdev)
+{
+	kfree(thdev->resource);
+	kfree(thdev);
+}
+
+/*
+ * Intel(R) Trace Hub subdevices
+ */
+static const struct intel_th_subdevice {
+	const char		*name;
+	struct resource		res[3];
+	unsigned		nres;
+	unsigned		type;
+	unsigned		otype;
+	unsigned		scrpd;
+	int			id;
+} intel_th_subdevices[] = {
+	{
+		.nres	= 1,
+		.res	= {
+			{
+				/* Handle TSCU from GTH driver */
+				.start	= REG_GTH_OFFSET,
+				.end	= REG_TSCU_OFFSET + REG_TSCU_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.name	= "gth",
+		.type	= INTEL_TH_SWITCH,
+		.id	= -1,
+	},
+	{
+		.nres	= 2,
+		.res	= {
+			{
+				.start	= REG_MSU_OFFSET,
+				.end	= REG_MSU_OFFSET + REG_MSU_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.start	= BUF_MSU_OFFSET,
+				.end	= BUF_MSU_OFFSET + BUF_MSU_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.name	= "msc",
+		.id	= 0,
+		.type	= INTEL_TH_OUTPUT,
+		.otype	= GTH_MSU,
+		.scrpd	= SCRPD_MEM_IS_PRIM_DEST | SCRPD_MSC0_IS_ENABLED,
+	},
+	{
+		.nres	= 2,
+		.res	= {
+			{
+				.start	= REG_MSU_OFFSET,
+				.end	= REG_MSU_OFFSET + REG_MSU_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.start	= BUF_MSU_OFFSET,
+				.end	= BUF_MSU_OFFSET + BUF_MSU_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.name	= "msc",
+		.id	= 1,
+		.type	= INTEL_TH_OUTPUT,
+		.otype	= GTH_MSU,
+		.scrpd	= SCRPD_MEM_IS_PRIM_DEST | SCRPD_MSC1_IS_ENABLED,
+	},
+	{
+		.nres	= 2,
+		.res	= {
+			{
+				.start	= REG_STH_OFFSET,
+				.end	= REG_STH_OFFSET + REG_STH_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.start	= 1, /* use resource[1] */
+				.end	= 0,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.id	= -1,
+		.name	= "sth",
+		.type	= INTEL_TH_SOURCE,
+	},
+	{
+		.nres	= 1,
+		.res	= {
+			{
+				.start	= REG_PTI_OFFSET,
+				.end	= REG_PTI_OFFSET + REG_PTI_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.id	= -1,
+		.name	= "pti",
+		.type	= INTEL_TH_OUTPUT,
+		.otype	= GTH_PTI,
+		.scrpd	= SCRPD_PTI_IS_PRIM_DEST,
+	},
+	{
+		.nres	= 1,
+		.res	= {
+			{
+				.start	= REG_PTI_OFFSET,
+				.end	= REG_PTI_OFFSET + REG_PTI_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.id	= -1,
+		.name	= "lpp",
+		.type	= INTEL_TH_OUTPUT,
+		.otype	= GTH_LPP,
+		.scrpd	= SCRPD_PTI_IS_PRIM_DEST,
+	},
+	{
+		.nres	= 1,
+		.res	= {
+			{
+				.start	= REG_DCIH_OFFSET,
+				.end	= REG_DCIH_OFFSET + REG_DCIH_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.id	= -1,
+		.name	= "dcih",
+		.type	= INTEL_TH_OUTPUT,
+	},
+};
+
+#ifdef CONFIG_MODULES
+static void __intel_th_request_hub_module(struct work_struct *work)
+{
+	struct intel_th *th = container_of(work, struct intel_th,
+					   request_module_work);
+
+	request_module("intel_th_%s", th->hub->name);
+}
+
+static int intel_th_request_hub_module(struct intel_th *th)
+{
+	INIT_WORK(&th->request_module_work, __intel_th_request_hub_module);
+	schedule_work(&th->request_module_work);
+
+	return 0;
+}
+
+static void intel_th_request_hub_module_flush(struct intel_th *th)
+{
+	flush_work(&th->request_module_work);
+}
+#else
+static inline int intel_th_request_hub_module(struct intel_th *th)
+{
+	return -EINVAL;
+}
+
+static inline void intel_th_request_hub_module_flush(struct intel_th *th)
+{
+}
+#endif /* CONFIG_MODULES */
+
+static struct intel_th_device *
+intel_th_subdevice_alloc(struct intel_th *th,
+			 const struct intel_th_subdevice *subdev)
+{
+	struct intel_th_device *thdev;
+	struct resource res[3];
+	unsigned int req = 0;
+	bool is64bit = false;
+	int r, err;
+
+	thdev = intel_th_device_alloc(th, subdev->type, subdev->name,
+				      subdev->id);
+	if (!thdev)
+		return ERR_PTR(-ENOMEM);
+
+	thdev->drvdata = th->drvdata;
+
+	for (r = 0; r < th->num_resources; r++)
+		if (th->resource[r].flags & IORESOURCE_MEM_64) {
+			is64bit = true;
+			break;
+		}
+
+	memcpy(res, subdev->res,
+	       sizeof(struct resource) * subdev->nres);
+
+	for (r = 0; r < subdev->nres; r++) {
+		struct resource *devres = th->resource;
+		int bar = 0; /* cut subdevices' MMIO from resource[0] */
+
+		/*
+		 * Take .end == 0 to mean 'take the whole bar',
+		 * .start then tells us which bar it is. Default to
+		 * TH_MMIO_CONFIG.
+		 */
+		if (!res[r].end && res[r].flags == IORESOURCE_MEM) {
+			bar = res[r].start;
+			if (is64bit)
+				bar *= 2;
+			res[r].start = 0;
+			res[r].end = resource_size(&devres[bar]) - 1;
+		}
+
+		if (res[r].flags & IORESOURCE_MEM) {
+			res[r].start	+= devres[bar].start;
+			res[r].end	+= devres[bar].start;
+
+			dev_dbg(th->dev, "%s:%d @ %pR\n",
+				subdev->name, r, &res[r]);
+		} else if (res[r].flags & IORESOURCE_IRQ) {
+			res[r].start	= th->irq;
+		}
+	}
+
+	err = intel_th_device_add_resources(thdev, res, subdev->nres);
+	if (err) {
+		put_device(&thdev->dev);
+		goto fail_put_device;
+	}
+
+	if (subdev->type == INTEL_TH_OUTPUT) {
+		thdev->dev.devt = MKDEV(th->major, th->num_thdevs);
+		thdev->output.type = subdev->otype;
+		thdev->output.port = -1;
+		thdev->output.scratchpad = subdev->scrpd;
+	} else if (subdev->type == INTEL_TH_SWITCH) {
+		thdev->host_mode =
+			INTEL_TH_CAP(th, host_mode_only) ? true : host_mode;
+		th->hub = thdev;
+	}
+
+	err = device_add(&thdev->dev);
+	if (err) {
+		put_device(&thdev->dev);
+		goto fail_free_res;
+	}
+
+	/* need switch driver to be loaded to enumerate the rest */
+	if (subdev->type == INTEL_TH_SWITCH && !req) {
+		err = intel_th_request_hub_module(th);
+		if (!err)
+			req++;
+	}
+
+	return thdev;
+
+fail_free_res:
+	kfree(thdev->resource);
+
+fail_put_device:
+	put_device(&thdev->dev);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * intel_th_output_enable() - find and enable a device for a given output type
+ * @th:		Intel TH instance
+ * @otype:	output type
+ *
+ * Go through the unallocated output devices, find the first one whos type
+ * matches @otype and instantiate it. These devices are removed when the hub
+ * device is removed, see intel_th_remove().
+ */
+int intel_th_output_enable(struct intel_th *th, unsigned int otype)
+{
+	struct intel_th_device *thdev;
+	int src = 0, dst = 0;
+
+	for (src = 0, dst = 0; dst <= th->num_thdevs; src++, dst++) {
+		for (; src < ARRAY_SIZE(intel_th_subdevices); src++) {
+			if (intel_th_subdevices[src].type != INTEL_TH_OUTPUT)
+				continue;
+
+			if (intel_th_subdevices[src].otype != otype)
+				continue;
+
+			break;
+		}
+
+		/* no unallocated matching subdevices */
+		if (src == ARRAY_SIZE(intel_th_subdevices))
+			return -ENODEV;
+
+		for (; dst < th->num_thdevs; dst++) {
+			if (th->thdev[dst]->type != INTEL_TH_OUTPUT)
+				continue;
+
+			if (th->thdev[dst]->output.type != otype)
+				continue;
+
+			break;
+		}
+
+		/*
+		 * intel_th_subdevices[src] matches our requirements and is
+		 * not matched in th::thdev[]
+		 */
+		if (dst == th->num_thdevs)
+			goto found;
+	}
+
+	return -ENODEV;
+
+found:
+	thdev = intel_th_subdevice_alloc(th, &intel_th_subdevices[src]);
+	if (IS_ERR(thdev))
+		return PTR_ERR(thdev);
+
+	th->thdev[th->num_thdevs++] = thdev;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_th_output_enable);
+
+static int intel_th_populate(struct intel_th *th)
+{
+	int src;
+
+	/* create devices for each intel_th_subdevice */
+	for (src = 0; src < ARRAY_SIZE(intel_th_subdevices); src++) {
+		const struct intel_th_subdevice *subdev =
+			&intel_th_subdevices[src];
+		struct intel_th_device *thdev;
+
+		/* only allow SOURCE and SWITCH devices in host mode */
+		if ((INTEL_TH_CAP(th, host_mode_only) || host_mode) &&
+		    subdev->type == INTEL_TH_OUTPUT)
+			continue;
+
+		/*
+		 * don't enable port OUTPUTs in this path; SWITCH enables them
+		 * via intel_th_output_enable()
+		 */
+		if (subdev->type == INTEL_TH_OUTPUT &&
+		    subdev->otype != GTH_NONE)
+			continue;
+
+		thdev = intel_th_subdevice_alloc(th, subdev);
+		/* note: caller should free subdevices from th::thdev[] */
+		if (IS_ERR(thdev))
+			return PTR_ERR(thdev);
+
+		th->thdev[th->num_thdevs++] = thdev;
+	}
+
+	return 0;
+}
+
+static int match_devt(struct device *dev, void *data)
+{
+	dev_t devt = (dev_t)(unsigned long)data;
+
+	return dev->devt == devt;
+}
+
+static int intel_th_output_open(struct inode *inode, struct file *file)
+{
+	const struct file_operations *fops;
+	struct intel_th_driver *thdrv;
+	struct device *dev;
+	int err;
+
+	dev = bus_find_device(&intel_th_bus, NULL,
+			      (void *)(unsigned long)inode->i_rdev,
+			      match_devt);
+	if (!dev || !dev->driver)
+		return -ENODEV;
+
+	thdrv = to_intel_th_driver(dev->driver);
+	fops = fops_get(thdrv->fops);
+	if (!fops)
+		return -ENODEV;
+
+	replace_fops(file, fops);
+
+	file->private_data = to_intel_th_device(dev);
+
+	if (file->f_op->open) {
+		err = file->f_op->open(inode, file);
+		return err;
+	}
+
+	return 0;
+}
+
+static const struct file_operations intel_th_output_fops = {
+	.open	= intel_th_output_open,
+	.llseek	= noop_llseek,
+};
+
+/**
+ * intel_th_alloc() - allocate a new Intel TH device and its subdevices
+ * @dev:	parent device
+ * @devres:	parent's resources
+ * @ndevres:	number of resources
+ * @irq:	irq number
+ */
+struct intel_th *
+intel_th_alloc(struct device *dev, struct intel_th_drvdata *drvdata,
+	       struct resource *devres, unsigned int ndevres, int irq)
+{
+	struct intel_th *th;
+	int err, r;
+
+	if (irq == -1)
+		for (r = 0; r < ndevres; r++)
+			if (devres[r].flags & IORESOURCE_IRQ) {
+				irq = devres[r].start;
+				break;
+			}
+
+	th = kzalloc(sizeof(*th), GFP_KERNEL);
+	if (!th)
+		return ERR_PTR(-ENOMEM);
+
+	th->id = ida_simple_get(&intel_th_ida, 0, 0, GFP_KERNEL);
+	if (th->id < 0) {
+		err = th->id;
+		goto err_alloc;
+	}
+
+	th->major = __register_chrdev(0, 0, TH_POSSIBLE_OUTPUTS,
+				      "intel_th/output", &intel_th_output_fops);
+	if (th->major < 0) {
+		err = th->major;
+		goto err_ida;
+	}
+	th->dev = dev;
+	th->drvdata = drvdata;
+
+	th->resource = devres;
+	th->num_resources = ndevres;
+	th->irq = irq;
+
+	dev_set_drvdata(dev, th);
+
+	pm_runtime_no_callbacks(dev);
+	pm_runtime_put(dev);
+	pm_runtime_allow(dev);
+
+	err = intel_th_populate(th);
+	if (err) {
+		/* free the subdevices and undo everything */
+		intel_th_free(th);
+		return ERR_PTR(err);
+	}
+
+	return th;
+
+err_ida:
+	ida_simple_remove(&intel_th_ida, th->id);
+
+err_alloc:
+	kfree(th);
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(intel_th_alloc);
+
+void intel_th_free(struct intel_th *th)
+{
+	int i;
+
+	intel_th_request_hub_module_flush(th);
+
+	intel_th_device_remove(th->hub);
+	for (i = 0; i < th->num_thdevs; i++) {
+		if (th->thdev[i] != th->hub)
+			intel_th_device_remove(th->thdev[i]);
+		th->thdev[i] = NULL;
+	}
+
+	th->num_thdevs = 0;
+
+	pm_runtime_get_sync(th->dev);
+	pm_runtime_forbid(th->dev);
+
+	__unregister_chrdev(th->major, 0, TH_POSSIBLE_OUTPUTS,
+			    "intel_th/output");
+
+	ida_simple_remove(&intel_th_ida, th->id);
+
+	kfree(th);
+}
+EXPORT_SYMBOL_GPL(intel_th_free);
+
+/**
+ * intel_th_trace_enable() - enable tracing for an output device
+ * @thdev:	output device that requests tracing be enabled
+ */
+int intel_th_trace_enable(struct intel_th_device *thdev)
+{
+	struct intel_th_device *hub = to_intel_th_device(thdev->dev.parent);
+	struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver);
+
+	if (WARN_ON_ONCE(hub->type != INTEL_TH_SWITCH))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(thdev->type != INTEL_TH_OUTPUT))
+		return -EINVAL;
+
+	pm_runtime_get_sync(&thdev->dev);
+	hubdrv->enable(hub, &thdev->output);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_th_trace_enable);
+
+/**
+ * intel_th_trace_disable() - disable tracing for an output device
+ * @thdev:	output device that requests tracing be disabled
+ */
+int intel_th_trace_disable(struct intel_th_device *thdev)
+{
+	struct intel_th_device *hub = to_intel_th_device(thdev->dev.parent);
+	struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver);
+
+	WARN_ON_ONCE(hub->type != INTEL_TH_SWITCH);
+	if (WARN_ON_ONCE(thdev->type != INTEL_TH_OUTPUT))
+		return -EINVAL;
+
+	hubdrv->disable(hub, &thdev->output);
+	pm_runtime_put(&thdev->dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_th_trace_disable);
+
+int intel_th_set_output(struct intel_th_device *thdev,
+			unsigned int master)
+{
+	struct intel_th_device *hub = to_intel_th_hub(thdev);
+	struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver);
+
+	/* In host mode, this is up to the external debugger, do nothing. */
+	if (hub->host_mode)
+		return 0;
+
+	if (!hubdrv->set_output)
+		return -ENOTSUPP;
+
+	return hubdrv->set_output(hub, master);
+}
+EXPORT_SYMBOL_GPL(intel_th_set_output);
+
+static int __init intel_th_init(void)
+{
+	intel_th_debug_init();
+
+	return bus_register(&intel_th_bus);
+}
+subsys_initcall(intel_th_init);
+
+static void __exit intel_th_exit(void)
+{
+	intel_th_debug_done();
+
+	bus_unregister(&intel_th_bus);
+}
+module_exit(intel_th_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub controller driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/intel_th/debug.c b/drivers/hwtracing/intel_th/debug.c
new file mode 100644
index 0000000..ff79063
--- /dev/null
+++ b/drivers/hwtracing/intel_th/debug.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub driver debugging
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/debugfs.h>
+
+#include "intel_th.h"
+#include "debug.h"
+
+struct dentry *intel_th_dbg;
+
+void intel_th_debug_init(void)
+{
+	intel_th_dbg = debugfs_create_dir("intel_th", NULL);
+	if (IS_ERR(intel_th_dbg))
+		intel_th_dbg = NULL;
+}
+
+void intel_th_debug_done(void)
+{
+	debugfs_remove(intel_th_dbg);
+	intel_th_dbg = NULL;
+}
diff --git a/drivers/hwtracing/intel_th/debug.h b/drivers/hwtracing/intel_th/debug.h
new file mode 100644
index 0000000..78bd7e4
--- /dev/null
+++ b/drivers/hwtracing/intel_th/debug.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel(R) Trace Hub driver debugging
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_DEBUG_H__
+#define __INTEL_TH_DEBUG_H__
+
+#ifdef CONFIG_INTEL_TH_DEBUG
+extern struct dentry *intel_th_dbg;
+
+void intel_th_debug_init(void);
+void intel_th_debug_done(void);
+#else
+static inline void intel_th_debug_init(void)
+{
+}
+
+static inline void intel_th_debug_done(void)
+{
+}
+#endif
+
+#endif /* __INTEL_TH_DEBUG_H__ */
diff --git a/drivers/hwtracing/intel_th/gth.c b/drivers/hwtracing/intel_th/gth.c
new file mode 100644
index 0000000..8426b79
--- /dev/null
+++ b/drivers/hwtracing/intel_th/gth.c
@@ -0,0 +1,748 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub Global Trace Hub
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+#include <linux/pm_runtime.h>
+
+#include "intel_th.h"
+#include "gth.h"
+
+struct gth_device;
+
+/**
+ * struct gth_output - GTH view on an output port
+ * @gth:	backlink to the GTH device
+ * @output:	link to output device's output descriptor
+ * @index:	output port number
+ * @port_type:	one of GTH_* port type values
+ * @master:	bitmap of masters configured for this output
+ */
+struct gth_output {
+	struct gth_device	*gth;
+	struct intel_th_output	*output;
+	unsigned int		index;
+	unsigned int		port_type;
+	DECLARE_BITMAP(master, TH_CONFIGURABLE_MASTERS + 1);
+};
+
+/**
+ * struct gth_device - GTH device
+ * @dev:	driver core's device
+ * @base:	register window base address
+ * @output_group:	attributes describing output ports
+ * @master_group:	attributes describing master assignments
+ * @output:		output ports
+ * @master:		master/output port assignments
+ * @gth_lock:		serializes accesses to GTH bits
+ */
+struct gth_device {
+	struct device		*dev;
+	void __iomem		*base;
+
+	struct attribute_group	output_group;
+	struct attribute_group	master_group;
+	struct gth_output	output[TH_POSSIBLE_OUTPUTS];
+	signed char		master[TH_CONFIGURABLE_MASTERS + 1];
+	spinlock_t		gth_lock;
+};
+
+static void gth_output_set(struct gth_device *gth, int port,
+			   unsigned int config)
+{
+	unsigned long reg = port & 4 ? REG_GTH_GTHOPT1 : REG_GTH_GTHOPT0;
+	u32 val;
+	int shift = (port & 3) * 8;
+
+	val = ioread32(gth->base + reg);
+	val &= ~(0xff << shift);
+	val |= config << shift;
+	iowrite32(val, gth->base + reg);
+}
+
+static unsigned int gth_output_get(struct gth_device *gth, int port)
+{
+	unsigned long reg = port & 4 ? REG_GTH_GTHOPT1 : REG_GTH_GTHOPT0;
+	u32 val;
+	int shift = (port & 3) * 8;
+
+	val = ioread32(gth->base + reg);
+	val &= 0xff << shift;
+	val >>= shift;
+
+	return val;
+}
+
+static void gth_smcfreq_set(struct gth_device *gth, int port,
+			    unsigned int freq)
+{
+	unsigned long reg = REG_GTH_SMCR0 + ((port / 2) * 4);
+	int shift = (port & 1) * 16;
+	u32 val;
+
+	val = ioread32(gth->base + reg);
+	val &= ~(0xffff << shift);
+	val |= freq << shift;
+	iowrite32(val, gth->base + reg);
+}
+
+static unsigned int gth_smcfreq_get(struct gth_device *gth, int port)
+{
+	unsigned long reg = REG_GTH_SMCR0 + ((port / 2) * 4);
+	int shift = (port & 1) * 16;
+	u32 val;
+
+	val = ioread32(gth->base + reg);
+	val &= 0xffff << shift;
+	val >>= shift;
+
+	return val;
+}
+
+/*
+ * "masters" attribute group
+ */
+
+struct master_attribute {
+	struct device_attribute	attr;
+	struct gth_device	*gth;
+	unsigned int		master;
+};
+
+static void
+gth_master_set(struct gth_device *gth, unsigned int master, int port)
+{
+	unsigned int reg = REG_GTH_SWDEST0 + ((master >> 1) & ~3u);
+	unsigned int shift = (master & 0x7) * 4;
+	u32 val;
+
+	if (master >= 256) {
+		reg = REG_GTH_GSWTDEST;
+		shift = 0;
+	}
+
+	val = ioread32(gth->base + reg);
+	val &= ~(0xf << shift);
+	if (port >= 0)
+		val |= (0x8 | port) << shift;
+	iowrite32(val, gth->base + reg);
+}
+
+static ssize_t master_attr_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct master_attribute *ma =
+		container_of(attr, struct master_attribute, attr);
+	struct gth_device *gth = ma->gth;
+	size_t count;
+	int port;
+
+	spin_lock(&gth->gth_lock);
+	port = gth->master[ma->master];
+	spin_unlock(&gth->gth_lock);
+
+	if (port >= 0)
+		count = snprintf(buf, PAGE_SIZE, "%x\n", port);
+	else
+		count = snprintf(buf, PAGE_SIZE, "disabled\n");
+
+	return count;
+}
+
+static ssize_t master_attr_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct master_attribute *ma =
+		container_of(attr, struct master_attribute, attr);
+	struct gth_device *gth = ma->gth;
+	int old_port, port;
+
+	if (kstrtoint(buf, 10, &port) < 0)
+		return -EINVAL;
+
+	if (port >= TH_POSSIBLE_OUTPUTS || port < -1)
+		return -EINVAL;
+
+	spin_lock(&gth->gth_lock);
+
+	/* disconnect from the previous output port, if any */
+	old_port = gth->master[ma->master];
+	if (old_port >= 0) {
+		gth->master[ma->master] = -1;
+		clear_bit(ma->master, gth->output[old_port].master);
+
+		/*
+		 * if the port is active, program this setting,
+		 * implies that runtime PM is on
+		 */
+		if (gth->output[old_port].output->active)
+			gth_master_set(gth, ma->master, -1);
+	}
+
+	/* connect to the new output port, if any */
+	if (port >= 0) {
+		/* check if there's a driver for this port */
+		if (!gth->output[port].output) {
+			count = -ENODEV;
+			goto unlock;
+		}
+
+		set_bit(ma->master, gth->output[port].master);
+
+		/* if the port is active, program this setting, see above */
+		if (gth->output[port].output->active)
+			gth_master_set(gth, ma->master, port);
+	}
+
+	gth->master[ma->master] = port;
+
+unlock:
+	spin_unlock(&gth->gth_lock);
+
+	return count;
+}
+
+struct output_attribute {
+	struct device_attribute attr;
+	struct gth_device	*gth;
+	unsigned int		port;
+	unsigned int		parm;
+};
+
+#define OUTPUT_PARM(_name, _mask, _r, _w, _what)			\
+	[TH_OUTPUT_PARM(_name)] = { .name = __stringify(_name),		\
+				    .get = gth_ ## _what ## _get,	\
+				    .set = gth_ ## _what ## _set,	\
+				    .mask = (_mask),			\
+				    .readable = (_r),			\
+				    .writable = (_w) }
+
+static const struct output_parm {
+	const char	*name;
+	unsigned int	(*get)(struct gth_device *gth, int port);
+	void		(*set)(struct gth_device *gth, int port,
+			       unsigned int val);
+	unsigned int	mask;
+	unsigned int	readable : 1,
+			writable : 1;
+} output_parms[] = {
+	OUTPUT_PARM(port,	0x7,	1, 0, output),
+	OUTPUT_PARM(null,	BIT(3),	1, 1, output),
+	OUTPUT_PARM(drop,	BIT(4),	1, 1, output),
+	OUTPUT_PARM(reset,	BIT(5),	1, 0, output),
+	OUTPUT_PARM(flush,	BIT(7),	0, 1, output),
+	OUTPUT_PARM(smcfreq,	0xffff,	1, 1, smcfreq),
+};
+
+static void
+gth_output_parm_set(struct gth_device *gth, int port, unsigned int parm,
+		    unsigned int val)
+{
+	unsigned int config = output_parms[parm].get(gth, port);
+	unsigned int mask = output_parms[parm].mask;
+	unsigned int shift = __ffs(mask);
+
+	config &= ~mask;
+	config |= (val << shift) & mask;
+	output_parms[parm].set(gth, port, config);
+}
+
+static unsigned int
+gth_output_parm_get(struct gth_device *gth, int port, unsigned int parm)
+{
+	unsigned int config = output_parms[parm].get(gth, port);
+	unsigned int mask = output_parms[parm].mask;
+	unsigned int shift = __ffs(mask);
+
+	config &= mask;
+	config >>= shift;
+	return config;
+}
+
+/*
+ * Reset outputs and sources
+ */
+static int intel_th_gth_reset(struct gth_device *gth)
+{
+	u32 reg;
+	int port, i;
+
+	reg = ioread32(gth->base + REG_GTH_SCRPD0);
+	if (reg & SCRPD_DEBUGGER_IN_USE)
+		return -EBUSY;
+
+	/* Always save/restore STH and TU registers in S0ix entry/exit */
+	reg |= SCRPD_STH_IS_ENABLED | SCRPD_TRIGGER_IS_ENABLED;
+	iowrite32(reg, gth->base + REG_GTH_SCRPD0);
+
+	/* output ports */
+	for (port = 0; port < 8; port++) {
+		if (gth_output_parm_get(gth, port, TH_OUTPUT_PARM(port)) ==
+		    GTH_NONE)
+			continue;
+
+		gth_output_set(gth, port, 0);
+		gth_smcfreq_set(gth, port, 16);
+	}
+	/* disable overrides */
+	iowrite32(0, gth->base + REG_GTH_DESTOVR);
+
+	/* masters swdest_0~31 and gswdest */
+	for (i = 0; i < 33; i++)
+		iowrite32(0, gth->base + REG_GTH_SWDEST0 + i * 4);
+
+	/* sources */
+	iowrite32(0, gth->base + REG_GTH_SCR);
+	iowrite32(0xfc, gth->base + REG_GTH_SCR2);
+
+	return 0;
+}
+
+/*
+ * "outputs" attribute group
+ */
+
+static ssize_t output_attr_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct output_attribute *oa =
+		container_of(attr, struct output_attribute, attr);
+	struct gth_device *gth = oa->gth;
+	size_t count;
+
+	pm_runtime_get_sync(dev);
+
+	spin_lock(&gth->gth_lock);
+	count = snprintf(buf, PAGE_SIZE, "%x\n",
+			 gth_output_parm_get(gth, oa->port, oa->parm));
+	spin_unlock(&gth->gth_lock);
+
+	pm_runtime_put(dev);
+
+	return count;
+}
+
+static ssize_t output_attr_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct output_attribute *oa =
+		container_of(attr, struct output_attribute, attr);
+	struct gth_device *gth = oa->gth;
+	unsigned int config;
+
+	if (kstrtouint(buf, 16, &config) < 0)
+		return -EINVAL;
+
+	pm_runtime_get_sync(dev);
+
+	spin_lock(&gth->gth_lock);
+	gth_output_parm_set(gth, oa->port, oa->parm, config);
+	spin_unlock(&gth->gth_lock);
+
+	pm_runtime_put(dev);
+
+	return count;
+}
+
+static int intel_th_master_attributes(struct gth_device *gth)
+{
+	struct master_attribute *master_attrs;
+	struct attribute **attrs;
+	int i, nattrs = TH_CONFIGURABLE_MASTERS + 2;
+
+	attrs = devm_kcalloc(gth->dev, nattrs, sizeof(void *), GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+
+	master_attrs = devm_kcalloc(gth->dev, nattrs,
+				    sizeof(struct master_attribute),
+				    GFP_KERNEL);
+	if (!master_attrs)
+		return -ENOMEM;
+
+	for (i = 0; i < TH_CONFIGURABLE_MASTERS + 1; i++) {
+		char *name;
+
+		name = devm_kasprintf(gth->dev, GFP_KERNEL, "%d%s", i,
+				      i == TH_CONFIGURABLE_MASTERS ? "+" : "");
+		if (!name)
+			return -ENOMEM;
+
+		master_attrs[i].attr.attr.name = name;
+		master_attrs[i].attr.attr.mode = S_IRUGO | S_IWUSR;
+		master_attrs[i].attr.show = master_attr_show;
+		master_attrs[i].attr.store = master_attr_store;
+
+		sysfs_attr_init(&master_attrs[i].attr.attr);
+		attrs[i] = &master_attrs[i].attr.attr;
+
+		master_attrs[i].gth = gth;
+		master_attrs[i].master = i;
+	}
+
+	gth->master_group.name	= "masters";
+	gth->master_group.attrs = attrs;
+
+	return sysfs_create_group(&gth->dev->kobj, &gth->master_group);
+}
+
+static int intel_th_output_attributes(struct gth_device *gth)
+{
+	struct output_attribute *out_attrs;
+	struct attribute **attrs;
+	int i, j, nouts = TH_POSSIBLE_OUTPUTS;
+	int nparms = ARRAY_SIZE(output_parms);
+	int nattrs = nouts * nparms + 1;
+
+	attrs = devm_kcalloc(gth->dev, nattrs, sizeof(void *), GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+
+	out_attrs = devm_kcalloc(gth->dev, nattrs,
+				 sizeof(struct output_attribute),
+				 GFP_KERNEL);
+	if (!out_attrs)
+		return -ENOMEM;
+
+	for (i = 0; i < nouts; i++) {
+		for (j = 0; j < nparms; j++) {
+			unsigned int idx = i * nparms + j;
+			char *name;
+
+			name = devm_kasprintf(gth->dev, GFP_KERNEL, "%d_%s", i,
+					      output_parms[j].name);
+			if (!name)
+				return -ENOMEM;
+
+			out_attrs[idx].attr.attr.name = name;
+
+			if (output_parms[j].readable) {
+				out_attrs[idx].attr.attr.mode |= S_IRUGO;
+				out_attrs[idx].attr.show = output_attr_show;
+			}
+
+			if (output_parms[j].writable) {
+				out_attrs[idx].attr.attr.mode |= S_IWUSR;
+				out_attrs[idx].attr.store = output_attr_store;
+			}
+
+			sysfs_attr_init(&out_attrs[idx].attr.attr);
+			attrs[idx] = &out_attrs[idx].attr.attr;
+
+			out_attrs[idx].gth = gth;
+			out_attrs[idx].port = i;
+			out_attrs[idx].parm = j;
+		}
+	}
+
+	gth->output_group.name	= "outputs";
+	gth->output_group.attrs = attrs;
+
+	return sysfs_create_group(&gth->dev->kobj, &gth->output_group);
+}
+
+/**
+ * intel_th_gth_disable() - disable tracing to an output device
+ * @thdev:	GTH device
+ * @output:	output device's descriptor
+ *
+ * This will deconfigure all masters set to output to this device,
+ * disable tracing using force storeEn off signal and wait for the
+ * "pipeline empty" bit for corresponding output port.
+ */
+static void intel_th_gth_disable(struct intel_th_device *thdev,
+				 struct intel_th_output *output)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	unsigned long count;
+	int master;
+	u32 reg;
+
+	spin_lock(&gth->gth_lock);
+	output->active = false;
+
+	for_each_set_bit(master, gth->output[output->port].master,
+			 TH_CONFIGURABLE_MASTERS) {
+		gth_master_set(gth, master, -1);
+	}
+	spin_unlock(&gth->gth_lock);
+
+	iowrite32(0, gth->base + REG_GTH_SCR);
+	iowrite32(0xfd, gth->base + REG_GTH_SCR2);
+
+	/* wait on pipeline empty for the given port */
+	for (reg = 0, count = GTH_PLE_WAITLOOP_DEPTH;
+	     count && !(reg & BIT(output->port)); count--) {
+		reg = ioread32(gth->base + REG_GTH_STAT);
+		cpu_relax();
+	}
+
+	/* clear force capture done for next captures */
+	iowrite32(0xfc, gth->base + REG_GTH_SCR2);
+
+	if (!count)
+		dev_dbg(&thdev->dev, "timeout waiting for GTH[%d] PLE\n",
+			output->port);
+
+	reg = ioread32(gth->base + REG_GTH_SCRPD0);
+	reg &= ~output->scratchpad;
+	iowrite32(reg, gth->base + REG_GTH_SCRPD0);
+}
+
+static void gth_tscu_resync(struct gth_device *gth)
+{
+	u32 reg;
+
+	reg = ioread32(gth->base + REG_TSCU_TSUCTRL);
+	reg &= ~TSUCTRL_CTCRESYNC;
+	iowrite32(reg, gth->base + REG_TSCU_TSUCTRL);
+}
+
+/**
+ * intel_th_gth_enable() - enable tracing to an output device
+ * @thdev:	GTH device
+ * @output:	output device's descriptor
+ *
+ * This will configure all masters set to output to this device and
+ * enable tracing using force storeEn signal.
+ */
+static void intel_th_gth_enable(struct intel_th_device *thdev,
+				struct intel_th_output *output)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	struct intel_th *th = to_intel_th(thdev);
+	u32 scr = 0xfc0000, scrpd;
+	int master;
+
+	spin_lock(&gth->gth_lock);
+	for_each_set_bit(master, gth->output[output->port].master,
+			 TH_CONFIGURABLE_MASTERS + 1) {
+		gth_master_set(gth, master, output->port);
+	}
+
+	if (output->multiblock)
+		scr |= 0xff;
+
+	output->active = true;
+	spin_unlock(&gth->gth_lock);
+
+	if (INTEL_TH_CAP(th, tscu_enable))
+		gth_tscu_resync(gth);
+
+	scrpd = ioread32(gth->base + REG_GTH_SCRPD0);
+	scrpd |= output->scratchpad;
+	iowrite32(scrpd, gth->base + REG_GTH_SCRPD0);
+
+	iowrite32(scr, gth->base + REG_GTH_SCR);
+	iowrite32(0, gth->base + REG_GTH_SCR2);
+}
+
+/**
+ * intel_th_gth_assign() - assign output device to a GTH output port
+ * @thdev:	GTH device
+ * @othdev:	output device
+ *
+ * This will match a given output device parameters against present
+ * output ports on the GTH and fill out relevant bits in output device's
+ * descriptor.
+ *
+ * Return:	0 on success, -errno on error.
+ */
+static int intel_th_gth_assign(struct intel_th_device *thdev,
+			       struct intel_th_device *othdev)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	int i, id;
+
+	if (thdev->host_mode)
+		return -EBUSY;
+
+	if (othdev->type != INTEL_TH_OUTPUT)
+		return -EINVAL;
+
+	for (i = 0, id = 0; i < TH_POSSIBLE_OUTPUTS; i++) {
+		if (gth->output[i].port_type != othdev->output.type)
+			continue;
+
+		if (othdev->id == -1 || othdev->id == id)
+			goto found;
+
+		id++;
+	}
+
+	return -ENOENT;
+
+found:
+	spin_lock(&gth->gth_lock);
+	othdev->output.port = i;
+	othdev->output.active = false;
+	gth->output[i].output = &othdev->output;
+	spin_unlock(&gth->gth_lock);
+
+	return 0;
+}
+
+/**
+ * intel_th_gth_unassign() - deassociate an output device from its output port
+ * @thdev:	GTH device
+ * @othdev:	output device
+ */
+static void intel_th_gth_unassign(struct intel_th_device *thdev,
+				  struct intel_th_device *othdev)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	int port = othdev->output.port;
+
+	if (thdev->host_mode)
+		return;
+
+	spin_lock(&gth->gth_lock);
+	othdev->output.port = -1;
+	othdev->output.active = false;
+	gth->output[port].output = NULL;
+	spin_unlock(&gth->gth_lock);
+}
+
+static int
+intel_th_gth_set_output(struct intel_th_device *thdev, unsigned int master)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	int port = 0; /* FIXME: make default output configurable */
+
+	/*
+	 * everything above TH_CONFIGURABLE_MASTERS is controlled by the
+	 * same register
+	 */
+	if (master > TH_CONFIGURABLE_MASTERS)
+		master = TH_CONFIGURABLE_MASTERS;
+
+	spin_lock(&gth->gth_lock);
+	if (gth->master[master] == -1) {
+		set_bit(master, gth->output[port].master);
+		gth->master[master] = port;
+	}
+	spin_unlock(&gth->gth_lock);
+
+	return 0;
+}
+
+static int intel_th_gth_probe(struct intel_th_device *thdev)
+{
+	struct device *dev = &thdev->dev;
+	struct intel_th *th = dev_get_drvdata(dev->parent);
+	struct gth_device *gth;
+	struct resource *res;
+	void __iomem *base;
+	int i, ret;
+
+	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!base)
+		return -ENOMEM;
+
+	gth = devm_kzalloc(dev, sizeof(*gth), GFP_KERNEL);
+	if (!gth)
+		return -ENOMEM;
+
+	gth->dev = dev;
+	gth->base = base;
+	spin_lock_init(&gth->gth_lock);
+
+	dev_set_drvdata(dev, gth);
+
+	/*
+	 * Host mode can be signalled via SW means or via SCRPD_DEBUGGER_IN_USE
+	 * bit. Either way, don't reset HW in this case, and don't export any
+	 * capture configuration attributes. Also, refuse to assign output
+	 * drivers to ports, see intel_th_gth_assign().
+	 */
+	if (thdev->host_mode)
+		return 0;
+
+	ret = intel_th_gth_reset(gth);
+	if (ret) {
+		if (ret != -EBUSY)
+			return ret;
+
+		thdev->host_mode = true;
+
+		return 0;
+	}
+
+	for (i = 0; i < TH_CONFIGURABLE_MASTERS + 1; i++)
+		gth->master[i] = -1;
+
+	for (i = 0; i < TH_POSSIBLE_OUTPUTS; i++) {
+		gth->output[i].gth = gth;
+		gth->output[i].index = i;
+		gth->output[i].port_type =
+			gth_output_parm_get(gth, i, TH_OUTPUT_PARM(port));
+		if (gth->output[i].port_type == GTH_NONE)
+			continue;
+
+		ret = intel_th_output_enable(th, gth->output[i].port_type);
+		/* -ENODEV is ok, we just won't have that device enumerated */
+		if (ret && ret != -ENODEV)
+			return ret;
+	}
+
+	if (intel_th_output_attributes(gth) ||
+	    intel_th_master_attributes(gth)) {
+		pr_warn("Can't initialize sysfs attributes\n");
+
+		if (gth->output_group.attrs)
+			sysfs_remove_group(&gth->dev->kobj, &gth->output_group);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void intel_th_gth_remove(struct intel_th_device *thdev)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+
+	sysfs_remove_group(&gth->dev->kobj, &gth->output_group);
+	sysfs_remove_group(&gth->dev->kobj, &gth->master_group);
+}
+
+static struct intel_th_driver intel_th_gth_driver = {
+	.probe		= intel_th_gth_probe,
+	.remove		= intel_th_gth_remove,
+	.assign		= intel_th_gth_assign,
+	.unassign	= intel_th_gth_unassign,
+	.set_output	= intel_th_gth_set_output,
+	.enable		= intel_th_gth_enable,
+	.disable	= intel_th_gth_disable,
+	.driver	= {
+		.name	= "gth",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_driver(intel_th_gth_driver,
+	      intel_th_driver_register,
+	      intel_th_driver_unregister);
+
+MODULE_ALIAS("intel_th_switch");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub Global Trace Hub driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/intel_th/gth.h b/drivers/hwtracing/intel_th/gth.h
new file mode 100644
index 0000000..6f2b0b9
--- /dev/null
+++ b/drivers/hwtracing/intel_th/gth.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel(R) Trace Hub Global Trace Hub (GTH) data structures
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_GTH_H__
+#define __INTEL_TH_GTH_H__
+
+/* Map output port parameter bits to symbolic names */
+#define TH_OUTPUT_PARM(name)			\
+	TH_OUTPUT_ ## name
+
+enum intel_th_output_parm {
+	/* output port type */
+	TH_OUTPUT_PARM(port),
+	/* generate NULL packet */
+	TH_OUTPUT_PARM(null),
+	/* packet drop */
+	TH_OUTPUT_PARM(drop),
+	/* port in reset state */
+	TH_OUTPUT_PARM(reset),
+	/* flush out data */
+	TH_OUTPUT_PARM(flush),
+	/* mainenance packet frequency */
+	TH_OUTPUT_PARM(smcfreq),
+};
+
+/*
+ * Register offsets
+ */
+enum {
+	REG_GTH_GTHOPT0		= 0x00, /* Output ports 0..3 config */
+	REG_GTH_GTHOPT1		= 0x04, /* Output ports 4..7 config */
+	REG_GTH_SWDEST0		= 0x08, /* Switching destination masters 0..7 */
+	REG_GTH_GSWTDEST	= 0x88, /* Global sw trace destination */
+	REG_GTH_SMCR0		= 0x9c, /* STP mainenance for ports 0/1 */
+	REG_GTH_SMCR1		= 0xa0, /* STP mainenance for ports 2/3 */
+	REG_GTH_SMCR2		= 0xa4, /* STP mainenance for ports 4/5 */
+	REG_GTH_SMCR3		= 0xa8, /* STP mainenance for ports 6/7 */
+	REG_GTH_SCR		= 0xc8, /* Source control (storeEn override) */
+	REG_GTH_STAT		= 0xd4, /* GTH status */
+	REG_GTH_SCR2		= 0xd8, /* Source control (force storeEn off) */
+	REG_GTH_DESTOVR		= 0xdc, /* Destination override */
+	REG_GTH_SCRPD0		= 0xe0, /* ScratchPad[0] */
+	REG_GTH_SCRPD1		= 0xe4, /* ScratchPad[1] */
+	REG_GTH_SCRPD2		= 0xe8, /* ScratchPad[2] */
+	REG_GTH_SCRPD3		= 0xec, /* ScratchPad[3] */
+	REG_TSCU_TSUCTRL	= 0x2000, /* TSCU control register */
+	REG_TSCU_TSCUSTAT	= 0x2004, /* TSCU status register */
+};
+
+/* waiting for Pipeline Empty bit(s) to assert for GTH */
+#define GTH_PLE_WAITLOOP_DEPTH	10000
+
+#define TSUCTRL_CTCRESYNC	BIT(0)
+#define TSCUSTAT_CTCSYNCING	BIT(1)
+
+#endif /* __INTEL_TH_GTH_H__ */
diff --git a/drivers/hwtracing/intel_th/intel_th.h b/drivers/hwtracing/intel_th/intel_th.h
new file mode 100644
index 0000000..780206d
--- /dev/null
+++ b/drivers/hwtracing/intel_th/intel_th.h
@@ -0,0 +1,359 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel(R) Trace Hub data structures
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_H__
+#define __INTEL_TH_H__
+
+/* intel_th_device device types */
+enum {
+	/* Devices that generate trace data */
+	INTEL_TH_SOURCE = 0,
+	/* Output ports (MSC, PTI) */
+	INTEL_TH_OUTPUT,
+	/* Switch, the Global Trace Hub (GTH) */
+	INTEL_TH_SWITCH,
+};
+
+/**
+ * struct intel_th_output - descriptor INTEL_TH_OUTPUT type devices
+ * @port:	output port number, assigned by the switch
+ * @type:	GTH_{MSU,CTP,PTI}
+ * @scratchpad:	scratchpad bits to flag when this output is enabled
+ * @multiblock:	true for multiblock output configuration
+ * @active:	true when this output is enabled
+ *
+ * Output port descriptor, used by switch driver to tell which output
+ * port this output device corresponds to. Filled in at output device's
+ * probe time by switch::assign(). Passed from output device driver to
+ * switch related code to enable/disable its port.
+ */
+struct intel_th_output {
+	int		port;
+	unsigned int	type;
+	unsigned int	scratchpad;
+	bool		multiblock;
+	bool		active;
+};
+
+/**
+ * struct intel_th_drvdata - describes hardware capabilities and quirks
+ * @tscu_enable:	device needs SW to enable time stamping unit
+ * @host_mode_only:	device can only operate in 'host debugger' mode
+ */
+struct intel_th_drvdata {
+	unsigned int	tscu_enable        : 1,
+			host_mode_only     : 1;
+};
+
+#define INTEL_TH_CAP(_th, _cap) ((_th)->drvdata ? (_th)->drvdata->_cap : 0)
+
+/**
+ * struct intel_th_device - device on the intel_th bus
+ * @dev:		device
+ * @drvdata:		hardware capabilities/quirks
+ * @resource:		array of resources available to this device
+ * @num_resources:	number of resources in @resource array
+ * @type:		INTEL_TH_{SOURCE,OUTPUT,SWITCH}
+ * @id:			device instance or -1
+ * @host_mode:		Intel TH is controlled by an external debug host
+ * @output:		output descriptor for INTEL_TH_OUTPUT devices
+ * @name:		device name to match the driver
+ */
+struct intel_th_device {
+	struct device		dev;
+	struct intel_th_drvdata *drvdata;
+	struct resource		*resource;
+	unsigned int		num_resources;
+	unsigned int		type;
+	int			id;
+
+	/* INTEL_TH_SWITCH specific */
+	bool			host_mode;
+
+	/* INTEL_TH_OUTPUT specific */
+	struct intel_th_output	output;
+
+	char		name[];
+};
+
+#define to_intel_th_device(_d)				\
+	container_of((_d), struct intel_th_device, dev)
+
+/**
+ * intel_th_device_get_resource() - obtain @num'th resource of type @type
+ * @thdev:	the device to search the resource for
+ * @type:	resource type
+ * @num:	number of the resource
+ */
+static inline struct resource *
+intel_th_device_get_resource(struct intel_th_device *thdev, unsigned int type,
+			     unsigned int num)
+{
+	int i;
+
+	for (i = 0; i < thdev->num_resources; i++)
+		if (resource_type(&thdev->resource[i]) == type && !num--)
+			return &thdev->resource[i];
+
+	return NULL;
+}
+
+/*
+ * GTH, output ports configuration
+ */
+enum {
+	GTH_NONE = 0,
+	GTH_MSU,	/* memory/usb */
+	GTH_CTP,	/* Common Trace Port */
+	GTH_LPP,	/* Low Power Path */
+	GTH_PTI,	/* MIPI-PTI */
+};
+
+/**
+ * intel_th_output_assigned() - if an output device is assigned to a switch port
+ * @thdev:	the output device
+ *
+ * Return:	true if the device is INTEL_TH_OUTPUT *and* is assigned a port
+ */
+static inline bool
+intel_th_output_assigned(struct intel_th_device *thdev)
+{
+	return thdev->type == INTEL_TH_OUTPUT &&
+		(thdev->output.port >= 0 ||
+		 thdev->output.type == GTH_NONE);
+}
+
+/**
+ * struct intel_th_driver - driver for an intel_th_device device
+ * @driver:	generic driver
+ * @probe:	probe method
+ * @remove:	remove method
+ * @assign:	match a given output type device against available outputs
+ * @unassign:	deassociate an output type device from an output port
+ * @enable:	enable tracing for a given output device
+ * @disable:	disable tracing for a given output device
+ * @irq:	interrupt callback
+ * @activate:	enable tracing on the output's side
+ * @deactivate:	disable tracing on the output's side
+ * @fops:	file operations for device nodes
+ * @attr_group:	attributes provided by the driver
+ *
+ * Callbacks @probe and @remove are required for all device types.
+ * Switch device driver needs to fill in @assign, @enable and @disable
+ * callbacks.
+ */
+struct intel_th_driver {
+	struct device_driver	driver;
+	int			(*probe)(struct intel_th_device *thdev);
+	void			(*remove)(struct intel_th_device *thdev);
+	/* switch (GTH) ops */
+	int			(*assign)(struct intel_th_device *thdev,
+					  struct intel_th_device *othdev);
+	void			(*unassign)(struct intel_th_device *thdev,
+					    struct intel_th_device *othdev);
+	void			(*enable)(struct intel_th_device *thdev,
+					  struct intel_th_output *output);
+	void			(*disable)(struct intel_th_device *thdev,
+					   struct intel_th_output *output);
+	/* output ops */
+	void			(*irq)(struct intel_th_device *thdev);
+	int			(*activate)(struct intel_th_device *thdev);
+	void			(*deactivate)(struct intel_th_device *thdev);
+	/* file_operations for those who want a device node */
+	const struct file_operations *fops;
+	/* optional attributes */
+	struct attribute_group	*attr_group;
+
+	/* source ops */
+	int			(*set_output)(struct intel_th_device *thdev,
+					      unsigned int master);
+};
+
+#define to_intel_th_driver(_d)					\
+	container_of((_d), struct intel_th_driver, driver)
+
+#define to_intel_th_driver_or_null(_d)		\
+	((_d) ? to_intel_th_driver(_d) : NULL)
+
+/*
+ * Subdevice tree structure is as follows:
+ * + struct intel_th device (pci; dev_{get,set}_drvdata()
+ *   + struct intel_th_device INTEL_TH_SWITCH (GTH)
+ *     + struct intel_th_device INTEL_TH_OUTPUT (MSU, PTI)
+ *   + struct intel_th_device INTEL_TH_SOURCE (STH)
+ *
+ * In other words, INTEL_TH_OUTPUT devices are children of INTEL_TH_SWITCH;
+ * INTEL_TH_SWITCH and INTEL_TH_SOURCE are children of the intel_th device.
+ */
+static inline struct intel_th_device *
+to_intel_th_parent(struct intel_th_device *thdev)
+{
+	struct device *parent = thdev->dev.parent;
+
+	if (!parent)
+		return NULL;
+
+	return to_intel_th_device(parent);
+}
+
+static inline struct intel_th *to_intel_th(struct intel_th_device *thdev)
+{
+	if (thdev->type == INTEL_TH_OUTPUT)
+		thdev = to_intel_th_parent(thdev);
+
+	if (WARN_ON_ONCE(!thdev || thdev->type == INTEL_TH_OUTPUT))
+		return NULL;
+
+	return dev_get_drvdata(thdev->dev.parent);
+}
+
+struct intel_th *
+intel_th_alloc(struct device *dev, struct intel_th_drvdata *drvdata,
+	       struct resource *devres, unsigned int ndevres, int irq);
+void intel_th_free(struct intel_th *th);
+
+int intel_th_driver_register(struct intel_th_driver *thdrv);
+void intel_th_driver_unregister(struct intel_th_driver *thdrv);
+
+int intel_th_trace_enable(struct intel_th_device *thdev);
+int intel_th_trace_disable(struct intel_th_device *thdev);
+int intel_th_set_output(struct intel_th_device *thdev,
+			unsigned int master);
+int intel_th_output_enable(struct intel_th *th, unsigned int otype);
+
+enum {
+	TH_MMIO_CONFIG = 0,
+	TH_MMIO_SW = 2,
+	TH_MMIO_END,
+};
+
+#define TH_POSSIBLE_OUTPUTS	8
+/* Total number of possible subdevices: outputs + GTH + STH */
+#define TH_SUBDEVICE_MAX	(TH_POSSIBLE_OUTPUTS + 2)
+#define TH_CONFIGURABLE_MASTERS 256
+#define TH_MSC_MAX		2
+
+/**
+ * struct intel_th - Intel TH controller
+ * @dev:	driver core's device
+ * @thdev:	subdevices
+ * @hub:	"switch" subdevice (GTH)
+ * @resource:	resources of the entire controller
+ * @num_thdevs:	number of devices in the @thdev array
+ * @num_resources:	number or resources in the @resource array
+ * @irq:	irq number
+ * @id:		this Intel TH controller's device ID in the system
+ * @major:	device node major for output devices
+ */
+struct intel_th {
+	struct device		*dev;
+
+	struct intel_th_device	*thdev[TH_SUBDEVICE_MAX];
+	struct intel_th_device	*hub;
+	struct intel_th_drvdata	*drvdata;
+
+	struct resource		*resource;
+	int			(*activate)(struct intel_th *);
+	void			(*deactivate)(struct intel_th *);
+	unsigned int		num_thdevs;
+	unsigned int		num_resources;
+	int			irq;
+
+	int			id;
+	int			major;
+#ifdef CONFIG_MODULES
+	struct work_struct	request_module_work;
+#endif /* CONFIG_MODULES */
+#ifdef CONFIG_INTEL_TH_DEBUG
+	struct dentry		*dbg;
+#endif
+};
+
+static inline struct intel_th_device *
+to_intel_th_hub(struct intel_th_device *thdev)
+{
+	if (thdev->type == INTEL_TH_SWITCH)
+		return thdev;
+	else if (thdev->type == INTEL_TH_OUTPUT)
+		return to_intel_th_parent(thdev);
+
+	return to_intel_th(thdev)->hub;
+}
+
+/*
+ * Register windows
+ */
+enum {
+	/* Global Trace Hub (GTH) */
+	REG_GTH_OFFSET		= 0x0000,
+	REG_GTH_LENGTH		= 0x2000,
+
+	/* Timestamp counter unit (TSCU) */
+	REG_TSCU_OFFSET		= 0x2000,
+	REG_TSCU_LENGTH		= 0x1000,
+
+	/* Software Trace Hub (STH) [0x4000..0x4fff] */
+	REG_STH_OFFSET		= 0x4000,
+	REG_STH_LENGTH		= 0x2000,
+
+	/* Memory Storage Unit (MSU) [0xa0000..0xa1fff] */
+	REG_MSU_OFFSET		= 0xa0000,
+	REG_MSU_LENGTH		= 0x02000,
+
+	/* Internal MSU trace buffer [0x80000..0x9ffff] */
+	BUF_MSU_OFFSET		= 0x80000,
+	BUF_MSU_LENGTH		= 0x20000,
+
+	/* PTI output == same window as GTH */
+	REG_PTI_OFFSET		= REG_GTH_OFFSET,
+	REG_PTI_LENGTH		= REG_GTH_LENGTH,
+
+	/* DCI Handler (DCIH) == some window as MSU */
+	REG_DCIH_OFFSET		= REG_MSU_OFFSET,
+	REG_DCIH_LENGTH		= REG_MSU_LENGTH,
+};
+
+/*
+ * Scratchpad bits: tell firmware and external debuggers
+ * what we are up to.
+ */
+enum {
+	/* Memory is the primary destination */
+	SCRPD_MEM_IS_PRIM_DEST		= BIT(0),
+	/* XHCI DbC is the primary destination */
+	SCRPD_DBC_IS_PRIM_DEST		= BIT(1),
+	/* PTI is the primary destination */
+	SCRPD_PTI_IS_PRIM_DEST		= BIT(2),
+	/* BSSB is the primary destination */
+	SCRPD_BSSB_IS_PRIM_DEST		= BIT(3),
+	/* PTI is the alternate destination */
+	SCRPD_PTI_IS_ALT_DEST		= BIT(4),
+	/* BSSB is the alternate destination */
+	SCRPD_BSSB_IS_ALT_DEST		= BIT(5),
+	/* DeepSx exit occurred */
+	SCRPD_DEEPSX_EXIT		= BIT(6),
+	/* S4 exit occurred */
+	SCRPD_S4_EXIT			= BIT(7),
+	/* S5 exit occurred */
+	SCRPD_S5_EXIT			= BIT(8),
+	/* MSU controller 0/1 is enabled */
+	SCRPD_MSC0_IS_ENABLED		= BIT(9),
+	SCRPD_MSC1_IS_ENABLED		= BIT(10),
+	/* Sx exit occurred */
+	SCRPD_SX_EXIT			= BIT(11),
+	/* Trigger Unit is enabled */
+	SCRPD_TRIGGER_IS_ENABLED	= BIT(12),
+	SCRPD_ODLA_IS_ENABLED		= BIT(13),
+	SCRPD_SOCHAP_IS_ENABLED		= BIT(14),
+	SCRPD_STH_IS_ENABLED		= BIT(15),
+	SCRPD_DCIH_IS_ENABLED		= BIT(16),
+	SCRPD_VER_IS_ENABLED		= BIT(17),
+	/* External debugger is using Intel TH */
+	SCRPD_DEBUGGER_IN_USE		= BIT(24),
+};
+
+#endif
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
new file mode 100644
index 0000000..d293e55
--- /dev/null
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -0,0 +1,1522 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub Memory Storage Unit
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/uaccess.h>
+#include <linux/sizes.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
+
+#include "intel_th.h"
+#include "msu.h"
+
+#define msc_dev(x) (&(x)->thdev->dev)
+
+/**
+ * struct msc_block - multiblock mode block descriptor
+ * @bdesc:	pointer to hardware descriptor (beginning of the block)
+ * @addr:	physical address of the block
+ */
+struct msc_block {
+	struct msc_block_desc	*bdesc;
+	dma_addr_t		addr;
+};
+
+/**
+ * struct msc_window - multiblock mode window descriptor
+ * @entry:	window list linkage (msc::win_list)
+ * @pgoff:	page offset into the buffer that this window starts at
+ * @nr_blocks:	number of blocks (pages) in this window
+ * @block:	array of block descriptors
+ */
+struct msc_window {
+	struct list_head	entry;
+	unsigned long		pgoff;
+	unsigned int		nr_blocks;
+	struct msc		*msc;
+	struct msc_block	block[0];
+};
+
+/**
+ * struct msc_iter - iterator for msc buffer
+ * @entry:		msc::iter_list linkage
+ * @msc:		pointer to the MSC device
+ * @start_win:		oldest window
+ * @win:		current window
+ * @offset:		current logical offset into the buffer
+ * @start_block:	oldest block in the window
+ * @block:		block number in the window
+ * @block_off:		offset into current block
+ * @wrap_count:		block wrapping handling
+ * @eof:		end of buffer reached
+ */
+struct msc_iter {
+	struct list_head	entry;
+	struct msc		*msc;
+	struct msc_window	*start_win;
+	struct msc_window	*win;
+	unsigned long		offset;
+	int			start_block;
+	int			block;
+	unsigned int		block_off;
+	unsigned int		wrap_count;
+	unsigned int		eof;
+};
+
+/**
+ * struct msc - MSC device representation
+ * @reg_base:		register window base address
+ * @thdev:		intel_th_device pointer
+ * @win_list:		list of windows in multiblock mode
+ * @nr_pages:		total number of pages allocated for this buffer
+ * @single_sz:		amount of data in single mode
+ * @single_wrap:	single mode wrap occurred
+ * @base:		buffer's base pointer
+ * @base_addr:		buffer's base address
+ * @user_count:		number of users of the buffer
+ * @mmap_count:		number of mappings
+ * @buf_mutex:		mutex to serialize access to buffer-related bits
+
+ * @enabled:		MSC is enabled
+ * @wrap:		wrapping is enabled
+ * @mode:		MSC operating mode
+ * @burst_len:		write burst length
+ * @index:		number of this MSC in the MSU
+ */
+struct msc {
+	void __iomem		*reg_base;
+	struct intel_th_device	*thdev;
+
+	struct list_head	win_list;
+	unsigned long		nr_pages;
+	unsigned long		single_sz;
+	unsigned int		single_wrap : 1;
+	void			*base;
+	dma_addr_t		base_addr;
+
+	/* <0: no buffer, 0: no users, >0: active users */
+	atomic_t		user_count;
+
+	atomic_t		mmap_count;
+	struct mutex		buf_mutex;
+
+	struct list_head	iter_list;
+
+	/* config */
+	unsigned int		enabled : 1,
+				wrap	: 1;
+	unsigned int		mode;
+	unsigned int		burst_len;
+	unsigned int		index;
+};
+
+static inline bool msc_block_is_empty(struct msc_block_desc *bdesc)
+{
+	/* header hasn't been written */
+	if (!bdesc->valid_dw)
+		return true;
+
+	/* valid_dw includes the header */
+	if (!msc_data_sz(bdesc))
+		return true;
+
+	return false;
+}
+
+/**
+ * msc_oldest_window() - locate the window with oldest data
+ * @msc:	MSC device
+ *
+ * This should only be used in multiblock mode. Caller should hold the
+ * msc::user_count reference.
+ *
+ * Return:	the oldest window with valid data
+ */
+static struct msc_window *msc_oldest_window(struct msc *msc)
+{
+	struct msc_window *win;
+	u32 reg = ioread32(msc->reg_base + REG_MSU_MSC0NWSA);
+	unsigned long win_addr = (unsigned long)reg << PAGE_SHIFT;
+	unsigned int found = 0;
+
+	if (list_empty(&msc->win_list))
+		return NULL;
+
+	/*
+	 * we might need a radix tree for this, depending on how
+	 * many windows a typical user would allocate; ideally it's
+	 * something like 2, in which case we're good
+	 */
+	list_for_each_entry(win, &msc->win_list, entry) {
+		if (win->block[0].addr == win_addr)
+			found++;
+
+		/* skip the empty ones */
+		if (msc_block_is_empty(win->block[0].bdesc))
+			continue;
+
+		if (found)
+			return win;
+	}
+
+	return list_entry(msc->win_list.next, struct msc_window, entry);
+}
+
+/**
+ * msc_win_oldest_block() - locate the oldest block in a given window
+ * @win:	window to look at
+ *
+ * Return:	index of the block with the oldest data
+ */
+static unsigned int msc_win_oldest_block(struct msc_window *win)
+{
+	unsigned int blk;
+	struct msc_block_desc *bdesc = win->block[0].bdesc;
+
+	/* without wrapping, first block is the oldest */
+	if (!msc_block_wrapped(bdesc))
+		return 0;
+
+	/*
+	 * with wrapping, last written block contains both the newest and the
+	 * oldest data for this window.
+	 */
+	for (blk = 0; blk < win->nr_blocks; blk++) {
+		bdesc = win->block[blk].bdesc;
+
+		if (msc_block_last_written(bdesc))
+			return blk;
+	}
+
+	return 0;
+}
+
+/**
+ * msc_is_last_win() - check if a window is the last one for a given MSC
+ * @win:	window
+ * Return:	true if @win is the last window in MSC's multiblock buffer
+ */
+static inline bool msc_is_last_win(struct msc_window *win)
+{
+	return win->entry.next == &win->msc->win_list;
+}
+
+/**
+ * msc_next_window() - return next window in the multiblock buffer
+ * @win:	current window
+ *
+ * Return:	window following the current one
+ */
+static struct msc_window *msc_next_window(struct msc_window *win)
+{
+	if (msc_is_last_win(win))
+		return list_entry(win->msc->win_list.next, struct msc_window,
+				  entry);
+
+	return list_entry(win->entry.next, struct msc_window, entry);
+}
+
+static struct msc_block_desc *msc_iter_bdesc(struct msc_iter *iter)
+{
+	return iter->win->block[iter->block].bdesc;
+}
+
+static void msc_iter_init(struct msc_iter *iter)
+{
+	memset(iter, 0, sizeof(*iter));
+	iter->start_block = -1;
+	iter->block = -1;
+}
+
+static struct msc_iter *msc_iter_install(struct msc *msc)
+{
+	struct msc_iter *iter;
+
+	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&msc->buf_mutex);
+
+	/*
+	 * Reading and tracing are mutually exclusive; if msc is
+	 * enabled, open() will fail; otherwise existing readers
+	 * will prevent enabling the msc and the rest of fops don't
+	 * need to worry about it.
+	 */
+	if (msc->enabled) {
+		kfree(iter);
+		iter = ERR_PTR(-EBUSY);
+		goto unlock;
+	}
+
+	msc_iter_init(iter);
+	iter->msc = msc;
+
+	list_add_tail(&iter->entry, &msc->iter_list);
+unlock:
+	mutex_unlock(&msc->buf_mutex);
+
+	return iter;
+}
+
+static void msc_iter_remove(struct msc_iter *iter, struct msc *msc)
+{
+	mutex_lock(&msc->buf_mutex);
+	list_del(&iter->entry);
+	mutex_unlock(&msc->buf_mutex);
+
+	kfree(iter);
+}
+
+static void msc_iter_block_start(struct msc_iter *iter)
+{
+	if (iter->start_block != -1)
+		return;
+
+	iter->start_block = msc_win_oldest_block(iter->win);
+	iter->block = iter->start_block;
+	iter->wrap_count = 0;
+
+	/*
+	 * start with the block with oldest data; if data has wrapped
+	 * in this window, it should be in this block
+	 */
+	if (msc_block_wrapped(msc_iter_bdesc(iter)))
+		iter->wrap_count = 2;
+
+}
+
+static int msc_iter_win_start(struct msc_iter *iter, struct msc *msc)
+{
+	/* already started, nothing to do */
+	if (iter->start_win)
+		return 0;
+
+	iter->start_win = msc_oldest_window(msc);
+	if (!iter->start_win)
+		return -EINVAL;
+
+	iter->win = iter->start_win;
+	iter->start_block = -1;
+
+	msc_iter_block_start(iter);
+
+	return 0;
+}
+
+static int msc_iter_win_advance(struct msc_iter *iter)
+{
+	iter->win = msc_next_window(iter->win);
+	iter->start_block = -1;
+
+	if (iter->win == iter->start_win) {
+		iter->eof++;
+		return 1;
+	}
+
+	msc_iter_block_start(iter);
+
+	return 0;
+}
+
+static int msc_iter_block_advance(struct msc_iter *iter)
+{
+	iter->block_off = 0;
+
+	/* wrapping */
+	if (iter->wrap_count && iter->block == iter->start_block) {
+		iter->wrap_count--;
+		if (!iter->wrap_count)
+			/* copied newest data from the wrapped block */
+			return msc_iter_win_advance(iter);
+	}
+
+	/* no wrapping, check for last written block */
+	if (!iter->wrap_count && msc_block_last_written(msc_iter_bdesc(iter)))
+		/* copied newest data for the window */
+		return msc_iter_win_advance(iter);
+
+	/* block advance */
+	if (++iter->block == iter->win->nr_blocks)
+		iter->block = 0;
+
+	/* no wrapping, sanity check in case there is no last written block */
+	if (!iter->wrap_count && iter->block == iter->start_block)
+		return msc_iter_win_advance(iter);
+
+	return 0;
+}
+
+/**
+ * msc_buffer_iterate() - go through multiblock buffer's data
+ * @iter:	iterator structure
+ * @size:	amount of data to scan
+ * @data:	callback's private data
+ * @fn:		iterator callback
+ *
+ * This will start at the window which will be written to next (containing
+ * the oldest data) and work its way to the current window, calling @fn
+ * for each chunk of data as it goes.
+ *
+ * Caller should have msc::user_count reference to make sure the buffer
+ * doesn't disappear from under us.
+ *
+ * Return:	amount of data actually scanned.
+ */
+static ssize_t
+msc_buffer_iterate(struct msc_iter *iter, size_t size, void *data,
+		   unsigned long (*fn)(void *, void *, size_t))
+{
+	struct msc *msc = iter->msc;
+	size_t len = size;
+	unsigned int advance;
+
+	if (iter->eof)
+		return 0;
+
+	/* start with the oldest window */
+	if (msc_iter_win_start(iter, msc))
+		return 0;
+
+	do {
+		unsigned long data_bytes = msc_data_sz(msc_iter_bdesc(iter));
+		void *src = (void *)msc_iter_bdesc(iter) + MSC_BDESC;
+		size_t tocopy = data_bytes, copied = 0;
+		size_t remaining = 0;
+
+		advance = 1;
+
+		/*
+		 * If block wrapping happened, we need to visit the last block
+		 * twice, because it contains both the oldest and the newest
+		 * data in this window.
+		 *
+		 * First time (wrap_count==2), in the very beginning, to collect
+		 * the oldest data, which is in the range
+		 * (data_bytes..DATA_IN_PAGE).
+		 *
+		 * Second time (wrap_count==1), it's just like any other block,
+		 * containing data in the range of [MSC_BDESC..data_bytes].
+		 */
+		if (iter->block == iter->start_block && iter->wrap_count == 2) {
+			tocopy = DATA_IN_PAGE - data_bytes;
+			src += data_bytes;
+		}
+
+		if (!tocopy)
+			goto next_block;
+
+		tocopy -= iter->block_off;
+		src += iter->block_off;
+
+		if (len < tocopy) {
+			tocopy = len;
+			advance = 0;
+		}
+
+		remaining = fn(data, src, tocopy);
+
+		if (remaining)
+			advance = 0;
+
+		copied = tocopy - remaining;
+		len -= copied;
+		iter->block_off += copied;
+		iter->offset += copied;
+
+		if (!advance)
+			break;
+
+next_block:
+		if (msc_iter_block_advance(iter))
+			break;
+
+	} while (len);
+
+	return size - len;
+}
+
+/**
+ * msc_buffer_clear_hw_header() - clear hw header for multiblock
+ * @msc:	MSC device
+ */
+static void msc_buffer_clear_hw_header(struct msc *msc)
+{
+	struct msc_window *win;
+
+	list_for_each_entry(win, &msc->win_list, entry) {
+		unsigned int blk;
+		size_t hw_sz = sizeof(struct msc_block_desc) -
+			offsetof(struct msc_block_desc, hw_tag);
+
+		for (blk = 0; blk < win->nr_blocks; blk++) {
+			struct msc_block_desc *bdesc = win->block[blk].bdesc;
+
+			memset(&bdesc->hw_tag, 0, hw_sz);
+		}
+	}
+}
+
+/**
+ * msc_configure() - set up MSC hardware
+ * @msc:	the MSC device to configure
+ *
+ * Program storage mode, wrapping, burst length and trace buffer address
+ * into a given MSC. Then, enable tracing and set msc::enabled.
+ * The latter is serialized on msc::buf_mutex, so make sure to hold it.
+ */
+static int msc_configure(struct msc *msc)
+{
+	u32 reg;
+
+	lockdep_assert_held(&msc->buf_mutex);
+
+	if (msc->mode > MSC_MODE_MULTI)
+		return -ENOTSUPP;
+
+	if (msc->mode == MSC_MODE_MULTI)
+		msc_buffer_clear_hw_header(msc);
+
+	reg = msc->base_addr >> PAGE_SHIFT;
+	iowrite32(reg, msc->reg_base + REG_MSU_MSC0BAR);
+
+	if (msc->mode == MSC_MODE_SINGLE) {
+		reg = msc->nr_pages;
+		iowrite32(reg, msc->reg_base + REG_MSU_MSC0SIZE);
+	}
+
+	reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL);
+	reg &= ~(MSC_MODE | MSC_WRAPEN | MSC_EN | MSC_RD_HDR_OVRD);
+
+	reg |= MSC_EN;
+	reg |= msc->mode << __ffs(MSC_MODE);
+	reg |= msc->burst_len << __ffs(MSC_LEN);
+
+	if (msc->wrap)
+		reg |= MSC_WRAPEN;
+
+	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);
+
+	msc->thdev->output.multiblock = msc->mode == MSC_MODE_MULTI;
+	intel_th_trace_enable(msc->thdev);
+	msc->enabled = 1;
+
+
+	return 0;
+}
+
+/**
+ * msc_disable() - disable MSC hardware
+ * @msc:	MSC device to disable
+ *
+ * If @msc is enabled, disable tracing on the switch and then disable MSC
+ * storage. Caller must hold msc::buf_mutex.
+ */
+static void msc_disable(struct msc *msc)
+{
+	unsigned long count;
+	u32 reg;
+
+	lockdep_assert_held(&msc->buf_mutex);
+
+	intel_th_trace_disable(msc->thdev);
+
+	for (reg = 0, count = MSC_PLE_WAITLOOP_DEPTH;
+	     count && !(reg & MSCSTS_PLE); count--) {
+		reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
+		cpu_relax();
+	}
+
+	if (!count)
+		dev_dbg(msc_dev(msc), "timeout waiting for MSC0 PLE\n");
+
+	if (msc->mode == MSC_MODE_SINGLE) {
+		msc->single_wrap = !!(reg & MSCSTS_WRAPSTAT);
+
+		reg = ioread32(msc->reg_base + REG_MSU_MSC0MWP);
+		msc->single_sz = reg & ((msc->nr_pages << PAGE_SHIFT) - 1);
+		dev_dbg(msc_dev(msc), "MSCnMWP: %08x/%08lx, wrap: %d\n",
+			reg, msc->single_sz, msc->single_wrap);
+	}
+
+	reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL);
+	reg &= ~MSC_EN;
+	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);
+	msc->enabled = 0;
+
+	iowrite32(0, msc->reg_base + REG_MSU_MSC0BAR);
+	iowrite32(0, msc->reg_base + REG_MSU_MSC0SIZE);
+
+	dev_dbg(msc_dev(msc), "MSCnNWSA: %08x\n",
+		ioread32(msc->reg_base + REG_MSU_MSC0NWSA));
+
+	reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
+	dev_dbg(msc_dev(msc), "MSCnSTS: %08x\n", reg);
+}
+
+static int intel_th_msc_activate(struct intel_th_device *thdev)
+{
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+	int ret = -EBUSY;
+
+	if (!atomic_inc_unless_negative(&msc->user_count))
+		return -ENODEV;
+
+	mutex_lock(&msc->buf_mutex);
+
+	/* if there are readers, refuse */
+	if (list_empty(&msc->iter_list))
+		ret = msc_configure(msc);
+
+	mutex_unlock(&msc->buf_mutex);
+
+	if (ret)
+		atomic_dec(&msc->user_count);
+
+	return ret;
+}
+
+static void intel_th_msc_deactivate(struct intel_th_device *thdev)
+{
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+
+	mutex_lock(&msc->buf_mutex);
+	if (msc->enabled) {
+		msc_disable(msc);
+		atomic_dec(&msc->user_count);
+	}
+	mutex_unlock(&msc->buf_mutex);
+}
+
+/**
+ * msc_buffer_contig_alloc() - allocate a contiguous buffer for SINGLE mode
+ * @msc:	MSC device
+ * @size:	allocation size in bytes
+ *
+ * This modifies msc::base, which requires msc::buf_mutex to serialize, so the
+ * caller is expected to hold it.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+static int msc_buffer_contig_alloc(struct msc *msc, unsigned long size)
+{
+	unsigned int order = get_order(size);
+	struct page *page;
+
+	if (!size)
+		return 0;
+
+	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!page)
+		return -ENOMEM;
+
+	split_page(page, order);
+	msc->nr_pages = size >> PAGE_SHIFT;
+	msc->base = page_address(page);
+	msc->base_addr = page_to_phys(page);
+
+	return 0;
+}
+
+/**
+ * msc_buffer_contig_free() - free a contiguous buffer
+ * @msc:	MSC configured in SINGLE mode
+ */
+static void msc_buffer_contig_free(struct msc *msc)
+{
+	unsigned long off;
+
+	for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) {
+		struct page *page = virt_to_page(msc->base + off);
+
+		page->mapping = NULL;
+		__free_page(page);
+	}
+
+	msc->nr_pages = 0;
+}
+
+/**
+ * msc_buffer_contig_get_page() - find a page at a given offset
+ * @msc:	MSC configured in SINGLE mode
+ * @pgoff:	page offset
+ *
+ * Return:	page, if @pgoff is within the range, NULL otherwise.
+ */
+static struct page *msc_buffer_contig_get_page(struct msc *msc,
+					       unsigned long pgoff)
+{
+	if (pgoff >= msc->nr_pages)
+		return NULL;
+
+	return virt_to_page(msc->base + (pgoff << PAGE_SHIFT));
+}
+
+/**
+ * msc_buffer_win_alloc() - alloc a window for a multiblock mode
+ * @msc:	MSC device
+ * @nr_blocks:	number of pages in this window
+ *
+ * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
+ * to serialize, so the caller is expected to hold it.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+static int msc_buffer_win_alloc(struct msc *msc, unsigned int nr_blocks)
+{
+	struct msc_window *win;
+	unsigned long size = PAGE_SIZE;
+	int i, ret = -ENOMEM;
+
+	if (!nr_blocks)
+		return 0;
+
+	win = kzalloc(offsetof(struct msc_window, block[nr_blocks]),
+		      GFP_KERNEL);
+	if (!win)
+		return -ENOMEM;
+
+	if (!list_empty(&msc->win_list)) {
+		struct msc_window *prev = list_entry(msc->win_list.prev,
+						     struct msc_window, entry);
+
+		win->pgoff = prev->pgoff + prev->nr_blocks;
+	}
+
+	for (i = 0; i < nr_blocks; i++) {
+		win->block[i].bdesc =
+			dma_alloc_coherent(msc_dev(msc)->parent->parent, size,
+					   &win->block[i].addr, GFP_KERNEL);
+
+		if (!win->block[i].bdesc)
+			goto err_nomem;
+
+#ifdef CONFIG_X86
+		/* Set the page as uncached */
+		set_memory_uc((unsigned long)win->block[i].bdesc, 1);
+#endif
+	}
+
+	win->msc = msc;
+	win->nr_blocks = nr_blocks;
+
+	if (list_empty(&msc->win_list)) {
+		msc->base = win->block[0].bdesc;
+		msc->base_addr = win->block[0].addr;
+	}
+
+	list_add_tail(&win->entry, &msc->win_list);
+	msc->nr_pages += nr_blocks;
+
+	return 0;
+
+err_nomem:
+	for (i--; i >= 0; i--) {
+#ifdef CONFIG_X86
+		/* Reset the page to write-back before releasing */
+		set_memory_wb((unsigned long)win->block[i].bdesc, 1);
+#endif
+		dma_free_coherent(msc_dev(msc)->parent->parent, size,
+				  win->block[i].bdesc, win->block[i].addr);
+	}
+	kfree(win);
+
+	return ret;
+}
+
+/**
+ * msc_buffer_win_free() - free a window from MSC's window list
+ * @msc:	MSC device
+ * @win:	window to free
+ *
+ * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
+ * to serialize, so the caller is expected to hold it.
+ */
+static void msc_buffer_win_free(struct msc *msc, struct msc_window *win)
+{
+	int i;
+
+	msc->nr_pages -= win->nr_blocks;
+
+	list_del(&win->entry);
+	if (list_empty(&msc->win_list)) {
+		msc->base = NULL;
+		msc->base_addr = 0;
+	}
+
+	for (i = 0; i < win->nr_blocks; i++) {
+		struct page *page = virt_to_page(win->block[i].bdesc);
+
+		page->mapping = NULL;
+#ifdef CONFIG_X86
+		/* Reset the page to write-back before releasing */
+		set_memory_wb((unsigned long)win->block[i].bdesc, 1);
+#endif
+		dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE,
+				  win->block[i].bdesc, win->block[i].addr);
+	}
+
+	kfree(win);
+}
+
+/**
+ * msc_buffer_relink() - set up block descriptors for multiblock mode
+ * @msc:	MSC device
+ *
+ * This traverses msc::win_list, which requires msc::buf_mutex to serialize,
+ * so the caller is expected to hold it.
+ */
+static void msc_buffer_relink(struct msc *msc)
+{
+	struct msc_window *win, *next_win;
+
+	/* call with msc::mutex locked */
+	list_for_each_entry(win, &msc->win_list, entry) {
+		unsigned int blk;
+		u32 sw_tag = 0;
+
+		/*
+		 * Last window's next_win should point to the first window
+		 * and MSC_SW_TAG_LASTWIN should be set.
+		 */
+		if (msc_is_last_win(win)) {
+			sw_tag |= MSC_SW_TAG_LASTWIN;
+			next_win = list_entry(msc->win_list.next,
+					      struct msc_window, entry);
+		} else {
+			next_win = list_entry(win->entry.next,
+					      struct msc_window, entry);
+		}
+
+		for (blk = 0; blk < win->nr_blocks; blk++) {
+			struct msc_block_desc *bdesc = win->block[blk].bdesc;
+
+			memset(bdesc, 0, sizeof(*bdesc));
+
+			bdesc->next_win = next_win->block[0].addr >> PAGE_SHIFT;
+
+			/*
+			 * Similarly to last window, last block should point
+			 * to the first one.
+			 */
+			if (blk == win->nr_blocks - 1) {
+				sw_tag |= MSC_SW_TAG_LASTBLK;
+				bdesc->next_blk =
+					win->block[0].addr >> PAGE_SHIFT;
+			} else {
+				bdesc->next_blk =
+					win->block[blk + 1].addr >> PAGE_SHIFT;
+			}
+
+			bdesc->sw_tag = sw_tag;
+			bdesc->block_sz = PAGE_SIZE / 64;
+		}
+	}
+
+	/*
+	 * Make the above writes globally visible before tracing is
+	 * enabled to make sure hardware sees them coherently.
+	 */
+	wmb();
+}
+
+static void msc_buffer_multi_free(struct msc *msc)
+{
+	struct msc_window *win, *iter;
+
+	list_for_each_entry_safe(win, iter, &msc->win_list, entry)
+		msc_buffer_win_free(msc, win);
+}
+
+static int msc_buffer_multi_alloc(struct msc *msc, unsigned long *nr_pages,
+				  unsigned int nr_wins)
+{
+	int ret, i;
+
+	for (i = 0; i < nr_wins; i++) {
+		ret = msc_buffer_win_alloc(msc, nr_pages[i]);
+		if (ret) {
+			msc_buffer_multi_free(msc);
+			return ret;
+		}
+	}
+
+	msc_buffer_relink(msc);
+
+	return 0;
+}
+
+/**
+ * msc_buffer_free() - free buffers for MSC
+ * @msc:	MSC device
+ *
+ * Free MSC's storage buffers.
+ *
+ * This modifies msc::win_list and msc::base, which requires msc::buf_mutex to
+ * serialize, so the caller is expected to hold it.
+ */
+static void msc_buffer_free(struct msc *msc)
+{
+	if (msc->mode == MSC_MODE_SINGLE)
+		msc_buffer_contig_free(msc);
+	else if (msc->mode == MSC_MODE_MULTI)
+		msc_buffer_multi_free(msc);
+}
+
+/**
+ * msc_buffer_alloc() - allocate a buffer for MSC
+ * @msc:	MSC device
+ * @size:	allocation size in bytes
+ *
+ * Allocate a storage buffer for MSC, depending on the msc::mode, it will be
+ * either done via msc_buffer_contig_alloc() for SINGLE operation mode or
+ * msc_buffer_win_alloc() for multiblock operation. The latter allocates one
+ * window per invocation, so in multiblock mode this can be called multiple
+ * times for the same MSC to allocate multiple windows.
+ *
+ * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
+ * to serialize, so the caller is expected to hold it.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+static int msc_buffer_alloc(struct msc *msc, unsigned long *nr_pages,
+			    unsigned int nr_wins)
+{
+	int ret;
+
+	/* -1: buffer not allocated */
+	if (atomic_read(&msc->user_count) != -1)
+		return -EBUSY;
+
+	if (msc->mode == MSC_MODE_SINGLE) {
+		if (nr_wins != 1)
+			return -EINVAL;
+
+		ret = msc_buffer_contig_alloc(msc, nr_pages[0] << PAGE_SHIFT);
+	} else if (msc->mode == MSC_MODE_MULTI) {
+		ret = msc_buffer_multi_alloc(msc, nr_pages, nr_wins);
+	} else {
+		ret = -ENOTSUPP;
+	}
+
+	if (!ret) {
+		/* allocation should be visible before the counter goes to 0 */
+		smp_mb__before_atomic();
+
+		if (WARN_ON_ONCE(atomic_cmpxchg(&msc->user_count, -1, 0) != -1))
+			return -EINVAL;
+	}
+
+	return ret;
+}
+
+/**
+ * msc_buffer_unlocked_free_unless_used() - free a buffer unless it's in use
+ * @msc:	MSC device
+ *
+ * This will free MSC buffer unless it is in use or there is no allocated
+ * buffer.
+ * Caller needs to hold msc::buf_mutex.
+ *
+ * Return:	0 on successful deallocation or if there was no buffer to
+ *		deallocate, -EBUSY if there are active users.
+ */
+static int msc_buffer_unlocked_free_unless_used(struct msc *msc)
+{
+	int count, ret = 0;
+
+	count = atomic_cmpxchg(&msc->user_count, 0, -1);
+
+	/* > 0: buffer is allocated and has users */
+	if (count > 0)
+		ret = -EBUSY;
+	/* 0: buffer is allocated, no users */
+	else if (!count)
+		msc_buffer_free(msc);
+	/* < 0: no buffer, nothing to do */
+
+	return ret;
+}
+
+/**
+ * msc_buffer_free_unless_used() - free a buffer unless it's in use
+ * @msc:	MSC device
+ *
+ * This is a locked version of msc_buffer_unlocked_free_unless_used().
+ */
+static int msc_buffer_free_unless_used(struct msc *msc)
+{
+	int ret;
+
+	mutex_lock(&msc->buf_mutex);
+	ret = msc_buffer_unlocked_free_unless_used(msc);
+	mutex_unlock(&msc->buf_mutex);
+
+	return ret;
+}
+
+/**
+ * msc_buffer_get_page() - get MSC buffer page at a given offset
+ * @msc:	MSC device
+ * @pgoff:	page offset into the storage buffer
+ *
+ * This traverses msc::win_list, so holding msc::buf_mutex is expected from
+ * the caller.
+ *
+ * Return:	page if @pgoff corresponds to a valid buffer page or NULL.
+ */
+static struct page *msc_buffer_get_page(struct msc *msc, unsigned long pgoff)
+{
+	struct msc_window *win;
+
+	if (msc->mode == MSC_MODE_SINGLE)
+		return msc_buffer_contig_get_page(msc, pgoff);
+
+	list_for_each_entry(win, &msc->win_list, entry)
+		if (pgoff >= win->pgoff && pgoff < win->pgoff + win->nr_blocks)
+			goto found;
+
+	return NULL;
+
+found:
+	pgoff -= win->pgoff;
+	return virt_to_page(win->block[pgoff].bdesc);
+}
+
+/**
+ * struct msc_win_to_user_struct - data for copy_to_user() callback
+ * @buf:	userspace buffer to copy data to
+ * @offset:	running offset
+ */
+struct msc_win_to_user_struct {
+	char __user	*buf;
+	unsigned long	offset;
+};
+
+/**
+ * msc_win_to_user() - iterator for msc_buffer_iterate() to copy data to user
+ * @data:	callback's private data
+ * @src:	source buffer
+ * @len:	amount of data to copy from the source buffer
+ */
+static unsigned long msc_win_to_user(void *data, void *src, size_t len)
+{
+	struct msc_win_to_user_struct *u = data;
+	unsigned long ret;
+
+	ret = copy_to_user(u->buf + u->offset, src, len);
+	u->offset += len - ret;
+
+	return ret;
+}
+
+
+/*
+ * file operations' callbacks
+ */
+
+static int intel_th_msc_open(struct inode *inode, struct file *file)
+{
+	struct intel_th_device *thdev = file->private_data;
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+	struct msc_iter *iter;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	iter = msc_iter_install(msc);
+	if (IS_ERR(iter))
+		return PTR_ERR(iter);
+
+	file->private_data = iter;
+
+	return nonseekable_open(inode, file);
+}
+
+static int intel_th_msc_release(struct inode *inode, struct file *file)
+{
+	struct msc_iter *iter = file->private_data;
+	struct msc *msc = iter->msc;
+
+	msc_iter_remove(iter, msc);
+
+	return 0;
+}
+
+static ssize_t
+msc_single_to_user(struct msc *msc, char __user *buf, loff_t off, size_t len)
+{
+	unsigned long size = msc->nr_pages << PAGE_SHIFT, rem = len;
+	unsigned long start = off, tocopy = 0;
+
+	if (msc->single_wrap) {
+		start += msc->single_sz;
+		if (start < size) {
+			tocopy = min(rem, size - start);
+			if (copy_to_user(buf, msc->base + start, tocopy))
+				return -EFAULT;
+
+			buf += tocopy;
+			rem -= tocopy;
+			start += tocopy;
+		}
+
+		start &= size - 1;
+		if (rem) {
+			tocopy = min(rem, msc->single_sz - start);
+			if (copy_to_user(buf, msc->base + start, tocopy))
+				return -EFAULT;
+
+			rem -= tocopy;
+		}
+
+		return len - rem;
+	}
+
+	if (copy_to_user(buf, msc->base + start, rem))
+		return -EFAULT;
+
+	return len;
+}
+
+static ssize_t intel_th_msc_read(struct file *file, char __user *buf,
+				 size_t len, loff_t *ppos)
+{
+	struct msc_iter *iter = file->private_data;
+	struct msc *msc = iter->msc;
+	size_t size;
+	loff_t off = *ppos;
+	ssize_t ret = 0;
+
+	if (!atomic_inc_unless_negative(&msc->user_count))
+		return 0;
+
+	if (msc->mode == MSC_MODE_SINGLE && !msc->single_wrap)
+		size = msc->single_sz;
+	else
+		size = msc->nr_pages << PAGE_SHIFT;
+
+	if (!size)
+		goto put_count;
+
+	if (off >= size)
+		goto put_count;
+
+	if (off + len >= size)
+		len = size - off;
+
+	if (msc->mode == MSC_MODE_SINGLE) {
+		ret = msc_single_to_user(msc, buf, off, len);
+		if (ret >= 0)
+			*ppos += ret;
+	} else if (msc->mode == MSC_MODE_MULTI) {
+		struct msc_win_to_user_struct u = {
+			.buf	= buf,
+			.offset	= 0,
+		};
+
+		ret = msc_buffer_iterate(iter, len, &u, msc_win_to_user);
+		if (ret >= 0)
+			*ppos = iter->offset;
+	} else {
+		ret = -ENOTSUPP;
+	}
+
+put_count:
+	atomic_dec(&msc->user_count);
+
+	return ret;
+}
+
+/*
+ * vm operations callbacks (vm_ops)
+ */
+
+static void msc_mmap_open(struct vm_area_struct *vma)
+{
+	struct msc_iter *iter = vma->vm_file->private_data;
+	struct msc *msc = iter->msc;
+
+	atomic_inc(&msc->mmap_count);
+}
+
+static void msc_mmap_close(struct vm_area_struct *vma)
+{
+	struct msc_iter *iter = vma->vm_file->private_data;
+	struct msc *msc = iter->msc;
+	unsigned long pg;
+
+	if (!atomic_dec_and_mutex_lock(&msc->mmap_count, &msc->buf_mutex))
+		return;
+
+	/* drop page _refcounts */
+	for (pg = 0; pg < msc->nr_pages; pg++) {
+		struct page *page = msc_buffer_get_page(msc, pg);
+
+		if (WARN_ON_ONCE(!page))
+			continue;
+
+		if (page->mapping)
+			page->mapping = NULL;
+	}
+
+	/* last mapping -- drop user_count */
+	atomic_dec(&msc->user_count);
+	mutex_unlock(&msc->buf_mutex);
+}
+
+static vm_fault_t msc_mmap_fault(struct vm_fault *vmf)
+{
+	struct msc_iter *iter = vmf->vma->vm_file->private_data;
+	struct msc *msc = iter->msc;
+
+	vmf->page = msc_buffer_get_page(msc, vmf->pgoff);
+	if (!vmf->page)
+		return VM_FAULT_SIGBUS;
+
+	get_page(vmf->page);
+	vmf->page->mapping = vmf->vma->vm_file->f_mapping;
+	vmf->page->index = vmf->pgoff;
+
+	return 0;
+}
+
+static const struct vm_operations_struct msc_mmap_ops = {
+	.open	= msc_mmap_open,
+	.close	= msc_mmap_close,
+	.fault	= msc_mmap_fault,
+};
+
+static int intel_th_msc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	unsigned long size = vma->vm_end - vma->vm_start;
+	struct msc_iter *iter = vma->vm_file->private_data;
+	struct msc *msc = iter->msc;
+	int ret = -EINVAL;
+
+	if (!size || offset_in_page(size))
+		return -EINVAL;
+
+	if (vma->vm_pgoff)
+		return -EINVAL;
+
+	/* grab user_count once per mmap; drop in msc_mmap_close() */
+	if (!atomic_inc_unless_negative(&msc->user_count))
+		return -EINVAL;
+
+	if (msc->mode != MSC_MODE_SINGLE &&
+	    msc->mode != MSC_MODE_MULTI)
+		goto out;
+
+	if (size >> PAGE_SHIFT != msc->nr_pages)
+		goto out;
+
+	atomic_set(&msc->mmap_count, 1);
+	ret = 0;
+
+out:
+	if (ret)
+		atomic_dec(&msc->user_count);
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTCOPY;
+	vma->vm_ops = &msc_mmap_ops;
+	return ret;
+}
+
+static const struct file_operations intel_th_msc_fops = {
+	.open		= intel_th_msc_open,
+	.release	= intel_th_msc_release,
+	.read		= intel_th_msc_read,
+	.mmap		= intel_th_msc_mmap,
+	.llseek		= no_llseek,
+	.owner		= THIS_MODULE,
+};
+
+static int intel_th_msc_init(struct msc *msc)
+{
+	atomic_set(&msc->user_count, -1);
+
+	msc->mode = MSC_MODE_MULTI;
+	mutex_init(&msc->buf_mutex);
+	INIT_LIST_HEAD(&msc->win_list);
+	INIT_LIST_HEAD(&msc->iter_list);
+
+	msc->burst_len =
+		(ioread32(msc->reg_base + REG_MSU_MSC0CTL) & MSC_LEN) >>
+		__ffs(MSC_LEN);
+
+	return 0;
+}
+
+static const char * const msc_mode[] = {
+	[MSC_MODE_SINGLE]	= "single",
+	[MSC_MODE_MULTI]	= "multi",
+	[MSC_MODE_EXI]		= "ExI",
+	[MSC_MODE_DEBUG]	= "debug",
+};
+
+static ssize_t
+wrap_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", msc->wrap);
+}
+
+static ssize_t
+wrap_store(struct device *dev, struct device_attribute *attr, const char *buf,
+	   size_t size)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	msc->wrap = !!val;
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(wrap);
+
+static ssize_t
+mode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", msc_mode[msc->mode]);
+}
+
+static ssize_t
+mode_store(struct device *dev, struct device_attribute *attr, const char *buf,
+	   size_t size)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	size_t len = size;
+	char *cp;
+	int i, ret;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	cp = memchr(buf, '\n', len);
+	if (cp)
+		len = cp - buf;
+
+	for (i = 0; i < ARRAY_SIZE(msc_mode); i++)
+		if (!strncmp(msc_mode[i], buf, len))
+			goto found;
+
+	return -EINVAL;
+
+found:
+	mutex_lock(&msc->buf_mutex);
+	ret = msc_buffer_unlocked_free_unless_used(msc);
+	if (!ret)
+		msc->mode = i;
+	mutex_unlock(&msc->buf_mutex);
+
+	return ret ? ret : size;
+}
+
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t
+nr_pages_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	struct msc_window *win;
+	size_t count = 0;
+
+	mutex_lock(&msc->buf_mutex);
+
+	if (msc->mode == MSC_MODE_SINGLE)
+		count = scnprintf(buf, PAGE_SIZE, "%ld\n", msc->nr_pages);
+	else if (msc->mode == MSC_MODE_MULTI) {
+		list_for_each_entry(win, &msc->win_list, entry) {
+			count += scnprintf(buf + count, PAGE_SIZE - count,
+					   "%d%c", win->nr_blocks,
+					   msc_is_last_win(win) ? '\n' : ',');
+		}
+	} else {
+		count = scnprintf(buf, PAGE_SIZE, "unsupported\n");
+	}
+
+	mutex_unlock(&msc->buf_mutex);
+
+	return count;
+}
+
+static ssize_t
+nr_pages_store(struct device *dev, struct device_attribute *attr,
+	       const char *buf, size_t size)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	unsigned long val, *win = NULL, *rewin;
+	size_t len = size;
+	const char *p = buf;
+	char *end, *s;
+	int ret, nr_wins = 0;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	ret = msc_buffer_free_unless_used(msc);
+	if (ret)
+		return ret;
+
+	/* scan the comma-separated list of allocation sizes */
+	end = memchr(buf, '\n', len);
+	if (end)
+		len = end - buf;
+
+	do {
+		end = memchr(p, ',', len);
+		s = kstrndup(p, end ? end - p : len, GFP_KERNEL);
+		if (!s) {
+			ret = -ENOMEM;
+			goto free_win;
+		}
+
+		ret = kstrtoul(s, 10, &val);
+		kfree(s);
+
+		if (ret || !val)
+			goto free_win;
+
+		if (nr_wins && msc->mode == MSC_MODE_SINGLE) {
+			ret = -EINVAL;
+			goto free_win;
+		}
+
+		nr_wins++;
+		rewin = krealloc(win, sizeof(*win) * nr_wins, GFP_KERNEL);
+		if (!rewin) {
+			kfree(win);
+			return -ENOMEM;
+		}
+
+		win = rewin;
+		win[nr_wins - 1] = val;
+
+		if (!end)
+			break;
+
+		len -= end - p;
+		p = end + 1;
+	} while (len);
+
+	mutex_lock(&msc->buf_mutex);
+	ret = msc_buffer_alloc(msc, win, nr_wins);
+	mutex_unlock(&msc->buf_mutex);
+
+free_win:
+	kfree(win);
+
+	return ret ? ret : size;
+}
+
+static DEVICE_ATTR_RW(nr_pages);
+
+static struct attribute *msc_output_attrs[] = {
+	&dev_attr_wrap.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_nr_pages.attr,
+	NULL,
+};
+
+static struct attribute_group msc_output_group = {
+	.attrs	= msc_output_attrs,
+};
+
+static int intel_th_msc_probe(struct intel_th_device *thdev)
+{
+	struct device *dev = &thdev->dev;
+	struct resource *res;
+	struct msc *msc;
+	void __iomem *base;
+	int err;
+
+	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!base)
+		return -ENOMEM;
+
+	msc = devm_kzalloc(dev, sizeof(*msc), GFP_KERNEL);
+	if (!msc)
+		return -ENOMEM;
+
+	msc->index = thdev->id;
+
+	msc->thdev = thdev;
+	msc->reg_base = base + msc->index * 0x100;
+
+	err = intel_th_msc_init(msc);
+	if (err)
+		return err;
+
+	dev_set_drvdata(dev, msc);
+
+	return 0;
+}
+
+static void intel_th_msc_remove(struct intel_th_device *thdev)
+{
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+	int ret;
+
+	intel_th_msc_deactivate(thdev);
+
+	/*
+	 * Buffers should not be used at this point except if the
+	 * output character device is still open and the parent
+	 * device gets detached from its bus, which is a FIXME.
+	 */
+	ret = msc_buffer_free_unless_used(msc);
+	WARN_ON_ONCE(ret);
+}
+
+static struct intel_th_driver intel_th_msc_driver = {
+	.probe	= intel_th_msc_probe,
+	.remove	= intel_th_msc_remove,
+	.activate	= intel_th_msc_activate,
+	.deactivate	= intel_th_msc_deactivate,
+	.fops	= &intel_th_msc_fops,
+	.attr_group	= &msc_output_group,
+	.driver	= {
+		.name	= "msc",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_driver(intel_th_msc_driver,
+	      intel_th_driver_register,
+	      intel_th_driver_unregister);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub Memory Storage Unit driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/intel_th/msu.h b/drivers/hwtracing/intel_th/msu.h
new file mode 100644
index 0000000..9cc8ace
--- /dev/null
+++ b/drivers/hwtracing/intel_th/msu.h
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub Memory Storage Unit (MSU) data structures
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_MSU_H__
+#define __INTEL_TH_MSU_H__
+
+enum {
+	REG_MSU_MSUPARAMS	= 0x0000,
+	REG_MSU_MSUSTS		= 0x0008,
+	REG_MSU_MSC0CTL		= 0x0100, /* MSC0 control */
+	REG_MSU_MSC0STS		= 0x0104, /* MSC0 status */
+	REG_MSU_MSC0BAR		= 0x0108, /* MSC0 output base address */
+	REG_MSU_MSC0SIZE	= 0x010c, /* MSC0 output size */
+	REG_MSU_MSC0MWP		= 0x0110, /* MSC0 write pointer */
+	REG_MSU_MSC0NWSA	= 0x011c, /* MSC0 next window start address */
+
+	REG_MSU_MSC1CTL		= 0x0200, /* MSC1 control */
+	REG_MSU_MSC1STS		= 0x0204, /* MSC1 status */
+	REG_MSU_MSC1BAR		= 0x0208, /* MSC1 output base address */
+	REG_MSU_MSC1SIZE	= 0x020c, /* MSC1 output size */
+	REG_MSU_MSC1MWP		= 0x0210, /* MSC1 write pointer */
+	REG_MSU_MSC1NWSA	= 0x021c, /* MSC1 next window start address */
+};
+
+/* MSUSTS bits */
+#define MSUSTS_MSU_INT	BIT(0)
+
+/* MSCnCTL bits */
+#define MSC_EN		BIT(0)
+#define MSC_WRAPEN	BIT(1)
+#define MSC_RD_HDR_OVRD	BIT(2)
+#define MSC_MODE	(BIT(4) | BIT(5))
+#define MSC_LEN		(BIT(8) | BIT(9) | BIT(10))
+
+/* MSC operating modes (MSC_MODE) */
+enum {
+	MSC_MODE_SINGLE	= 0,
+	MSC_MODE_MULTI,
+	MSC_MODE_EXI,
+	MSC_MODE_DEBUG,
+};
+
+/* MSCnSTS bits */
+#define MSCSTS_WRAPSTAT	BIT(1)	/* Wrap occurred */
+#define MSCSTS_PLE	BIT(2)	/* Pipeline Empty */
+
+/*
+ * Multiblock/multiwindow block descriptor
+ */
+struct msc_block_desc {
+	u32	sw_tag;
+	u32	block_sz;
+	u32	next_blk;
+	u32	next_win;
+	u32	res0[4];
+	u32	hw_tag;
+	u32	valid_dw;
+	u32	ts_low;
+	u32	ts_high;
+	u32	res1[4];
+} __packed;
+
+#define MSC_BDESC	sizeof(struct msc_block_desc)
+#define DATA_IN_PAGE	(PAGE_SIZE - MSC_BDESC)
+
+/* MSC multiblock sw tag bits */
+#define MSC_SW_TAG_LASTBLK	BIT(0)
+#define MSC_SW_TAG_LASTWIN	BIT(1)
+
+/* MSC multiblock hw tag bits */
+#define MSC_HW_TAG_TRIGGER	BIT(0)
+#define MSC_HW_TAG_BLOCKWRAP	BIT(1)
+#define MSC_HW_TAG_WINWRAP	BIT(2)
+#define MSC_HW_TAG_ENDBIT	BIT(3)
+
+static inline unsigned long msc_data_sz(struct msc_block_desc *bdesc)
+{
+	if (!bdesc->valid_dw)
+		return 0;
+
+	return bdesc->valid_dw * 4 - MSC_BDESC;
+}
+
+static inline bool msc_block_wrapped(struct msc_block_desc *bdesc)
+{
+	if (bdesc->hw_tag & MSC_HW_TAG_BLOCKWRAP)
+		return true;
+
+	return false;
+}
+
+static inline bool msc_block_last_written(struct msc_block_desc *bdesc)
+{
+	if ((bdesc->hw_tag & MSC_HW_TAG_ENDBIT) ||
+	    (msc_data_sz(bdesc) != DATA_IN_PAGE))
+		return true;
+
+	return false;
+}
+
+/* waiting for Pipeline Empty bit(s) to assert for MSC */
+#define MSC_PLE_WAITLOOP_DEPTH	10000
+
+#endif /* __INTEL_TH_MSU_H__ */
diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
new file mode 100644
index 0000000..1cf6290
--- /dev/null
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub pci driver
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/pci.h>
+
+#include "intel_th.h"
+
+#define DRIVER_NAME "intel_th_pci"
+
+#define BAR_MASK (BIT(TH_MMIO_CONFIG) | BIT(TH_MMIO_SW))
+
+#define PCI_REG_NPKDSC	0x80
+#define NPKDSC_TSACT	BIT(5)
+
+static int intel_th_pci_activate(struct intel_th *th)
+{
+	struct pci_dev *pdev = to_pci_dev(th->dev);
+	u32 npkdsc;
+	int err;
+
+	if (!INTEL_TH_CAP(th, tscu_enable))
+		return 0;
+
+	err = pci_read_config_dword(pdev, PCI_REG_NPKDSC, &npkdsc);
+	if (!err) {
+		npkdsc |= NPKDSC_TSACT;
+		err = pci_write_config_dword(pdev, PCI_REG_NPKDSC, npkdsc);
+	}
+
+	if (err)
+		dev_err(&pdev->dev, "failed to read NPKDSC register\n");
+
+	return err;
+}
+
+static void intel_th_pci_deactivate(struct intel_th *th)
+{
+	struct pci_dev *pdev = to_pci_dev(th->dev);
+	u32 npkdsc;
+	int err;
+
+	if (!INTEL_TH_CAP(th, tscu_enable))
+		return;
+
+	err = pci_read_config_dword(pdev, PCI_REG_NPKDSC, &npkdsc);
+	if (!err) {
+		npkdsc |= NPKDSC_TSACT;
+		err = pci_write_config_dword(pdev, PCI_REG_NPKDSC, npkdsc);
+	}
+
+	if (err)
+		dev_err(&pdev->dev, "failed to read NPKDSC register\n");
+}
+
+static int intel_th_pci_probe(struct pci_dev *pdev,
+			      const struct pci_device_id *id)
+{
+	struct intel_th_drvdata *drvdata = (void *)id->driver_data;
+	struct intel_th *th;
+	int err;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions_request_all(pdev, BAR_MASK, DRIVER_NAME);
+	if (err)
+		return err;
+
+	th = intel_th_alloc(&pdev->dev, drvdata, pdev->resource,
+			    DEVICE_COUNT_RESOURCE, pdev->irq);
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+
+	th->activate   = intel_th_pci_activate;
+	th->deactivate = intel_th_pci_deactivate;
+
+	pci_set_master(pdev);
+
+	return 0;
+}
+
+static void intel_th_pci_remove(struct pci_dev *pdev)
+{
+	struct intel_th *th = pci_get_drvdata(pdev);
+
+	intel_th_free(th);
+}
+
+static const struct intel_th_drvdata intel_th_2x = {
+	.tscu_enable	= 1,
+};
+
+static const struct pci_device_id intel_th_pci_id_table[] = {
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9d26),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa126),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Apollo Lake */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x5a8e),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Broxton */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0a80),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Broxton B-step */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x1a8e),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Kaby Lake PCH-H */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa2a6),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Denverton */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x19e1),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Lewisburg PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa1a6),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Gemini Lake */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Cannon Lake H */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa326),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Cannon Lake LP */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9da6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Cedar Fork PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x18e1),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Ice Lake PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x34a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{ 0 },
+};
+
+MODULE_DEVICE_TABLE(pci, intel_th_pci_id_table);
+
+static struct pci_driver intel_th_pci_driver = {
+	.name		= DRIVER_NAME,
+	.id_table	= intel_th_pci_id_table,
+	.probe		= intel_th_pci_probe,
+	.remove		= intel_th_pci_remove,
+};
+
+module_pci_driver(intel_th_pci_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub PCI controller driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@intel.com>");
diff --git a/drivers/hwtracing/intel_th/pti.c b/drivers/hwtracing/intel_th/pti.c
new file mode 100644
index 0000000..5669433
--- /dev/null
+++ b/drivers/hwtracing/intel_th/pti.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub PTI output driver
+ *
+ * Copyright (C) 2014-2016 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sizes.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+
+#include "intel_th.h"
+#include "pti.h"
+
+struct pti_device {
+	void __iomem		*base;
+	struct intel_th_device	*thdev;
+	unsigned int		mode;
+	unsigned int		freeclk;
+	unsigned int		clkdiv;
+	unsigned int		patgen;
+	unsigned int		lpp_dest_mask;
+	unsigned int		lpp_dest;
+};
+
+/* map PTI widths to MODE settings of PTI_CTL register */
+static const unsigned int pti_mode[] = {
+	0, 4, 8, 0, 12, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static int pti_width_mode(unsigned int width)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pti_mode); i++)
+		if (pti_mode[i] == width)
+			return i;
+
+	return -EINVAL;
+}
+
+static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", pti_mode[pti->mode]);
+}
+
+static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	ret = pti_width_mode(val);
+	if (ret < 0)
+		return ret;
+
+	pti->mode = ret;
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t
+freerunning_clock_show(struct device *dev, struct device_attribute *attr,
+		       char *buf)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", pti->freeclk);
+}
+
+static ssize_t
+freerunning_clock_store(struct device *dev, struct device_attribute *attr,
+			const char *buf, size_t size)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	pti->freeclk = !!val;
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(freerunning_clock);
+
+static ssize_t
+clock_divider_show(struct device *dev, struct device_attribute *attr,
+		   char *buf)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", 1u << pti->clkdiv);
+}
+
+static ssize_t
+clock_divider_store(struct device *dev, struct device_attribute *attr,
+		    const char *buf, size_t size)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (!is_power_of_2(val) || val > 8 || !val)
+		return -EINVAL;
+
+	pti->clkdiv = val;
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(clock_divider);
+
+static struct attribute *pti_output_attrs[] = {
+	&dev_attr_mode.attr,
+	&dev_attr_freerunning_clock.attr,
+	&dev_attr_clock_divider.attr,
+	NULL,
+};
+
+static struct attribute_group pti_output_group = {
+	.attrs	= pti_output_attrs,
+};
+
+static int intel_th_pti_activate(struct intel_th_device *thdev)
+{
+	struct pti_device *pti = dev_get_drvdata(&thdev->dev);
+	u32 ctl = PTI_EN;
+
+	if (pti->patgen)
+		ctl |= pti->patgen << __ffs(PTI_PATGENMODE);
+	if (pti->freeclk)
+		ctl |= PTI_FCEN;
+	ctl |= pti->mode << __ffs(PTI_MODE);
+	ctl |= pti->clkdiv << __ffs(PTI_CLKDIV);
+	ctl |= pti->lpp_dest << __ffs(LPP_DEST);
+
+	iowrite32(ctl, pti->base + REG_PTI_CTL);
+
+	intel_th_trace_enable(thdev);
+
+	return 0;
+}
+
+static void intel_th_pti_deactivate(struct intel_th_device *thdev)
+{
+	struct pti_device *pti = dev_get_drvdata(&thdev->dev);
+
+	intel_th_trace_disable(thdev);
+
+	iowrite32(0, pti->base + REG_PTI_CTL);
+}
+
+static void read_hw_config(struct pti_device *pti)
+{
+	u32 ctl = ioread32(pti->base + REG_PTI_CTL);
+
+	pti->mode	= (ctl & PTI_MODE) >> __ffs(PTI_MODE);
+	pti->clkdiv	= (ctl & PTI_CLKDIV) >> __ffs(PTI_CLKDIV);
+	pti->freeclk	= !!(ctl & PTI_FCEN);
+
+	if (!pti_mode[pti->mode])
+		pti->mode = pti_width_mode(4);
+	if (!pti->clkdiv)
+		pti->clkdiv = 1;
+
+	if (pti->thdev->output.type == GTH_LPP) {
+		if (ctl & LPP_PTIPRESENT)
+			pti->lpp_dest_mask |= LPP_DEST_PTI;
+		if (ctl & LPP_BSSBPRESENT)
+			pti->lpp_dest_mask |= LPP_DEST_EXI;
+		if (ctl & LPP_DEST)
+			pti->lpp_dest = 1;
+	}
+}
+
+static int intel_th_pti_probe(struct intel_th_device *thdev)
+{
+	struct device *dev = &thdev->dev;
+	struct resource *res;
+	struct pti_device *pti;
+	void __iomem *base;
+
+	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!base)
+		return -ENOMEM;
+
+	pti = devm_kzalloc(dev, sizeof(*pti), GFP_KERNEL);
+	if (!pti)
+		return -ENOMEM;
+
+	pti->thdev = thdev;
+	pti->base = base;
+
+	read_hw_config(pti);
+
+	dev_set_drvdata(dev, pti);
+
+	return 0;
+}
+
+static void intel_th_pti_remove(struct intel_th_device *thdev)
+{
+}
+
+static struct intel_th_driver intel_th_pti_driver = {
+	.probe	= intel_th_pti_probe,
+	.remove	= intel_th_pti_remove,
+	.activate	= intel_th_pti_activate,
+	.deactivate	= intel_th_pti_deactivate,
+	.attr_group	= &pti_output_group,
+	.driver	= {
+		.name	= "pti",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static const char * const lpp_dest_str[] = { "pti", "exi" };
+
+static ssize_t lpp_dest_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+	ssize_t ret = 0;
+	int i;
+
+	for (i = ARRAY_SIZE(lpp_dest_str) - 1; i >= 0; i--) {
+		const char *fmt = pti->lpp_dest == i ? "[%s] " : "%s ";
+
+		if (!(pti->lpp_dest_mask & BIT(i)))
+			continue;
+
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+				 fmt, lpp_dest_str[i]);
+	}
+
+	if (ret)
+		buf[ret - 1] = '\n';
+
+	return ret;
+}
+
+static ssize_t lpp_dest_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+	ssize_t ret = -EINVAL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(lpp_dest_str); i++)
+		if (sysfs_streq(buf, lpp_dest_str[i]))
+			break;
+
+	if (i < ARRAY_SIZE(lpp_dest_str) && pti->lpp_dest_mask & BIT(i)) {
+		pti->lpp_dest = i;
+		ret = size;
+	}
+
+	return ret;
+}
+
+static DEVICE_ATTR_RW(lpp_dest);
+
+static struct attribute *lpp_output_attrs[] = {
+	&dev_attr_mode.attr,
+	&dev_attr_freerunning_clock.attr,
+	&dev_attr_clock_divider.attr,
+	&dev_attr_lpp_dest.attr,
+	NULL,
+};
+
+static struct attribute_group lpp_output_group = {
+	.attrs	= lpp_output_attrs,
+};
+
+static struct intel_th_driver intel_th_lpp_driver = {
+	.probe		= intel_th_pti_probe,
+	.remove		= intel_th_pti_remove,
+	.activate	= intel_th_pti_activate,
+	.deactivate	= intel_th_pti_deactivate,
+	.attr_group	= &lpp_output_group,
+	.driver	= {
+		.name	= "lpp",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init intel_th_pti_lpp_init(void)
+{
+	int err;
+
+	err = intel_th_driver_register(&intel_th_pti_driver);
+	if (err)
+		return err;
+
+	err = intel_th_driver_register(&intel_th_lpp_driver);
+	if (err) {
+		intel_th_driver_unregister(&intel_th_pti_driver);
+		return err;
+	}
+
+	return 0;
+}
+
+module_init(intel_th_pti_lpp_init);
+
+static void __exit intel_th_pti_lpp_exit(void)
+{
+	intel_th_driver_unregister(&intel_th_pti_driver);
+	intel_th_driver_unregister(&intel_th_lpp_driver);
+}
+
+module_exit(intel_th_pti_lpp_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub PTI/LPP output driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/intel_th/pti.h b/drivers/hwtracing/intel_th/pti.h
new file mode 100644
index 0000000..e9381ba
--- /dev/null
+++ b/drivers/hwtracing/intel_th/pti.h
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub PTI output data structures
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_STH_H__
+#define __INTEL_TH_STH_H__
+
+enum {
+	REG_PTI_CTL	= 0x1c00,
+};
+
+#define PTI_EN		BIT(0)
+#define PTI_FCEN	BIT(1)
+#define PTI_MODE	0xf0
+#define LPP_PTIPRESENT	BIT(8)
+#define LPP_BSSBPRESENT	BIT(9)
+#define PTI_CLKDIV	0x000f0000
+#define PTI_PATGENMODE	0x00f00000
+#define LPP_DEST	BIT(25)
+#define LPP_BSSBACT	BIT(30)
+#define LPP_LPPBUSY	BIT(31)
+
+#define LPP_DEST_PTI	BIT(0)
+#define LPP_DEST_EXI	BIT(1)
+
+#endif /* __INTEL_TH_STH_H__ */
diff --git a/drivers/hwtracing/intel_th/sth.c b/drivers/hwtracing/intel_th/sth.c
new file mode 100644
index 0000000..4b7ae47
--- /dev/null
+++ b/drivers/hwtracing/intel_th/sth.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub Software Trace Hub support
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+
+#include "intel_th.h"
+#include "sth.h"
+
+struct sth_device {
+	void __iomem	*base;
+	void __iomem	*channels;
+	phys_addr_t	channels_phys;
+	struct device	*dev;
+	struct stm_data	stm;
+	unsigned int	sw_nmasters;
+};
+
+static struct intel_th_channel __iomem *
+sth_channel(struct sth_device *sth, unsigned int master, unsigned int channel)
+{
+	struct intel_th_channel __iomem *sw_map = sth->channels;
+
+	return &sw_map[(master - sth->stm.sw_start) * sth->stm.sw_nchannels +
+		       channel];
+}
+
+static void sth_iowrite(void __iomem *dest, const unsigned char *payload,
+			unsigned int size)
+{
+	switch (size) {
+#ifdef CONFIG_64BIT
+	case 8:
+		writeq_relaxed(*(u64 *)payload, dest);
+		break;
+#endif
+	case 4:
+		writel_relaxed(*(u32 *)payload, dest);
+		break;
+	case 2:
+		writew_relaxed(*(u16 *)payload, dest);
+		break;
+	case 1:
+		writeb_relaxed(*(u8 *)payload, dest);
+		break;
+	default:
+		break;
+	}
+}
+
+static ssize_t notrace sth_stm_packet(struct stm_data *stm_data,
+				      unsigned int master,
+				      unsigned int channel,
+				      unsigned int packet,
+				      unsigned int flags,
+				      unsigned int size,
+				      const unsigned char *payload)
+{
+	struct sth_device *sth = container_of(stm_data, struct sth_device, stm);
+	struct intel_th_channel __iomem *out =
+		sth_channel(sth, master, channel);
+	u64 __iomem *outp = &out->Dn;
+	unsigned long reg = REG_STH_TRIG;
+
+#ifndef CONFIG_64BIT
+	if (size > 4)
+		size = 4;
+#endif
+
+	size = rounddown_pow_of_two(size);
+
+	switch (packet) {
+	/* Global packets (GERR, XSYNC, TRIG) are sent with register writes */
+	case STP_PACKET_GERR:
+		reg += 4;
+	case STP_PACKET_XSYNC:
+		reg += 8;
+	case STP_PACKET_TRIG:
+		if (flags & STP_PACKET_TIMESTAMPED)
+			reg += 4;
+		writeb_relaxed(*payload, sth->base + reg);
+		break;
+
+	case STP_PACKET_MERR:
+		if (size > 4)
+			size = 4;
+
+		sth_iowrite(&out->MERR, payload, size);
+		break;
+
+	case STP_PACKET_FLAG:
+		if (flags & STP_PACKET_TIMESTAMPED)
+			outp = (u64 __iomem *)&out->FLAG_TS;
+		else
+			outp = (u64 __iomem *)&out->FLAG;
+
+		size = 0;
+		writeb_relaxed(0, outp);
+		break;
+
+	case STP_PACKET_USER:
+		if (flags & STP_PACKET_TIMESTAMPED)
+			outp = &out->USER_TS;
+		else
+			outp = &out->USER;
+		sth_iowrite(outp, payload, size);
+		break;
+
+	case STP_PACKET_DATA:
+		outp = &out->Dn;
+
+		if (flags & STP_PACKET_TIMESTAMPED)
+			outp += 2;
+		if (flags & STP_PACKET_MARKED)
+			outp++;
+
+		sth_iowrite(outp, payload, size);
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	return size;
+}
+
+static phys_addr_t
+sth_stm_mmio_addr(struct stm_data *stm_data, unsigned int master,
+		  unsigned int channel, unsigned int nr_chans)
+{
+	struct sth_device *sth = container_of(stm_data, struct sth_device, stm);
+	phys_addr_t addr;
+
+	master -= sth->stm.sw_start;
+	addr = sth->channels_phys + (master * sth->stm.sw_nchannels + channel) *
+		sizeof(struct intel_th_channel);
+
+	if (offset_in_page(addr) ||
+	    offset_in_page(nr_chans * sizeof(struct intel_th_channel)))
+		return 0;
+
+	return addr;
+}
+
+static int sth_stm_link(struct stm_data *stm_data, unsigned int master,
+			 unsigned int channel)
+{
+	struct sth_device *sth = container_of(stm_data, struct sth_device, stm);
+
+	intel_th_set_output(to_intel_th_device(sth->dev), master);
+
+	return 0;
+}
+
+static int intel_th_sw_init(struct sth_device *sth)
+{
+	u32 reg;
+
+	reg = ioread32(sth->base + REG_STH_STHCAP1);
+	sth->stm.sw_nchannels = reg & 0xff;
+
+	reg = ioread32(sth->base + REG_STH_STHCAP0);
+	sth->stm.sw_start = reg & 0xffff;
+	sth->stm.sw_end = reg >> 16;
+
+	sth->sw_nmasters = sth->stm.sw_end - sth->stm.sw_start;
+	dev_dbg(sth->dev, "sw_start: %x sw_end: %x masters: %x nchannels: %x\n",
+		sth->stm.sw_start, sth->stm.sw_end, sth->sw_nmasters,
+		sth->stm.sw_nchannels);
+
+	return 0;
+}
+
+static int intel_th_sth_probe(struct intel_th_device *thdev)
+{
+	struct device *dev = &thdev->dev;
+	struct sth_device *sth;
+	struct resource *res;
+	void __iomem *base, *channels;
+	int err;
+
+	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!base)
+		return -ENOMEM;
+
+	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 1);
+	if (!res)
+		return -ENODEV;
+
+	channels = devm_ioremap(dev, res->start, resource_size(res));
+	if (!channels)
+		return -ENOMEM;
+
+	sth = devm_kzalloc(dev, sizeof(*sth), GFP_KERNEL);
+	if (!sth)
+		return -ENOMEM;
+
+	sth->dev = dev;
+	sth->base = base;
+	sth->channels = channels;
+	sth->channels_phys = res->start;
+	sth->stm.name = dev_name(dev);
+	sth->stm.packet = sth_stm_packet;
+	sth->stm.mmio_addr = sth_stm_mmio_addr;
+	sth->stm.sw_mmiosz = sizeof(struct intel_th_channel);
+	sth->stm.link = sth_stm_link;
+
+	err = intel_th_sw_init(sth);
+	if (err)
+		return err;
+
+	err = stm_register_device(dev, &sth->stm, THIS_MODULE);
+	if (err) {
+		dev_err(dev, "stm_register_device failed\n");
+		return err;
+	}
+
+	dev_set_drvdata(dev, sth);
+
+	return 0;
+}
+
+static void intel_th_sth_remove(struct intel_th_device *thdev)
+{
+	struct sth_device *sth = dev_get_drvdata(&thdev->dev);
+
+	stm_unregister_device(&sth->stm);
+}
+
+static struct intel_th_driver intel_th_sth_driver = {
+	.probe	= intel_th_sth_probe,
+	.remove	= intel_th_sth_remove,
+	.driver	= {
+		.name	= "sth",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_driver(intel_th_sth_driver,
+	      intel_th_driver_register,
+	      intel_th_driver_unregister);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub Software Trace Hub driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@intel.com>");
diff --git a/drivers/hwtracing/intel_th/sth.h b/drivers/hwtracing/intel_th/sth.h
new file mode 100644
index 0000000..f97fc0c
--- /dev/null
+++ b/drivers/hwtracing/intel_th/sth.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel(R) Trace Hub Software Trace Hub (STH) data structures
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_STH_H__
+#define __INTEL_TH_STH_H__
+
+enum {
+	REG_STH_STHCAP0		= 0x0000, /* capabilities pt1 */
+	REG_STH_STHCAP1		= 0x0004, /* capabilities pt2 */
+	REG_STH_TRIG		= 0x0008, /* TRIG packet payload */
+	REG_STH_TRIG_TS		= 0x000c, /* TRIG_TS packet payload */
+	REG_STH_XSYNC		= 0x0010, /* XSYNC packet payload */
+	REG_STH_XSYNC_TS	= 0x0014, /* XSYNC_TS packet payload */
+	REG_STH_GERR		= 0x0018, /* GERR packet payload */
+};
+
+struct intel_th_channel {
+	u64	Dn;
+	u64	DnM;
+	u64	DnTS;
+	u64	DnMTS;
+	u64	USER;
+	u64	USER_TS;
+	u32	FLAG;
+	u32	FLAG_TS;
+	u32	MERR;
+	u32	__unused;
+} __packed;
+
+#endif /* __INTEL_TH_STH_H__ */
diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig
new file mode 100644
index 0000000..723e2d9
--- /dev/null
+++ b/drivers/hwtracing/stm/Kconfig
@@ -0,0 +1,53 @@
+config STM
+	tristate "System Trace Module devices"
+	select CONFIGFS_FS
+	select SRCU
+	help
+	  A System Trace Module (STM) is a device exporting data in System
+	  Trace Protocol (STP) format as defined by MIPI STP standards.
+	  Examples of such devices are Intel(R) Trace Hub and Coresight STM.
+
+	  Say Y here to enable System Trace Module device support.
+
+if STM
+
+config STM_DUMMY
+	tristate "Dummy STM driver"
+	help
+	  This is a simple dummy device that pretends to be an stm device
+	  and discards your data. Use for stm class testing.
+
+	  If you don't know what this is, say N.
+
+config STM_SOURCE_CONSOLE
+	tristate "Kernel console over STM devices"
+	help
+	  This is a kernel space trace source that sends kernel log
+	  messages to trace hosts over STM devices.
+
+	  If you want to send kernel console messages over STM devices,
+	  say Y.
+
+config STM_SOURCE_HEARTBEAT
+	tristate "Heartbeat over STM devices"
+	help
+	  This is a kernel space trace source that sends periodic
+	  heartbeat messages to trace hosts over STM devices. It is
+	  also useful for testing stm class drivers and the stm class
+	  framework itself.
+
+	  If you want to send heartbeat messages over STM devices,
+	  say Y.
+
+config STM_SOURCE_FTRACE
+	tristate "Copy the output from kernel Ftrace to STM engine"
+	depends on FUNCTION_TRACER
+	help
+	  This option can be used to copy the output from kernel Ftrace
+	  to STM engine. Enabling this option will introduce a slight
+	  timing effect.
+
+	  If you want to send kernel Ftrace messages over STM devices,
+	  say Y.
+
+endif
diff --git a/drivers/hwtracing/stm/Makefile b/drivers/hwtracing/stm/Makefile
new file mode 100644
index 0000000..effc19e
--- /dev/null
+++ b/drivers/hwtracing/stm/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_STM)	+= stm_core.o
+
+stm_core-y		:= core.o policy.o
+
+obj-$(CONFIG_STM_DUMMY)	+= dummy_stm.o
+
+obj-$(CONFIG_STM_SOURCE_CONSOLE)	+= stm_console.o
+obj-$(CONFIG_STM_SOURCE_HEARTBEAT)	+= stm_heartbeat.o
+obj-$(CONFIG_STM_SOURCE_FTRACE)		+= stm_ftrace.o
+
+stm_console-y		:= console.o
+stm_heartbeat-y		:= heartbeat.o
+stm_ftrace-y		:= ftrace.o
diff --git a/drivers/hwtracing/stm/console.c b/drivers/hwtracing/stm/console.c
new file mode 100644
index 0000000..a00f65e
--- /dev/null
+++ b/drivers/hwtracing/stm/console.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple kernel console driver for STM devices
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * STM console will send kernel messages over STM devices to a trace host.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+
+static int stm_console_link(struct stm_source_data *data);
+static void stm_console_unlink(struct stm_source_data *data);
+
+static struct stm_console {
+	struct stm_source_data	data;
+	struct console		console;
+} stm_console = {
+	.data	= {
+		.name		= "console",
+		.nr_chans	= 1,
+		.link		= stm_console_link,
+		.unlink		= stm_console_unlink,
+	},
+};
+
+static void
+stm_console_write(struct console *con, const char *buf, unsigned len)
+{
+	struct stm_console *sc = container_of(con, struct stm_console, console);
+
+	stm_source_write(&sc->data, 0, buf, len);
+}
+
+static int stm_console_link(struct stm_source_data *data)
+{
+	struct stm_console *sc = container_of(data, struct stm_console, data);
+
+	strcpy(sc->console.name, "stm_console");
+	sc->console.write = stm_console_write;
+	sc->console.flags = CON_ENABLED | CON_PRINTBUFFER;
+	register_console(&sc->console);
+
+	return 0;
+}
+
+static void stm_console_unlink(struct stm_source_data *data)
+{
+	struct stm_console *sc = container_of(data, struct stm_console, data);
+
+	unregister_console(&sc->console);
+}
+
+static int stm_console_init(void)
+{
+	return stm_source_register_device(NULL, &stm_console.data);
+}
+
+static void stm_console_exit(void)
+{
+	stm_source_unregister_device(&stm_console.data);
+}
+
+module_init(stm_console_init);
+module_exit(stm_console_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("stm_console driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
new file mode 100644
index 0000000..10bcb5d
--- /dev/null
+++ b/drivers/hwtracing/stm/core.c
@@ -0,0 +1,1193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * System Trace Module (STM) infrastructure
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * STM class implements generic infrastructure for  System Trace Module devices
+ * as defined in MIPI STPv2 specification.
+ */
+
+#include <linux/pm_runtime.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/compat.h>
+#include <linux/kdev_t.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include "stm.h"
+
+#include <uapi/linux/stm.h>
+
+static unsigned int stm_core_up;
+
+/*
+ * The SRCU here makes sure that STM device doesn't disappear from under a
+ * stm_source_write() caller, which may want to have as little overhead as
+ * possible.
+ */
+static struct srcu_struct stm_source_srcu;
+
+static ssize_t masters_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	struct stm_device *stm = to_stm_device(dev);
+	int ret;
+
+	ret = sprintf(buf, "%u %u\n", stm->data->sw_start, stm->data->sw_end);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RO(masters);
+
+static ssize_t channels_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct stm_device *stm = to_stm_device(dev);
+	int ret;
+
+	ret = sprintf(buf, "%u\n", stm->data->sw_nchannels);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RO(channels);
+
+static ssize_t hw_override_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct stm_device *stm = to_stm_device(dev);
+	int ret;
+
+	ret = sprintf(buf, "%u\n", stm->data->hw_override);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RO(hw_override);
+
+static struct attribute *stm_attrs[] = {
+	&dev_attr_masters.attr,
+	&dev_attr_channels.attr,
+	&dev_attr_hw_override.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(stm);
+
+static struct class stm_class = {
+	.name		= "stm",
+	.dev_groups	= stm_groups,
+};
+
+static int stm_dev_match(struct device *dev, const void *data)
+{
+	const char *name = data;
+
+	return sysfs_streq(name, dev_name(dev));
+}
+
+/**
+ * stm_find_device() - find stm device by name
+ * @buf:	character buffer containing the name
+ *
+ * This is called when either policy gets assigned to an stm device or an
+ * stm_source device gets linked to an stm device.
+ *
+ * This grabs device's reference (get_device()) and module reference, both
+ * of which the calling path needs to make sure to drop with stm_put_device().
+ *
+ * Return:	stm device pointer or null if lookup failed.
+ */
+struct stm_device *stm_find_device(const char *buf)
+{
+	struct stm_device *stm;
+	struct device *dev;
+
+	if (!stm_core_up)
+		return NULL;
+
+	dev = class_find_device(&stm_class, NULL, buf, stm_dev_match);
+	if (!dev)
+		return NULL;
+
+	stm = to_stm_device(dev);
+	if (!try_module_get(stm->owner)) {
+		/* matches class_find_device() above */
+		put_device(dev);
+		return NULL;
+	}
+
+	return stm;
+}
+
+/**
+ * stm_put_device() - drop references on the stm device
+ * @stm:	stm device, previously acquired by stm_find_device()
+ *
+ * This drops the module reference and device reference taken by
+ * stm_find_device() or stm_char_open().
+ */
+void stm_put_device(struct stm_device *stm)
+{
+	module_put(stm->owner);
+	put_device(&stm->dev);
+}
+
+/*
+ * Internally we only care about software-writable masters here, that is the
+ * ones in the range [stm_data->sw_start..stm_data..sw_end], however we need
+ * original master numbers to be visible externally, since they are the ones
+ * that will appear in the STP stream. Thus, the internal bookkeeping uses
+ * $master - stm_data->sw_start to reference master descriptors and such.
+ */
+
+#define __stm_master(_s, _m)				\
+	((_s)->masters[(_m) - (_s)->data->sw_start])
+
+static inline struct stp_master *
+stm_master(struct stm_device *stm, unsigned int idx)
+{
+	if (idx < stm->data->sw_start || idx > stm->data->sw_end)
+		return NULL;
+
+	return __stm_master(stm, idx);
+}
+
+static int stp_master_alloc(struct stm_device *stm, unsigned int idx)
+{
+	struct stp_master *master;
+	size_t size;
+
+	size = ALIGN(stm->data->sw_nchannels, 8) / 8;
+	size += sizeof(struct stp_master);
+	master = kzalloc(size, GFP_ATOMIC);
+	if (!master)
+		return -ENOMEM;
+
+	master->nr_free = stm->data->sw_nchannels;
+	__stm_master(stm, idx) = master;
+
+	return 0;
+}
+
+static void stp_master_free(struct stm_device *stm, unsigned int idx)
+{
+	struct stp_master *master = stm_master(stm, idx);
+
+	if (!master)
+		return;
+
+	__stm_master(stm, idx) = NULL;
+	kfree(master);
+}
+
+static void stm_output_claim(struct stm_device *stm, struct stm_output *output)
+{
+	struct stp_master *master = stm_master(stm, output->master);
+
+	lockdep_assert_held(&stm->mc_lock);
+	lockdep_assert_held(&output->lock);
+
+	if (WARN_ON_ONCE(master->nr_free < output->nr_chans))
+		return;
+
+	bitmap_allocate_region(&master->chan_map[0], output->channel,
+			       ilog2(output->nr_chans));
+
+	master->nr_free -= output->nr_chans;
+}
+
+static void
+stm_output_disclaim(struct stm_device *stm, struct stm_output *output)
+{
+	struct stp_master *master = stm_master(stm, output->master);
+
+	lockdep_assert_held(&stm->mc_lock);
+	lockdep_assert_held(&output->lock);
+
+	bitmap_release_region(&master->chan_map[0], output->channel,
+			      ilog2(output->nr_chans));
+
+	output->nr_chans = 0;
+	master->nr_free += output->nr_chans;
+}
+
+/*
+ * This is like bitmap_find_free_region(), except it can ignore @start bits
+ * at the beginning.
+ */
+static int find_free_channels(unsigned long *bitmap, unsigned int start,
+			      unsigned int end, unsigned int width)
+{
+	unsigned int pos;
+	int i;
+
+	for (pos = start; pos < end + 1; pos = ALIGN(pos, width)) {
+		pos = find_next_zero_bit(bitmap, end + 1, pos);
+		if (pos + width > end + 1)
+			break;
+
+		if (pos & (width - 1))
+			continue;
+
+		for (i = 1; i < width && !test_bit(pos + i, bitmap); i++)
+			;
+		if (i == width)
+			return pos;
+	}
+
+	return -1;
+}
+
+static int
+stm_find_master_chan(struct stm_device *stm, unsigned int width,
+		     unsigned int *mstart, unsigned int mend,
+		     unsigned int *cstart, unsigned int cend)
+{
+	struct stp_master *master;
+	unsigned int midx;
+	int pos, err;
+
+	for (midx = *mstart; midx <= mend; midx++) {
+		if (!stm_master(stm, midx)) {
+			err = stp_master_alloc(stm, midx);
+			if (err)
+				return err;
+		}
+
+		master = stm_master(stm, midx);
+
+		if (!master->nr_free)
+			continue;
+
+		pos = find_free_channels(master->chan_map, *cstart, cend,
+					 width);
+		if (pos < 0)
+			continue;
+
+		*mstart = midx;
+		*cstart = pos;
+		return 0;
+	}
+
+	return -ENOSPC;
+}
+
+static int stm_output_assign(struct stm_device *stm, unsigned int width,
+			     struct stp_policy_node *policy_node,
+			     struct stm_output *output)
+{
+	unsigned int midx, cidx, mend, cend;
+	int ret = -EINVAL;
+
+	if (width > stm->data->sw_nchannels)
+		return -EINVAL;
+
+	if (policy_node) {
+		stp_policy_node_get_ranges(policy_node,
+					   &midx, &mend, &cidx, &cend);
+	} else {
+		midx = stm->data->sw_start;
+		cidx = 0;
+		mend = stm->data->sw_end;
+		cend = stm->data->sw_nchannels - 1;
+	}
+
+	spin_lock(&stm->mc_lock);
+	spin_lock(&output->lock);
+	/* output is already assigned -- shouldn't happen */
+	if (WARN_ON_ONCE(output->nr_chans))
+		goto unlock;
+
+	ret = stm_find_master_chan(stm, width, &midx, mend, &cidx, cend);
+	if (ret < 0)
+		goto unlock;
+
+	output->master = midx;
+	output->channel = cidx;
+	output->nr_chans = width;
+	stm_output_claim(stm, output);
+	dev_dbg(&stm->dev, "assigned %u:%u (+%u)\n", midx, cidx, width);
+
+	ret = 0;
+unlock:
+	spin_unlock(&output->lock);
+	spin_unlock(&stm->mc_lock);
+
+	return ret;
+}
+
+static void stm_output_free(struct stm_device *stm, struct stm_output *output)
+{
+	spin_lock(&stm->mc_lock);
+	spin_lock(&output->lock);
+	if (output->nr_chans)
+		stm_output_disclaim(stm, output);
+	spin_unlock(&output->lock);
+	spin_unlock(&stm->mc_lock);
+}
+
+static void stm_output_init(struct stm_output *output)
+{
+	spin_lock_init(&output->lock);
+}
+
+static int major_match(struct device *dev, const void *data)
+{
+	unsigned int major = *(unsigned int *)data;
+
+	return MAJOR(dev->devt) == major;
+}
+
+static int stm_char_open(struct inode *inode, struct file *file)
+{
+	struct stm_file *stmf;
+	struct device *dev;
+	unsigned int major = imajor(inode);
+	int err = -ENOMEM;
+
+	dev = class_find_device(&stm_class, NULL, &major, major_match);
+	if (!dev)
+		return -ENODEV;
+
+	stmf = kzalloc(sizeof(*stmf), GFP_KERNEL);
+	if (!stmf)
+		goto err_put_device;
+
+	err = -ENODEV;
+	stm_output_init(&stmf->output);
+	stmf->stm = to_stm_device(dev);
+
+	if (!try_module_get(stmf->stm->owner))
+		goto err_free;
+
+	file->private_data = stmf;
+
+	return nonseekable_open(inode, file);
+
+err_free:
+	kfree(stmf);
+err_put_device:
+	/* matches class_find_device() above */
+	put_device(dev);
+
+	return err;
+}
+
+static int stm_char_release(struct inode *inode, struct file *file)
+{
+	struct stm_file *stmf = file->private_data;
+	struct stm_device *stm = stmf->stm;
+
+	if (stm->data->unlink)
+		stm->data->unlink(stm->data, stmf->output.master,
+				  stmf->output.channel);
+
+	stm_output_free(stm, &stmf->output);
+
+	/*
+	 * matches the stm_char_open()'s
+	 * class_find_device() + try_module_get()
+	 */
+	stm_put_device(stm);
+	kfree(stmf);
+
+	return 0;
+}
+
+static int stm_file_assign(struct stm_file *stmf, char *id, unsigned int width)
+{
+	struct stm_device *stm = stmf->stm;
+	int ret;
+
+	stmf->policy_node = stp_policy_node_lookup(stm, id);
+
+	ret = stm_output_assign(stm, width, stmf->policy_node, &stmf->output);
+
+	if (stmf->policy_node)
+		stp_policy_node_put(stmf->policy_node);
+
+	return ret;
+}
+
+static ssize_t notrace stm_write(struct stm_data *data, unsigned int master,
+			  unsigned int channel, const char *buf, size_t count)
+{
+	unsigned int flags = STP_PACKET_TIMESTAMPED;
+	const unsigned char *p = buf, nil = 0;
+	size_t pos;
+	ssize_t sz;
+
+	for (pos = 0, p = buf; count > pos; pos += sz, p += sz) {
+		sz = min_t(unsigned int, count - pos, 8);
+		sz = data->packet(data, master, channel, STP_PACKET_DATA, flags,
+				  sz, p);
+		flags = 0;
+
+		if (sz < 0)
+			break;
+	}
+
+	data->packet(data, master, channel, STP_PACKET_FLAG, 0, 0, &nil);
+
+	return pos;
+}
+
+static ssize_t stm_char_write(struct file *file, const char __user *buf,
+			      size_t count, loff_t *ppos)
+{
+	struct stm_file *stmf = file->private_data;
+	struct stm_device *stm = stmf->stm;
+	char *kbuf;
+	int err;
+
+	if (count + 1 > PAGE_SIZE)
+		count = PAGE_SIZE - 1;
+
+	/*
+	 * if no m/c have been assigned to this writer up to this
+	 * point, use "default" policy entry
+	 */
+	if (!stmf->output.nr_chans) {
+		err = stm_file_assign(stmf, "default", 1);
+		/*
+		 * EBUSY means that somebody else just assigned this
+		 * output, which is just fine for write()
+		 */
+		if (err && err != -EBUSY)
+			return err;
+	}
+
+	kbuf = kmalloc(count + 1, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	err = copy_from_user(kbuf, buf, count);
+	if (err) {
+		kfree(kbuf);
+		return -EFAULT;
+	}
+
+	pm_runtime_get_sync(&stm->dev);
+
+	count = stm_write(stm->data, stmf->output.master, stmf->output.channel,
+			  kbuf, count);
+
+	pm_runtime_mark_last_busy(&stm->dev);
+	pm_runtime_put_autosuspend(&stm->dev);
+	kfree(kbuf);
+
+	return count;
+}
+
+static void stm_mmap_open(struct vm_area_struct *vma)
+{
+	struct stm_file *stmf = vma->vm_file->private_data;
+	struct stm_device *stm = stmf->stm;
+
+	pm_runtime_get(&stm->dev);
+}
+
+static void stm_mmap_close(struct vm_area_struct *vma)
+{
+	struct stm_file *stmf = vma->vm_file->private_data;
+	struct stm_device *stm = stmf->stm;
+
+	pm_runtime_mark_last_busy(&stm->dev);
+	pm_runtime_put_autosuspend(&stm->dev);
+}
+
+static const struct vm_operations_struct stm_mmap_vmops = {
+	.open	= stm_mmap_open,
+	.close	= stm_mmap_close,
+};
+
+static int stm_char_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct stm_file *stmf = file->private_data;
+	struct stm_device *stm = stmf->stm;
+	unsigned long size, phys;
+
+	if (!stm->data->mmio_addr)
+		return -EOPNOTSUPP;
+
+	if (vma->vm_pgoff)
+		return -EINVAL;
+
+	size = vma->vm_end - vma->vm_start;
+
+	if (stmf->output.nr_chans * stm->data->sw_mmiosz != size)
+		return -EINVAL;
+
+	phys = stm->data->mmio_addr(stm->data, stmf->output.master,
+				    stmf->output.channel,
+				    stmf->output.nr_chans);
+
+	if (!phys)
+		return -EINVAL;
+
+	pm_runtime_get_sync(&stm->dev);
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_ops = &stm_mmap_vmops;
+	vm_iomap_memory(vma, phys, size);
+
+	return 0;
+}
+
+static int stm_char_policy_set_ioctl(struct stm_file *stmf, void __user *arg)
+{
+	struct stm_device *stm = stmf->stm;
+	struct stp_policy_id *id;
+	int ret = -EINVAL;
+	u32 size;
+
+	if (stmf->output.nr_chans)
+		return -EBUSY;
+
+	if (copy_from_user(&size, arg, sizeof(size)))
+		return -EFAULT;
+
+	if (size < sizeof(*id) || size >= PATH_MAX + sizeof(*id))
+		return -EINVAL;
+
+	/*
+	 * size + 1 to make sure the .id string at the bottom is terminated,
+	 * which is also why memdup_user() is not useful here
+	 */
+	id = kzalloc(size + 1, GFP_KERNEL);
+	if (!id)
+		return -ENOMEM;
+
+	if (copy_from_user(id, arg, size)) {
+		ret = -EFAULT;
+		goto err_free;
+	}
+
+	if (id->__reserved_0 || id->__reserved_1)
+		goto err_free;
+
+	if (id->width < 1 ||
+	    id->width > PAGE_SIZE / stm->data->sw_mmiosz)
+		goto err_free;
+
+	ret = stm_file_assign(stmf, id->id, id->width);
+	if (ret)
+		goto err_free;
+
+	if (stm->data->link)
+		ret = stm->data->link(stm->data, stmf->output.master,
+				      stmf->output.channel);
+
+	if (ret)
+		stm_output_free(stmf->stm, &stmf->output);
+
+err_free:
+	kfree(id);
+
+	return ret;
+}
+
+static int stm_char_policy_get_ioctl(struct stm_file *stmf, void __user *arg)
+{
+	struct stp_policy_id id = {
+		.size		= sizeof(id),
+		.master		= stmf->output.master,
+		.channel	= stmf->output.channel,
+		.width		= stmf->output.nr_chans,
+		.__reserved_0	= 0,
+		.__reserved_1	= 0,
+	};
+
+	return copy_to_user(arg, &id, id.size) ? -EFAULT : 0;
+}
+
+static long
+stm_char_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct stm_file *stmf = file->private_data;
+	struct stm_data *stm_data = stmf->stm->data;
+	int err = -ENOTTY;
+	u64 options;
+
+	switch (cmd) {
+	case STP_POLICY_ID_SET:
+		err = stm_char_policy_set_ioctl(stmf, (void __user *)arg);
+		if (err)
+			return err;
+
+		return stm_char_policy_get_ioctl(stmf, (void __user *)arg);
+
+	case STP_POLICY_ID_GET:
+		return stm_char_policy_get_ioctl(stmf, (void __user *)arg);
+
+	case STP_SET_OPTIONS:
+		if (copy_from_user(&options, (u64 __user *)arg, sizeof(u64)))
+			return -EFAULT;
+
+		if (stm_data->set_options)
+			err = stm_data->set_options(stm_data,
+						    stmf->output.master,
+						    stmf->output.channel,
+						    stmf->output.nr_chans,
+						    options);
+
+		break;
+	default:
+		break;
+	}
+
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+static long
+stm_char_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	return stm_char_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+#else
+#define stm_char_compat_ioctl	NULL
+#endif
+
+static const struct file_operations stm_fops = {
+	.open		= stm_char_open,
+	.release	= stm_char_release,
+	.write		= stm_char_write,
+	.mmap		= stm_char_mmap,
+	.unlocked_ioctl	= stm_char_ioctl,
+	.compat_ioctl	= stm_char_compat_ioctl,
+	.llseek		= no_llseek,
+};
+
+static void stm_device_release(struct device *dev)
+{
+	struct stm_device *stm = to_stm_device(dev);
+
+	vfree(stm);
+}
+
+int stm_register_device(struct device *parent, struct stm_data *stm_data,
+			struct module *owner)
+{
+	struct stm_device *stm;
+	unsigned int nmasters;
+	int err = -ENOMEM;
+
+	if (!stm_core_up)
+		return -EPROBE_DEFER;
+
+	if (!stm_data->packet || !stm_data->sw_nchannels)
+		return -EINVAL;
+
+	nmasters = stm_data->sw_end - stm_data->sw_start + 1;
+	stm = vzalloc(sizeof(*stm) + nmasters * sizeof(void *));
+	if (!stm)
+		return -ENOMEM;
+
+	stm->major = register_chrdev(0, stm_data->name, &stm_fops);
+	if (stm->major < 0)
+		goto err_free;
+
+	device_initialize(&stm->dev);
+	stm->dev.devt = MKDEV(stm->major, 0);
+	stm->dev.class = &stm_class;
+	stm->dev.parent = parent;
+	stm->dev.release = stm_device_release;
+
+	mutex_init(&stm->link_mutex);
+	spin_lock_init(&stm->link_lock);
+	INIT_LIST_HEAD(&stm->link_list);
+
+	/* initialize the object before it is accessible via sysfs */
+	spin_lock_init(&stm->mc_lock);
+	mutex_init(&stm->policy_mutex);
+	stm->sw_nmasters = nmasters;
+	stm->owner = owner;
+	stm->data = stm_data;
+	stm_data->stm = stm;
+
+	err = kobject_set_name(&stm->dev.kobj, "%s", stm_data->name);
+	if (err)
+		goto err_device;
+
+	err = device_add(&stm->dev);
+	if (err)
+		goto err_device;
+
+	/*
+	 * Use delayed autosuspend to avoid bouncing back and forth
+	 * on recurring character device writes, with the initial
+	 * delay time of 2 seconds.
+	 */
+	pm_runtime_no_callbacks(&stm->dev);
+	pm_runtime_use_autosuspend(&stm->dev);
+	pm_runtime_set_autosuspend_delay(&stm->dev, 2000);
+	pm_runtime_set_suspended(&stm->dev);
+	pm_runtime_enable(&stm->dev);
+
+	return 0;
+
+err_device:
+	unregister_chrdev(stm->major, stm_data->name);
+
+	/* matches device_initialize() above */
+	put_device(&stm->dev);
+err_free:
+	vfree(stm);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(stm_register_device);
+
+static int __stm_source_link_drop(struct stm_source_device *src,
+				  struct stm_device *stm);
+
+void stm_unregister_device(struct stm_data *stm_data)
+{
+	struct stm_device *stm = stm_data->stm;
+	struct stm_source_device *src, *iter;
+	int i, ret;
+
+	pm_runtime_dont_use_autosuspend(&stm->dev);
+	pm_runtime_disable(&stm->dev);
+
+	mutex_lock(&stm->link_mutex);
+	list_for_each_entry_safe(src, iter, &stm->link_list, link_entry) {
+		ret = __stm_source_link_drop(src, stm);
+		/*
+		 * src <-> stm link must not change under the same
+		 * stm::link_mutex, so complain loudly if it has;
+		 * also in this situation ret!=0 means this src is
+		 * not connected to this stm and it should be otherwise
+		 * safe to proceed with the tear-down of stm.
+		 */
+		WARN_ON_ONCE(ret);
+	}
+	mutex_unlock(&stm->link_mutex);
+
+	synchronize_srcu(&stm_source_srcu);
+
+	unregister_chrdev(stm->major, stm_data->name);
+
+	mutex_lock(&stm->policy_mutex);
+	if (stm->policy)
+		stp_policy_unbind(stm->policy);
+	mutex_unlock(&stm->policy_mutex);
+
+	for (i = stm->data->sw_start; i <= stm->data->sw_end; i++)
+		stp_master_free(stm, i);
+
+	device_unregister(&stm->dev);
+	stm_data->stm = NULL;
+}
+EXPORT_SYMBOL_GPL(stm_unregister_device);
+
+/*
+ * stm::link_list access serialization uses a spinlock and a mutex; holding
+ * either of them guarantees that the list is stable; modification requires
+ * holding both of them.
+ *
+ * Lock ordering is as follows:
+ *   stm::link_mutex
+ *     stm::link_lock
+ *       src::link_lock
+ */
+
+/**
+ * stm_source_link_add() - connect an stm_source device to an stm device
+ * @src:	stm_source device
+ * @stm:	stm device
+ *
+ * This function establishes a link from stm_source to an stm device so that
+ * the former can send out trace data to the latter.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+static int stm_source_link_add(struct stm_source_device *src,
+			       struct stm_device *stm)
+{
+	char *id;
+	int err;
+
+	mutex_lock(&stm->link_mutex);
+	spin_lock(&stm->link_lock);
+	spin_lock(&src->link_lock);
+
+	/* src->link is dereferenced under stm_source_srcu but not the list */
+	rcu_assign_pointer(src->link, stm);
+	list_add_tail(&src->link_entry, &stm->link_list);
+
+	spin_unlock(&src->link_lock);
+	spin_unlock(&stm->link_lock);
+	mutex_unlock(&stm->link_mutex);
+
+	id = kstrdup(src->data->name, GFP_KERNEL);
+	if (id) {
+		src->policy_node =
+			stp_policy_node_lookup(stm, id);
+
+		kfree(id);
+	}
+
+	err = stm_output_assign(stm, src->data->nr_chans,
+				src->policy_node, &src->output);
+
+	if (src->policy_node)
+		stp_policy_node_put(src->policy_node);
+
+	if (err)
+		goto fail_detach;
+
+	/* this is to notify the STM device that a new link has been made */
+	if (stm->data->link)
+		err = stm->data->link(stm->data, src->output.master,
+				      src->output.channel);
+
+	if (err)
+		goto fail_free_output;
+
+	/* this is to let the source carry out all necessary preparations */
+	if (src->data->link)
+		src->data->link(src->data);
+
+	return 0;
+
+fail_free_output:
+	stm_output_free(stm, &src->output);
+
+fail_detach:
+	mutex_lock(&stm->link_mutex);
+	spin_lock(&stm->link_lock);
+	spin_lock(&src->link_lock);
+
+	rcu_assign_pointer(src->link, NULL);
+	list_del_init(&src->link_entry);
+
+	spin_unlock(&src->link_lock);
+	spin_unlock(&stm->link_lock);
+	mutex_unlock(&stm->link_mutex);
+
+	return err;
+}
+
+/**
+ * __stm_source_link_drop() - detach stm_source from an stm device
+ * @src:	stm_source device
+ * @stm:	stm device
+ *
+ * If @stm is @src::link, disconnect them from one another and put the
+ * reference on the @stm device.
+ *
+ * Caller must hold stm::link_mutex.
+ */
+static int __stm_source_link_drop(struct stm_source_device *src,
+				  struct stm_device *stm)
+{
+	struct stm_device *link;
+	int ret = 0;
+
+	lockdep_assert_held(&stm->link_mutex);
+
+	/* for stm::link_list modification, we hold both mutex and spinlock */
+	spin_lock(&stm->link_lock);
+	spin_lock(&src->link_lock);
+	link = srcu_dereference_check(src->link, &stm_source_srcu, 1);
+
+	/*
+	 * The linked device may have changed since we last looked, because
+	 * we weren't holding the src::link_lock back then; if this is the
+	 * case, tell the caller to retry.
+	 */
+	if (link != stm) {
+		ret = -EAGAIN;
+		goto unlock;
+	}
+
+	stm_output_free(link, &src->output);
+	list_del_init(&src->link_entry);
+	pm_runtime_mark_last_busy(&link->dev);
+	pm_runtime_put_autosuspend(&link->dev);
+	/* matches stm_find_device() from stm_source_link_store() */
+	stm_put_device(link);
+	rcu_assign_pointer(src->link, NULL);
+
+unlock:
+	spin_unlock(&src->link_lock);
+	spin_unlock(&stm->link_lock);
+
+	/*
+	 * Call the unlink callbacks for both source and stm, when we know
+	 * that we have actually performed the unlinking.
+	 */
+	if (!ret) {
+		if (src->data->unlink)
+			src->data->unlink(src->data);
+
+		if (stm->data->unlink)
+			stm->data->unlink(stm->data, src->output.master,
+					  src->output.channel);
+	}
+
+	return ret;
+}
+
+/**
+ * stm_source_link_drop() - detach stm_source from its stm device
+ * @src:	stm_source device
+ *
+ * Unlinking means disconnecting from source's STM device; after this
+ * writes will be unsuccessful until it is linked to a new STM device.
+ *
+ * This will happen on "stm_source_link" sysfs attribute write to undo
+ * the existing link (if any), or on linked STM device's de-registration.
+ */
+static void stm_source_link_drop(struct stm_source_device *src)
+{
+	struct stm_device *stm;
+	int idx, ret;
+
+retry:
+	idx = srcu_read_lock(&stm_source_srcu);
+	/*
+	 * The stm device will be valid for the duration of this
+	 * read section, but the link may change before we grab
+	 * the src::link_lock in __stm_source_link_drop().
+	 */
+	stm = srcu_dereference(src->link, &stm_source_srcu);
+
+	ret = 0;
+	if (stm) {
+		mutex_lock(&stm->link_mutex);
+		ret = __stm_source_link_drop(src, stm);
+		mutex_unlock(&stm->link_mutex);
+	}
+
+	srcu_read_unlock(&stm_source_srcu, idx);
+
+	/* if it did change, retry */
+	if (ret == -EAGAIN)
+		goto retry;
+}
+
+static ssize_t stm_source_link_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct stm_source_device *src = to_stm_source_device(dev);
+	struct stm_device *stm;
+	int idx, ret;
+
+	idx = srcu_read_lock(&stm_source_srcu);
+	stm = srcu_dereference(src->link, &stm_source_srcu);
+	ret = sprintf(buf, "%s\n",
+		      stm ? dev_name(&stm->dev) : "<none>");
+	srcu_read_unlock(&stm_source_srcu, idx);
+
+	return ret;
+}
+
+static ssize_t stm_source_link_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct stm_source_device *src = to_stm_source_device(dev);
+	struct stm_device *link;
+	int err;
+
+	stm_source_link_drop(src);
+
+	link = stm_find_device(buf);
+	if (!link)
+		return -EINVAL;
+
+	pm_runtime_get(&link->dev);
+
+	err = stm_source_link_add(src, link);
+	if (err) {
+		pm_runtime_put_autosuspend(&link->dev);
+		/* matches the stm_find_device() above */
+		stm_put_device(link);
+	}
+
+	return err ? : count;
+}
+
+static DEVICE_ATTR_RW(stm_source_link);
+
+static struct attribute *stm_source_attrs[] = {
+	&dev_attr_stm_source_link.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(stm_source);
+
+static struct class stm_source_class = {
+	.name		= "stm_source",
+	.dev_groups	= stm_source_groups,
+};
+
+static void stm_source_device_release(struct device *dev)
+{
+	struct stm_source_device *src = to_stm_source_device(dev);
+
+	kfree(src);
+}
+
+/**
+ * stm_source_register_device() - register an stm_source device
+ * @parent:	parent device
+ * @data:	device description structure
+ *
+ * This will create a device of stm_source class that can write
+ * data to an stm device once linked.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+int stm_source_register_device(struct device *parent,
+			       struct stm_source_data *data)
+{
+	struct stm_source_device *src;
+	int err;
+
+	if (!stm_core_up)
+		return -EPROBE_DEFER;
+
+	src = kzalloc(sizeof(*src), GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+
+	device_initialize(&src->dev);
+	src->dev.class = &stm_source_class;
+	src->dev.parent = parent;
+	src->dev.release = stm_source_device_release;
+
+	err = kobject_set_name(&src->dev.kobj, "%s", data->name);
+	if (err)
+		goto err;
+
+	pm_runtime_no_callbacks(&src->dev);
+	pm_runtime_forbid(&src->dev);
+
+	err = device_add(&src->dev);
+	if (err)
+		goto err;
+
+	stm_output_init(&src->output);
+	spin_lock_init(&src->link_lock);
+	INIT_LIST_HEAD(&src->link_entry);
+	src->data = data;
+	data->src = src;
+
+	return 0;
+
+err:
+	put_device(&src->dev);
+	kfree(src);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(stm_source_register_device);
+
+/**
+ * stm_source_unregister_device() - unregister an stm_source device
+ * @data:	device description that was used to register the device
+ *
+ * This will remove a previously created stm_source device from the system.
+ */
+void stm_source_unregister_device(struct stm_source_data *data)
+{
+	struct stm_source_device *src = data->src;
+
+	stm_source_link_drop(src);
+
+	device_unregister(&src->dev);
+}
+EXPORT_SYMBOL_GPL(stm_source_unregister_device);
+
+int notrace stm_source_write(struct stm_source_data *data,
+			     unsigned int chan,
+			     const char *buf, size_t count)
+{
+	struct stm_source_device *src = data->src;
+	struct stm_device *stm;
+	int idx;
+
+	if (!src->output.nr_chans)
+		return -ENODEV;
+
+	if (chan >= src->output.nr_chans)
+		return -EINVAL;
+
+	idx = srcu_read_lock(&stm_source_srcu);
+
+	stm = srcu_dereference(src->link, &stm_source_srcu);
+	if (stm)
+		count = stm_write(stm->data, src->output.master,
+				  src->output.channel + chan,
+				  buf, count);
+	else
+		count = -ENODEV;
+
+	srcu_read_unlock(&stm_source_srcu, idx);
+
+	return count;
+}
+EXPORT_SYMBOL_GPL(stm_source_write);
+
+static int __init stm_core_init(void)
+{
+	int err;
+
+	err = class_register(&stm_class);
+	if (err)
+		return err;
+
+	err = class_register(&stm_source_class);
+	if (err)
+		goto err_stm;
+
+	err = stp_configfs_init();
+	if (err)
+		goto err_src;
+
+	init_srcu_struct(&stm_source_srcu);
+
+	stm_core_up++;
+
+	return 0;
+
+err_src:
+	class_unregister(&stm_source_class);
+err_stm:
+	class_unregister(&stm_class);
+
+	return err;
+}
+
+module_init(stm_core_init);
+
+static void __exit stm_core_exit(void)
+{
+	cleanup_srcu_struct(&stm_source_srcu);
+	class_unregister(&stm_source_class);
+	class_unregister(&stm_class);
+	stp_configfs_exit();
+}
+
+module_exit(stm_core_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("System Trace Module device class");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/dummy_stm.c b/drivers/hwtracing/stm/dummy_stm.c
new file mode 100644
index 0000000..38528ff
--- /dev/null
+++ b/drivers/hwtracing/stm/dummy_stm.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A dummy STM device for stm/stm_source class testing.
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * STM class implements generic infrastructure for  System Trace Module devices
+ * as defined in MIPI STPv2 specification.
+ */
+
+#undef DEBUG
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+#include <uapi/linux/stm.h>
+
+static ssize_t notrace
+dummy_stm_packet(struct stm_data *stm_data, unsigned int master,
+		 unsigned int channel, unsigned int packet, unsigned int flags,
+		 unsigned int size, const unsigned char *payload)
+{
+#ifdef DEBUG
+	u64 pl = 0;
+
+	if (payload)
+		pl = *(u64 *)payload;
+
+	if (size < 8)
+		pl &= (1ull << (size * 8)) - 1;
+	trace_printk("[%u:%u] [pkt: %x/%x] (%llx)\n", master, channel,
+		     packet, size, pl);
+#endif
+	return size;
+}
+
+#define DUMMY_STM_MAX 32
+
+static struct stm_data dummy_stm[DUMMY_STM_MAX];
+
+static int nr_dummies = 4;
+
+module_param(nr_dummies, int, 0400);
+
+static unsigned int fail_mode;
+
+module_param(fail_mode, int, 0600);
+
+static unsigned int master_min;
+
+module_param(master_min, int, 0400);
+
+static unsigned int master_max = STP_MASTER_MAX;
+
+module_param(master_max, int, 0400);
+
+static unsigned int nr_channels = STP_CHANNEL_MAX;
+
+module_param(nr_channels, int, 0400);
+
+static int dummy_stm_link(struct stm_data *data, unsigned int master,
+			  unsigned int channel)
+{
+	if (fail_mode && (channel & fail_mode))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int dummy_stm_init(void)
+{
+	int i, ret = -ENOMEM;
+
+	if (nr_dummies < 0 || nr_dummies > DUMMY_STM_MAX)
+		return -EINVAL;
+
+	if (master_min > master_max ||
+	    master_max > STP_MASTER_MAX ||
+	    nr_channels > STP_CHANNEL_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < nr_dummies; i++) {
+		dummy_stm[i].name = kasprintf(GFP_KERNEL, "dummy_stm.%d", i);
+		if (!dummy_stm[i].name)
+			goto fail_unregister;
+
+		dummy_stm[i].sw_start		= master_min;
+		dummy_stm[i].sw_end		= master_max;
+		dummy_stm[i].sw_nchannels	= nr_channels;
+		dummy_stm[i].packet		= dummy_stm_packet;
+		dummy_stm[i].link		= dummy_stm_link;
+
+		ret = stm_register_device(NULL, &dummy_stm[i], THIS_MODULE);
+		if (ret)
+			goto fail_free;
+	}
+
+	return 0;
+
+fail_unregister:
+	for (i--; i >= 0; i--) {
+		stm_unregister_device(&dummy_stm[i]);
+fail_free:
+		kfree(dummy_stm[i].name);
+	}
+
+	return ret;
+
+}
+
+static void dummy_stm_exit(void)
+{
+	int i;
+
+	for (i = 0; i < nr_dummies; i++) {
+		stm_unregister_device(&dummy_stm[i]);
+		kfree(dummy_stm[i].name);
+	}
+}
+
+module_init(dummy_stm_init);
+module_exit(dummy_stm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("dummy_stm device");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/ftrace.c b/drivers/hwtracing/stm/ftrace.c
new file mode 100644
index 0000000..ce868e0
--- /dev/null
+++ b/drivers/hwtracing/stm/ftrace.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple kernel driver to link kernel Ftrace and an STM device
+ * Copyright (c) 2016, Linaro Ltd.
+ *
+ * STM Ftrace will be registered as a trace_export.
+ */
+
+#include <linux/module.h>
+#include <linux/stm.h>
+#include <linux/trace.h>
+
+#define STM_FTRACE_NR_CHANNELS 1
+#define STM_FTRACE_CHAN 0
+
+static int stm_ftrace_link(struct stm_source_data *data);
+static void stm_ftrace_unlink(struct stm_source_data *data);
+
+static struct stm_ftrace {
+	struct stm_source_data	data;
+	struct trace_export	ftrace;
+} stm_ftrace = {
+	.data	= {
+		.name		= "ftrace",
+		.nr_chans	= STM_FTRACE_NR_CHANNELS,
+		.link		= stm_ftrace_link,
+		.unlink		= stm_ftrace_unlink,
+	},
+};
+
+/**
+ * stm_ftrace_write() - write data to STM via 'stm_ftrace' source
+ * @buf:	buffer containing the data packet
+ * @len:	length of the data packet
+ */
+static void notrace
+stm_ftrace_write(struct trace_export *export, const void *buf, unsigned int len)
+{
+	struct stm_ftrace *stm = container_of(export, struct stm_ftrace, ftrace);
+
+	stm_source_write(&stm->data, STM_FTRACE_CHAN, buf, len);
+}
+
+static int stm_ftrace_link(struct stm_source_data *data)
+{
+	struct stm_ftrace *sf = container_of(data, struct stm_ftrace, data);
+
+	sf->ftrace.write = stm_ftrace_write;
+
+	return register_ftrace_export(&sf->ftrace);
+}
+
+static void stm_ftrace_unlink(struct stm_source_data *data)
+{
+	struct stm_ftrace *sf = container_of(data, struct stm_ftrace, data);
+
+	unregister_ftrace_export(&sf->ftrace);
+}
+
+static int __init stm_ftrace_init(void)
+{
+	int ret;
+
+	ret = stm_source_register_device(NULL, &stm_ftrace.data);
+	if (ret)
+		pr_err("Failed to register stm_source - ftrace.\n");
+
+	return ret;
+}
+
+static void __exit stm_ftrace_exit(void)
+{
+	stm_source_unregister_device(&stm_ftrace.data);
+}
+
+module_init(stm_ftrace_init);
+module_exit(stm_ftrace_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("stm_ftrace driver");
+MODULE_AUTHOR("Chunyan Zhang <zhang.chunyan@linaro.org>");
diff --git a/drivers/hwtracing/stm/heartbeat.c b/drivers/hwtracing/stm/heartbeat.c
new file mode 100644
index 0000000..7db4239
--- /dev/null
+++ b/drivers/hwtracing/stm/heartbeat.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple heartbeat STM source driver
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * Heartbeat STM source will send repetitive messages over STM devices to a
+ * trace host.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+
+#define STM_HEARTBEAT_MAX	32
+
+static int nr_devs = 4;
+static int interval_ms = 10;
+
+module_param(nr_devs, int, 0400);
+module_param(interval_ms, int, 0600);
+
+static struct stm_heartbeat {
+	struct stm_source_data	data;
+	struct hrtimer		hrtimer;
+	unsigned int		active;
+} stm_heartbeat[STM_HEARTBEAT_MAX];
+
+static const char str[] = "heartbeat stm source driver is here to serve you";
+
+static enum hrtimer_restart stm_heartbeat_hrtimer_handler(struct hrtimer *hr)
+{
+	struct stm_heartbeat *heartbeat = container_of(hr, struct stm_heartbeat,
+						       hrtimer);
+
+	stm_source_write(&heartbeat->data, 0, str, sizeof str);
+	if (heartbeat->active)
+		hrtimer_forward_now(hr, ms_to_ktime(interval_ms));
+
+	return heartbeat->active ? HRTIMER_RESTART : HRTIMER_NORESTART;
+}
+
+static int stm_heartbeat_link(struct stm_source_data *data)
+{
+	struct stm_heartbeat *heartbeat =
+		container_of(data, struct stm_heartbeat, data);
+
+	heartbeat->active = 1;
+	hrtimer_start(&heartbeat->hrtimer, ms_to_ktime(interval_ms),
+		      HRTIMER_MODE_ABS);
+
+	return 0;
+}
+
+static void stm_heartbeat_unlink(struct stm_source_data *data)
+{
+	struct stm_heartbeat *heartbeat =
+		container_of(data, struct stm_heartbeat, data);
+
+	heartbeat->active = 0;
+	hrtimer_cancel(&heartbeat->hrtimer);
+}
+
+static int stm_heartbeat_init(void)
+{
+	int i, ret = -ENOMEM;
+
+	if (nr_devs < 0 || nr_devs > STM_HEARTBEAT_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < nr_devs; i++) {
+		stm_heartbeat[i].data.name =
+			kasprintf(GFP_KERNEL, "heartbeat.%d", i);
+		if (!stm_heartbeat[i].data.name)
+			goto fail_unregister;
+
+		stm_heartbeat[i].data.nr_chans	= 1;
+		stm_heartbeat[i].data.link		= stm_heartbeat_link;
+		stm_heartbeat[i].data.unlink	= stm_heartbeat_unlink;
+		hrtimer_init(&stm_heartbeat[i].hrtimer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_ABS);
+		stm_heartbeat[i].hrtimer.function =
+			stm_heartbeat_hrtimer_handler;
+
+		ret = stm_source_register_device(NULL, &stm_heartbeat[i].data);
+		if (ret)
+			goto fail_free;
+	}
+
+	return 0;
+
+fail_unregister:
+	for (i--; i >= 0; i--) {
+		stm_source_unregister_device(&stm_heartbeat[i].data);
+fail_free:
+		kfree(stm_heartbeat[i].data.name);
+	}
+
+	return ret;
+}
+
+static void stm_heartbeat_exit(void)
+{
+	int i;
+
+	for (i = 0; i < nr_devs; i++) {
+		stm_source_unregister_device(&stm_heartbeat[i].data);
+		kfree(stm_heartbeat[i].data.name);
+	}
+}
+
+module_init(stm_heartbeat_init);
+module_exit(stm_heartbeat_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("stm_heartbeat driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/policy.c b/drivers/hwtracing/stm/policy.c
new file mode 100644
index 0000000..3fd07e2
--- /dev/null
+++ b/drivers/hwtracing/stm/policy.c
@@ -0,0 +1,476 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * System Trace Module (STM) master/channel allocation policy management
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * A master/channel allocation policy allows mapping string identifiers to
+ * master and channel ranges, where allocation can be done.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/configfs.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+#include "stm.h"
+
+/*
+ * STP Master/Channel allocation policy configfs layout.
+ */
+
+struct stp_policy {
+	struct config_group	group;
+	struct stm_device	*stm;
+};
+
+struct stp_policy_node {
+	struct config_group	group;
+	struct stp_policy	*policy;
+	unsigned int		first_master;
+	unsigned int		last_master;
+	unsigned int		first_channel;
+	unsigned int		last_channel;
+};
+
+static struct configfs_subsystem stp_policy_subsys;
+
+void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
+				unsigned int *mstart, unsigned int *mend,
+				unsigned int *cstart, unsigned int *cend)
+{
+	*mstart	= policy_node->first_master;
+	*mend	= policy_node->last_master;
+	*cstart	= policy_node->first_channel;
+	*cend	= policy_node->last_channel;
+}
+
+static inline char *stp_policy_node_name(struct stp_policy_node *policy_node)
+{
+	return policy_node->group.cg_item.ci_name ? : "<none>";
+}
+
+static inline struct stp_policy *to_stp_policy(struct config_item *item)
+{
+	return item ?
+		container_of(to_config_group(item), struct stp_policy, group) :
+		NULL;
+}
+
+static inline struct stp_policy_node *
+to_stp_policy_node(struct config_item *item)
+{
+	return item ?
+		container_of(to_config_group(item), struct stp_policy_node,
+			     group) :
+		NULL;
+}
+
+static ssize_t
+stp_policy_node_masters_show(struct config_item *item, char *page)
+{
+	struct stp_policy_node *policy_node = to_stp_policy_node(item);
+	ssize_t count;
+
+	count = sprintf(page, "%u %u\n", policy_node->first_master,
+			policy_node->last_master);
+
+	return count;
+}
+
+static ssize_t
+stp_policy_node_masters_store(struct config_item *item, const char *page,
+			      size_t count)
+{
+	struct stp_policy_node *policy_node = to_stp_policy_node(item);
+	unsigned int first, last;
+	struct stm_device *stm;
+	char *p = (char *)page;
+	ssize_t ret = -ENODEV;
+
+	if (sscanf(p, "%u %u", &first, &last) != 2)
+		return -EINVAL;
+
+	mutex_lock(&stp_policy_subsys.su_mutex);
+	stm = policy_node->policy->stm;
+	if (!stm)
+		goto unlock;
+
+	/* must be within [sw_start..sw_end], which is an inclusive range */
+	if (first > last || first < stm->data->sw_start ||
+	    last > stm->data->sw_end) {
+		ret = -ERANGE;
+		goto unlock;
+	}
+
+	ret = count;
+	policy_node->first_master = first;
+	policy_node->last_master = last;
+
+unlock:
+	mutex_unlock(&stp_policy_subsys.su_mutex);
+
+	return ret;
+}
+
+static ssize_t
+stp_policy_node_channels_show(struct config_item *item, char *page)
+{
+	struct stp_policy_node *policy_node = to_stp_policy_node(item);
+	ssize_t count;
+
+	count = sprintf(page, "%u %u\n", policy_node->first_channel,
+			policy_node->last_channel);
+
+	return count;
+}
+
+static ssize_t
+stp_policy_node_channels_store(struct config_item *item, const char *page,
+			       size_t count)
+{
+	struct stp_policy_node *policy_node = to_stp_policy_node(item);
+	unsigned int first, last;
+	struct stm_device *stm;
+	char *p = (char *)page;
+	ssize_t ret = -ENODEV;
+
+	if (sscanf(p, "%u %u", &first, &last) != 2)
+		return -EINVAL;
+
+	mutex_lock(&stp_policy_subsys.su_mutex);
+	stm = policy_node->policy->stm;
+	if (!stm)
+		goto unlock;
+
+	if (first > INT_MAX || last > INT_MAX || first > last ||
+	    last >= stm->data->sw_nchannels) {
+		ret = -ERANGE;
+		goto unlock;
+	}
+
+	ret = count;
+	policy_node->first_channel = first;
+	policy_node->last_channel = last;
+
+unlock:
+	mutex_unlock(&stp_policy_subsys.su_mutex);
+
+	return ret;
+}
+
+static void stp_policy_node_release(struct config_item *item)
+{
+	kfree(to_stp_policy_node(item));
+}
+
+static struct configfs_item_operations stp_policy_node_item_ops = {
+	.release		= stp_policy_node_release,
+};
+
+CONFIGFS_ATTR(stp_policy_node_, masters);
+CONFIGFS_ATTR(stp_policy_node_, channels);
+
+static struct configfs_attribute *stp_policy_node_attrs[] = {
+	&stp_policy_node_attr_masters,
+	&stp_policy_node_attr_channels,
+	NULL,
+};
+
+static const struct config_item_type stp_policy_type;
+static const struct config_item_type stp_policy_node_type;
+
+static struct config_group *
+stp_policy_node_make(struct config_group *group, const char *name)
+{
+	struct stp_policy_node *policy_node, *parent_node;
+	struct stp_policy *policy;
+
+	if (group->cg_item.ci_type == &stp_policy_type) {
+		policy = container_of(group, struct stp_policy, group);
+	} else {
+		parent_node = container_of(group, struct stp_policy_node,
+					   group);
+		policy = parent_node->policy;
+	}
+
+	if (!policy->stm)
+		return ERR_PTR(-ENODEV);
+
+	policy_node = kzalloc(sizeof(struct stp_policy_node), GFP_KERNEL);
+	if (!policy_node)
+		return ERR_PTR(-ENOMEM);
+
+	config_group_init_type_name(&policy_node->group, name,
+				    &stp_policy_node_type);
+
+	policy_node->policy = policy;
+
+	/* default values for the attributes */
+	policy_node->first_master = policy->stm->data->sw_start;
+	policy_node->last_master = policy->stm->data->sw_end;
+	policy_node->first_channel = 0;
+	policy_node->last_channel = policy->stm->data->sw_nchannels - 1;
+
+	return &policy_node->group;
+}
+
+static void
+stp_policy_node_drop(struct config_group *group, struct config_item *item)
+{
+	config_item_put(item);
+}
+
+static struct configfs_group_operations stp_policy_node_group_ops = {
+	.make_group	= stp_policy_node_make,
+	.drop_item	= stp_policy_node_drop,
+};
+
+static const struct config_item_type stp_policy_node_type = {
+	.ct_item_ops	= &stp_policy_node_item_ops,
+	.ct_group_ops	= &stp_policy_node_group_ops,
+	.ct_attrs	= stp_policy_node_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+/*
+ * Root group: policies.
+ */
+static ssize_t stp_policy_device_show(struct config_item *item,
+				      char *page)
+{
+	struct stp_policy *policy = to_stp_policy(item);
+	ssize_t count;
+
+	count = sprintf(page, "%s\n",
+			(policy && policy->stm) ?
+			policy->stm->data->name :
+			"<none>");
+
+	return count;
+}
+
+CONFIGFS_ATTR_RO(stp_policy_, device);
+
+static struct configfs_attribute *stp_policy_attrs[] = {
+	&stp_policy_attr_device,
+	NULL,
+};
+
+void stp_policy_unbind(struct stp_policy *policy)
+{
+	struct stm_device *stm = policy->stm;
+
+	/*
+	 * stp_policy_release() will not call here if the policy is already
+	 * unbound; other users should not either, as no link exists between
+	 * this policy and anything else in that case
+	 */
+	if (WARN_ON_ONCE(!policy->stm))
+		return;
+
+	lockdep_assert_held(&stm->policy_mutex);
+
+	stm->policy = NULL;
+	policy->stm = NULL;
+
+	stm_put_device(stm);
+}
+
+static void stp_policy_release(struct config_item *item)
+{
+	struct stp_policy *policy = to_stp_policy(item);
+	struct stm_device *stm = policy->stm;
+
+	/* a policy *can* be unbound and still exist in configfs tree */
+	if (!stm)
+		return;
+
+	mutex_lock(&stm->policy_mutex);
+	stp_policy_unbind(policy);
+	mutex_unlock(&stm->policy_mutex);
+
+	kfree(policy);
+}
+
+static struct configfs_item_operations stp_policy_item_ops = {
+	.release		= stp_policy_release,
+};
+
+static struct configfs_group_operations stp_policy_group_ops = {
+	.make_group	= stp_policy_node_make,
+};
+
+static const struct config_item_type stp_policy_type = {
+	.ct_item_ops	= &stp_policy_item_ops,
+	.ct_group_ops	= &stp_policy_group_ops,
+	.ct_attrs	= stp_policy_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct config_group *
+stp_policies_make(struct config_group *group, const char *name)
+{
+	struct config_group *ret;
+	struct stm_device *stm;
+	char *devname, *p;
+
+	devname = kasprintf(GFP_KERNEL, "%s", name);
+	if (!devname)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * node must look like <device_name>.<policy_name>, where
+	 * <device_name> is the name of an existing stm device; may
+	 *               contain dots;
+	 * <policy_name> is an arbitrary string; may not contain dots
+	 */
+	p = strrchr(devname, '.');
+	if (!p) {
+		kfree(devname);
+		return ERR_PTR(-EINVAL);
+	}
+
+	*p = '\0';
+
+	stm = stm_find_device(devname);
+	kfree(devname);
+
+	if (!stm)
+		return ERR_PTR(-ENODEV);
+
+	mutex_lock(&stm->policy_mutex);
+	if (stm->policy) {
+		ret = ERR_PTR(-EBUSY);
+		goto unlock_policy;
+	}
+
+	stm->policy = kzalloc(sizeof(*stm->policy), GFP_KERNEL);
+	if (!stm->policy) {
+		ret = ERR_PTR(-ENOMEM);
+		goto unlock_policy;
+	}
+
+	config_group_init_type_name(&stm->policy->group, name,
+				    &stp_policy_type);
+	stm->policy->stm = stm;
+
+	ret = &stm->policy->group;
+
+unlock_policy:
+	mutex_unlock(&stm->policy_mutex);
+
+	if (IS_ERR(ret))
+		stm_put_device(stm);
+
+	return ret;
+}
+
+static struct configfs_group_operations stp_policies_group_ops = {
+	.make_group	= stp_policies_make,
+};
+
+static const struct config_item_type stp_policies_type = {
+	.ct_group_ops	= &stp_policies_group_ops,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem stp_policy_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf	= "stp-policy",
+			.ci_type	= &stp_policies_type,
+		},
+	},
+};
+
+/*
+ * Lock the policy mutex from the outside
+ */
+static struct stp_policy_node *
+__stp_policy_node_lookup(struct stp_policy *policy, char *s)
+{
+	struct stp_policy_node *policy_node, *ret;
+	struct list_head *head = &policy->group.cg_children;
+	struct config_item *item;
+	char *start, *end = s;
+
+	if (list_empty(head))
+		return NULL;
+
+	/* return the first entry if everything else fails */
+	item = list_entry(head->next, struct config_item, ci_entry);
+	ret = to_stp_policy_node(item);
+
+next:
+	for (;;) {
+		start = strsep(&end, "/");
+		if (!start)
+			break;
+
+		if (!*start)
+			continue;
+
+		list_for_each_entry(item, head, ci_entry) {
+			policy_node = to_stp_policy_node(item);
+
+			if (!strcmp(start,
+				    policy_node->group.cg_item.ci_name)) {
+				ret = policy_node;
+
+				if (!end)
+					goto out;
+
+				head = &policy_node->group.cg_children;
+				goto next;
+			}
+		}
+		break;
+	}
+
+out:
+	return ret;
+}
+
+
+struct stp_policy_node *
+stp_policy_node_lookup(struct stm_device *stm, char *s)
+{
+	struct stp_policy_node *policy_node = NULL;
+
+	mutex_lock(&stp_policy_subsys.su_mutex);
+
+	mutex_lock(&stm->policy_mutex);
+	if (stm->policy)
+		policy_node = __stp_policy_node_lookup(stm->policy, s);
+	mutex_unlock(&stm->policy_mutex);
+
+	if (policy_node)
+		config_item_get(&policy_node->group.cg_item);
+	mutex_unlock(&stp_policy_subsys.su_mutex);
+
+	return policy_node;
+}
+
+void stp_policy_node_put(struct stp_policy_node *policy_node)
+{
+	config_item_put(&policy_node->group.cg_item);
+}
+
+int __init stp_configfs_init(void)
+{
+	int err;
+
+	config_group_init(&stp_policy_subsys.su_group);
+	mutex_init(&stp_policy_subsys.su_mutex);
+	err = configfs_register_subsystem(&stp_policy_subsys);
+
+	return err;
+}
+
+void __exit stp_configfs_exit(void)
+{
+	configfs_unregister_subsystem(&stp_policy_subsys);
+}
diff --git a/drivers/hwtracing/stm/stm.h b/drivers/hwtracing/stm/stm.h
new file mode 100644
index 0000000..923571a
--- /dev/null
+++ b/drivers/hwtracing/stm/stm.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * System Trace Module (STM) infrastructure
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * STM class implements generic infrastructure for  System Trace Module devices
+ * as defined in MIPI STPv2 specification.
+ */
+
+#ifndef _STM_STM_H_
+#define _STM_STM_H_
+
+struct stp_policy;
+struct stp_policy_node;
+
+struct stp_policy_node *
+stp_policy_node_lookup(struct stm_device *stm, char *s);
+void stp_policy_node_put(struct stp_policy_node *policy_node);
+void stp_policy_unbind(struct stp_policy *policy);
+
+void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
+				unsigned int *mstart, unsigned int *mend,
+				unsigned int *cstart, unsigned int *cend);
+int stp_configfs_init(void);
+void stp_configfs_exit(void);
+
+struct stp_master {
+	unsigned int	nr_free;
+	unsigned long	chan_map[0];
+};
+
+struct stm_device {
+	struct device		dev;
+	struct module		*owner;
+	struct stp_policy	*policy;
+	struct mutex		policy_mutex;
+	int			major;
+	unsigned int		sw_nmasters;
+	struct stm_data		*data;
+	struct mutex		link_mutex;
+	spinlock_t		link_lock;
+	struct list_head	link_list;
+	/* master allocation */
+	spinlock_t		mc_lock;
+	struct stp_master	*masters[0];
+};
+
+#define to_stm_device(_d)				\
+	container_of((_d), struct stm_device, dev)
+
+struct stm_output {
+	spinlock_t		lock;
+	unsigned int		master;
+	unsigned int		channel;
+	unsigned int		nr_chans;
+};
+
+struct stm_file {
+	struct stm_device	*stm;
+	struct stp_policy_node	*policy_node;
+	struct stm_output	output;
+};
+
+struct stm_device *stm_find_device(const char *name);
+void stm_put_device(struct stm_device *stm);
+
+struct stm_source_device {
+	struct device		dev;
+	struct stm_source_data	*data;
+	spinlock_t		link_lock;
+	struct stm_device __rcu	*link;
+	struct list_head	link_entry;
+	/* one output per stm_source device */
+	struct stp_policy_node	*policy_node;
+	struct stm_output	output;
+};
+
+#define to_stm_source_device(_d)				\
+	container_of((_d), struct stm_source_device, dev)
+
+#endif /* _STM_STM_H_ */