Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
index cf987a3..6dab94a 100644
--- a/drivers/gpu/host1x/Kconfig
+++ b/drivers/gpu/host1x/Kconfig
@@ -2,7 +2,7 @@
config TEGRA_HOST1X
tristate "NVIDIA Tegra host1x driver"
depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
- select IOMMU_IOVA if IOMMU_SUPPORT
+ select IOMMU_IOVA
help
Driver for the NVIDIA Tegra host1x hardware.
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index fcda862..6e3b49d 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -120,7 +120,7 @@
mutex_lock(&device->clients_lock);
list_move_tail(&client->list, &device->clients);
list_move_tail(&subdev->list, &device->active);
- client->parent = &device->dev;
+ client->host = &device->dev;
subdev->client = client;
mutex_unlock(&device->clients_lock);
mutex_unlock(&device->subdevs_lock);
@@ -156,7 +156,7 @@
*/
mutex_lock(&device->clients_lock);
subdev->client = NULL;
- client->parent = NULL;
+ client->host = NULL;
list_move_tail(&subdev->list, &device->subdevs);
/*
* XXX: Perhaps don't do this here, but rather explicitly remove it
@@ -445,7 +445,7 @@
of_dma_configure(&device->dev, host1x->dev->of_node, true);
device->dev.dma_parms = &device->dma_parms;
- dma_set_max_seg_size(&device->dev, SZ_4M);
+ dma_set_max_seg_size(&device->dev, UINT_MAX);
err = host1x_device_parse_dt(device, driver);
if (err < 0) {
@@ -704,8 +704,32 @@
EXPORT_SYMBOL(host1x_driver_unregister);
/**
- * host1x_client_register() - register a host1x client
+ * __host1x_client_init() - initialize a host1x client
* @client: host1x client
+ * @key: lock class key for the client-specific mutex
+ */
+void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key)
+{
+ INIT_LIST_HEAD(&client->list);
+ __mutex_init(&client->lock, "host1x client lock", key);
+ client->usecount = 0;
+}
+EXPORT_SYMBOL(__host1x_client_init);
+
+/**
+ * host1x_client_exit() - uninitialize a host1x client
+ * @client: host1x client
+ */
+void host1x_client_exit(struct host1x_client *client)
+{
+ mutex_destroy(&client->lock);
+}
+EXPORT_SYMBOL(host1x_client_exit);
+
+/**
+ * __host1x_client_register() - register a host1x client
+ * @client: host1x client
+ * @key: lock class key for the client-specific mutex
*
* Registers a host1x client with each host1x controller instance. Note that
* each client will only match their parent host1x controller and will only be
@@ -714,7 +738,7 @@
* device and call host1x_device_init(), which will in turn call each client's
* &host1x_client_ops.init implementation.
*/
-int host1x_client_register(struct host1x_client *client)
+int __host1x_client_register(struct host1x_client *client)
{
struct host1x *host1x;
int err;
@@ -737,7 +761,7 @@
return 0;
}
-EXPORT_SYMBOL(host1x_client_register);
+EXPORT_SYMBOL(__host1x_client_register);
/**
* host1x_client_unregister() - unregister a host1x client
@@ -777,3 +801,74 @@
return 0;
}
EXPORT_SYMBOL(host1x_client_unregister);
+
+int host1x_client_suspend(struct host1x_client *client)
+{
+ int err = 0;
+
+ mutex_lock(&client->lock);
+
+ if (client->usecount == 1) {
+ if (client->ops && client->ops->suspend) {
+ err = client->ops->suspend(client);
+ if (err < 0)
+ goto unlock;
+ }
+ }
+
+ client->usecount--;
+ dev_dbg(client->dev, "use count: %u\n", client->usecount);
+
+ if (client->parent) {
+ err = host1x_client_suspend(client->parent);
+ if (err < 0)
+ goto resume;
+ }
+
+ goto unlock;
+
+resume:
+ if (client->usecount == 0)
+ if (client->ops && client->ops->resume)
+ client->ops->resume(client);
+
+ client->usecount++;
+unlock:
+ mutex_unlock(&client->lock);
+ return err;
+}
+EXPORT_SYMBOL(host1x_client_suspend);
+
+int host1x_client_resume(struct host1x_client *client)
+{
+ int err = 0;
+
+ mutex_lock(&client->lock);
+
+ if (client->parent) {
+ err = host1x_client_resume(client->parent);
+ if (err < 0)
+ goto unlock;
+ }
+
+ if (client->usecount == 0) {
+ if (client->ops && client->ops->resume) {
+ err = client->ops->resume(client);
+ if (err < 0)
+ goto suspend;
+ }
+ }
+
+ client->usecount++;
+ dev_dbg(client->dev, "use count: %u\n", client->usecount);
+
+ goto unlock;
+
+suspend:
+ if (client->parent)
+ host1x_client_suspend(client->parent);
+unlock:
+ mutex_unlock(&client->lock);
+ return err;
+}
+EXPORT_SYMBOL(host1x_client_resume);
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 48c84c4..e8d3fda 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -232,9 +232,9 @@
*
* Must be called with the cdma lock held.
*/
-int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x,
- struct host1x_cdma *cdma,
- unsigned int needed)
+static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x,
+ struct host1x_cdma *cdma,
+ unsigned int needed)
{
while (true) {
struct push_buffer *pb = &cdma->push_buffer;
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 1436295..4cd212b 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -115,14 +115,14 @@
/**
* host1x_channel_request() - Allocate a channel
- * @device: Host1x unit this channel will be used to send commands to
+ * @client: Host1x client this channel will be used to send commands to
*
- * Allocates a new host1x channel for @device. May return NULL if CDMA
+ * Allocates a new host1x channel for @client. May return NULL if CDMA
* initialization fails.
*/
-struct host1x_channel *host1x_channel_request(struct device *dev)
+struct host1x_channel *host1x_channel_request(struct host1x_client *client)
{
- struct host1x *host = dev_get_drvdata(dev->parent);
+ struct host1x *host = dev_get_drvdata(client->dev->parent);
struct host1x_channel_list *chlist = &host->channel_list;
struct host1x_channel *channel;
int err;
@@ -133,7 +133,8 @@
kref_init(&channel->refcount);
mutex_init(&channel->submitlock);
- channel->dev = dev;
+ channel->client = client;
+ channel->dev = client->dev;
err = host1x_hw_channel_init(host, channel, channel->id);
if (err < 0)
@@ -148,7 +149,7 @@
fail:
clear_bit(channel->id, chlist->allocated_channels);
- dev_err(dev, "failed to initialize channel\n");
+ dev_err(client->dev, "failed to initialize channel\n");
return NULL;
}
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index 4fd6948..39044ff 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -26,6 +26,7 @@
unsigned int id;
struct mutex submitlock;
void __iomem *regs;
+ struct host1x_client *client;
struct device *dev;
struct host1x_cdma cdma;
};
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 5a3f797..a2c09dc 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -77,6 +77,10 @@
.init = host1x01_init,
.sync_offset = 0x3000,
.dma_mask = DMA_BIT_MASK(32),
+ .has_wide_gather = false,
+ .has_hypervisor = false,
+ .num_sid_entries = 0,
+ .sid_table = NULL,
};
static const struct host1x_info host1x02_info = {
@@ -87,6 +91,10 @@
.init = host1x02_init,
.sync_offset = 0x3000,
.dma_mask = DMA_BIT_MASK(32),
+ .has_wide_gather = false,
+ .has_hypervisor = false,
+ .num_sid_entries = 0,
+ .sid_table = NULL,
};
static const struct host1x_info host1x04_info = {
@@ -97,6 +105,10 @@
.init = host1x04_init,
.sync_offset = 0x2100,
.dma_mask = DMA_BIT_MASK(34),
+ .has_wide_gather = false,
+ .has_hypervisor = false,
+ .num_sid_entries = 0,
+ .sid_table = NULL,
};
static const struct host1x_info host1x05_info = {
@@ -107,6 +119,10 @@
.init = host1x05_init,
.sync_offset = 0x2100,
.dma_mask = DMA_BIT_MASK(34),
+ .has_wide_gather = false,
+ .has_hypervisor = false,
+ .num_sid_entries = 0,
+ .sid_table = NULL,
};
static const struct host1x_sid_entry tegra186_sid_table[] = {
@@ -126,6 +142,7 @@
.init = host1x06_init,
.sync_offset = 0x0,
.dma_mask = DMA_BIT_MASK(40),
+ .has_wide_gather = true,
.has_hypervisor = true,
.num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
.sid_table = tegra186_sid_table,
@@ -148,6 +165,7 @@
.init = host1x07_init,
.sync_offset = 0x0,
.dma_mask = DMA_BIT_MASK(40),
+ .has_wide_gather = true,
.has_hypervisor = true,
.num_sid_entries = ARRAY_SIZE(tegra194_sid_table),
.sid_table = tegra194_sid_table,
@@ -178,6 +196,166 @@
}
}
+static bool host1x_wants_iommu(struct host1x *host1x)
+{
+ /*
+ * If we support addressing a maximum of 32 bits of physical memory
+ * and if the host1x firewall is enabled, there's no need to enable
+ * IOMMU support. This can happen for example on Tegra20, Tegra30
+ * and Tegra114.
+ *
+ * Tegra124 and later can address up to 34 bits of physical memory and
+ * many platforms come equipped with more than 2 GiB of system memory,
+ * which requires crossing the 4 GiB boundary. But there's a catch: on
+ * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can
+ * only address up to 32 bits of memory in GATHER opcodes, which means
+ * that command buffers need to either be in the first 2 GiB of system
+ * memory (which could quickly lead to memory exhaustion), or command
+ * buffers need to be treated differently from other buffers (which is
+ * not possible with the current ABI).
+ *
+ * A third option is to use the IOMMU in these cases to make sure all
+ * buffers will be mapped into a 32-bit IOVA space that host1x can
+ * address. This allows all of the system memory to be used and works
+ * within the limitations of the host1x on these SoCs.
+ *
+ * In summary, default to enable IOMMU on Tegra124 and later. For any
+ * of the earlier SoCs, only use the IOMMU for additional safety when
+ * the host1x firewall is disabled.
+ */
+ if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) {
+ if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ return false;
+ }
+
+ return true;
+}
+
+static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
+ int err;
+
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+ if (host->dev->archdata.mapping) {
+ struct dma_iommu_mapping *mapping =
+ to_dma_iommu_mapping(host->dev);
+ arm_iommu_detach_device(host->dev);
+ arm_iommu_release_mapping(mapping);
+
+ domain = iommu_get_domain_for_dev(host->dev);
+ }
+#endif
+
+ /*
+ * We may not always want to enable IOMMU support (for example if the
+ * host1x firewall is already enabled and we don't support addressing
+ * more than 32 bits of physical memory), so check for that first.
+ *
+ * Similarly, if host1x is already attached to an IOMMU (via the DMA
+ * API), don't try to attach again.
+ */
+ if (!host1x_wants_iommu(host) || domain)
+ return domain;
+
+ host->group = iommu_group_get(host->dev);
+ if (host->group) {
+ struct iommu_domain_geometry *geometry;
+ dma_addr_t start, end;
+ unsigned long order;
+
+ err = iova_cache_get();
+ if (err < 0)
+ goto put_group;
+
+ host->domain = iommu_domain_alloc(&platform_bus_type);
+ if (!host->domain) {
+ err = -ENOMEM;
+ goto put_cache;
+ }
+
+ err = iommu_attach_group(host->domain, host->group);
+ if (err) {
+ if (err == -ENODEV)
+ err = 0;
+
+ goto free_domain;
+ }
+
+ geometry = &host->domain->geometry;
+ start = geometry->aperture_start & host->info->dma_mask;
+ end = geometry->aperture_end & host->info->dma_mask;
+
+ order = __ffs(host->domain->pgsize_bitmap);
+ init_iova_domain(&host->iova, 1UL << order, start >> order);
+ host->iova_end = end;
+
+ domain = host->domain;
+ }
+
+ return domain;
+
+free_domain:
+ iommu_domain_free(host->domain);
+ host->domain = NULL;
+put_cache:
+ iova_cache_put();
+put_group:
+ iommu_group_put(host->group);
+ host->group = NULL;
+
+ return ERR_PTR(err);
+}
+
+static int host1x_iommu_init(struct host1x *host)
+{
+ u64 mask = host->info->dma_mask;
+ struct iommu_domain *domain;
+ int err;
+
+ domain = host1x_iommu_attach(host);
+ if (IS_ERR(domain)) {
+ err = PTR_ERR(domain);
+ dev_err(host->dev, "failed to attach to IOMMU: %d\n", err);
+ return err;
+ }
+
+ /*
+ * If we're not behind an IOMMU make sure we don't get push buffers
+ * that are allocated outside of the range addressable by the GATHER
+ * opcode.
+ *
+ * Newer generations of Tegra (Tegra186 and later) support a wide
+ * variant of the GATHER opcode that allows addressing more bits.
+ */
+ if (!domain && !host->info->has_wide_gather)
+ mask = DMA_BIT_MASK(32);
+
+ err = dma_coerce_mask_and_coherent(host->dev, mask);
+ if (err < 0) {
+ dev_err(host->dev, "failed to set DMA mask: %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void host1x_iommu_exit(struct host1x *host)
+{
+ if (host->domain) {
+ put_iova_domain(&host->iova);
+ iommu_detach_group(host->domain, host->group);
+
+ iommu_domain_free(host->domain);
+ host->domain = NULL;
+
+ iova_cache_put();
+
+ iommu_group_put(host->group);
+ host->group = NULL;
+ }
+}
+
static int host1x_probe(struct platform_device *pdev)
{
struct host1x *host;
@@ -214,10 +392,8 @@
}
syncpt_irq = platform_get_irq(pdev, 0);
- if (syncpt_irq < 0) {
- dev_err(&pdev->dev, "failed to get IRQ: %d\n", syncpt_irq);
+ if (syncpt_irq < 0)
return syncpt_irq;
- }
mutex_init(&host->devices_lock);
INIT_LIST_HEAD(&host->devices);
@@ -237,7 +413,8 @@
return PTR_ERR(host->hv_regs);
}
- dma_set_mask_and_coherent(host->dev, host->info->dma_mask);
+ host->dev->dma_parms = &host->dma_parms;
+ dma_set_max_seg_size(host->dev, UINT_MAX);
if (host->info->init) {
err = host->info->init(host);
@@ -261,87 +438,42 @@
dev_err(&pdev->dev, "failed to get reset: %d\n", err);
return err;
}
-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
- if (host->dev->archdata.mapping) {
- struct dma_iommu_mapping *mapping =
- to_dma_iommu_mapping(host->dev);
- arm_iommu_detach_device(host->dev);
- arm_iommu_release_mapping(mapping);
- }
-#endif
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
- goto skip_iommu;
- host->group = iommu_group_get(&pdev->dev);
- if (host->group) {
- struct iommu_domain_geometry *geometry;
- u64 mask = dma_get_mask(host->dev);
- dma_addr_t start, end;
- unsigned long order;
-
- err = iova_cache_get();
- if (err < 0)
- goto put_group;
-
- host->domain = iommu_domain_alloc(&platform_bus_type);
- if (!host->domain) {
- err = -ENOMEM;
- goto put_cache;
- }
-
- err = iommu_attach_group(host->domain, host->group);
- if (err) {
- if (err == -ENODEV) {
- iommu_domain_free(host->domain);
- host->domain = NULL;
- iova_cache_put();
- iommu_group_put(host->group);
- host->group = NULL;
- goto skip_iommu;
- }
-
- goto fail_free_domain;
- }
-
- geometry = &host->domain->geometry;
- start = geometry->aperture_start & mask;
- end = geometry->aperture_end & mask;
-
- order = __ffs(host->domain->pgsize_bitmap);
- init_iova_domain(&host->iova, 1UL << order, start >> order);
- host->iova_end = end;
+ err = host1x_iommu_init(host);
+ if (err < 0) {
+ dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err);
+ return err;
}
-skip_iommu:
err = host1x_channel_list_init(&host->channel_list,
host->info->nb_channels);
if (err) {
dev_err(&pdev->dev, "failed to initialize channel list\n");
- goto fail_detach_device;
+ goto iommu_exit;
}
err = clk_prepare_enable(host->clk);
if (err < 0) {
dev_err(&pdev->dev, "failed to enable clock\n");
- goto fail_free_channels;
+ goto free_channels;
}
err = reset_control_deassert(host->rst);
if (err < 0) {
dev_err(&pdev->dev, "failed to deassert reset: %d\n", err);
- goto fail_unprepare_disable;
+ goto unprepare_disable;
}
err = host1x_syncpt_init(host);
if (err) {
dev_err(&pdev->dev, "failed to initialize syncpts\n");
- goto fail_reset_assert;
+ goto reset_assert;
}
err = host1x_intr_init(host, syncpt_irq);
if (err) {
dev_err(&pdev->dev, "failed to initialize interrupts\n");
- goto fail_deinit_syncpt;
+ goto deinit_syncpt;
}
host1x_debug_init(host);
@@ -351,33 +483,29 @@
err = host1x_register(host);
if (err < 0)
- goto fail_deinit_intr;
+ goto deinit_debugfs;
+
+ err = devm_of_platform_populate(&pdev->dev);
+ if (err < 0)
+ goto unregister;
return 0;
-fail_deinit_intr:
+unregister:
+ host1x_unregister(host);
+deinit_debugfs:
+ host1x_debug_deinit(host);
host1x_intr_deinit(host);
-fail_deinit_syncpt:
+deinit_syncpt:
host1x_syncpt_deinit(host);
-fail_reset_assert:
+reset_assert:
reset_control_assert(host->rst);
-fail_unprepare_disable:
+unprepare_disable:
clk_disable_unprepare(host->clk);
-fail_free_channels:
+free_channels:
host1x_channel_list_free(&host->channel_list);
-fail_detach_device:
- if (host->group && host->domain) {
- put_iova_domain(&host->iova);
- iommu_detach_group(host->domain, host->group);
- }
-fail_free_domain:
- if (host->domain)
- iommu_domain_free(host->domain);
-put_cache:
- if (host->group)
- iova_cache_put();
-put_group:
- iommu_group_put(host->group);
+iommu_exit:
+ host1x_iommu_exit(host);
return err;
}
@@ -387,18 +515,12 @@
struct host1x *host = platform_get_drvdata(pdev);
host1x_unregister(host);
+ host1x_debug_deinit(host);
host1x_intr_deinit(host);
host1x_syncpt_deinit(host);
reset_control_assert(host->rst);
clk_disable_unprepare(host->clk);
-
- if (host->domain) {
- put_iova_domain(&host->iova);
- iommu_detach_group(host->domain, host->group);
- iommu_domain_free(host->domain);
- iova_cache_put();
- iommu_group_put(host->group);
- }
+ host1x_iommu_exit(host);
return 0;
}
@@ -440,6 +562,19 @@
}
module_exit(tegra_host1x_exit);
+/**
+ * host1x_get_dma_mask() - query the supported DMA mask for host1x
+ * @host1x: host1x instance
+ *
+ * Note that this returns the supported DMA mask for host1x, which can be
+ * different from the applicable DMA mask under certain circumstances.
+ */
+u64 host1x_get_dma_mask(struct host1x *host1x)
+{
+ return host1x->info->dma_mask;
+}
+EXPORT_SYMBOL(host1x_get_dma_mask);
+
MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
MODULE_DESCRIPTION("Host1x driver for Tegra products");
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index ff56f5e..f781a9b 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -97,6 +97,7 @@
int (*init)(struct host1x *host1x); /* initialize per SoC ops */
unsigned int sync_offset; /* offset of syncpoint registers */
u64 dma_mask; /* mask of addressable memory */
+ bool has_wide_gather; /* supports GATHER_W opcode */
bool has_hypervisor; /* has hypervisor registers */
unsigned int num_sid_entries;
const struct host1x_sid_entry *sid_table;
@@ -140,6 +141,8 @@
struct list_head devices;
struct list_head list;
+
+ struct device_dma_parameters dma_parms;
};
void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 0212584..f31bcfa 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -192,8 +192,14 @@
static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
{
+ struct push_buffer *pb = &cdma->push_buffer;
struct host1x_job *job;
+ host1x_debug_output(o, "PUSHBUF at %pad, %u words\n",
+ &pb->dma, pb->size / 4);
+
+ show_gather(o, pb->dma, pb->size / 4, cdma, pb->dma, pb->mapped);
+
list_for_each_entry(job, &cdma->sync_queue, list) {
unsigned int i;
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 26f3c74..9245add 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -105,7 +105,6 @@
/* Add nr_completed to trace */
trace_host1x_channel_submit_complete(dev_name(channel->dev),
waiter->count, waiter->thresh);
-
}
static void action_wakeup(struct host1x_waitlist *waiter)
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 2255967..82d0a60 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -8,6 +8,7 @@
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/host1x.h>
+#include <linux/iommu.h>
#include <linux/kref.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
@@ -26,10 +27,13 @@
u32 num_cmdbufs, u32 num_relocs)
{
struct host1x_job *job = NULL;
- unsigned int num_unpins = num_cmdbufs + num_relocs;
+ unsigned int num_unpins = num_relocs;
u64 total;
void *mem;
+ if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ num_unpins += num_cmdbufs;
+
/* Check that we're not going to overflow */
total = sizeof(struct host1x_job) +
(u64)num_relocs * sizeof(struct host1x_reloc) +
@@ -99,15 +103,20 @@
static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
{
+ struct host1x_client *client = job->client;
+ struct device *dev = client->dev;
+ struct host1x_job_gather *g;
+ struct iommu_domain *domain;
unsigned int i;
int err;
+ domain = iommu_get_domain_for_dev(dev);
job->num_unpins = 0;
for (i = 0; i < job->num_relocs; i++) {
struct host1x_reloc *reloc = &job->relocs[i];
+ dma_addr_t phys_addr, *phys;
struct sg_table *sgt;
- dma_addr_t phys_addr;
reloc->target.bo = host1x_bo_get(reloc->target.bo);
if (!reloc->target.bo) {
@@ -115,7 +124,60 @@
goto unpin;
}
- phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
+ /*
+ * If the client device is not attached to an IOMMU, the
+ * physical address of the buffer object can be used.
+ *
+ * Similarly, when an IOMMU domain is shared between all
+ * host1x clients, the IOVA is already available, so no
+ * need to map the buffer object again.
+ *
+ * XXX Note that this isn't always safe to do because it
+ * relies on an assumption that no cache maintenance is
+ * needed on the buffer objects.
+ */
+ if (!domain || client->group)
+ phys = &phys_addr;
+ else
+ phys = NULL;
+
+ sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
+ if (IS_ERR(sgt)) {
+ err = PTR_ERR(sgt);
+ goto unpin;
+ }
+
+ if (sgt) {
+ unsigned long mask = HOST1X_RELOC_READ |
+ HOST1X_RELOC_WRITE;
+ enum dma_data_direction dir;
+
+ switch (reloc->flags & mask) {
+ case HOST1X_RELOC_READ:
+ dir = DMA_TO_DEVICE;
+ break;
+
+ case HOST1X_RELOC_WRITE:
+ dir = DMA_FROM_DEVICE;
+ break;
+
+ case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
+ dir = DMA_BIDIRECTIONAL;
+ break;
+
+ default:
+ err = -EINVAL;
+ goto unpin;
+ }
+
+ err = dma_map_sgtable(dev, sgt, dir, 0);
+ if (err)
+ goto unpin;
+
+ job->unpins[job->num_unpins].dev = dev;
+ job->unpins[job->num_unpins].dir = dir;
+ phys_addr = sg_dma_address(sgt->sgl);
+ }
job->addr_phys[job->num_unpins] = phys_addr;
job->unpins[job->num_unpins].bo = reloc->target.bo;
@@ -123,26 +185,48 @@
job->num_unpins++;
}
+ /*
+ * We will copy gathers BO content later, so there is no need to
+ * hold and pin them.
+ */
+ if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ return 0;
+
for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
size_t gather_size = 0;
struct scatterlist *sg;
struct sg_table *sgt;
dma_addr_t phys_addr;
unsigned long shift;
struct iova *alloc;
+ dma_addr_t *phys;
unsigned int j;
+ g = &job->gathers[i];
g->bo = host1x_bo_get(g->bo);
if (!g->bo) {
err = -EINVAL;
goto unpin;
}
- phys_addr = host1x_bo_pin(g->bo, &sgt);
+ /**
+ * If the host1x is not attached to an IOMMU, there is no need
+ * to map the buffer object for the host1x, since the physical
+ * address can simply be used.
+ */
+ if (!iommu_get_domain_for_dev(host->dev))
+ phys = &phys_addr;
+ else
+ phys = NULL;
- if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
- for_each_sg(sgt->sgl, sg, sgt->nents, j)
+ sgt = host1x_bo_pin(host->dev, g->bo, phys);
+ if (IS_ERR(sgt)) {
+ err = PTR_ERR(sgt);
+ goto put;
+ }
+
+ if (host->domain) {
+ for_each_sgtable_sg(sgt, sg, j)
gather_size += sg->length;
gather_size = iova_align(&host->iova, gather_size);
@@ -151,26 +235,32 @@
host->iova_end >> shift, true);
if (!alloc) {
err = -ENOMEM;
- goto unpin;
+ goto put;
}
- err = iommu_map_sg(host->domain,
+ err = iommu_map_sgtable(host->domain,
iova_dma_addr(&host->iova, alloc),
- sgt->sgl, sgt->nents, IOMMU_READ);
+ sgt, IOMMU_READ);
if (err == 0) {
__free_iova(&host->iova, alloc);
err = -EINVAL;
- goto unpin;
+ goto put;
}
- job->addr_phys[job->num_unpins] =
- iova_dma_addr(&host->iova, alloc);
job->unpins[job->num_unpins].size = gather_size;
- } else {
- job->addr_phys[job->num_unpins] = phys_addr;
+ phys_addr = iova_dma_addr(&host->iova, alloc);
+ } else if (sgt) {
+ err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0);
+ if (err)
+ goto put;
+
+ job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
+ job->unpins[job->num_unpins].dev = host->dev;
+ phys_addr = sg_dma_address(sgt->sgl);
}
- job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
+ job->addr_phys[job->num_unpins] = phys_addr;
+ job->gather_addr_phys[i] = phys_addr;
job->unpins[job->num_unpins].bo = g->bo;
job->unpins[job->num_unpins].sgt = sgt;
@@ -179,6 +269,8 @@
return 0;
+put:
+ host1x_bo_put(g->bo);
unpin:
host1x_job_unpin(job);
return err;
@@ -186,8 +278,7 @@
static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
{
- u32 last_page = ~0;
- void *cmdbuf_page_addr = NULL;
+ void *cmdbuf_addr = NULL;
struct host1x_bo *cmdbuf = g->bo;
unsigned int i;
@@ -209,28 +300,22 @@
goto patch_reloc;
}
- if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) {
- if (cmdbuf_page_addr)
- host1x_bo_kunmap(cmdbuf, last_page,
- cmdbuf_page_addr);
+ if (!cmdbuf_addr) {
+ cmdbuf_addr = host1x_bo_mmap(cmdbuf);
- cmdbuf_page_addr = host1x_bo_kmap(cmdbuf,
- reloc->cmdbuf.offset >> PAGE_SHIFT);
- last_page = reloc->cmdbuf.offset >> PAGE_SHIFT;
-
- if (unlikely(!cmdbuf_page_addr)) {
+ if (unlikely(!cmdbuf_addr)) {
pr_err("Could not map cmdbuf for relocation\n");
return -ENOMEM;
}
}
- target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK);
+ target = cmdbuf_addr + reloc->cmdbuf.offset;
patch_reloc:
*target = reloc_addr;
}
- if (cmdbuf_page_addr)
- host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr);
+ if (cmdbuf_addr)
+ host1x_bo_munmap(cmdbuf, cmdbuf_addr);
return 0;
}
@@ -558,6 +643,8 @@
for (i = 0; i < job->num_unpins; i++) {
struct host1x_job_unpin_data *unpin = &job->unpins[i];
+ struct device *dev = unpin->dev ?: host->dev;
+ struct sg_table *sgt = unpin->sgt;
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
unpin->size && host->domain) {
@@ -567,7 +654,10 @@
iova_pfn(&host->iova, job->addr_phys[i]));
}
- host1x_bo_unpin(unpin->bo, unpin->sgt);
+ if (unpin->dev && sgt)
+ dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0);
+
+ host1x_bo_unpin(dev, unpin->bo, sgt);
host1x_bo_put(unpin->bo);
}
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 62b8805..94bc2e4 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -8,6 +8,8 @@
#ifndef __HOST1X_JOB_H
#define __HOST1X_JOB_H
+#include <linux/dma-direction.h>
+
struct host1x_job_gather {
unsigned int words;
dma_addr_t base;
@@ -19,7 +21,9 @@
struct host1x_job_unpin_data {
struct host1x_bo *bo;
struct sg_table *sgt;
+ struct device *dev;
size_t size;
+ enum dma_data_direction dir;
};
/*
diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c
index e00809d..2efe12d 100644
--- a/drivers/gpu/host1x/mipi.c
+++ b/drivers/gpu/host1x/mipi.c
@@ -21,9 +21,9 @@
*/
#include <linux/clk.h>
-#include <linux/delay.h>
#include <linux/host1x.h>
#include <linux/io.h>
+#include <linux/iopoll.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
@@ -206,9 +206,9 @@
return 0;
}
-struct tegra_mipi_device *tegra_mipi_request(struct device *device)
+struct tegra_mipi_device *tegra_mipi_request(struct device *device,
+ struct device_node *np)
{
- struct device_node *np = device->of_node;
struct tegra_mipi_device *dev;
struct of_phandle_args args;
int err;
@@ -293,24 +293,25 @@
}
EXPORT_SYMBOL(tegra_mipi_disable);
-static int tegra_mipi_wait(struct tegra_mipi *mipi)
+int tegra_mipi_finish_calibration(struct tegra_mipi_device *device)
{
- unsigned long timeout = jiffies + msecs_to_jiffies(250);
+ struct tegra_mipi *mipi = device->mipi;
+ void __iomem *status_reg = mipi->regs + (MIPI_CAL_STATUS << 2);
u32 value;
+ int err;
- while (time_before(jiffies, timeout)) {
- value = tegra_mipi_readl(mipi, MIPI_CAL_STATUS);
- if ((value & MIPI_CAL_STATUS_ACTIVE) == 0 &&
- (value & MIPI_CAL_STATUS_DONE) != 0)
- return 0;
+ err = readl_relaxed_poll_timeout(status_reg, value,
+ !(value & MIPI_CAL_STATUS_ACTIVE) &&
+ (value & MIPI_CAL_STATUS_DONE), 50,
+ 250000);
+ mutex_unlock(&device->mipi->lock);
+ clk_disable(device->mipi->clk);
- usleep_range(10, 50);
- }
-
- return -ETIMEDOUT;
+ return err;
}
+EXPORT_SYMBOL(tegra_mipi_finish_calibration);
-int tegra_mipi_calibrate(struct tegra_mipi_device *device)
+int tegra_mipi_start_calibration(struct tegra_mipi_device *device)
{
const struct tegra_mipi_soc *soc = device->mipi->soc;
unsigned int i;
@@ -374,14 +375,16 @@
value |= MIPI_CAL_CTRL_START;
tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL);
- err = tegra_mipi_wait(device->mipi);
+ /*
+ * Wait for min 72uS to let calibration logic finish calibration
+ * sequence codes before waiting for pads idle state to apply the
+ * results.
+ */
+ usleep_range(75, 80);
- mutex_unlock(&device->mipi->lock);
- clk_disable(device->mipi->clk);
-
- return err;
+ return 0;
}
-EXPORT_SYMBOL(tegra_mipi_calibrate);
+EXPORT_SYMBOL(tegra_mipi_start_calibration);
static const struct tegra_mipi_pad tegra114_mipi_pads[] = {
{ .data = MIPI_CAL_CONFIG_CSIA },
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index dd1cd01..fce7892 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -421,7 +421,7 @@
struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client,
unsigned long flags)
{
- struct host1x *host = dev_get_drvdata(client->parent->parent);
+ struct host1x *host = dev_get_drvdata(client->host->parent);
return host1x_syncpt_alloc(host, client, flags);
}