Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index c60395b..e3842ea 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -1,3 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# The IOVA library may also be used by non-IOMMU_API users
+config IOMMU_IOVA
+ tristate
+
# IOMMU_API always gets selected by whoever wants it.
config IOMMU_API
bool
@@ -81,9 +86,6 @@
If unsure, say N here.
-config IOMMU_IOVA
- tristate
-
config OF_IOMMU
def_bool y
depends on OF && IOMMU_API
@@ -93,6 +95,7 @@
bool
select IOMMU_API
select IOMMU_IOVA
+ select IRQ_MSI_IOMMU
select NEED_SG_DMA_LENGTH
config FSL_PAMU
@@ -174,11 +177,12 @@
config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
- depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
+ depends on PCI_MSI && ACPI && (X86 || IA64)
select IOMMU_API
select IOMMU_IOVA
select NEED_DMA_MAP_STATE
select DMAR_TABLE
+ select SWIOTLB
help
DMA remapping (DMAR) devices support enables independent address
translations for Direct Memory Access (DMA) from devices.
@@ -186,6 +190,19 @@
and include PCI device scope covered by these DMA
remapping devices.
+config INTEL_IOMMU_DEBUGFS
+ bool "Export Intel IOMMU internals in Debugfs"
+ depends on INTEL_IOMMU && IOMMU_DEBUGFS
+ help
+ !!!WARNING!!!
+
+ DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!!
+
+ Expose Intel IOMMU internals in Debugfs.
+
+ This option is -NOT- intended for production environments, and should
+ only be enabled for debugging Intel IOMMU.
+
config INTEL_IOMMU_SVM
bool "Support for Shared Virtual Memory with Intel IOMMU"
depends on INTEL_IOMMU && X86
@@ -269,6 +286,7 @@
config TEGRA_IOMMU_GART
bool "Tegra GART IOMMU Support"
depends on ARCH_TEGRA_2x_SOC
+ depends on TEGRA_MC
select IOMMU_API
help
Enables support for remapping discontiguous physical memory
@@ -344,6 +362,31 @@
Say Y here if your SoC includes an IOMMU device implementing
the ARM SMMU architecture.
+config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT
+ bool "Default to disabling bypass on ARM SMMU v1 and v2"
+ depends on ARM_SMMU
+ default y
+ help
+ Say Y here to (by default) disable bypass streams such that
+ incoming transactions from devices that are not attached to
+ an iommu domain will report an abort back to the device and
+ will not be allowed to pass through the SMMU.
+
+ Any old kernels that existed before this KConfig was
+ introduced would default to _allowing_ bypass (AKA the
+ equivalent of NO for this config). However the default for
+ this option is YES because the old behavior is insecure.
+
+ There are few reasons to allow unmatched stream bypass, and
+ even fewer good ones. If saying YES here breaks your board
+ you should work on fixing your board. This KConfig option
+ is expected to be removed in the future and we'll simply
+ hardcode the bypass disable in the code.
+
+ NOTE: the kernel command line parameter
+ 'arm-smmu.disable_bypass' will continue to override this
+ config.
+
config ARM_SMMU_V3
bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
depends on ARM64
@@ -372,6 +415,14 @@
Enables bits of IOMMU API required by VFIO. The iommu_ops
is not implemented as it is not necessary for VFIO.
+config S390_AP_IOMMU
+ bool "S390 AP IOMMU Support"
+ depends on S390 && ZCRYPT
+ select IOMMU_API
+ help
+ Enables bits of IOMMU API required by VFIO. The iommu_ops
+ is not implemented as it is not necessary for VFIO.
+
config MTK_IOMMU
bool "MTK IOMMU Support"
depends on ARM || ARM64
@@ -414,4 +465,24 @@
help
Support for IOMMU on certain Qualcomm SoCs.
+config HYPERV_IOMMU
+ bool "Hyper-V x2APIC IRQ Handling"
+ depends on HYPERV
+ select IOMMU_API
+ default HYPERV
+ help
+ Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux
+ guests to run with x2APIC mode enabled.
+
+config VIRTIO_IOMMU
+ bool "Virtio IOMMU driver"
+ depends on VIRTIO=y
+ depends on ARM64
+ select IOMMU_API
+ select INTERVAL_TREE
+ help
+ Para-virtualised IOMMU driver with virtio.
+
+ Say Y here if you intend to run this kernel as a guest.
+
endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index ab5eba6..4f405f9 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -10,13 +10,15 @@
obj-$(CONFIG_IOMMU_IOVA) += iova.o
obj-$(CONFIG_OF_IOMMU) += of_iommu.o
obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
-obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
+obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o amd_iommu_quirks.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
-obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
+obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
+obj-$(CONFIG_INTEL_IOMMU) += intel-trace.o
+obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
@@ -31,3 +33,5 @@
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
+obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
+obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index bee0dfb..dd55507 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1,22 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <jroedel@suse.de>
* Leo Duran <leo.duran@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
#include <linux/ratelimit.h>
#include <linux/pci.h>
#include <linux/acpi.h>
@@ -55,8 +46,6 @@
#include "amd_iommu_types.h"
#include "irq_remapping.h"
-#define AMD_IOMMU_MAPPING_ERROR 0
-
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
#define LOOP_TIMEOUT 100000
@@ -81,7 +70,6 @@
*/
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
-static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
static DEFINE_SPINLOCK(pd_bitmap_lock);
/* List of all available dev_data structures */
@@ -139,10 +127,14 @@
static inline int match_hid_uid(struct device *dev,
struct acpihid_map_entry *entry)
{
+ struct acpi_device *adev = ACPI_COMPANION(dev);
const char *hid, *uid;
- hid = acpi_device_hid(ACPI_COMPANION(dev));
- uid = acpi_device_uid(ACPI_COMPANION(dev));
+ if (!adev)
+ return -ENODEV;
+
+ hid = acpi_device_hid(adev);
+ uid = acpi_device_uid(adev);
if (!hid || !(*hid))
return -ENODEV;
@@ -160,7 +152,7 @@
{
struct pci_dev *pdev = to_pci_dev(dev);
- return PCI_DEVID(pdev->bus->number, pdev->devfn);
+ return pci_dev_id(pdev);
}
static inline int get_acpihid_device_id(struct device *dev,
@@ -209,6 +201,7 @@
if (!dev_data)
return NULL;
+ spin_lock_init(&dev_data->lock);
dev_data->devid = devid;
ratelimit_default_init(&dev_data->rs);
@@ -279,10 +272,10 @@
return pci_alias;
}
- pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d "
- "for device %s[%04x:%04x], kernel reported alias "
+ pci_info(pdev, "Using IVRS reported alias %02x:%02x.%d "
+ "for device [%04x:%04x], kernel reported alias "
"%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
- PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
+ PCI_FUNC(ivrs_alias), pdev->vendor, pdev->device,
PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias),
PCI_FUNC(pci_alias));
@@ -293,9 +286,8 @@
if (pci_alias == devid &&
PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
pci_add_dma_alias(pdev, ivrs_alias & 0xff);
- pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
- PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
- dev_name(dev));
+ pci_info(pdev, "Added PCI DMA alias %02x.%d\n",
+ PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias));
}
return ivrs_alias;
@@ -438,7 +430,14 @@
dev_data->alias = get_alias(dev);
- if (dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
+ /*
+ * By default we use passthrough mode for IOMMUv2 capable device.
+ * But if amd_iommu=force_isolation is set (e.g. to debug DMA to
+ * invalid address), we ignore the capability for the device so
+ * it'll be forced to go into translation mode.
+ */
+ if ((iommu_default_passthrough() || !amd_iommu_force_isolation) &&
+ dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
struct amd_iommu *iommu;
iommu = amd_iommu_rlookup_table[dev_data->devid];
@@ -502,6 +501,29 @@
*/
}
+/*
+ * Helper function to get the first pte of a large mapping
+ */
+static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
+ unsigned long *count)
+{
+ unsigned long pte_mask, pg_size, cnt;
+ u64 *fpte;
+
+ pg_size = PTE_PAGE_SIZE(*pte);
+ cnt = PAGE_SIZE_PTE_COUNT(pg_size);
+ pte_mask = ~((cnt << 3) - 1);
+ fpte = (u64 *)(((unsigned long)pte) & pte_mask);
+
+ if (page_size)
+ *page_size = pg_size;
+
+ if (count)
+ *count = cnt;
+
+ return fpte;
+}
+
/****************************************************************************
*
* Interrupt handling functions
@@ -513,7 +535,7 @@
int i;
for (i = 0; i < 4; ++i)
- pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
+ pr_err("DTE[%d]: %016llx\n", i,
amd_iommu_dev_table[devid].data[i]);
}
@@ -523,7 +545,7 @@
int i;
for (i = 0; i < 4; ++i)
- pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
+ pr_err("CMD[%d]: %08x\n", i, cmd->data[i]);
}
static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
@@ -538,10 +560,10 @@
dev_data = get_dev_data(&pdev->dev);
if (dev_data && __ratelimit(&dev_data->rs)) {
- dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
domain_id, address, flags);
} else if (printk_ratelimit()) {
- pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
domain_id, address, flags);
}
@@ -561,14 +583,15 @@
retry:
type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
- pasid = PPR_PASID(*(u64 *)&event[0]);
+ pasid = (event[0] & EVENT_DOMID_MASK_HI) |
+ (event[1] & EVENT_DOMID_MASK_LO);
flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
address = (u64)(((u64)event[3]) << 32) | event[2];
if (type == 0) {
/* Did we hit the erratum? */
if (++count == LOOP_TIMEOUT) {
- pr_err("AMD-Vi: No event written to event log\n");
+ pr_err("No event written to event log\n");
return;
}
udelay(1);
@@ -578,43 +601,41 @@
if (type == EVENT_TYPE_IO_FAULT) {
amd_iommu_report_page_fault(devid, pasid, address, flags);
return;
- } else {
- dev_err(dev, "AMD-Vi: Event logged [");
}
switch (type) {
case EVENT_TYPE_ILL_DEV:
- dev_err(dev, "ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
dump_dte_entry(devid);
break;
case EVENT_TYPE_DEV_TAB_ERR:
- dev_err(dev, "DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
- "address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+ "address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address, flags);
break;
case EVENT_TYPE_PAGE_TAB_ERR:
- dev_err(dev, "PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
case EVENT_TYPE_ILL_CMD:
- dev_err(dev, "ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+ dev_err(dev, "Event logged [ILLEGAL_COMMAND_ERROR address=0x%llx]\n", address);
dump_command(address);
break;
case EVENT_TYPE_CMD_HARD_ERR:
- dev_err(dev, "COMMAND_HARDWARE_ERROR address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [COMMAND_HARDWARE_ERROR address=0x%llx flags=0x%04x]\n",
address, flags);
break;
case EVENT_TYPE_IOTLB_INV_TO:
- dev_err(dev, "IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%016llx]\n",
+ dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%llx]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address);
break;
case EVENT_TYPE_INV_DEV_REQ:
- dev_err(dev, "INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
@@ -622,12 +643,12 @@
pasid = ((event[0] >> 16) & 0xFFFF)
| ((event[1] << 6) & 0xF0000);
tag = event[1] & 0x03FF;
- dev_err(dev, "INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- pasid, address, flags);
+ pasid, address, flags, tag);
break;
default:
- dev_err(dev, "UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
+ dev_err(dev, "Event logged [UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
event[0], event[1], event[2], event[3]);
}
@@ -654,7 +675,7 @@
struct amd_iommu_fault fault;
if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
- pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
+ pr_err_ratelimited("Unknown PPR request received\n");
return;
}
@@ -759,12 +780,12 @@
if (!iommu_ga_log_notifier)
break;
- pr_debug("AMD-Vi: %s: devid=%#x, ga_tag=%#x\n",
+ pr_debug("%s: devid=%#x, ga_tag=%#x\n",
__func__, GA_DEVID(log_entry),
GA_TAG(log_entry));
if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0)
- pr_err("AMD-Vi: GA log notifier failed.\n");
+ pr_err("GA log notifier failed.\n");
break;
default:
break;
@@ -789,18 +810,18 @@
iommu->mmio_base + MMIO_STATUS_OFFSET);
if (status & MMIO_STATUS_EVT_INT_MASK) {
- pr_devel("AMD-Vi: Processing IOMMU Event Log\n");
+ pr_devel("Processing IOMMU Event Log\n");
iommu_poll_events(iommu);
}
if (status & MMIO_STATUS_PPR_INT_MASK) {
- pr_devel("AMD-Vi: Processing IOMMU PPR Log\n");
+ pr_devel("Processing IOMMU PPR Log\n");
iommu_poll_ppr_log(iommu);
}
#ifdef CONFIG_IRQ_REMAP
if (status & MMIO_STATUS_GALOG_INT_MASK) {
- pr_devel("AMD-Vi: Processing IOMMU GA Log\n");
+ pr_devel("Processing IOMMU GA Log\n");
iommu_poll_ga_log(iommu);
}
#endif
@@ -844,7 +865,7 @@
}
if (i == LOOP_TIMEOUT) {
- pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
+ pr_alert("Completion-Wait loop timed out\n");
return -EIO;
}
@@ -1036,7 +1057,7 @@
/* Skip udelay() the first time around */
if (count++) {
if (count == LOOP_TIMEOUT) {
- pr_err("AMD-Vi: Command buffer timeout\n");
+ pr_err("Command buffer timeout\n");
return -EIO;
}
@@ -1146,6 +1167,17 @@
iommu_completion_wait(iommu);
}
+static void amd_iommu_flush_tlb_domid(struct amd_iommu *iommu, u32 dom_id)
+{
+ struct iommu_cmd cmd;
+
+ build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+ dom_id, 1);
+ iommu_queue_command(iommu, &cmd);
+
+ iommu_completion_wait(iommu);
+}
+
static void amd_iommu_flush_all(struct amd_iommu *iommu)
{
struct iommu_cmd cmd;
@@ -1298,6 +1330,20 @@
}
}
+/* Flush the not present cache if it exists */
+static void domain_flush_np_cache(struct protection_domain *domain,
+ dma_addr_t iova, size_t size)
+{
+ if (unlikely(amd_iommu_np_cache)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->lock, flags);
+ domain_flush_pages(domain, iova, size);
+ domain_flush_complete(domain);
+ spin_unlock_irqrestore(&domain->lock, flags);
+ }
+}
+
/*
* This function flushes the DTEs for all devices in domain
@@ -1317,38 +1363,143 @@
*
****************************************************************************/
+static void free_page_list(struct page *freelist)
+{
+ while (freelist != NULL) {
+ unsigned long p = (unsigned long)page_address(freelist);
+ freelist = freelist->freelist;
+ free_page(p);
+ }
+}
+
+static struct page *free_pt_page(unsigned long pt, struct page *freelist)
+{
+ struct page *p = virt_to_page((void *)pt);
+
+ p->freelist = freelist;
+
+ return p;
+}
+
+#define DEFINE_FREE_PT_FN(LVL, FN) \
+static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \
+{ \
+ unsigned long p; \
+ u64 *pt; \
+ int i; \
+ \
+ pt = (u64 *)__pt; \
+ \
+ for (i = 0; i < 512; ++i) { \
+ /* PTE present? */ \
+ if (!IOMMU_PTE_PRESENT(pt[i])) \
+ continue; \
+ \
+ /* Large PTE? */ \
+ if (PM_PTE_LEVEL(pt[i]) == 0 || \
+ PM_PTE_LEVEL(pt[i]) == 7) \
+ continue; \
+ \
+ p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
+ freelist = FN(p, freelist); \
+ } \
+ \
+ return free_pt_page((unsigned long)pt, freelist); \
+}
+
+DEFINE_FREE_PT_FN(l2, free_pt_page)
+DEFINE_FREE_PT_FN(l3, free_pt_l2)
+DEFINE_FREE_PT_FN(l4, free_pt_l3)
+DEFINE_FREE_PT_FN(l5, free_pt_l4)
+DEFINE_FREE_PT_FN(l6, free_pt_l5)
+
+static struct page *free_sub_pt(unsigned long root, int mode,
+ struct page *freelist)
+{
+ switch (mode) {
+ case PAGE_MODE_NONE:
+ case PAGE_MODE_7_LEVEL:
+ break;
+ case PAGE_MODE_1_LEVEL:
+ freelist = free_pt_page(root, freelist);
+ break;
+ case PAGE_MODE_2_LEVEL:
+ freelist = free_pt_l2(root, freelist);
+ break;
+ case PAGE_MODE_3_LEVEL:
+ freelist = free_pt_l3(root, freelist);
+ break;
+ case PAGE_MODE_4_LEVEL:
+ freelist = free_pt_l4(root, freelist);
+ break;
+ case PAGE_MODE_5_LEVEL:
+ freelist = free_pt_l5(root, freelist);
+ break;
+ case PAGE_MODE_6_LEVEL:
+ freelist = free_pt_l6(root, freelist);
+ break;
+ default:
+ BUG();
+ }
+
+ return freelist;
+}
+
+static void free_pagetable(struct protection_domain *domain)
+{
+ unsigned long root = (unsigned long)domain->pt_root;
+ struct page *freelist = NULL;
+
+ BUG_ON(domain->mode < PAGE_MODE_NONE ||
+ domain->mode > PAGE_MODE_6_LEVEL);
+
+ freelist = free_sub_pt(root, domain->mode, freelist);
+
+ free_page_list(freelist);
+}
+
/*
* This function is used to add another level to an IO page table. Adding
* another level increases the size of the address space by 9 bits to a size up
* to 64 bits.
*/
static bool increase_address_space(struct protection_domain *domain,
+ unsigned long address,
gfp_t gfp)
{
+ unsigned long flags;
+ bool ret = false;
u64 *pte;
- if (domain->mode == PAGE_MODE_6_LEVEL)
- /* address space already 64 bit large */
- return false;
+ spin_lock_irqsave(&domain->lock, flags);
+
+ if (address <= PM_LEVEL_SIZE(domain->mode) ||
+ WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
+ goto out;
pte = (void *)get_zeroed_page(gfp);
if (!pte)
- return false;
+ goto out;
*pte = PM_LEVEL_PDE(domain->mode,
iommu_virt_to_phys(domain->pt_root));
domain->pt_root = pte;
domain->mode += 1;
- domain->updated = true;
- return true;
+ ret = true;
+
+out:
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return ret;
}
static u64 *alloc_pte(struct protection_domain *domain,
unsigned long address,
unsigned long page_size,
u64 **pte_page,
- gfp_t gfp)
+ gfp_t gfp,
+ bool *updated)
{
int level, end_lvl;
u64 *pte, *page;
@@ -1356,7 +1507,7 @@
BUG_ON(!is_power_of_2(page_size));
while (address > PM_LEVEL_SIZE(domain->mode))
- increase_address_space(domain, gfp);
+ *updated = increase_address_space(domain, address, gfp) || *updated;
level = domain->mode - 1;
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
@@ -1365,30 +1516,58 @@
while (level > end_lvl) {
u64 __pte, __npte;
+ int pte_level;
- __pte = *pte;
+ __pte = *pte;
+ pte_level = PM_PTE_LEVEL(__pte);
- if (!IOMMU_PTE_PRESENT(__pte)) {
+ /*
+ * If we replace a series of large PTEs, we need
+ * to tear down all of them.
+ */
+ if (IOMMU_PTE_PRESENT(__pte) &&
+ pte_level == PAGE_MODE_7_LEVEL) {
+ unsigned long count, i;
+ u64 *lpte;
+
+ lpte = first_pte_l7(pte, NULL, &count);
+
+ /*
+ * Unmap the replicated PTEs that still match the
+ * original large mapping
+ */
+ for (i = 0; i < count; ++i)
+ cmpxchg64(&lpte[i], __pte, 0ULL);
+
+ *updated = true;
+ continue;
+ }
+
+ if (!IOMMU_PTE_PRESENT(__pte) ||
+ pte_level == PAGE_MODE_NONE) {
page = (u64 *)get_zeroed_page(gfp);
+
if (!page)
return NULL;
__npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
/* pte could have been changed somewhere. */
- if (cmpxchg64(pte, __pte, __npte) != __pte) {
+ if (cmpxchg64(pte, __pte, __npte) != __pte)
free_page((unsigned long)page);
- continue;
- }
+ else if (IOMMU_PTE_PRESENT(__pte))
+ *updated = true;
+
+ continue;
}
/* No level skipping support yet */
- if (PM_PTE_LEVEL(*pte) != level)
+ if (pte_level != level)
return NULL;
level -= 1;
- pte = IOMMU_PTE_PAGE(*pte);
+ pte = IOMMU_PTE_PAGE(__pte);
if (pte_page && level == end_lvl)
*pte_page = pte;
@@ -1442,21 +1621,35 @@
*page_size = PTE_LEVEL_PAGE_SIZE(level);
}
- if (PM_PTE_LEVEL(*pte) == 0x07) {
- unsigned long pte_mask;
-
- /*
- * If we have a series of large PTEs, make
- * sure to return a pointer to the first one.
- */
- *page_size = pte_mask = PTE_PAGE_SIZE(*pte);
- pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
- pte = (u64 *)(((unsigned long)pte) & pte_mask);
- }
+ /*
+ * If we have a series of large PTEs, make
+ * sure to return a pointer to the first one.
+ */
+ if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
+ pte = first_pte_l7(pte, page_size, NULL);
return pte;
}
+static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
+{
+ unsigned long pt;
+ int mode;
+
+ while (cmpxchg64(pte, pteval, 0) != pteval) {
+ pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
+ pteval = *pte;
+ }
+
+ if (!IOMMU_PTE_PRESENT(pteval))
+ return freelist;
+
+ pt = (unsigned long)IOMMU_PTE_PAGE(pteval);
+ mode = IOMMU_PTE_MODE(pteval);
+
+ return free_sub_pt(pt, mode, freelist);
+}
+
/*
* Generic mapping functions. It maps a physical address into a DMA
* address space. It allocates the page table pages if necessary.
@@ -1471,24 +1664,30 @@
int prot,
gfp_t gfp)
{
+ struct page *freelist = NULL;
+ bool updated = false;
u64 __pte, *pte;
- int i, count;
+ int ret, i, count;
BUG_ON(!IS_ALIGNED(bus_addr, page_size));
BUG_ON(!IS_ALIGNED(phys_addr, page_size));
+ ret = -EINVAL;
if (!(prot & IOMMU_PROT_MASK))
- return -EINVAL;
+ goto out;
count = PAGE_SIZE_PTE_COUNT(page_size);
- pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp);
+ pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
+ ret = -ENOMEM;
if (!pte)
- return -ENOMEM;
+ goto out;
for (i = 0; i < count; ++i)
- if (IOMMU_PTE_PRESENT(pte[i]))
- return -EBUSY;
+ freelist = free_clear_pte(&pte[i], pte[i], freelist);
+
+ if (freelist != NULL)
+ updated = true;
if (count > 1) {
__pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
@@ -1504,9 +1703,21 @@
for (i = 0; i < count; ++i)
pte[i] = __pte;
- update_domain(dom);
+ ret = 0;
- return 0;
+out:
+ if (updated) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dom->lock, flags);
+ update_domain(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
+ }
+
+ /* Everything flushed out, free pages now */
+ free_page_list(freelist);
+
+ return ret;
}
static unsigned long iommu_unmap_page(struct protection_domain *dom,
@@ -1589,31 +1800,6 @@
*
****************************************************************************/
-/*
- * This function adds a protection domain to the global protection domain list
- */
-static void add_domain_to_list(struct protection_domain *domain)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&amd_iommu_pd_lock, flags);
- list_add(&domain->list, &amd_iommu_pd_list);
- spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-}
-
-/*
- * This function removes a protection domain to the global
- * protection domain list
- */
-static void del_domain_from_list(struct protection_domain *domain)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&amd_iommu_pd_lock, flags);
- list_del(&domain->list);
- spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-}
-
static u16 domain_id_alloc(void)
{
int id;
@@ -1638,67 +1824,6 @@
spin_unlock(&pd_bitmap_lock);
}
-#define DEFINE_FREE_PT_FN(LVL, FN) \
-static void free_pt_##LVL (unsigned long __pt) \
-{ \
- unsigned long p; \
- u64 *pt; \
- int i; \
- \
- pt = (u64 *)__pt; \
- \
- for (i = 0; i < 512; ++i) { \
- /* PTE present? */ \
- if (!IOMMU_PTE_PRESENT(pt[i])) \
- continue; \
- \
- /* Large PTE? */ \
- if (PM_PTE_LEVEL(pt[i]) == 0 || \
- PM_PTE_LEVEL(pt[i]) == 7) \
- continue; \
- \
- p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
- FN(p); \
- } \
- free_page((unsigned long)pt); \
-}
-
-DEFINE_FREE_PT_FN(l2, free_page)
-DEFINE_FREE_PT_FN(l3, free_pt_l2)
-DEFINE_FREE_PT_FN(l4, free_pt_l3)
-DEFINE_FREE_PT_FN(l5, free_pt_l4)
-DEFINE_FREE_PT_FN(l6, free_pt_l5)
-
-static void free_pagetable(struct protection_domain *domain)
-{
- unsigned long root = (unsigned long)domain->pt_root;
-
- switch (domain->mode) {
- case PAGE_MODE_NONE:
- break;
- case PAGE_MODE_1_LEVEL:
- free_page(root);
- break;
- case PAGE_MODE_2_LEVEL:
- free_pt_l2(root);
- break;
- case PAGE_MODE_3_LEVEL:
- free_pt_l3(root);
- break;
- case PAGE_MODE_4_LEVEL:
- free_pt_l4(root);
- break;
- case PAGE_MODE_5_LEVEL:
- free_pt_l5(root);
- break;
- case PAGE_MODE_6_LEVEL:
- free_pt_l6(root);
- break;
- default:
- BUG();
- }
-}
-
static void free_gcr3_tbl_level1(u64 *tbl)
{
u64 *ptr;
@@ -1743,8 +1868,12 @@
static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dom->domain.lock, flags);
domain_flush_tlb(&dom->domain);
domain_flush_complete(&dom->domain);
+ spin_unlock_irqrestore(&dom->domain.lock, flags);
}
static void iova_domain_flush_tlb(struct iova_domain *iovad)
@@ -1765,8 +1894,6 @@
if (!dom)
return;
- del_domain_from_list(&dom->domain);
-
put_iova_domain(&dom->iovad);
free_pagetable(&dom->domain);
@@ -1807,8 +1934,6 @@
/* Initialize reserved ranges */
copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
- add_domain_to_list(&dma_dom->domain);
-
return dma_dom;
free_dma_dom:
@@ -1831,6 +1956,7 @@
{
u64 pte_root = 0;
u64 flags = 0;
+ u32 old_domid;
if (domain->mode != PAGE_MODE_NONE)
pte_root = iommu_virt_to_phys(domain->pt_root);
@@ -1880,8 +2006,20 @@
flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
+ old_domid = amd_iommu_dev_table[devid].data[1] & DEV_DOMID_MASK;
amd_iommu_dev_table[devid].data[1] = flags;
amd_iommu_dev_table[devid].data[0] = pte_root;
+
+ /*
+ * A kdump kernel might be replacing a domain ID that was copied from
+ * the previous kernel--if so, it needs to flush the translation cache
+ * entries for the old domain ID that is being overwritten
+ */
+ if (old_domid) {
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+ amd_iommu_flush_tlb_domid(iommu, old_domid);
+ }
}
static void clear_dte_entry(u16 devid)
@@ -1922,16 +2060,13 @@
static void do_detach(struct iommu_dev_data *dev_data)
{
+ struct protection_domain *domain = dev_data->domain;
struct amd_iommu *iommu;
u16 alias;
iommu = amd_iommu_rlookup_table[dev_data->devid];
alias = dev_data->alias;
- /* decrease reference counters */
- dev_data->domain->dev_iommu[iommu->index] -= 1;
- dev_data->domain->dev_cnt -= 1;
-
/* Update data structures */
dev_data->domain = NULL;
list_del(&dev_data->list);
@@ -1941,38 +2076,18 @@
/* Flush the DTE entry */
device_flush_dte(dev_data);
+
+ /* Flush IOTLB */
+ domain_flush_tlb_pde(domain);
+
+ /* Wait for the flushes to finish */
+ domain_flush_complete(domain);
+
+ /* decrease reference counters - needs to happen after the flushes */
+ domain->dev_iommu[iommu->index] -= 1;
+ domain->dev_cnt -= 1;
}
-/*
- * If a device is not yet associated with a domain, this function makes the
- * device visible in the domain
- */
-static int __attach_device(struct iommu_dev_data *dev_data,
- struct protection_domain *domain)
-{
- int ret;
-
- /* lock domain */
- spin_lock(&domain->lock);
-
- ret = -EBUSY;
- if (dev_data->domain != NULL)
- goto out_unlock;
-
- /* Attach alias group root */
- do_attach(dev_data, domain);
-
- ret = 0;
-
-out_unlock:
-
- /* ready */
- spin_unlock(&domain->lock);
-
- return ret;
-}
-
-
static void pdev_iommuv2_disable(struct pci_dev *pdev)
{
pci_disable_ats(pdev);
@@ -2042,23 +2157,6 @@
return ret;
}
-/* FIXME: Move this to PCI code */
-#define PCI_PRI_TLP_OFF (1 << 15)
-
-static bool pci_pri_tlp_required(struct pci_dev *pdev)
-{
- u16 status;
- int pos;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
- if (!pos)
- return false;
-
- pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
-
- return (status & PCI_PRI_TLP_OFF) ? true : false;
-}
-
/*
* If a device is not yet associated with a domain, this function makes the
* device visible in the domain
@@ -2071,23 +2169,32 @@
unsigned long flags;
int ret;
+ spin_lock_irqsave(&domain->lock, flags);
+
dev_data = get_dev_data(dev);
+ spin_lock(&dev_data->lock);
+
+ ret = -EBUSY;
+ if (dev_data->domain != NULL)
+ goto out;
+
if (!dev_is_pci(dev))
goto skip_ats_check;
pdev = to_pci_dev(dev);
if (domain->flags & PD_IOMMUV2_MASK) {
+ ret = -EINVAL;
if (!dev_data->passthrough)
- return -EINVAL;
+ goto out;
if (dev_data->iommu_v2) {
if (pdev_iommuv2_enable(pdev) != 0)
- return -EINVAL;
+ goto out;
dev_data->ats.enabled = true;
dev_data->ats.qdep = pci_ats_queue_depth(pdev);
- dev_data->pri_tlp = pci_pri_tlp_required(pdev);
+ dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev);
}
} else if (amd_iommu_iotlb_sup &&
pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
@@ -2096,9 +2203,9 @@
}
skip_ats_check:
- spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
- ret = __attach_device(dev_data, domain);
- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ ret = 0;
+
+ do_attach(dev_data, domain);
/*
* We might boot into a crash-kernel here. The crashed kernel
@@ -2107,26 +2214,17 @@
*/
domain_flush_tlb_pde(domain);
+ domain_flush_complete(domain);
+
+out:
+ spin_unlock(&dev_data->lock);
+
+ spin_unlock_irqrestore(&domain->lock, flags);
+
return ret;
}
/*
- * Removes a device from a protection domain (unlocked)
- */
-static void __detach_device(struct iommu_dev_data *dev_data)
-{
- struct protection_domain *domain;
-
- domain = dev_data->domain;
-
- spin_lock(&domain->lock);
-
- do_detach(dev_data);
-
- spin_unlock(&domain->lock);
-}
-
-/*
* Removes a device from a protection domain (with devtable_lock held)
*/
static void detach_device(struct device *dev)
@@ -2138,6 +2236,10 @@
dev_data = get_dev_data(dev);
domain = dev_data->domain;
+ spin_lock_irqsave(&domain->lock, flags);
+
+ spin_lock(&dev_data->lock);
+
/*
* First check if the device is still attached. It might already
* be detached from its domain because the generic
@@ -2145,15 +2247,12 @@
* our alias handling.
*/
if (WARN_ON(!dev_data->domain))
- return;
+ goto out;
- /* lock device table */
- spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
- __detach_device(dev_data);
- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ do_detach(dev_data);
if (!dev_is_pci(dev))
- return;
+ goto out;
if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
pdev_iommuv2_disable(to_pci_dev(dev));
@@ -2161,6 +2260,11 @@
pci_disable_ats(to_pci_dev(dev));
dev_data->ats.enabled = false;
+
+out:
+ spin_unlock(&dev_data->lock);
+
+ spin_unlock_irqrestore(&domain->lock, flags);
}
static int amd_iommu_add_device(struct device *dev)
@@ -2182,11 +2286,10 @@
ret = iommu_init_device(dev);
if (ret) {
if (ret != -ENOTSUPP)
- pr_err("Failed to initialize device %s - trying to proceed anyway\n",
- dev_name(dev));
+ dev_err(dev, "Failed to initialize - trying to proceed anyway\n");
iommu_ignore_device(dev);
- dev->dma_ops = &dma_direct_ops;
+ dev->dma_ops = NULL;
goto out;
}
init_iommu_group(dev);
@@ -2195,7 +2298,7 @@
BUG_ON(!dev_data);
- if (iommu_pass_through || dev_data->iommu_v2)
+ if (dev_data->iommu_v2)
iommu_request_dm_for_dev(dev);
/* Domains are initialized for this device - have a look what we ended up with */
@@ -2293,15 +2396,10 @@
static void update_domain(struct protection_domain *domain)
{
- if (!domain->updated)
- return;
-
update_device_table(domain);
domain_flush_devices(domain);
domain_flush_tlb_pde(domain);
-
- domain->updated = false;
}
static int dir2prot(enum dma_data_direction direction)
@@ -2331,6 +2429,7 @@
{
dma_addr_t offset = paddr & ~PAGE_MASK;
dma_addr_t address, start, ret;
+ unsigned long flags;
unsigned int pages;
int prot = 0;
int i;
@@ -2339,7 +2438,7 @@
paddr &= PAGE_MASK;
address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask);
- if (address == AMD_IOMMU_MAPPING_ERROR)
+ if (!address)
goto out;
prot = dir2prot(direction);
@@ -2356,10 +2455,7 @@
}
address += offset;
- if (unlikely(amd_iommu_np_cache)) {
- domain_flush_pages(&dma_dom->domain, address, size);
- domain_flush_complete(&dma_dom->domain);
- }
+ domain_flush_np_cache(&dma_dom->domain, address, size);
out:
return address;
@@ -2371,12 +2467,14 @@
iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
}
+ spin_lock_irqsave(&dma_dom->domain.lock, flags);
domain_flush_tlb(&dma_dom->domain);
domain_flush_complete(&dma_dom->domain);
+ spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
dma_ops_free_iova(dma_dom, address, pages);
- return AMD_IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
/*
@@ -2401,8 +2499,12 @@
}
if (amd_iommu_unmap_flush) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dma_dom->domain.lock, flags);
domain_flush_tlb(&dma_dom->domain);
domain_flush_complete(&dma_dom->domain);
+ spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
dma_ops_free_iova(dma_dom, dma_addr, pages);
} else {
pages = __roundup_pow_of_two(pages);
@@ -2427,7 +2529,7 @@
if (PTR_ERR(domain) == -EINVAL)
return (dma_addr_t)paddr;
else if (IS_ERR(domain))
- return AMD_IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
dma_mask = *dev->dma_mask;
dma_dom = to_dma_ops_domain(domain);
@@ -2493,6 +2595,7 @@
struct scatterlist *s;
unsigned long address;
u64 dma_mask;
+ int ret;
domain = get_domain(dev);
if (IS_ERR(domain))
@@ -2504,7 +2607,7 @@
npages = sg_num_pages(dev, sglist, nelems);
address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
- if (address == AMD_IOMMU_MAPPING_ERROR)
+ if (!address)
goto out_err;
prot = dir2prot(direction);
@@ -2515,11 +2618,12 @@
for (j = 0; j < pages; ++j) {
unsigned long bus_addr, phys_addr;
- int ret;
bus_addr = address + s->dma_address + (j << PAGE_SHIFT);
phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT);
- ret = iommu_map_page(domain, bus_addr, phys_addr, PAGE_SIZE, prot, GFP_ATOMIC);
+ ret = iommu_map_page(domain, bus_addr, phys_addr,
+ PAGE_SIZE, prot,
+ GFP_ATOMIC | __GFP_NOWARN);
if (ret)
goto out_unmap;
@@ -2529,15 +2633,23 @@
/* Everything is mapped - write the right values into s->dma_address */
for_each_sg(sglist, s, nelems, i) {
- s->dma_address += address + s->offset;
+ /*
+ * Add in the remaining piece of the scatter-gather offset that
+ * was masked out when we were determining the physical address
+ * via (sg_phys(s) & PAGE_MASK) earlier.
+ */
+ s->dma_address += address + (s->offset & ~PAGE_MASK);
s->dma_length = s->length;
}
+ if (s)
+ domain_flush_np_cache(domain, s->dma_address, s->dma_length);
+
return nelems;
out_unmap:
- pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n",
- dev_name(dev), npages);
+ dev_err(dev, "IOMMU mapping error in map_sg (io-pages: %d reason: %d)\n",
+ npages, ret);
for_each_sg(sglist, s, nelems, i) {
int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
@@ -2548,13 +2660,13 @@
bus_addr = address + s->dma_address + (j << PAGE_SHIFT);
iommu_unmap_page(domain, bus_addr, PAGE_SIZE);
- if (--mapped_pages)
+ if (--mapped_pages == 0)
goto out_free_iova;
}
}
out_free_iova:
- free_iova_fast(&dma_dom->iovad, address, npages);
+ free_iova_fast(&dma_dom->iovad, address >> PAGE_SHIFT, npages);
out_err:
return 0;
@@ -2571,7 +2683,7 @@
struct protection_domain *domain;
struct dma_ops_domain *dma_dom;
unsigned long startaddr;
- int npages = 2;
+ int npages;
domain = get_domain(dev);
if (IS_ERR(domain))
@@ -2627,7 +2739,7 @@
*dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
size, DMA_BIDIRECTIONAL, dma_mask);
- if (*dma_addr == AMD_IOMMU_MAPPING_ERROR)
+ if (*dma_addr == DMA_MAPPING_ERROR)
goto out_free;
return page_address(page);
@@ -2678,11 +2790,6 @@
return check_device(dev);
}
-static int amd_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == AMD_IOMMU_MAPPING_ERROR;
-}
-
static const struct dma_map_ops amd_iommu_dma_ops = {
.alloc = alloc_coherent,
.free = free_coherent,
@@ -2691,7 +2798,8 @@
.map_sg = map_sg,
.unmap_sg = unmap_sg,
.dma_supported = amd_iommu_dma_supported,
- .mapping_error = amd_iommu_mapping_error,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
static int init_reserved_iova_ranges(void)
@@ -2737,7 +2845,7 @@
IOVA_PFN(r->start),
IOVA_PFN(r->end));
if (!val) {
- pr_err("Reserve pci-resource range failed\n");
+ pci_err(pdev, "Reserve pci-resource range %pR failed\n", r);
return -ENOMEM;
}
}
@@ -2775,24 +2883,13 @@
int __init amd_iommu_init_dma_ops(void)
{
- swiotlb = (iommu_pass_through || sme_me_mask) ? 1 : 0;
+ swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
iommu_detected = 1;
- /*
- * In case we don't initialize SWIOTLB (actually the common case
- * when AMD IOMMU is enabled and SME is not active), make sure there
- * are global dma_ops set as a fall-back for devices not handled by
- * this driver (for example non-PCI devices). When SME is active,
- * make sure that swiotlb variable remains set so the global dma_ops
- * continue to be SWIOTLB.
- */
- if (!swiotlb)
- dma_ops = &dma_direct_ops;
-
if (amd_iommu_unmap_flush)
- pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
+ pr_info("IO/TLB flush on unmap enabled\n");
else
- pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n");
+ pr_info("Lazy IO/TLB flushing enabled\n");
return 0;
@@ -2813,16 +2910,16 @@
struct iommu_dev_data *entry;
unsigned long flags;
- spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock_irqsave(&domain->lock, flags);
while (!list_empty(&domain->dev_list)) {
entry = list_first_entry(&domain->dev_list,
struct iommu_dev_data, list);
BUG_ON(!entry->domain);
- __detach_device(entry);
+ do_detach(entry);
}
- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock_irqrestore(&domain->lock, flags);
}
static void protection_domain_free(struct protection_domain *domain)
@@ -2830,8 +2927,6 @@
if (!domain)
return;
- del_domain_from_list(domain);
-
if (domain->id)
domain_id_free(domain->id);
@@ -2861,8 +2956,6 @@
if (protection_domain_init(domain))
goto out_err;
- add_domain_to_list(domain);
-
return domain;
out_err:
@@ -2897,7 +2990,7 @@
case IOMMU_DOMAIN_DMA:
dma_domain = dma_ops_domain_alloc();
if (!dma_domain) {
- pr_err("AMD-Vi: Failed to allocate\n");
+ pr_err("Failed to allocate\n");
return NULL;
}
pdomain = &dma_domain->domain;
@@ -3034,11 +3127,14 @@
ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL);
mutex_unlock(&domain->api_lock);
+ domain_flush_np_cache(domain, iova, page_size);
+
return ret;
}
static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
- size_t page_size)
+ size_t page_size,
+ struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
size_t unmap_size;
@@ -3083,6 +3179,8 @@
return (irq_remapping_enabled == 1);
case IOMMU_CAP_NOEXEC:
return false;
+ default:
+ break;
}
return false;
@@ -3100,24 +3198,26 @@
return;
list_for_each_entry(entry, &amd_iommu_unity_map, list) {
+ int type, prot = 0;
size_t length;
- int prot = 0;
if (devid < entry->devid_start || devid > entry->devid_end)
continue;
+ type = IOMMU_RESV_DIRECT;
length = entry->address_end - entry->address_start;
if (entry->prot & IOMMU_PROT_IR)
prot |= IOMMU_READ;
if (entry->prot & IOMMU_PROT_IW)
prot |= IOMMU_WRITE;
+ if (entry->prot & IOMMU_UNITY_MAP_FLAG_EXCL_RANGE)
+ /* Exclusion range */
+ type = IOMMU_RESV_RESERVED;
region = iommu_alloc_resv_region(entry->address_start,
- length, prot,
- IOMMU_RESV_DIRECT);
+ length, prot, type);
if (!region) {
- pr_err("Out of memory allocating dm-regions for %s\n",
- dev_name(dev));
+ dev_err(dev, "Out of memory allocating dm-regions\n");
return;
}
list_add_tail(®ion->list, head);
@@ -3170,14 +3270,18 @@
static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
struct protection_domain *dom = to_pdomain(domain);
+ unsigned long flags;
+ spin_lock_irqsave(&dom->lock, flags);
domain_flush_tlb_pde(dom);
domain_flush_complete(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
}
-static void amd_iommu_iotlb_range_add(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
{
+ amd_iommu_flush_iotlb_all(domain);
}
const struct iommu_ops amd_iommu_ops = {
@@ -3198,8 +3302,7 @@
.is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
- .iotlb_range_add = amd_iommu_iotlb_range_add,
- .iotlb_sync = amd_iommu_flush_iotlb_all,
+ .iotlb_sync = amd_iommu_iotlb_sync,
};
/*****************************************************************************
@@ -3234,7 +3337,6 @@
/* Update data structure */
domain->mode = PAGE_MODE_NONE;
- domain->updated = true;
/* Make changes visible to IOMMUs */
update_domain(domain);
@@ -3280,7 +3382,6 @@
domain->glx = levels;
domain->flags |= PD_IOMMUV2_MASK;
- domain->updated = true;
update_domain(domain);
@@ -4292,13 +4393,62 @@
.deactivate = irq_remapping_deactivate,
};
+int amd_iommu_activate_guest_mode(void *data)
+{
+ struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+ struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+
+ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
+ !entry || entry->lo.fields_vapic.guest_mode)
+ return 0;
+
+ entry->lo.val = 0;
+ entry->hi.val = 0;
+
+ entry->lo.fields_vapic.guest_mode = 1;
+ entry->lo.fields_vapic.ga_log_intr = 1;
+ entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr;
+ entry->hi.fields.vector = ir_data->ga_vector;
+ entry->lo.fields_vapic.ga_tag = ir_data->ga_tag;
+
+ return modify_irte_ga(ir_data->irq_2_irte.devid,
+ ir_data->irq_2_irte.index, entry, NULL);
+}
+EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
+
+int amd_iommu_deactivate_guest_mode(void *data)
+{
+ struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+ struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+ struct irq_cfg *cfg = ir_data->cfg;
+
+ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
+ !entry || !entry->lo.fields_vapic.guest_mode)
+ return 0;
+
+ entry->lo.val = 0;
+ entry->hi.val = 0;
+
+ entry->lo.fields_remap.dm = apic->irq_dest_mode;
+ entry->lo.fields_remap.int_type = apic->irq_delivery_mode;
+ entry->hi.fields.vector = cfg->vector;
+ entry->lo.fields_remap.destination =
+ APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
+ entry->hi.fields.destination =
+ APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
+
+ return modify_irte_ga(ir_data->irq_2_irte.devid,
+ ir_data->irq_2_irte.index, entry, NULL);
+}
+EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
+
static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
{
+ int ret;
struct amd_iommu *iommu;
struct amd_iommu_pi_data *pi_data = vcpu_info;
struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
struct amd_ir_data *ir_data = data->chip_data;
- struct irte_ga *irte = (struct irte_ga *) ir_data->entry;
struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid);
@@ -4309,6 +4459,7 @@
if (!dev_data || !dev_data->use_vapic)
return 0;
+ ir_data->cfg = irqd_cfg(data);
pi_data->ir_data = ir_data;
/* Note:
@@ -4316,7 +4467,7 @@
* legacy mode. So, we force legacy mode instead.
*/
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
- pr_debug("AMD-Vi: %s: Fall back to using intr legacy remap\n",
+ pr_debug("%s: Fall back to using intr legacy remap\n",
__func__);
pi_data->is_guest_mode = false;
}
@@ -4327,37 +4478,24 @@
pi_data->prev_ga_tag = ir_data->cached_ga_tag;
if (pi_data->is_guest_mode) {
- /* Setting */
- irte->hi.fields.ga_root_ptr = (pi_data->base >> 12);
- irte->hi.fields.vector = vcpu_pi_info->vector;
- irte->lo.fields_vapic.ga_log_intr = 1;
- irte->lo.fields_vapic.guest_mode = 1;
- irte->lo.fields_vapic.ga_tag = pi_data->ga_tag;
-
- ir_data->cached_ga_tag = pi_data->ga_tag;
+ ir_data->ga_root_ptr = (pi_data->base >> 12);
+ ir_data->ga_vector = vcpu_pi_info->vector;
+ ir_data->ga_tag = pi_data->ga_tag;
+ ret = amd_iommu_activate_guest_mode(ir_data);
+ if (!ret)
+ ir_data->cached_ga_tag = pi_data->ga_tag;
} else {
- /* Un-Setting */
- struct irq_cfg *cfg = irqd_cfg(data);
-
- irte->hi.val = 0;
- irte->lo.val = 0;
- irte->hi.fields.vector = cfg->vector;
- irte->lo.fields_remap.guest_mode = 0;
- irte->lo.fields_remap.destination =
- APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
- irte->hi.fields.destination =
- APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
- irte->lo.fields_remap.int_type = apic->irq_delivery_mode;
- irte->lo.fields_remap.dm = apic->irq_dest_mode;
+ ret = amd_iommu_deactivate_guest_mode(ir_data);
/*
* This communicates the ga_tag back to the caller
* so that it can do all the necessary clean up.
*/
- ir_data->cached_ga_tag = 0;
+ if (!ret)
+ ir_data->cached_ga_tag = 0;
}
- return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data);
+ return ret;
}
diff --git a/drivers/iommu/amd_iommu.h b/drivers/iommu/amd_iommu.h
new file mode 100644
index 0000000..12d540d
--- /dev/null
+++ b/drivers/iommu/amd_iommu.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef AMD_IOMMU_H
+#define AMD_IOMMU_H
+
+int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line);
+
+#ifdef CONFIG_DMI
+void amd_iommu_apply_ivrs_quirks(void);
+#else
+static void amd_iommu_apply_ivrs_quirks(void) { }
+#endif
+
+#endif
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index e062ab9..568c523 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1,22 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <jroedel@suse.de>
* Leo Duran <leo.duran@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/list.h>
@@ -32,6 +23,8 @@
#include <linux/mem_encrypt.h>
#include <asm/pci-direct.h>
#include <asm/iommu.h>
+#include <asm/apic.h>
+#include <asm/msidef.h>
#include <asm/gart.h>
#include <asm/x86_init.h>
#include <asm/iommu_table.h>
@@ -39,6 +32,7 @@
#include <asm/irq_remapping.h>
#include <linux/crash_dump.h>
+#include "amd_iommu.h"
#include "amd_iommu_proto.h"
#include "amd_iommu_types.h"
#include "irq_remapping.h"
@@ -186,12 +180,6 @@
bool amd_iommu_force_isolation __read_mostly;
/*
- * List of protection domains - used during resume
- */
-LIST_HEAD(amd_iommu_pd_list);
-spinlock_t amd_iommu_pd_lock;
-
-/*
* Pointer to the device table which is shared by all AMD IOMMUs
* it is indexed by the PCI device id or the HT unit id and contains
* information about the domain the device belongs to as well as the
@@ -356,7 +344,7 @@
static void iommu_set_exclusion_range(struct amd_iommu *iommu)
{
u64 start = iommu->exclusion_start & PAGE_MASK;
- u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
+ u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
u64 entry;
if (!iommu->exclusion_start)
@@ -421,6 +409,9 @@
static void iommu_disable(struct amd_iommu *iommu)
{
+ if (!iommu->mmio_base)
+ return;
+
/* Disable command buffer */
iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
@@ -443,9 +434,9 @@
static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
{
if (!request_mem_region(address, end, "amd_iommu")) {
- pr_err("AMD-Vi: Can not reserve memory region %llx-%llx for mmio\n",
+ pr_err("Can not reserve memory region %llx-%llx for mmio\n",
address, end);
- pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
+ pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
return NULL;
}
@@ -512,7 +503,7 @@
u32 ivhd_size = get_ivhd_header_size(h);
if (!ivhd_size) {
- pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type);
+ pr_err("Unsupported IVHD type %#x\n", h->type);
return -EINVAL;
}
@@ -553,7 +544,7 @@
checksum += p[i];
if (checksum != 0) {
/* ACPI table corrupt */
- pr_err(FW_BUG "AMD-Vi: IVRS invalid checksum\n");
+ pr_err(FW_BUG "IVRS invalid checksum\n");
return -ENODEV;
}
@@ -903,12 +894,22 @@
}
}
- old_devtb_phys = entry & PAGE_MASK;
+ /*
+ * When SME is enabled in the first kernel, the entry includes the
+ * memory encryption mask(sme_me_mask), we must remove the memory
+ * encryption mask to obtain the true physical address in kdump kernel.
+ */
+ old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
+
if (old_devtb_phys >= 0x100000000ULL) {
pr_err("The address of old device table is above 4G, not trustworthy!\n");
return false;
}
- old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
+ old_devtb = (sme_active() && is_kdump_kernel())
+ ? (__force void *)ioremap_encrypted(old_devtb_phys,
+ dev_table_size)
+ : memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
+
if (!old_devtb)
return false;
@@ -1002,7 +1003,7 @@
set_iommu_for_device(iommu, devid);
}
-static int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
+int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
{
struct devid_map *entry;
struct list_head *list;
@@ -1018,7 +1019,7 @@
if (!(entry->id == id && entry->cmd_line))
continue;
- pr_info("AMD-Vi: Command-line override present for %s id %d - ignoring\n",
+ pr_info("Command-line override present for %s id %d - ignoring\n",
type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
*devid = entry->devid;
@@ -1051,7 +1052,7 @@
!entry->cmd_line)
continue;
- pr_info("AMD-Vi: Command-line override for hid:%s uid:%s\n",
+ pr_info("Command-line override for hid:%s uid:%s\n",
hid, uid);
*devid = entry->devid;
return 0;
@@ -1067,7 +1068,7 @@
entry->cmd_line = cmd_line;
entry->root_devid = (entry->devid & (~0x7));
- pr_info("AMD-Vi:%s, add hid:%s, uid:%s, rdevid:%d\n",
+ pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
entry->cmd_line ? "cmd" : "ivrs",
entry->hid, entry->uid, entry->root_devid);
@@ -1153,6 +1154,8 @@
if (ret)
return ret;
+ amd_iommu_apply_ivrs_quirks();
+
/*
* First save the recommended feature enable bits from ACPI
*/
@@ -1163,7 +1166,7 @@
*/
ivhd_size = get_ivhd_header_size(h);
if (!ivhd_size) {
- pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type);
+ pr_err("Unsupported IVHD type %#x\n", h->type);
return -EINVAL;
}
@@ -1445,8 +1448,7 @@
pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
- pr_info("AMD-Vi: Applying erratum 746 workaround for IOMMU at %s\n",
- dev_name(&iommu->dev->dev));
+ pci_info(iommu->dev, "Applying erratum 746 workaround\n");
/* Clear the enable writing bit */
pci_write_config_dword(iommu->dev, 0xf0, 0x90);
@@ -1476,8 +1478,7 @@
/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
iommu_write_l2(iommu, 0x47, value | BIT(0));
- pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n",
- dev_name(&iommu->dev->dev));
+ pci_info(iommu->dev, "Applying ATS write check workaround\n");
}
/*
@@ -1496,7 +1497,7 @@
iommu->index = amd_iommus_present++;
if (unlikely(iommu->index >= MAX_IOMMUS)) {
- WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
+ WARN(1, "System has more IOMMUs than supported by this driver\n");
return -ENOSYS;
}
@@ -1653,6 +1654,7 @@
static void init_iommu_perf_ctr(struct amd_iommu *iommu)
{
+ struct pci_dev *pdev = iommu->dev;
u64 val = 0xabcd, val2 = 0;
if (!iommu_feature(iommu, FEATURE_PC))
@@ -1664,12 +1666,12 @@
if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
(iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
(val != val2)) {
- pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
+ pci_err(pdev, "Unable to write to IOMMU perf counter.\n");
amd_iommu_pc_present = false;
return;
}
- pr_info("AMD-Vi: IOMMU performance counters supported\n");
+ pci_info(pdev, "IOMMU performance counters supported\n");
val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
iommu->max_banks = (u8) ((val >> 12) & 0x3f);
@@ -1710,7 +1712,7 @@
NULL,
};
-static int iommu_init_pci(struct amd_iommu *iommu)
+static int __init iommu_init_pci(struct amd_iommu *iommu)
{
int cap_ptr = iommu->cap_ptr;
u32 range, misc, low, high;
@@ -1828,14 +1830,14 @@
struct amd_iommu *iommu;
for_each_iommu(iommu) {
+ struct pci_dev *pdev = iommu->dev;
int i;
- pr_info("AMD-Vi: Found IOMMU at %s cap 0x%hx\n",
- dev_name(&iommu->dev->dev), iommu->cap_ptr);
+ pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr);
if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
- pr_info("AMD-Vi: Extended features (%#llx):\n",
- iommu->features);
+ pci_info(pdev, "Extended features (%#llx):\n",
+ iommu->features);
for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
if (iommu_feature(iommu, (1ULL << i)))
pr_cont(" %s", feat_str[i]);
@@ -1848,11 +1850,11 @@
}
}
if (irq_remapping_enabled) {
- pr_info("AMD-Vi: Interrupt remapping enabled\n");
+ pr_info("Interrupt remapping enabled\n");
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
- pr_info("AMD-Vi: virtual APIC enabled\n");
+ pr_info("Virtual APIC enabled\n");
if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
- pr_info("AMD-Vi: X2APIC enabled\n");
+ pr_info("X2APIC enabled\n");
}
}
@@ -1923,6 +1925,90 @@
return 0;
}
+#define XT_INT_DEST_MODE(x) (((x) & 0x1ULL) << 2)
+#define XT_INT_DEST_LO(x) (((x) & 0xFFFFFFULL) << 8)
+#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32)
+#define XT_INT_DEST_HI(x) ((((x) >> 24) & 0xFFULL) << 56)
+
+/**
+ * Setup the IntCapXT registers with interrupt routing information
+ * based on the PCI MSI capability block registers, accessed via
+ * MMIO MSI address low/hi and MSI data registers.
+ */
+static void iommu_update_intcapxt(struct amd_iommu *iommu)
+{
+ u64 val;
+ u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET);
+ u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET);
+ u32 data = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET);
+ bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+ u32 dest = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF);
+
+ if (x2apic_enabled())
+ dest |= MSI_ADDR_EXT_DEST_ID(addr_hi);
+
+ val = XT_INT_VEC(data & 0xFF) |
+ XT_INT_DEST_MODE(dm) |
+ XT_INT_DEST_LO(dest) |
+ XT_INT_DEST_HI(dest);
+
+ /**
+ * Current IOMMU implemtation uses the same IRQ for all
+ * 3 IOMMU interrupts.
+ */
+ writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
+ writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
+ writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+}
+
+static void _irq_notifier_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct amd_iommu *iommu;
+
+ for_each_iommu(iommu) {
+ if (iommu->dev->irq == notify->irq) {
+ iommu_update_intcapxt(iommu);
+ break;
+ }
+ }
+}
+
+static void _irq_notifier_release(struct kref *ref)
+{
+}
+
+static int iommu_init_intcapxt(struct amd_iommu *iommu)
+{
+ int ret;
+ struct irq_affinity_notify *notify = &iommu->intcapxt_notify;
+
+ /**
+ * IntCapXT requires XTSup=1, which can be inferred
+ * amd_iommu_xt_mode.
+ */
+ if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
+ return 0;
+
+ /**
+ * Also, we need to setup notifier to update the IntCapXT registers
+ * whenever the irq affinity is changed from user-space.
+ */
+ notify->irq = iommu->dev->irq;
+ notify->notify = _irq_notifier_notify,
+ notify->release = _irq_notifier_release,
+ ret = irq_set_affinity_notifier(iommu->dev->irq, notify);
+ if (ret) {
+ pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n",
+ iommu->devid, iommu->dev->irq);
+ return ret;
+ }
+
+ iommu_update_intcapxt(iommu);
+ iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
+ return ret;
+}
+
static int iommu_init_msi(struct amd_iommu *iommu)
{
int ret;
@@ -1939,6 +2025,10 @@
return ret;
enable_faults:
+ ret = iommu_init_intcapxt(iommu);
+ if (ret)
+ return ret;
+
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
if (iommu->ppr_log != NULL)
@@ -2001,6 +2091,9 @@
if (e == NULL)
return -ENOMEM;
+ if (m->flags & IVMD_FLAG_EXCL_RANGE)
+ init_exclusion_range(m);
+
switch (m->type) {
default:
kfree(e);
@@ -2047,9 +2140,7 @@
while (p < end) {
m = (struct ivmd_header *)p;
- if (m->flags & IVMD_FLAG_EXCL_RANGE)
- init_exclusion_range(m);
- else if (m->flags & IVMD_FLAG_UNITY_MAP)
+ if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
init_unity_map_range(m);
p += m->length;
@@ -2330,15 +2421,6 @@
amd_iommu_dev_table = NULL;
free_iommu_all();
-
-#ifdef CONFIG_GART_IOMMU
- /*
- * We failed to initialize the AMD IOMMU - try fallback to GART
- * if possible.
- */
- gart_iommu_init();
-
-#endif
}
/* SB IOAPIC is always on this device in AMD systems */
@@ -2366,7 +2448,7 @@
devid = get_ioapic_devid(id);
if (devid < 0) {
- pr_err("%sAMD-Vi: IOAPIC[%d] not in IVRS table\n",
+ pr_err("%s: IOAPIC[%d] not in IVRS table\n",
fw_bug, id);
ret = false;
} else if (devid == IOAPIC_SB_DEVID) {
@@ -2384,11 +2466,11 @@
* when the BIOS is buggy and provides us the wrong
* device id for the IOAPIC in the system.
*/
- pr_err("%sAMD-Vi: No southbridge IOAPIC found\n", fw_bug);
+ pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
}
if (!ret)
- pr_err("AMD-Vi: Disabling interrupt remapping\n");
+ pr_err("Disabling interrupt remapping\n");
return ret;
}
@@ -2443,7 +2525,7 @@
return -ENODEV;
else if (ACPI_FAILURE(status)) {
const char *err = acpi_format_exception(status);
- pr_err("AMD-Vi: IVRS table error: %s\n", err);
+ pr_err("IVRS table error: %s\n", err);
return -EINVAL;
}
@@ -2513,8 +2595,6 @@
*/
__set_bit(0, amd_iommu_pd_alloc_bitmap);
- spin_lock_init(&amd_iommu_pd_lock);
-
/*
* now the data structures are allocated and basically initialized
* start the real acpi table scan
@@ -2596,7 +2676,7 @@
return false;
else if (ACPI_FAILURE(status)) {
const char *err = acpi_format_exception(status);
- pr_err("AMD-Vi: IVRS table error: %s\n", err);
+ pr_err("IVRS table error: %s\n", err);
return false;
}
@@ -2631,9 +2711,7 @@
ret = early_amd_iommu_init();
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
- pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n");
- free_dma_resources();
- free_iommu_resources();
+ pr_info("AMD IOMMU disabled on kernel command-line\n");
init_state = IOMMU_CMDLINE_DISABLED;
ret = -EINVAL;
}
@@ -2674,6 +2752,19 @@
BUG();
}
+ if (ret) {
+ free_dma_resources();
+ if (!irq_remapping_enabled) {
+ disable_iommus();
+ free_iommu_resources();
+ } else {
+ struct amd_iommu *iommu;
+
+ uninit_device_table_dma();
+ for_each_iommu(iommu)
+ iommu_flush_all_caches(iommu);
+ }
+ }
return ret;
}
@@ -2747,17 +2838,15 @@
int ret;
ret = iommu_go_to_state(IOMMU_INITIALIZED);
- if (ret) {
- free_dma_resources();
- if (!irq_remapping_enabled) {
- disable_iommus();
- free_iommu_resources();
- } else {
- uninit_device_table_dma();
- for_each_iommu(iommu)
- iommu_flush_all_caches(iommu);
- }
+#ifdef CONFIG_GART_IOMMU
+ if (ret && list_empty(&amd_iommu_list)) {
+ /*
+ * We failed to initialize the AMD IOMMU - try fallback
+ * to GART if possible.
+ */
+ gart_iommu_init();
}
+#endif
for_each_iommu(iommu)
amd_iommu_debugfs_setup(iommu);
@@ -2778,7 +2867,7 @@
(boot_cpu_data.microcode <= 0x080011ff))
return true;
- pr_notice("AMD-Vi: IOMMU not currently supported when SME is active\n");
+ pr_notice("IOMMU not currently supported when SME is active\n");
return false;
}
@@ -2863,12 +2952,12 @@
ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
if (ret != 4) {
- pr_err("AMD-Vi: Invalid command line: ivrs_ioapic%s\n", str);
+ pr_err("Invalid command line: ivrs_ioapic%s\n", str);
return 1;
}
if (early_ioapic_map_size == EARLY_MAP_SIZE) {
- pr_err("AMD-Vi: Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
+ pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
str);
return 1;
}
@@ -2893,12 +2982,12 @@
ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
if (ret != 4) {
- pr_err("AMD-Vi: Invalid command line: ivrs_hpet%s\n", str);
+ pr_err("Invalid command line: ivrs_hpet%s\n", str);
return 1;
}
if (early_hpet_map_size == EARLY_MAP_SIZE) {
- pr_err("AMD-Vi: Early HPET map overflow - ignoring ivrs_hpet%s\n",
+ pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
str);
return 1;
}
@@ -2923,7 +3012,7 @@
ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
if (ret != 4) {
- pr_err("AMD-Vi: Invalid command line: ivrs_acpihid(%s)\n", str);
+ pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
return 1;
}
@@ -2932,7 +3021,7 @@
uid = p;
if (!hid || !(*hid) || !uid) {
- pr_err("AMD-Vi: Invalid command line: hid or uid\n");
+ pr_err("Invalid command line: hid or uid\n");
return 1;
}
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index a8cd029..92c2ba6 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -1,19 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <jroedel@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
diff --git a/drivers/iommu/amd_iommu_quirks.c b/drivers/iommu/amd_iommu_quirks.c
new file mode 100644
index 0000000..5120ce4
--- /dev/null
+++ b/drivers/iommu/amd_iommu_quirks.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Quirks for AMD IOMMU
+ *
+ * Copyright (C) 2019 Kai-Heng Feng <kai.heng.feng@canonical.com>
+ */
+
+#ifdef CONFIG_DMI
+#include <linux/dmi.h>
+
+#include "amd_iommu.h"
+
+#define IVHD_SPECIAL_IOAPIC 1
+
+struct ivrs_quirk_entry {
+ u8 id;
+ u16 devid;
+};
+
+enum {
+ DELL_INSPIRON_7375 = 0,
+ DELL_LATITUDE_5495,
+ LENOVO_IDEAPAD_330S_15ARR,
+};
+
+static const struct ivrs_quirk_entry ivrs_ioapic_quirks[][3] __initconst = {
+ /* ivrs_ioapic[4]=00:14.0 ivrs_ioapic[5]=00:00.2 */
+ [DELL_INSPIRON_7375] = {
+ { .id = 4, .devid = 0xa0 },
+ { .id = 5, .devid = 0x2 },
+ {}
+ },
+ /* ivrs_ioapic[4]=00:14.0 */
+ [DELL_LATITUDE_5495] = {
+ { .id = 4, .devid = 0xa0 },
+ {}
+ },
+ /* ivrs_ioapic[32]=00:14.0 */
+ [LENOVO_IDEAPAD_330S_15ARR] = {
+ { .id = 32, .devid = 0xa0 },
+ {}
+ },
+ {}
+};
+
+static int __init ivrs_ioapic_quirk_cb(const struct dmi_system_id *d)
+{
+ const struct ivrs_quirk_entry *i;
+
+ for (i = d->driver_data; i->id != 0 && i->devid != 0; i++)
+ add_special_device(IVHD_SPECIAL_IOAPIC, i->id, (u16 *)&i->devid, 0);
+
+ return 0;
+}
+
+static const struct dmi_system_id ivrs_quirks[] __initconst = {
+ {
+ .callback = ivrs_ioapic_quirk_cb,
+ .ident = "Dell Inspiron 7375",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7375"),
+ },
+ .driver_data = (void *)&ivrs_ioapic_quirks[DELL_INSPIRON_7375],
+ },
+ {
+ .callback = ivrs_ioapic_quirk_cb,
+ .ident = "Dell Latitude 5495",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude 5495"),
+ },
+ .driver_data = (void *)&ivrs_ioapic_quirks[DELL_LATITUDE_5495],
+ },
+ {
+ /*
+ * Acer Aspire A315-41 requires the very same workaround as
+ * Dell Latitude 5495
+ */
+ .callback = ivrs_ioapic_quirk_cb,
+ .ident = "Acer Aspire A315-41",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A315-41"),
+ },
+ .driver_data = (void *)&ivrs_ioapic_quirks[DELL_LATITUDE_5495],
+ },
+ {
+ .callback = ivrs_ioapic_quirk_cb,
+ .ident = "Lenovo ideapad 330S-15ARR",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "81FB"),
+ },
+ .driver_data = (void *)&ivrs_ioapic_quirks[LENOVO_IDEAPAD_330S_15ARR],
+ },
+ {}
+};
+
+void __init amd_iommu_apply_ivrs_quirks(void)
+{
+ dmi_check_system(ivrs_quirks);
+}
+#endif
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index e2b342e..17bd5a3 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <jroedel@suse.de>
* Leo Duran <leo.duran@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ASM_X86_AMD_IOMMU_TYPES_H
@@ -72,6 +60,12 @@
#define MMIO_PPR_LOG_OFFSET 0x0038
#define MMIO_GA_LOG_BASE_OFFSET 0x00e0
#define MMIO_GA_LOG_TAIL_OFFSET 0x00e8
+#define MMIO_MSI_ADDR_LO_OFFSET 0x015C
+#define MMIO_MSI_ADDR_HI_OFFSET 0x0160
+#define MMIO_MSI_DATA_OFFSET 0x0164
+#define MMIO_INTCAPXT_EVT_OFFSET 0x0170
+#define MMIO_INTCAPXT_PPR_OFFSET 0x0178
+#define MMIO_INTCAPXT_GALOG_OFFSET 0x0180
#define MMIO_CMD_HEAD_OFFSET 0x2000
#define MMIO_CMD_TAIL_OFFSET 0x2008
#define MMIO_EVT_HEAD_OFFSET 0x2010
@@ -136,8 +130,8 @@
#define EVENT_TYPE_INV_PPR_REQ 0x9
#define EVENT_DEVID_MASK 0xffff
#define EVENT_DEVID_SHIFT 0
-#define EVENT_DOMID_MASK 0xffff
-#define EVENT_DOMID_SHIFT 0
+#define EVENT_DOMID_MASK_LO 0xffff
+#define EVENT_DOMID_MASK_HI 0xf0000
#define EVENT_FLAGS_MASK 0xfff
#define EVENT_FLAGS_SHIFT 0x10
@@ -162,6 +156,7 @@
#define CONTROL_GALOG_EN 0x1CULL
#define CONTROL_GAINT_EN 0x1DULL
#define CONTROL_XT_EN 0x32ULL
+#define CONTROL_INTCAPXT_EN 0x33ULL
#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
#define CTRL_INV_TO_NONE 0
@@ -269,6 +264,7 @@
#define PAGE_MODE_4_LEVEL 0x04
#define PAGE_MODE_5_LEVEL 0x05
#define PAGE_MODE_6_LEVEL 0x06
+#define PAGE_MODE_7_LEVEL 0x07
#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9))
#define PM_LEVEL_SIZE(x) (((x) < 6) ? \
@@ -373,6 +369,8 @@
#define IOMMU_PROT_IR 0x01
#define IOMMU_PROT_IW 0x02
+#define IOMMU_UNITY_MAP_FLAG_EXCL_RANGE (1 << 2)
+
/* IOMMU capabilities */
#define IOMMU_CAP_IOTLB 24
#define IOMMU_CAP_NPCACHE 26
@@ -477,7 +475,6 @@
int glx; /* Number of levels for GCR3 table */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
- bool updated; /* complete domain flush required */
unsigned dev_cnt; /* devices assigned to this domain */
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
};
@@ -601,6 +598,8 @@
/* DebugFS Info */
struct dentry *debugfs;
#endif
+ /* IRQ notifier for IntCapXT interrupt */
+ struct irq_affinity_notify intcapxt_notify;
};
static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
@@ -634,6 +633,9 @@
* This struct contains device specific data for the IOMMU
*/
struct iommu_dev_data {
+ /*Protect against attach/detach races */
+ spinlock_t lock;
+
struct list_head list; /* For domain->dev_list */
struct llist_node dev_data_list; /* For global dev_data_list */
struct protection_domain *domain; /* Domain the device is bound to */
@@ -672,12 +674,6 @@
extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
/*
- * Declarations for the global list of all protection domains
- */
-extern spinlock_t amd_iommu_pd_lock;
-extern struct list_head amd_iommu_pd_list;
-
-/*
* Structure defining one entry in the device table
*/
struct dev_table_entry {
@@ -879,6 +875,15 @@
struct msi_msg msi_entry;
void *entry; /* Pointer to union irte or struct irte_ga */
void *ref; /* Pointer to the actual irte */
+
+ /**
+ * Store information for activate/de-activate
+ * Guest virtual APIC mode during runtime.
+ */
+ struct irq_cfg *cfg;
+ int ga_vector;
+ int ga_root_ptr;
+ int ga_tag;
};
struct amd_irte_ops {
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 58da65d..d6d85de 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -1,21 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <jroedel@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+
#include <linux/mmu_notifier.h>
#include <linux/amd-iommu.h>
#include <linux/mm_types.h>
@@ -368,29 +358,6 @@
return container_of(mn, struct pasid_state, mn);
}
-static void __mn_flush_page(struct mmu_notifier *mn,
- unsigned long address)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
-
- pasid_state = mn_to_state(mn);
- dev_state = pasid_state->device_state;
-
- amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address);
-}
-
-static int mn_clear_flush_young(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
-{
- for (; start < end; start += PAGE_SIZE)
- __mn_flush_page(mn, start);
-
- return 0;
-}
-
static void mn_invalidate_range(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
@@ -427,9 +394,7 @@
}
static const struct mmu_notifier_ops iommu_mn = {
- .flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
.release = mn_release,
- .clear_flush_young = mn_clear_flush_young,
.invalidate_range = mn_invalidate_range,
};
diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
new file mode 100644
index 0000000..5c87a38
--- /dev/null
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Miscellaneous Arm SMMU implementation and integration quirks
+// Copyright (C) 2019 Arm Limited
+
+#define pr_fmt(fmt) "arm-smmu: " fmt
+
+#include <linux/bitfield.h>
+#include <linux/of.h>
+
+#include "arm-smmu.h"
+
+
+static int arm_smmu_gr0_ns(int offset)
+{
+ switch(offset) {
+ case ARM_SMMU_GR0_sCR0:
+ case ARM_SMMU_GR0_sACR:
+ case ARM_SMMU_GR0_sGFSR:
+ case ARM_SMMU_GR0_sGFSYNR0:
+ case ARM_SMMU_GR0_sGFSYNR1:
+ case ARM_SMMU_GR0_sGFSYNR2:
+ return offset + 0x400;
+ default:
+ return offset;
+ }
+}
+
+static u32 arm_smmu_read_ns(struct arm_smmu_device *smmu, int page,
+ int offset)
+{
+ if (page == ARM_SMMU_GR0)
+ offset = arm_smmu_gr0_ns(offset);
+ return readl_relaxed(arm_smmu_page(smmu, page) + offset);
+}
+
+static void arm_smmu_write_ns(struct arm_smmu_device *smmu, int page,
+ int offset, u32 val)
+{
+ if (page == ARM_SMMU_GR0)
+ offset = arm_smmu_gr0_ns(offset);
+ writel_relaxed(val, arm_smmu_page(smmu, page) + offset);
+}
+
+/* Since we don't care for sGFAR, we can do without 64-bit accessors */
+static const struct arm_smmu_impl calxeda_impl = {
+ .read_reg = arm_smmu_read_ns,
+ .write_reg = arm_smmu_write_ns,
+};
+
+
+struct cavium_smmu {
+ struct arm_smmu_device smmu;
+ u32 id_base;
+};
+
+static int cavium_cfg_probe(struct arm_smmu_device *smmu)
+{
+ static atomic_t context_count = ATOMIC_INIT(0);
+ struct cavium_smmu *cs = container_of(smmu, struct cavium_smmu, smmu);
+ /*
+ * Cavium CN88xx erratum #27704.
+ * Ensure ASID and VMID allocation is unique across all SMMUs in
+ * the system.
+ */
+ cs->id_base = atomic_fetch_add(smmu->num_context_banks, &context_count);
+ dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
+
+ return 0;
+}
+
+static int cavium_init_context(struct arm_smmu_domain *smmu_domain)
+{
+ struct cavium_smmu *cs = container_of(smmu_domain->smmu,
+ struct cavium_smmu, smmu);
+
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
+ smmu_domain->cfg.vmid += cs->id_base;
+ else
+ smmu_domain->cfg.asid += cs->id_base;
+
+ return 0;
+}
+
+static const struct arm_smmu_impl cavium_impl = {
+ .cfg_probe = cavium_cfg_probe,
+ .init_context = cavium_init_context,
+};
+
+static struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smmu)
+{
+ struct cavium_smmu *cs;
+
+ cs = devm_kzalloc(smmu->dev, sizeof(*cs), GFP_KERNEL);
+ if (!cs)
+ return ERR_PTR(-ENOMEM);
+
+ cs->smmu = *smmu;
+ cs->smmu.impl = &cavium_impl;
+
+ devm_kfree(smmu->dev, smmu);
+
+ return &cs->smmu;
+}
+
+
+#define ARM_MMU500_ACTLR_CPRE (1 << 1)
+
+#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
+#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
+#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
+
+static int arm_mmu500_reset(struct arm_smmu_device *smmu)
+{
+ u32 reg, major;
+ int i;
+ /*
+ * On MMU-500 r2p0 onwards we need to clear ACR.CACHE_LOCK before
+ * writes to the context bank ACTLRs will stick. And we just hope that
+ * Secure has also cleared SACR.CACHE_LOCK for this to take effect...
+ */
+ reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID7);
+ major = FIELD_GET(ID7_MAJOR, reg);
+ reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sACR);
+ if (major >= 2)
+ reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+ /*
+ * Allow unmatched Stream IDs to allocate bypass
+ * TLB entries for reduced latency.
+ */
+ reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sACR, reg);
+
+ /*
+ * Disable MMU-500's not-particularly-beneficial next-page
+ * prefetcher for the sake of errata #841119 and #826419.
+ */
+ for (i = 0; i < smmu->num_context_banks; ++i) {
+ reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
+ reg &= ~ARM_MMU500_ACTLR_CPRE;
+ arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg);
+ }
+
+ return 0;
+}
+
+static const struct arm_smmu_impl arm_mmu500_impl = {
+ .reset = arm_mmu500_reset,
+};
+
+
+struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
+{
+ /*
+ * We will inevitably have to combine model-specific implementation
+ * quirks with platform-specific integration quirks, but everything
+ * we currently support happens to work out as straightforward
+ * mutually-exclusive assignments.
+ */
+ switch (smmu->model) {
+ case ARM_MMU500:
+ smmu->impl = &arm_mmu500_impl;
+ break;
+ case CAVIUM_SMMUV2:
+ return cavium_smmu_impl_init(smmu);
+ default:
+ break;
+ }
+
+ if (of_property_read_bool(smmu->dev->of_node,
+ "calxeda,smmu-secure-config-access"))
+ smmu->impl = &calxeda_impl;
+
+ return smmu;
+}
diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
deleted file mode 100644
index a1226e4..0000000
--- a/drivers/iommu/arm-smmu-regs.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * IOMMU API for ARM architected SMMU implementations.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) 2013 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#ifndef _ARM_SMMU_REGS_H
-#define _ARM_SMMU_REGS_H
-
-/* Configuration registers */
-#define ARM_SMMU_GR0_sCR0 0x0
-#define sCR0_CLIENTPD (1 << 0)
-#define sCR0_GFRE (1 << 1)
-#define sCR0_GFIE (1 << 2)
-#define sCR0_EXIDENABLE (1 << 3)
-#define sCR0_GCFGFRE (1 << 4)
-#define sCR0_GCFGFIE (1 << 5)
-#define sCR0_USFCFG (1 << 10)
-#define sCR0_VMIDPNE (1 << 11)
-#define sCR0_PTM (1 << 12)
-#define sCR0_FB (1 << 13)
-#define sCR0_VMID16EN (1 << 31)
-#define sCR0_BSU_SHIFT 14
-#define sCR0_BSU_MASK 0x3
-
-/* Auxiliary Configuration register */
-#define ARM_SMMU_GR0_sACR 0x10
-
-/* Identification registers */
-#define ARM_SMMU_GR0_ID0 0x20
-#define ARM_SMMU_GR0_ID1 0x24
-#define ARM_SMMU_GR0_ID2 0x28
-#define ARM_SMMU_GR0_ID3 0x2c
-#define ARM_SMMU_GR0_ID4 0x30
-#define ARM_SMMU_GR0_ID5 0x34
-#define ARM_SMMU_GR0_ID6 0x38
-#define ARM_SMMU_GR0_ID7 0x3c
-#define ARM_SMMU_GR0_sGFSR 0x48
-#define ARM_SMMU_GR0_sGFSYNR0 0x50
-#define ARM_SMMU_GR0_sGFSYNR1 0x54
-#define ARM_SMMU_GR0_sGFSYNR2 0x58
-
-#define ID0_S1TS (1 << 30)
-#define ID0_S2TS (1 << 29)
-#define ID0_NTS (1 << 28)
-#define ID0_SMS (1 << 27)
-#define ID0_ATOSNS (1 << 26)
-#define ID0_PTFS_NO_AARCH32 (1 << 25)
-#define ID0_PTFS_NO_AARCH32S (1 << 24)
-#define ID0_CTTW (1 << 14)
-#define ID0_NUMIRPT_SHIFT 16
-#define ID0_NUMIRPT_MASK 0xff
-#define ID0_NUMSIDB_SHIFT 9
-#define ID0_NUMSIDB_MASK 0xf
-#define ID0_EXIDS (1 << 8)
-#define ID0_NUMSMRG_SHIFT 0
-#define ID0_NUMSMRG_MASK 0xff
-
-#define ID1_PAGESIZE (1 << 31)
-#define ID1_NUMPAGENDXB_SHIFT 28
-#define ID1_NUMPAGENDXB_MASK 7
-#define ID1_NUMS2CB_SHIFT 16
-#define ID1_NUMS2CB_MASK 0xff
-#define ID1_NUMCB_SHIFT 0
-#define ID1_NUMCB_MASK 0xff
-
-#define ID2_OAS_SHIFT 4
-#define ID2_OAS_MASK 0xf
-#define ID2_IAS_SHIFT 0
-#define ID2_IAS_MASK 0xf
-#define ID2_UBS_SHIFT 8
-#define ID2_UBS_MASK 0xf
-#define ID2_PTFS_4K (1 << 12)
-#define ID2_PTFS_16K (1 << 13)
-#define ID2_PTFS_64K (1 << 14)
-#define ID2_VMID16 (1 << 15)
-
-#define ID7_MAJOR_SHIFT 4
-#define ID7_MAJOR_MASK 0xf
-
-/* Global TLB invalidation */
-#define ARM_SMMU_GR0_TLBIVMID 0x64
-#define ARM_SMMU_GR0_TLBIALLNSNH 0x68
-#define ARM_SMMU_GR0_TLBIALLH 0x6c
-#define ARM_SMMU_GR0_sTLBGSYNC 0x70
-#define ARM_SMMU_GR0_sTLBGSTATUS 0x74
-#define sTLBGSTATUS_GSACTIVE (1 << 0)
-
-/* Stream mapping registers */
-#define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2))
-#define SMR_VALID (1 << 31)
-#define SMR_MASK_SHIFT 16
-#define SMR_ID_SHIFT 0
-
-#define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2))
-#define S2CR_CBNDX_SHIFT 0
-#define S2CR_CBNDX_MASK 0xff
-#define S2CR_EXIDVALID (1 << 10)
-#define S2CR_TYPE_SHIFT 16
-#define S2CR_TYPE_MASK 0x3
-enum arm_smmu_s2cr_type {
- S2CR_TYPE_TRANS,
- S2CR_TYPE_BYPASS,
- S2CR_TYPE_FAULT,
-};
-
-#define S2CR_PRIVCFG_SHIFT 24
-#define S2CR_PRIVCFG_MASK 0x3
-enum arm_smmu_s2cr_privcfg {
- S2CR_PRIVCFG_DEFAULT,
- S2CR_PRIVCFG_DIPAN,
- S2CR_PRIVCFG_UNPRIV,
- S2CR_PRIVCFG_PRIV,
-};
-
-/* Context bank attribute registers */
-#define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2))
-#define CBAR_VMID_SHIFT 0
-#define CBAR_VMID_MASK 0xff
-#define CBAR_S1_BPSHCFG_SHIFT 8
-#define CBAR_S1_BPSHCFG_MASK 3
-#define CBAR_S1_BPSHCFG_NSH 3
-#define CBAR_S1_MEMATTR_SHIFT 12
-#define CBAR_S1_MEMATTR_MASK 0xf
-#define CBAR_S1_MEMATTR_WB 0xf
-#define CBAR_TYPE_SHIFT 16
-#define CBAR_TYPE_MASK 0x3
-#define CBAR_TYPE_S2_TRANS (0 << CBAR_TYPE_SHIFT)
-#define CBAR_TYPE_S1_TRANS_S2_BYPASS (1 << CBAR_TYPE_SHIFT)
-#define CBAR_TYPE_S1_TRANS_S2_FAULT (2 << CBAR_TYPE_SHIFT)
-#define CBAR_TYPE_S1_TRANS_S2_TRANS (3 << CBAR_TYPE_SHIFT)
-#define CBAR_IRPTNDX_SHIFT 24
-#define CBAR_IRPTNDX_MASK 0xff
-
-#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
-#define CBA2R_RW64_32BIT (0 << 0)
-#define CBA2R_RW64_64BIT (1 << 0)
-#define CBA2R_VMID_SHIFT 16
-#define CBA2R_VMID_MASK 0xffff
-
-#define ARM_SMMU_CB_SCTLR 0x0
-#define ARM_SMMU_CB_ACTLR 0x4
-#define ARM_SMMU_CB_RESUME 0x8
-#define ARM_SMMU_CB_TTBCR2 0x10
-#define ARM_SMMU_CB_TTBR0 0x20
-#define ARM_SMMU_CB_TTBR1 0x28
-#define ARM_SMMU_CB_TTBCR 0x30
-#define ARM_SMMU_CB_CONTEXTIDR 0x34
-#define ARM_SMMU_CB_S1_MAIR0 0x38
-#define ARM_SMMU_CB_S1_MAIR1 0x3c
-#define ARM_SMMU_CB_PAR 0x50
-#define ARM_SMMU_CB_FSR 0x58
-#define ARM_SMMU_CB_FAR 0x60
-#define ARM_SMMU_CB_FSYNR0 0x68
-#define ARM_SMMU_CB_S1_TLBIVA 0x600
-#define ARM_SMMU_CB_S1_TLBIASID 0x610
-#define ARM_SMMU_CB_S1_TLBIVAL 0x620
-#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
-#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
-#define ARM_SMMU_CB_TLBSYNC 0x7f0
-#define ARM_SMMU_CB_TLBSTATUS 0x7f4
-#define ARM_SMMU_CB_ATS1PR 0x800
-#define ARM_SMMU_CB_ATSR 0x8f0
-
-#define SCTLR_S1_ASIDPNE (1 << 12)
-#define SCTLR_CFCFG (1 << 7)
-#define SCTLR_CFIE (1 << 6)
-#define SCTLR_CFRE (1 << 5)
-#define SCTLR_E (1 << 4)
-#define SCTLR_AFE (1 << 2)
-#define SCTLR_TRE (1 << 1)
-#define SCTLR_M (1 << 0)
-
-#define CB_PAR_F (1 << 0)
-
-#define ATSR_ACTIVE (1 << 0)
-
-#define RESUME_RETRY (0 << 0)
-#define RESUME_TERMINATE (1 << 0)
-
-#define TTBCR2_SEP_SHIFT 15
-#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
-#define TTBCR2_AS (1 << 4)
-
-#define TTBRn_ASID_SHIFT 48
-
-#define FSR_MULTI (1 << 31)
-#define FSR_SS (1 << 30)
-#define FSR_UUT (1 << 8)
-#define FSR_ASF (1 << 7)
-#define FSR_TLBLKF (1 << 6)
-#define FSR_TLBMCF (1 << 5)
-#define FSR_EF (1 << 4)
-#define FSR_PF (1 << 3)
-#define FSR_AFF (1 << 2)
-#define FSR_TF (1 << 1)
-
-#define FSR_IGN (FSR_AFF | FSR_ASF | \
- FSR_TLBMCF | FSR_TLBLKF)
-#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \
- FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
-
-#define FSYNR0_WNR (1 << 4)
-
-#endif /* _ARM_SMMU_REGS_H */
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 5059d09..8da93e7 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* IOMMU API for ARM architected SMMUv3 implementations.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
* Copyright (C) 2015 ARM Limited
*
* Author: Will Deacon <will.deacon@arm.com>
@@ -29,21 +18,22 @@
#include <linux/dma-iommu.h>
#include <linux/err.h>
#include <linux/interrupt.h>
+#include <linux/io-pgtable.h>
#include <linux/iommu.h>
#include <linux/iopoll.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_iommu.h>
#include <linux/of_platform.h>
#include <linux/pci.h>
+#include <linux/pci-ats.h>
#include <linux/platform_device.h>
#include <linux/amba/bus.h>
-#include "io-pgtable.h"
-
/* MMIO registers */
#define ARM_SMMU_IDR0 0x0
#define IDR0_ST_LVL GENMASK(28, 27)
@@ -97,6 +87,7 @@
#define IDR5_VAX_52_BIT 1
#define ARM_SMMU_CR0 0x20
+#define CR0_ATSCHK (1 << 4)
#define CR0_CMDQEN (1 << 3)
#define CR0_EVTQEN (1 << 2)
#define CR0_PRIQEN (1 << 1)
@@ -190,17 +181,25 @@
#define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
#define ARM_SMMU_MEMATTR_OIWB 0xf
-#define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1))
-#define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift))
-#define Q_OVERFLOW_FLAG (1 << 31)
-#define Q_OVF(q, p) ((p) & Q_OVERFLOW_FLAG)
+#define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
+#define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
+#define Q_OVERFLOW_FLAG (1U << 31)
+#define Q_OVF(p) ((p) & Q_OVERFLOW_FLAG)
#define Q_ENT(q, p) ((q)->base + \
- Q_IDX(q, p) * (q)->ent_dwords)
+ Q_IDX(&((q)->llq), p) * \
+ (q)->ent_dwords)
#define Q_BASE_RWA (1UL << 62)
#define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
#define Q_BASE_LOG2SIZE GENMASK(4, 0)
+/* Ensure DMA allocations are naturally aligned */
+#ifdef CONFIG_CMA_ALIGNMENT
+#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
+#else
+#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1)
+#endif
+
/*
* Stream table.
*
@@ -298,13 +297,24 @@
FIELD_GET(ARM64_TCR_##fld, tcr))
/* Command queue */
-#define CMDQ_ENT_DWORDS 2
-#define CMDQ_MAX_SZ_SHIFT 8
+#define CMDQ_ENT_SZ_SHIFT 4
+#define CMDQ_ENT_DWORDS ((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
+#define CMDQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
#define CMDQ_CONS_ERR GENMASK(30, 24)
#define CMDQ_ERR_CERROR_NONE_IDX 0
#define CMDQ_ERR_CERROR_ILL_IDX 1
#define CMDQ_ERR_CERROR_ABT_IDX 2
+#define CMDQ_ERR_CERROR_ATC_INV_IDX 3
+
+#define CMDQ_PROD_OWNED_FLAG Q_OVERFLOW_FLAG
+
+/*
+ * This is used to size the command queue and therefore must be at least
+ * BITS_PER_LONG so that the valid_map works correctly (it relies on the
+ * total number of queue entries being a multiple of BITS_PER_LONG).
+ */
+#define CMDQ_BATCH_ENTRIES BITS_PER_LONG
#define CMDQ_0_OP GENMASK_ULL(7, 0)
#define CMDQ_0_SSV (1UL << 11)
@@ -323,6 +333,12 @@
#define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
#define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
+#define CMDQ_ATC_0_SSID GENMASK_ULL(31, 12)
+#define CMDQ_ATC_0_SID GENMASK_ULL(63, 32)
+#define CMDQ_ATC_0_GLOBAL (1UL << 9)
+#define CMDQ_ATC_1_SIZE GENMASK_ULL(5, 0)
+#define CMDQ_ATC_1_ADDR_MASK GENMASK_ULL(63, 12)
+
#define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
#define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
#define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
@@ -338,14 +354,16 @@
#define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
/* Event queue */
-#define EVTQ_ENT_DWORDS 4
-#define EVTQ_MAX_SZ_SHIFT 7
+#define EVTQ_ENT_SZ_SHIFT 5
+#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
+#define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
#define EVTQ_0_ID GENMASK_ULL(7, 0)
/* PRI queue */
-#define PRIQ_ENT_DWORDS 2
-#define PRIQ_MAX_SZ_SHIFT 8
+#define PRIQ_ENT_SZ_SHIFT 4
+#define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
+#define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
#define PRIQ_0_SID GENMASK_ULL(31, 0)
#define PRIQ_0_SSID GENMASK_ULL(51, 32)
@@ -360,13 +378,16 @@
#define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
/* High-level queue structures */
-#define ARM_SMMU_POLL_TIMEOUT_US 100
-#define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
-#define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
+#define ARM_SMMU_POLL_TIMEOUT_US 1000000 /* 1s! */
+#define ARM_SMMU_POLL_SPIN_COUNT 10
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
+/*
+ * not really modular, but the easiest way to keep compat with existing
+ * bootargs behaviour is to continue using module_param_named here.
+ */
static bool disable_bypass = 1;
module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
MODULE_PARM_DESC(disable_bypass,
@@ -440,6 +461,16 @@
u64 addr;
} tlbi;
+ #define CMDQ_OP_ATC_INV 0x40
+ #define ATC_INV_SIZE_ALL 52
+ struct {
+ u32 sid;
+ u32 ssid;
+ u64 addr;
+ u8 size;
+ bool global;
+ } atc;
+
#define CMDQ_OP_PRI_RESP 0x41
struct {
u32 sid;
@@ -450,13 +481,29 @@
#define CMDQ_OP_CMD_SYNC 0x46
struct {
- u32 msidata;
u64 msiaddr;
} sync;
};
};
+struct arm_smmu_ll_queue {
+ union {
+ u64 val;
+ struct {
+ u32 prod;
+ u32 cons;
+ };
+ struct {
+ atomic_t prod;
+ atomic_t cons;
+ } atomic;
+ u8 __pad[SMP_CACHE_BYTES];
+ } ____cacheline_aligned_in_smp;
+ u32 max_n_shift;
+};
+
struct arm_smmu_queue {
+ struct arm_smmu_ll_queue llq;
int irq; /* Wired interrupt */
__le64 *base;
@@ -464,17 +511,23 @@
u64 q_base;
size_t ent_dwords;
- u32 max_n_shift;
- u32 prod;
- u32 cons;
u32 __iomem *prod_reg;
u32 __iomem *cons_reg;
};
+struct arm_smmu_queue_poll {
+ ktime_t timeout;
+ unsigned int delay;
+ unsigned int spin_cnt;
+ bool wfe;
+};
+
struct arm_smmu_cmdq {
struct arm_smmu_queue q;
- spinlock_t lock;
+ atomic_long_t *valid_map;
+ atomic_t owner_prod;
+ atomic_t lock;
};
struct arm_smmu_evtq {
@@ -512,19 +565,6 @@
u64 vtcr;
};
-struct arm_smmu_strtab_ent {
- /*
- * An STE is "assigned" if the master emitting the corresponding SID
- * is attached to a domain. The behaviour of an unassigned STE is
- * determined by the disable_bypass parameter, whereas an assigned
- * STE behaves according to s1_cfg/s2_cfg, which themselves are
- * configured according to the domain type.
- */
- bool assigned;
- struct arm_smmu_s1_cfg *s1_cfg;
- struct arm_smmu_s2_cfg *s2_cfg;
-};
-
struct arm_smmu_strtab_cfg {
__le64 *strtab;
dma_addr_t strtab_dma;
@@ -567,7 +607,6 @@
int gerr_irq;
int combined_irq;
- atomic_t sync_nr;
unsigned long ias; /* IPA */
unsigned long oas; /* PA */
@@ -586,16 +625,19 @@
struct arm_smmu_strtab_cfg strtab_cfg;
- u32 sync_count;
-
/* IOMMU core code handle */
struct iommu_device iommu;
};
/* SMMU private data for each master */
-struct arm_smmu_master_data {
+struct arm_smmu_master {
struct arm_smmu_device *smmu;
- struct arm_smmu_strtab_ent ste;
+ struct device *dev;
+ struct arm_smmu_domain *domain;
+ struct list_head domain_head;
+ u32 *sids;
+ unsigned int num_sids;
+ bool ats_enabled;
};
/* SMMU private data for an IOMMU domain */
@@ -611,6 +653,8 @@
struct mutex init_mutex; /* Protects smmu pointer */
struct io_pgtable_ops *pgtbl_ops;
+ bool non_strict;
+ atomic_t nr_ats_masters;
enum arm_smmu_domain_stage stage;
union {
@@ -619,6 +663,9 @@
};
struct iommu_domain domain;
+
+ struct list_head devices;
+ spinlock_t devices_lock;
};
struct arm_smmu_option_prop {
@@ -662,79 +709,97 @@
}
/* Low-level queue manipulation functions */
-static bool queue_full(struct arm_smmu_queue *q)
+static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
+{
+ u32 space, prod, cons;
+
+ prod = Q_IDX(q, q->prod);
+ cons = Q_IDX(q, q->cons);
+
+ if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
+ space = (1 << q->max_n_shift) - (prod - cons);
+ else
+ space = cons - prod;
+
+ return space >= n;
+}
+
+static bool queue_full(struct arm_smmu_ll_queue *q)
{
return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
}
-static bool queue_empty(struct arm_smmu_queue *q)
+static bool queue_empty(struct arm_smmu_ll_queue *q)
{
return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
}
-static void queue_sync_cons(struct arm_smmu_queue *q)
+static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
{
- q->cons = readl_relaxed(q->cons_reg);
+ return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
+ (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
+ ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
+ (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
}
-static void queue_inc_cons(struct arm_smmu_queue *q)
+static void queue_sync_cons_out(struct arm_smmu_queue *q)
+{
+ /*
+ * Ensure that all CPU accesses (reads and writes) to the queue
+ * are complete before we update the cons pointer.
+ */
+ mb();
+ writel_relaxed(q->llq.cons, q->cons_reg);
+}
+
+static void queue_inc_cons(struct arm_smmu_ll_queue *q)
{
u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
-
- q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
- writel(q->cons, q->cons_reg);
+ q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
}
-static int queue_sync_prod(struct arm_smmu_queue *q)
+static int queue_sync_prod_in(struct arm_smmu_queue *q)
{
int ret = 0;
u32 prod = readl_relaxed(q->prod_reg);
- if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
+ if (Q_OVF(prod) != Q_OVF(q->llq.prod))
ret = -EOVERFLOW;
- q->prod = prod;
+ q->llq.prod = prod;
return ret;
}
-static void queue_inc_prod(struct arm_smmu_queue *q)
+static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
{
- u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
-
- q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
- writel(q->prod, q->prod_reg);
+ u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
+ return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
}
-/*
- * Wait for the SMMU to consume items. If drain is true, wait until the queue
- * is empty. Otherwise, wait until there is at least one free slot.
- */
-static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
+static void queue_poll_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_queue_poll *qp)
{
- ktime_t timeout;
- unsigned int delay = 1, spin_cnt = 0;
+ qp->delay = 1;
+ qp->spin_cnt = 0;
+ qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+ qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
+}
- /* Wait longer if it's a CMD_SYNC */
- timeout = ktime_add_us(ktime_get(), sync ?
- ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
- ARM_SMMU_POLL_TIMEOUT_US);
+static int queue_poll(struct arm_smmu_queue_poll *qp)
+{
+ if (ktime_compare(ktime_get(), qp->timeout) > 0)
+ return -ETIMEDOUT;
- while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
- if (ktime_compare(ktime_get(), timeout) > 0)
- return -ETIMEDOUT;
-
- if (wfe) {
- wfe();
- } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
- cpu_relax();
- continue;
- } else {
- udelay(delay);
- delay *= 2;
- spin_cnt = 0;
- }
+ if (qp->wfe) {
+ wfe();
+ } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
+ cpu_relax();
+ } else {
+ udelay(qp->delay);
+ qp->delay *= 2;
+ qp->spin_cnt = 0;
}
return 0;
@@ -748,16 +813,6 @@
*dst++ = cpu_to_le64(*src++);
}
-static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
-{
- if (queue_full(q))
- return -ENOSPC;
-
- queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
- queue_inc_prod(q);
- return 0;
-}
-
static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
{
int i;
@@ -768,18 +823,19 @@
static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
{
- if (queue_empty(q))
+ if (queue_empty(&q->llq))
return -EAGAIN;
- queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
- queue_inc_cons(q);
+ queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
+ queue_inc_cons(&q->llq);
+ queue_sync_cons_out(q);
return 0;
}
/* High-level queue accessors */
static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
{
- memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
+ memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
switch (ent->opcode) {
@@ -815,6 +871,14 @@
case CMDQ_OP_TLBI_S12_VMALL:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
break;
+ case CMDQ_OP_ATC_INV:
+ cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
+ cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
+ cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
+ cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
+ cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
+ cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
+ break;
case CMDQ_OP_PRI_RESP:
cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
@@ -831,14 +895,14 @@
cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
break;
case CMDQ_OP_CMD_SYNC:
- if (ent->sync.msiaddr)
+ if (ent->sync.msiaddr) {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
- else
+ cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
+ } else {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
+ }
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, ent->sync.msidata);
- cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
break;
default:
return -ENOENT;
@@ -847,12 +911,34 @@
return 0;
}
+static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
+ u32 prod)
+{
+ struct arm_smmu_queue *q = &smmu->cmdq.q;
+ struct arm_smmu_cmdq_ent ent = {
+ .opcode = CMDQ_OP_CMD_SYNC,
+ };
+
+ /*
+ * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
+ * payload, so the write will zero the entire command on that platform.
+ */
+ if (smmu->features & ARM_SMMU_FEAT_MSI &&
+ smmu->features & ARM_SMMU_FEAT_COHERENCY) {
+ ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
+ q->ent_dwords * 8;
+ }
+
+ arm_smmu_cmdq_build_cmd(cmd, &ent);
+}
+
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
{
static const char *cerror_str[] = {
[CMDQ_ERR_CERROR_NONE_IDX] = "No error",
[CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
[CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
+ [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
};
int i;
@@ -872,6 +958,14 @@
dev_err(smmu->dev, "retrying command fetch\n");
case CMDQ_ERR_CERROR_NONE_IDX:
return;
+ case CMDQ_ERR_CERROR_ATC_INV_IDX:
+ /*
+ * ATC Invalidation Completion timeout. CONS is still pointing
+ * at the CMD_SYNC. Attempt to complete other pending commands
+ * by repeating the CMD_SYNC, though we might well end up back
+ * here since the ATC invalidation may still be pending.
+ */
+ return;
case CMDQ_ERR_CERROR_ILL_IDX:
/* Fallthrough */
default:
@@ -896,100 +990,456 @@
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
}
-static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
+/*
+ * Command queue locking.
+ * This is a form of bastardised rwlock with the following major changes:
+ *
+ * - The only LOCK routines are exclusive_trylock() and shared_lock().
+ * Neither have barrier semantics, and instead provide only a control
+ * dependency.
+ *
+ * - The UNLOCK routines are supplemented with shared_tryunlock(), which
+ * fails if the caller appears to be the last lock holder (yes, this is
+ * racy). All successful UNLOCK routines have RELEASE semantics.
+ */
+static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
{
- struct arm_smmu_queue *q = &smmu->cmdq.q;
- bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+ int val;
- while (queue_insert_raw(q, cmd) == -ENOSPC) {
- if (queue_poll_cons(q, false, wfe))
- dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
- }
-}
-
-static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_ent *ent)
-{
- u64 cmd[CMDQ_ENT_DWORDS];
- unsigned long flags;
-
- if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
- dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
- ent->opcode);
+ /*
+ * We can try to avoid the cmpxchg() loop by simply incrementing the
+ * lock counter. When held in exclusive state, the lock counter is set
+ * to INT_MIN so these increments won't hurt as the value will remain
+ * negative.
+ */
+ if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
return;
- }
- spin_lock_irqsave(&smmu->cmdq.lock, flags);
- arm_smmu_cmdq_insert_cmd(smmu, cmd);
- spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+ do {
+ val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
+ } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
}
+static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
+{
+ (void)atomic_dec_return_release(&cmdq->lock);
+}
+
+static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
+{
+ if (atomic_read(&cmdq->lock) == 1)
+ return false;
+
+ arm_smmu_cmdq_shared_unlock(cmdq);
+ return true;
+}
+
+#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
+({ \
+ bool __ret; \
+ local_irq_save(flags); \
+ __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
+ if (!__ret) \
+ local_irq_restore(flags); \
+ __ret; \
+})
+
+#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
+({ \
+ atomic_set_release(&cmdq->lock, 0); \
+ local_irq_restore(flags); \
+})
+
+
/*
- * The difference between val and sync_idx is bounded by the maximum size of
- * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
+ * Command queue insertion.
+ * This is made fiddly by our attempts to achieve some sort of scalability
+ * since there is one queue shared amongst all of the CPUs in the system. If
+ * you like mixed-size concurrency, dependency ordering and relaxed atomics,
+ * then you'll *love* this monstrosity.
+ *
+ * The basic idea is to split the queue up into ranges of commands that are
+ * owned by a given CPU; the owner may not have written all of the commands
+ * itself, but is responsible for advancing the hardware prod pointer when
+ * the time comes. The algorithm is roughly:
+ *
+ * 1. Allocate some space in the queue. At this point we also discover
+ * whether the head of the queue is currently owned by another CPU,
+ * or whether we are the owner.
+ *
+ * 2. Write our commands into our allocated slots in the queue.
+ *
+ * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
+ *
+ * 4. If we are an owner:
+ * a. Wait for the previous owner to finish.
+ * b. Mark the queue head as unowned, which tells us the range
+ * that we are responsible for publishing.
+ * c. Wait for all commands in our owned range to become valid.
+ * d. Advance the hardware prod pointer.
+ * e. Tell the next owner we've finished.
+ *
+ * 5. If we are inserting a CMD_SYNC (we may or may not have been an
+ * owner), then we need to stick around until it has completed:
+ * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
+ * to clear the first 4 bytes.
+ * b. Otherwise, we spin waiting for the hardware cons pointer to
+ * advance past our command.
+ *
+ * The devil is in the details, particularly the use of locking for handling
+ * SYNC completion and freeing up space in the queue before we think that it is
+ * full.
*/
-static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
+static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
+ u32 sprod, u32 eprod, bool set)
{
- ktime_t timeout;
- u32 val;
-
- timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
- val = smp_cond_load_acquire(&smmu->sync_count,
- (int)(VAL - sync_idx) >= 0 ||
- !ktime_before(ktime_get(), timeout));
-
- return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
-}
-
-static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
-{
- u64 cmd[CMDQ_ENT_DWORDS];
- unsigned long flags;
- struct arm_smmu_cmdq_ent ent = {
- .opcode = CMDQ_OP_CMD_SYNC,
- .sync = {
- .msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
- .msiaddr = virt_to_phys(&smmu->sync_count),
- },
+ u32 swidx, sbidx, ewidx, ebidx;
+ struct arm_smmu_ll_queue llq = {
+ .max_n_shift = cmdq->q.llq.max_n_shift,
+ .prod = sprod,
};
- arm_smmu_cmdq_build_cmd(cmd, &ent);
+ ewidx = BIT_WORD(Q_IDX(&llq, eprod));
+ ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
- spin_lock_irqsave(&smmu->cmdq.lock, flags);
- arm_smmu_cmdq_insert_cmd(smmu, cmd);
- spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+ while (llq.prod != eprod) {
+ unsigned long mask;
+ atomic_long_t *ptr;
+ u32 limit = BITS_PER_LONG;
- return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
+ swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
+ sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
+
+ ptr = &cmdq->valid_map[swidx];
+
+ if ((swidx == ewidx) && (sbidx < ebidx))
+ limit = ebidx;
+
+ mask = GENMASK(limit - 1, sbidx);
+
+ /*
+ * The valid bit is the inverse of the wrap bit. This means
+ * that a zero-initialised queue is invalid and, after marking
+ * all entries as valid, they become invalid again when we
+ * wrap.
+ */
+ if (set) {
+ atomic_long_xor(mask, ptr);
+ } else { /* Poll */
+ unsigned long valid;
+
+ valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
+ atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
+ }
+
+ llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
+ }
}
-static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+/* Mark all entries in the range [sprod, eprod) as valid */
+static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
+ u32 sprod, u32 eprod)
{
- u64 cmd[CMDQ_ENT_DWORDS];
+ __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
+}
+
+/* Wait for all entries in the range [sprod, eprod) to become valid */
+static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
+ u32 sprod, u32 eprod)
+{
+ __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
+}
+
+/* Wait for the command queue to become non-full */
+static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
+ struct arm_smmu_ll_queue *llq)
+{
unsigned long flags;
- bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
- struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
- int ret;
+ struct arm_smmu_queue_poll qp;
+ struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+ int ret = 0;
- arm_smmu_cmdq_build_cmd(cmd, &ent);
+ /*
+ * Try to update our copy of cons by grabbing exclusive cmdq access. If
+ * that fails, spin until somebody else updates it for us.
+ */
+ if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
+ WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
+ arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
+ llq->val = READ_ONCE(cmdq->q.llq.val);
+ return 0;
+ }
- spin_lock_irqsave(&smmu->cmdq.lock, flags);
- arm_smmu_cmdq_insert_cmd(smmu, cmd);
- ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
- spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+ queue_poll_init(smmu, &qp);
+ do {
+ llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
+ if (!queue_full(llq))
+ break;
+
+ ret = queue_poll(&qp);
+ } while (!ret);
return ret;
}
-static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+/*
+ * Wait until the SMMU signals a CMD_SYNC completion MSI.
+ * Must be called with the cmdq lock held in some capacity.
+ */
+static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
+ struct arm_smmu_ll_queue *llq)
{
- int ret;
- bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
- (smmu->features & ARM_SMMU_FEAT_COHERENCY);
+ int ret = 0;
+ struct arm_smmu_queue_poll qp;
+ struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+ u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
- ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
- : __arm_smmu_cmdq_issue_sync(smmu);
- if (ret)
- dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
+ queue_poll_init(smmu, &qp);
+
+ /*
+ * The MSI won't generate an event, since it's being written back
+ * into the command queue.
+ */
+ qp.wfe = false;
+ smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
+ llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
+ return ret;
+}
+
+/*
+ * Wait until the SMMU cons index passes llq->prod.
+ * Must be called with the cmdq lock held in some capacity.
+ */
+static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
+ struct arm_smmu_ll_queue *llq)
+{
+ struct arm_smmu_queue_poll qp;
+ struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+ u32 prod = llq->prod;
+ int ret = 0;
+
+ queue_poll_init(smmu, &qp);
+ llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
+ do {
+ if (queue_consumed(llq, prod))
+ break;
+
+ ret = queue_poll(&qp);
+
+ /*
+ * This needs to be a readl() so that our subsequent call
+ * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
+ *
+ * Specifically, we need to ensure that we observe all
+ * shared_lock()s by other CMD_SYNCs that share our owner,
+ * so that a failing call to tryunlock() means that we're
+ * the last one out and therefore we can safely advance
+ * cmdq->q.llq.cons. Roughly speaking:
+ *
+ * CPU 0 CPU1 CPU2 (us)
+ *
+ * if (sync)
+ * shared_lock();
+ *
+ * dma_wmb();
+ * set_valid_map();
+ *
+ * if (owner) {
+ * poll_valid_map();
+ * <control dependency>
+ * writel(prod_reg);
+ *
+ * readl(cons_reg);
+ * tryunlock();
+ *
+ * Requires us to see CPU 0's shared_lock() acquisition.
+ */
+ llq->cons = readl(cmdq->q.cons_reg);
+ } while (!ret);
+
+ return ret;
+}
+
+static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
+ struct arm_smmu_ll_queue *llq)
+{
+ if (smmu->features & ARM_SMMU_FEAT_MSI &&
+ smmu->features & ARM_SMMU_FEAT_COHERENCY)
+ return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
+
+ return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
+}
+
+static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
+ u32 prod, int n)
+{
+ int i;
+ struct arm_smmu_ll_queue llq = {
+ .max_n_shift = cmdq->q.llq.max_n_shift,
+ .prod = prod,
+ };
+
+ for (i = 0; i < n; ++i) {
+ u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
+
+ prod = queue_inc_prod_n(&llq, i);
+ queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
+ }
+}
+
+/*
+ * This is the actual insertion function, and provides the following
+ * ordering guarantees to callers:
+ *
+ * - There is a dma_wmb() before publishing any commands to the queue.
+ * This can be relied upon to order prior writes to data structures
+ * in memory (such as a CD or an STE) before the command.
+ *
+ * - On completion of a CMD_SYNC, there is a control dependency.
+ * This can be relied upon to order subsequent writes to memory (e.g.
+ * freeing an IOVA) after completion of the CMD_SYNC.
+ *
+ * - Command insertion is totally ordered, so if two CPUs each race to
+ * insert their own list of commands then all of the commands from one
+ * CPU will appear before any of the commands from the other CPU.
+ */
+static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+ u64 *cmds, int n, bool sync)
+{
+ u64 cmd_sync[CMDQ_ENT_DWORDS];
+ u32 prod;
+ unsigned long flags;
+ bool owner;
+ struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+ struct arm_smmu_ll_queue llq = {
+ .max_n_shift = cmdq->q.llq.max_n_shift,
+ }, head = llq;
+ int ret = 0;
+
+ /* 1. Allocate some space in the queue */
+ local_irq_save(flags);
+ llq.val = READ_ONCE(cmdq->q.llq.val);
+ do {
+ u64 old;
+
+ while (!queue_has_space(&llq, n + sync)) {
+ local_irq_restore(flags);
+ if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
+ dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
+ local_irq_save(flags);
+ }
+
+ head.cons = llq.cons;
+ head.prod = queue_inc_prod_n(&llq, n + sync) |
+ CMDQ_PROD_OWNED_FLAG;
+
+ old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
+ if (old == llq.val)
+ break;
+
+ llq.val = old;
+ } while (1);
+ owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
+ head.prod &= ~CMDQ_PROD_OWNED_FLAG;
+ llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
+
+ /*
+ * 2. Write our commands into the queue
+ * Dependency ordering from the cmpxchg() loop above.
+ */
+ arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
+ if (sync) {
+ prod = queue_inc_prod_n(&llq, n);
+ arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
+ queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
+
+ /*
+ * In order to determine completion of our CMD_SYNC, we must
+ * ensure that the queue can't wrap twice without us noticing.
+ * We achieve that by taking the cmdq lock as shared before
+ * marking our slot as valid.
+ */
+ arm_smmu_cmdq_shared_lock(cmdq);
+ }
+
+ /* 3. Mark our slots as valid, ensuring commands are visible first */
+ dma_wmb();
+ arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
+
+ /* 4. If we are the owner, take control of the SMMU hardware */
+ if (owner) {
+ /* a. Wait for previous owner to finish */
+ atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
+
+ /* b. Stop gathering work by clearing the owned flag */
+ prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
+ &cmdq->q.llq.atomic.prod);
+ prod &= ~CMDQ_PROD_OWNED_FLAG;
+
+ /*
+ * c. Wait for any gathered work to be written to the queue.
+ * Note that we read our own entries so that we have the control
+ * dependency required by (d).
+ */
+ arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
+
+ /*
+ * d. Advance the hardware prod pointer
+ * Control dependency ordering from the entries becoming valid.
+ */
+ writel_relaxed(prod, cmdq->q.prod_reg);
+
+ /*
+ * e. Tell the next owner we're done
+ * Make sure we've updated the hardware first, so that we don't
+ * race to update prod and potentially move it backwards.
+ */
+ atomic_set_release(&cmdq->owner_prod, prod);
+ }
+
+ /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
+ if (sync) {
+ llq.prod = queue_inc_prod_n(&llq, n);
+ ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
+ if (ret) {
+ dev_err_ratelimited(smmu->dev,
+ "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
+ llq.prod,
+ readl_relaxed(cmdq->q.prod_reg),
+ readl_relaxed(cmdq->q.cons_reg));
+ }
+
+ /*
+ * Try to unlock the cmq lock. This will fail if we're the last
+ * reader, in which case we can safely update cmdq->q.llq.cons
+ */
+ if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
+ WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
+ arm_smmu_cmdq_shared_unlock(cmdq);
+ }
+ }
+
+ local_irq_restore(flags);
+ return ret;
+}
+
+static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_ent *ent)
+{
+ u64 cmd[CMDQ_ENT_DWORDS];
+
+ if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
+ dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
+ ent->opcode);
+ return -EINVAL;
+ }
+
+ return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
+}
+
+static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+{
+ return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
}
/* Context descriptor manipulation functions */
@@ -1006,7 +1456,6 @@
val |= ARM_SMMU_TCR2CD(tcr, EPD0);
val |= ARM_SMMU_TCR2CD(tcr, EPD1);
val |= ARM_SMMU_TCR2CD(tcr, IPS);
- val |= ARM_SMMU_TCR2CD(tcr, TBI0);
return val;
}
@@ -1066,8 +1515,8 @@
arm_smmu_cmdq_issue_sync(smmu);
}
-static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
- __le64 *dst, struct arm_smmu_strtab_ent *ste)
+static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
+ __le64 *dst)
{
/*
* This is hideously complicated, but we only really care about
@@ -1087,6 +1536,10 @@
*/
u64 val = le64_to_cpu(dst[0]);
bool ste_live = false;
+ struct arm_smmu_device *smmu = NULL;
+ struct arm_smmu_s1_cfg *s1_cfg = NULL;
+ struct arm_smmu_s2_cfg *s2_cfg = NULL;
+ struct arm_smmu_domain *smmu_domain = NULL;
struct arm_smmu_cmdq_ent prefetch_cmd = {
.opcode = CMDQ_OP_PREFETCH_CFG,
.prefetch = {
@@ -1094,6 +1547,25 @@
},
};
+ if (master) {
+ smmu_domain = master->domain;
+ smmu = master->smmu;
+ }
+
+ if (smmu_domain) {
+ switch (smmu_domain->stage) {
+ case ARM_SMMU_DOMAIN_S1:
+ s1_cfg = &smmu_domain->s1_cfg;
+ break;
+ case ARM_SMMU_DOMAIN_S2:
+ case ARM_SMMU_DOMAIN_NESTED:
+ s2_cfg = &smmu_domain->s2_cfg;
+ break;
+ default:
+ break;
+ }
+ }
+
if (val & STRTAB_STE_0_V) {
switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
case STRTAB_STE_0_CFG_BYPASS:
@@ -1103,8 +1575,8 @@
ste_live = true;
break;
case STRTAB_STE_0_CFG_ABORT:
- if (disable_bypass)
- break;
+ BUG_ON(!disable_bypass);
+ break;
default:
BUG(); /* STE corruption */
}
@@ -1114,8 +1586,8 @@
val = STRTAB_STE_0_V;
/* Bypass/fault */
- if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
- if (!ste->assigned && disable_bypass)
+ if (!smmu_domain || !(s1_cfg || s2_cfg)) {
+ if (!smmu_domain && disable_bypass)
val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
else
val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
@@ -1133,41 +1605,42 @@
return;
}
- if (ste->s1_cfg) {
+ if (s1_cfg) {
BUG_ON(ste_live);
dst[1] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
-#ifdef CONFIG_PCI_ATS
- FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
-#endif
FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
if (smmu->features & ARM_SMMU_FEAT_STALLS &&
!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
- val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+ val |= (s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
}
- if (ste->s2_cfg) {
+ if (s2_cfg) {
BUG_ON(ste_live);
dst[2] = cpu_to_le64(
- FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
- FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
+ FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
+ FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
#ifdef __BIG_ENDIAN
STRTAB_STE_2_S2ENDI |
#endif
STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
STRTAB_STE_2_S2R);
- dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
+ dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
}
+ if (master->ats_enabled)
+ dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
+ STRTAB_STE_1_EATS_TRANS));
+
arm_smmu_sync_ste_for_sid(smmu, sid);
dst[0] = cpu_to_le64(val);
arm_smmu_sync_ste_for_sid(smmu, sid);
@@ -1180,10 +1653,9 @@
static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
{
unsigned int i;
- struct arm_smmu_strtab_ent ste = { .assigned = false };
for (i = 0; i < nent; ++i) {
- arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
+ arm_smmu_write_strtab_ent(NULL, -1, strtab);
strtab += STRTAB_STE_DWORDS;
}
}
@@ -1222,6 +1694,7 @@
int i;
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->evtq.q;
+ struct arm_smmu_ll_queue *llq = &q->llq;
u64 evt[EVTQ_ENT_DWORDS];
do {
@@ -1239,12 +1712,13 @@
* Not much we can do on overflow, so scream and pretend we're
* trying harder.
*/
- if (queue_sync_prod(q) == -EOVERFLOW)
+ if (queue_sync_prod_in(q) == -EOVERFLOW)
dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
- } while (!queue_empty(q));
+ } while (!queue_empty(llq));
/* Sync our overflow flag, as we believe we're up to speed */
- q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
+ llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
+ Q_IDX(llq, llq->cons);
return IRQ_HANDLED;
}
@@ -1290,19 +1764,21 @@
{
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->priq.q;
+ struct arm_smmu_ll_queue *llq = &q->llq;
u64 evt[PRIQ_ENT_DWORDS];
do {
while (!queue_remove_raw(q, evt))
arm_smmu_handle_ppr(smmu, evt);
- if (queue_sync_prod(q) == -EOVERFLOW)
+ if (queue_sync_prod_in(q) == -EOVERFLOW)
dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
- } while (!queue_empty(q));
+ } while (!queue_empty(llq));
/* Sync our overflow flag, as we believe we're up to speed */
- q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
- writel(q->cons, q->cons_reg);
+ llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
+ Q_IDX(llq, llq->cons);
+ queue_sync_cons_out(q);
return IRQ_HANDLED;
}
@@ -1371,18 +1847,114 @@
return IRQ_WAKE_THREAD;
}
+static void
+arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
+ struct arm_smmu_cmdq_ent *cmd)
+{
+ size_t log2_span;
+ size_t span_mask;
+ /* ATC invalidates are always on 4096-bytes pages */
+ size_t inval_grain_shift = 12;
+ unsigned long page_start, page_end;
+
+ *cmd = (struct arm_smmu_cmdq_ent) {
+ .opcode = CMDQ_OP_ATC_INV,
+ .substream_valid = !!ssid,
+ .atc.ssid = ssid,
+ };
+
+ if (!size) {
+ cmd->atc.size = ATC_INV_SIZE_ALL;
+ return;
+ }
+
+ page_start = iova >> inval_grain_shift;
+ page_end = (iova + size - 1) >> inval_grain_shift;
+
+ /*
+ * In an ATS Invalidate Request, the address must be aligned on the
+ * range size, which must be a power of two number of page sizes. We
+ * thus have to choose between grossly over-invalidating the region, or
+ * splitting the invalidation into multiple commands. For simplicity
+ * we'll go with the first solution, but should refine it in the future
+ * if multiple commands are shown to be more efficient.
+ *
+ * Find the smallest power of two that covers the range. The most
+ * significant differing bit between the start and end addresses,
+ * fls(start ^ end), indicates the required span. For example:
+ *
+ * We want to invalidate pages [8; 11]. This is already the ideal range:
+ * x = 0b1000 ^ 0b1011 = 0b11
+ * span = 1 << fls(x) = 4
+ *
+ * To invalidate pages [7; 10], we need to invalidate [0; 15]:
+ * x = 0b0111 ^ 0b1010 = 0b1101
+ * span = 1 << fls(x) = 16
+ */
+ log2_span = fls_long(page_start ^ page_end);
+ span_mask = (1ULL << log2_span) - 1;
+
+ page_start &= ~span_mask;
+
+ cmd->atc.addr = page_start << inval_grain_shift;
+ cmd->atc.size = log2_span;
+}
+
+static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
+ struct arm_smmu_cmdq_ent *cmd)
+{
+ int i;
+
+ if (!master->ats_enabled)
+ return 0;
+
+ for (i = 0; i < master->num_sids; i++) {
+ cmd->atc.sid = master->sids[i];
+ arm_smmu_cmdq_issue_cmd(master->smmu, cmd);
+ }
+
+ return arm_smmu_cmdq_issue_sync(master->smmu);
+}
+
+static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
+ int ssid, unsigned long iova, size_t size)
+{
+ int ret = 0;
+ unsigned long flags;
+ struct arm_smmu_cmdq_ent cmd;
+ struct arm_smmu_master *master;
+
+ if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
+ return 0;
+
+ /*
+ * Ensure that we've completed prior invalidation of the main TLBs
+ * before we read 'nr_ats_masters' in case of a concurrent call to
+ * arm_smmu_enable_ats():
+ *
+ * // unmap() // arm_smmu_enable_ats()
+ * TLBI+SYNC atomic_inc(&nr_ats_masters);
+ * smp_mb(); [...]
+ * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
+ *
+ * Ensures that we always see the incremented 'nr_ats_masters' count if
+ * ATS was enabled at the PCI device before completion of the TLBI.
+ */
+ smp_mb();
+ if (!atomic_read(&smmu_domain->nr_ats_masters))
+ return 0;
+
+ arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master, &smmu_domain->devices, domain_head)
+ ret |= arm_smmu_atc_inv_master(master, &cmd);
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+ return ret ? -ETIMEDOUT : 0;
+}
+
/* IO_PGTABLE API */
-static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
-{
- arm_smmu_cmdq_issue_sync(smmu);
-}
-
-static void arm_smmu_tlb_sync(void *cookie)
-{
- struct arm_smmu_domain *smmu_domain = cookie;
- __arm_smmu_tlb_sync(smmu_domain->smmu);
-}
-
static void arm_smmu_tlb_inv_context(void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
@@ -1398,22 +1970,35 @@
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
}
+ /*
+ * NOTE: when io-pgtable is in non-strict mode, we may get here with
+ * PTEs previously cleared by unmaps on the current CPU not yet visible
+ * to the SMMU. We are relying on the dma_wmb() implicit during cmd
+ * insertion to guarantee those are observed before the TLBI. Do be
+ * careful, 007.
+ */
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
- __arm_smmu_tlb_sync(smmu);
+ arm_smmu_cmdq_issue_sync(smmu);
+ arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
}
-static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
+ size_t granule, bool leaf,
+ struct arm_smmu_domain *smmu_domain)
{
- struct arm_smmu_domain *smmu_domain = cookie;
+ u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
struct arm_smmu_device *smmu = smmu_domain->smmu;
+ unsigned long start = iova, end = iova + size;
+ int i = 0;
struct arm_smmu_cmdq_ent cmd = {
.tlbi = {
.leaf = leaf,
- .addr = iova,
},
};
+ if (!size)
+ return;
+
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cmd.opcode = CMDQ_OP_TLBI_NH_VA;
cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
@@ -1422,16 +2007,54 @@
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
}
- do {
- arm_smmu_cmdq_issue_cmd(smmu, &cmd);
- cmd.tlbi.addr += granule;
- } while (size -= granule);
+ while (iova < end) {
+ if (i == CMDQ_BATCH_ENTRIES) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, false);
+ i = 0;
+ }
+
+ cmd.tlbi.addr = iova;
+ arm_smmu_cmdq_build_cmd(&cmds[i * CMDQ_ENT_DWORDS], &cmd);
+ iova += granule;
+ i++;
+ }
+
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, true);
+
+ /*
+ * Unfortunately, this can't be leaf-only since we may have
+ * zapped an entire table.
+ */
+ arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
}
-static const struct iommu_gather_ops arm_smmu_gather_ops = {
+static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ struct iommu_domain *domain = &smmu_domain->domain;
+
+ iommu_iotlb_gather_add_page(domain, gather, iova, granule);
+}
+
+static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
+}
+
+static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
+}
+
+static const struct iommu_flush_ops arm_smmu_flush_ops = {
.tlb_flush_all = arm_smmu_tlb_inv_context,
- .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
- .tlb_sync = arm_smmu_tlb_sync,
+ .tlb_flush_walk = arm_smmu_tlb_inv_walk,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
+ .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
};
/* IOMMU API */
@@ -1472,6 +2095,9 @@
}
mutex_init(&smmu_domain->init_mutex);
+ INIT_LIST_HEAD(&smmu_domain->devices);
+ spin_lock_init(&smmu_domain->devices_lock);
+
return &smmu_domain->domain;
}
@@ -1617,12 +2243,13 @@
.pgsize_bitmap = smmu->pgsize_bitmap,
.ias = ias,
.oas = oas,
- .tlb = &arm_smmu_gather_ops,
+ .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
+ .tlb = &arm_smmu_flush_ops,
.iommu_dev = smmu->dev,
};
- if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
- pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+ if (smmu_domain->non_strict)
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
if (!pgtbl_ops)
@@ -1664,53 +2291,122 @@
return step;
}
-static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
+static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
{
int i, j;
- struct arm_smmu_master_data *master = fwspec->iommu_priv;
struct arm_smmu_device *smmu = master->smmu;
- for (i = 0; i < fwspec->num_ids; ++i) {
- u32 sid = fwspec->ids[i];
+ for (i = 0; i < master->num_sids; ++i) {
+ u32 sid = master->sids[i];
__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
/* Bridged PCI devices may end up with duplicated IDs */
for (j = 0; j < i; j++)
- if (fwspec->ids[j] == sid)
+ if (master->sids[j] == sid)
break;
if (j < i)
continue;
- arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
+ arm_smmu_write_strtab_ent(master, sid, step);
}
}
-static void arm_smmu_detach_dev(struct device *dev)
+#ifdef CONFIG_PCI_ATS
+static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
{
- struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
+ struct pci_dev *pdev;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
- master->ste.assigned = false;
- arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
+ if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) ||
+ !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled())
+ return false;
+
+ pdev = to_pci_dev(master->dev);
+ return !pdev->untrusted && pdev->ats_cap;
+}
+#else
+static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
+{
+ return false;
+}
+#endif
+
+static void arm_smmu_enable_ats(struct arm_smmu_master *master)
+{
+ size_t stu;
+ struct pci_dev *pdev;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_domain *smmu_domain = master->domain;
+
+ /* Don't enable ATS at the endpoint if it's not enabled in the STE */
+ if (!master->ats_enabled)
+ return;
+
+ /* Smallest Translation Unit: log2 of the smallest supported granule */
+ stu = __ffs(smmu->pgsize_bitmap);
+ pdev = to_pci_dev(master->dev);
+
+ atomic_inc(&smmu_domain->nr_ats_masters);
+ arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
+ if (pci_enable_ats(pdev, stu))
+ dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
+}
+
+static void arm_smmu_disable_ats(struct arm_smmu_master *master)
+{
+ struct arm_smmu_cmdq_ent cmd;
+ struct arm_smmu_domain *smmu_domain = master->domain;
+
+ if (!master->ats_enabled)
+ return;
+
+ pci_disable_ats(to_pci_dev(master->dev));
+ /*
+ * Ensure ATS is disabled at the endpoint before we issue the
+ * ATC invalidation via the SMMU.
+ */
+ wmb();
+ arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
+ arm_smmu_atc_inv_master(master, &cmd);
+ atomic_dec(&smmu_domain->nr_ats_masters);
+}
+
+static void arm_smmu_detach_dev(struct arm_smmu_master *master)
+{
+ unsigned long flags;
+ struct arm_smmu_domain *smmu_domain = master->domain;
+
+ if (!smmu_domain)
+ return;
+
+ arm_smmu_disable_ats(master);
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_del(&master->domain_head);
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+ master->domain = NULL;
+ master->ats_enabled = false;
+ arm_smmu_install_ste_for_dev(master);
}
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int ret = 0;
+ unsigned long flags;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_device *smmu;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- struct arm_smmu_master_data *master;
- struct arm_smmu_strtab_ent *ste;
+ struct arm_smmu_master *master;
- if (!dev->iommu_fwspec)
+ if (!fwspec)
return -ENOENT;
- master = dev->iommu_fwspec->iommu_priv;
+ master = fwspec->iommu_priv;
smmu = master->smmu;
- ste = &master->ste;
- /* Already attached to a different domain? */
- if (ste->assigned)
- arm_smmu_detach_dev(dev);
+ arm_smmu_detach_dev(master);
mutex_lock(&smmu_domain->init_mutex);
@@ -1730,21 +2426,22 @@
goto out_unlock;
}
- ste->assigned = true;
+ master->domain = smmu_domain;
- if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
- ste->s1_cfg = NULL;
- ste->s2_cfg = NULL;
- } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- ste->s1_cfg = &smmu_domain->s1_cfg;
- ste->s2_cfg = NULL;
- arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
- } else {
- ste->s1_cfg = NULL;
- ste->s2_cfg = &smmu_domain->s2_cfg;
- }
+ if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
+ master->ats_enabled = arm_smmu_ats_supported(master);
- arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+ arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg);
+
+ arm_smmu_install_ste_for_dev(master);
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_add(&master->domain_head, &smmu_domain->devices);
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+ arm_smmu_enable_ats(master);
+
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
return ret;
@@ -1761,23 +2458,33 @@
return ops->map(ops, iova, paddr, size, prot);
}
-static size_t
-arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
+static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size, struct iommu_iotlb_gather *gather)
{
- struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
if (!ops)
return 0;
- return ops->unmap(ops, iova, size);
+ return ops->unmap(ops, iova, size, gather);
}
-static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
{
- struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (smmu)
- __arm_smmu_tlb_sync(smmu);
+ if (smmu_domain->smmu)
+ arm_smmu_tlb_inv_context(smmu_domain);
+}
+
+static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
+ gather->pgsize, true, smmu_domain);
}
static phys_addr_t
@@ -1796,16 +2503,11 @@
static struct platform_driver arm_smmu_driver;
-static int arm_smmu_match_node(struct device *dev, void *data)
-{
- return dev->fwnode == data;
-}
-
static
struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
{
- struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
- fwnode, arm_smmu_match_node);
+ struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
+ fwnode);
put_device(dev);
return dev ? dev_get_drvdata(dev) : NULL;
}
@@ -1826,8 +2528,8 @@
{
int i, ret;
struct arm_smmu_device *smmu;
- struct arm_smmu_master_data *master;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct arm_smmu_master *master;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct iommu_group *group;
if (!fwspec || fwspec->ops != &arm_smmu_ops)
@@ -1848,13 +2550,16 @@
if (!master)
return -ENOMEM;
+ master->dev = dev;
master->smmu = smmu;
+ master->sids = fwspec->ids;
+ master->num_sids = fwspec->num_ids;
fwspec->iommu_priv = master;
}
/* Check the SIDs are in range of the SMMU and our stream table */
- for (i = 0; i < fwspec->num_ids; i++) {
- u32 sid = fwspec->ids[i];
+ for (i = 0; i < master->num_sids; i++) {
+ u32 sid = master->sids[i];
if (!arm_smmu_sid_in_range(smmu, sid))
return -ERANGE;
@@ -1878,8 +2583,8 @@
static void arm_smmu_remove_device(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
- struct arm_smmu_master_data *master;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct arm_smmu_master *master;
struct arm_smmu_device *smmu;
if (!fwspec || fwspec->ops != &arm_smmu_ops)
@@ -1887,8 +2592,7 @@
master = fwspec->iommu_priv;
smmu = master->smmu;
- if (master && master->ste.assigned)
- arm_smmu_detach_dev(dev);
+ arm_smmu_detach_dev(master);
iommu_group_remove_device(dev);
iommu_device_unlink(&smmu->iommu, dev);
kfree(master);
@@ -1917,15 +2621,27 @@
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
- return 0;
+ switch (domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ *(int *)data = smmu_domain->non_strict;
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
default:
- return -ENODEV;
+ return -EINVAL;
}
}
@@ -1935,26 +2651,37 @@
int ret = 0;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
mutex_lock(&smmu_domain->init_mutex);
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- if (smmu_domain->smmu) {
- ret = -EPERM;
- goto out_unlock;
+ switch (domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ if (smmu_domain->smmu) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (*(int *)data)
+ smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+ else
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ break;
+ default:
+ ret = -ENODEV;
}
-
- if (*(int *)data)
- smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
- else
- smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch(attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ smmu_domain->non_strict = *(int *)data;
+ break;
+ default:
+ ret = -ENODEV;
+ }
break;
default:
- ret = -ENODEV;
+ ret = -EINVAL;
}
out_unlock:
@@ -1999,7 +2726,7 @@
.attach_dev = arm_smmu_attach_dev,
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
- .flush_iotlb_all = arm_smmu_iotlb_sync,
+ .flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
@@ -2018,43 +2745,91 @@
struct arm_smmu_queue *q,
unsigned long prod_off,
unsigned long cons_off,
- size_t dwords)
+ size_t dwords, const char *name)
{
- size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
+ size_t qsz;
- q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
+ do {
+ qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
+ q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
+ GFP_KERNEL);
+ if (q->base || qsz < PAGE_SIZE)
+ break;
+
+ q->llq.max_n_shift--;
+ } while (1);
+
if (!q->base) {
- dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
- qsz);
+ dev_err(smmu->dev,
+ "failed to allocate queue (0x%zx bytes) for %s\n",
+ qsz, name);
return -ENOMEM;
}
+ if (!WARN_ON(q->base_dma & (qsz - 1))) {
+ dev_info(smmu->dev, "allocated %u entries for %s\n",
+ 1 << q->llq.max_n_shift, name);
+ }
+
q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
q->ent_dwords = dwords;
q->q_base = Q_BASE_RWA;
q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
- q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
+ q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
- q->prod = q->cons = 0;
+ q->llq.prod = q->llq.cons = 0;
return 0;
}
+static void arm_smmu_cmdq_free_bitmap(void *data)
+{
+ unsigned long *bitmap = data;
+ bitmap_free(bitmap);
+}
+
+static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
+{
+ int ret = 0;
+ struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+ unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
+ atomic_long_t *bitmap;
+
+ atomic_set(&cmdq->owner_prod, 0);
+ atomic_set(&cmdq->lock, 0);
+
+ bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
+ if (!bitmap) {
+ dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
+ ret = -ENOMEM;
+ } else {
+ cmdq->valid_map = bitmap;
+ devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
+ }
+
+ return ret;
+}
+
static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
{
int ret;
/* cmdq */
- spin_lock_init(&smmu->cmdq.lock);
ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
- ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
+ ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
+ "cmdq");
+ if (ret)
+ return ret;
+
+ ret = arm_smmu_cmdq_init(smmu);
if (ret)
return ret;
/* evtq */
ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
- ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
+ ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
+ "evtq");
if (ret)
return ret;
@@ -2063,7 +2838,8 @@
return 0;
return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
- ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
+ ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
+ "priq");
}
static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
@@ -2180,7 +2956,6 @@
{
int ret;
- atomic_set(&smmu->sync_nr, 0);
ret = arm_smmu_init_queues(smmu);
if (ret)
return ret;
@@ -2353,8 +3128,8 @@
irq = smmu->combined_irq;
if (irq) {
/*
- * Cavium ThunderX2 implementation doesn't not support unique
- * irq lines. Use single irq line for all the SMMUv3 interrupts.
+ * Cavium ThunderX2 implementation doesn't support unique irq
+ * lines. Use a single irq line for all the SMMUv3 interrupts.
*/
ret = devm_request_threaded_irq(smmu->dev, irq,
arm_smmu_combined_irq_handler,
@@ -2398,13 +3173,9 @@
/* Clear CR0 and sync (disables SMMU and queue processing) */
reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
if (reg & CR0_SMMUEN) {
- if (is_kdump_kernel()) {
- arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
- arm_smmu_device_disable(smmu);
- return -EBUSY;
- }
-
dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
+ WARN_ON(is_kdump_kernel() && !disable_bypass);
+ arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
}
ret = arm_smmu_device_disable(smmu);
@@ -2432,8 +3203,8 @@
/* Command queue */
writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
- writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
- writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
+ writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
+ writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
enables = CR0_CMDQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -2460,9 +3231,9 @@
/* Event queue */
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
- writel_relaxed(smmu->evtq.q.prod,
+ writel_relaxed(smmu->evtq.q.llq.prod,
arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
- writel_relaxed(smmu->evtq.q.cons,
+ writel_relaxed(smmu->evtq.q.llq.cons,
arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
enables |= CR0_EVTQEN;
@@ -2477,9 +3248,9 @@
if (smmu->features & ARM_SMMU_FEAT_PRI) {
writeq_relaxed(smmu->priq.q.q_base,
smmu->base + ARM_SMMU_PRIQ_BASE);
- writel_relaxed(smmu->priq.q.prod,
+ writel_relaxed(smmu->priq.q.llq.prod,
arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
- writel_relaxed(smmu->priq.q.cons,
+ writel_relaxed(smmu->priq.q.llq.cons,
arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
enables |= CR0_PRIQEN;
@@ -2491,12 +3262,24 @@
}
}
+ if (smmu->features & ARM_SMMU_FEAT_ATS) {
+ enables |= CR0_ATSCHK;
+ ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
+ ARM_SMMU_CR0ACK);
+ if (ret) {
+ dev_err(smmu->dev, "failed to enable ATS check\n");
+ return ret;
+ }
+ }
+
ret = arm_smmu_setup_irqs(smmu);
if (ret) {
dev_err(smmu->dev, "failed to setup irqs\n");
return ret;
}
+ if (is_kdump_kernel())
+ enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
/* Enable the SMMU interface, or ensure bypass */
if (!bypass || disable_bypass) {
@@ -2620,19 +3403,25 @@
return -ENXIO;
}
- /* Queue sizes, capped at 4k */
- smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
- FIELD_GET(IDR1_CMDQS, reg));
- if (!smmu->cmdq.q.max_n_shift) {
- /* Odd alignment restrictions on the base, so ignore for now */
- dev_err(smmu->dev, "unit-length command queue not supported\n");
+ /* Queue sizes, capped to ensure natural alignment */
+ smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
+ FIELD_GET(IDR1_CMDQS, reg));
+ if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
+ /*
+ * We don't support splitting up batches, so one batch of
+ * commands plus an extra sync needs to fit inside the command
+ * queue. There's also no way we can handle the weird alignment
+ * restrictions on the base pointer for a unit-length queue.
+ */
+ dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
+ CMDQ_BATCH_ENTRIES);
return -ENXIO;
}
- smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
- FIELD_GET(IDR1_EVTQS, reg));
- smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
- FIELD_GET(IDR1_PRIQS, reg));
+ smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
+ FIELD_GET(IDR1_EVTQS, reg));
+ smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
+ FIELD_GET(IDR1_PRIQS, reg));
/* SID/SSID sizes */
smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
@@ -2894,37 +3683,25 @@
return 0;
}
-static int arm_smmu_device_remove(struct platform_device *pdev)
+static void arm_smmu_device_shutdown(struct platform_device *pdev)
{
struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
arm_smmu_device_disable(smmu);
-
- return 0;
-}
-
-static void arm_smmu_device_shutdown(struct platform_device *pdev)
-{
- arm_smmu_device_remove(pdev);
}
static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v3", },
{ },
};
-MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
static struct platform_driver arm_smmu_driver = {
.driver = {
.name = "arm-smmu-v3",
.of_match_table = of_match_ptr(arm_smmu_of_match),
+ .suppress_bind_attrs = true,
},
.probe = arm_smmu_device_probe,
- .remove = arm_smmu_device_remove,
.shutdown = arm_smmu_device_shutdown,
};
-module_platform_driver(arm_smmu_driver);
-
-MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
-MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
-MODULE_LICENSE("GPL v2");
+builtin_platform_driver(arm_smmu_driver);
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index e7cbf4f..7c503a6 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* IOMMU API for ARM architected SMMU implementations.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) 2013 ARM Limited
*
* Author: Will Deacon <will.deacon@arm.com>
@@ -31,95 +19,59 @@
#include <linux/acpi.h>
#include <linux/acpi_iort.h>
-#include <linux/atomic.h>
+#include <linux/bitfield.h>
#include <linux/delay.h>
#include <linux/dma-iommu.h>
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/io-64-nonatomic-hi-lo.h>
-#include <linux/iommu.h>
#include <linux/iopoll.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/of_iommu.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/slab.h>
-#include <linux/spinlock.h>
#include <linux/amba/bus.h>
+#include <linux/fsl/mc.h>
-#include "io-pgtable.h"
-#include "arm-smmu-regs.h"
+#include "arm-smmu.h"
-#define ARM_MMU500_ACTLR_CPRE (1 << 1)
-
-#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
-#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
-#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
+/*
+ * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
+ * global register space are still, in fact, using a hypervisor to mediate it
+ * by trapping and emulating register accesses. Sadly, some deployed versions
+ * of said trapping code have bugs wherein they go horribly wrong for stores
+ * using r31 (i.e. XZR/WZR) as the source register.
+ */
+#define QCOM_DUMMY_VAL -1
#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
#define TLB_SPIN_COUNT 10
-/* Maximum number of context banks per SMMU */
-#define ARM_SMMU_MAX_CBS 128
-
-/* SMMU global address space */
-#define ARM_SMMU_GR0(smmu) ((smmu)->base)
-#define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
-
-/*
- * SMMU global address space with conditional offset to access secure
- * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
- * nsGFSYNR0: 0x450)
- */
-#define ARM_SMMU_GR0_NS(smmu) \
- ((smmu)->base + \
- ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
- ? 0x400 : 0))
-
-/*
- * Some 64-bit registers only make sense to write atomically, but in such
- * cases all the data relevant to AArch32 formats lies within the lower word,
- * therefore this actually makes more sense than it might first appear.
- */
-#ifdef CONFIG_64BIT
-#define smmu_write_atomic_lq writeq_relaxed
-#else
-#define smmu_write_atomic_lq writel_relaxed
-#endif
-
-/* Translation context bank */
-#define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
-
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
static int force_stage;
+/*
+ * not really modular, but the easiest way to keep compat with existing
+ * bootargs behaviour is to continue using module_param() here.
+ */
module_param(force_stage, int, S_IRUGO);
MODULE_PARM_DESC(force_stage,
"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
-static bool disable_bypass;
+static bool disable_bypass =
+ IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
module_param(disable_bypass, bool, S_IRUGO);
MODULE_PARM_DESC(disable_bypass,
"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
-enum arm_smmu_arch_version {
- ARM_SMMU_V1,
- ARM_SMMU_V1_64K,
- ARM_SMMU_V2,
-};
-
-enum arm_smmu_implementation {
- GENERIC_SMMU,
- ARM_MMU500,
- CAVIUM_SMMUV2,
-};
-
struct arm_smmu_s2cr {
struct iommu_group *group;
int count;
@@ -157,133 +109,27 @@
#define for_each_cfg_sme(fw, i, idx) \
for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
-struct arm_smmu_device {
- struct device *dev;
-
- void __iomem *base;
- void __iomem *cb_base;
- unsigned long pgshift;
-
-#define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
-#define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
-#define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
-#define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
-#define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
-#define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
-#define ARM_SMMU_FEAT_VMID16 (1 << 6)
-#define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
-#define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
-#define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
-#define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
-#define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
-#define ARM_SMMU_FEAT_EXIDS (1 << 12)
- u32 features;
-
-#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
- u32 options;
- enum arm_smmu_arch_version version;
- enum arm_smmu_implementation model;
-
- u32 num_context_banks;
- u32 num_s2_context_banks;
- DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
- struct arm_smmu_cb *cbs;
- atomic_t irptndx;
-
- u32 num_mapping_groups;
- u16 streamid_mask;
- u16 smr_mask_mask;
- struct arm_smmu_smr *smrs;
- struct arm_smmu_s2cr *s2crs;
- struct mutex stream_map_mutex;
-
- unsigned long va_size;
- unsigned long ipa_size;
- unsigned long pa_size;
- unsigned long pgsize_bitmap;
-
- u32 num_global_irqs;
- u32 num_context_irqs;
- unsigned int *irqs;
-
- u32 cavium_id_base; /* Specific to Cavium */
-
- spinlock_t global_sync_lock;
-
- /* IOMMU core code handle */
- struct iommu_device iommu;
-};
-
-enum arm_smmu_context_fmt {
- ARM_SMMU_CTX_FMT_NONE,
- ARM_SMMU_CTX_FMT_AARCH64,
- ARM_SMMU_CTX_FMT_AARCH32_L,
- ARM_SMMU_CTX_FMT_AARCH32_S,
-};
-
-struct arm_smmu_cfg {
- u8 cbndx;
- u8 irptndx;
- union {
- u16 asid;
- u16 vmid;
- };
- u32 cbar;
- enum arm_smmu_context_fmt fmt;
-};
-#define INVALID_IRPTNDX 0xff
-
-enum arm_smmu_domain_stage {
- ARM_SMMU_DOMAIN_S1 = 0,
- ARM_SMMU_DOMAIN_S2,
- ARM_SMMU_DOMAIN_NESTED,
- ARM_SMMU_DOMAIN_BYPASS,
-};
-
-struct arm_smmu_domain {
- struct arm_smmu_device *smmu;
- struct io_pgtable_ops *pgtbl_ops;
- const struct iommu_gather_ops *tlb_ops;
- struct arm_smmu_cfg cfg;
- enum arm_smmu_domain_stage stage;
- struct mutex init_mutex; /* Protects smmu pointer */
- spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
- struct iommu_domain domain;
-};
-
-struct arm_smmu_option_prop {
- u32 opt;
- const char *prop;
-};
-
-static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
-
static bool using_legacy_binding, using_generic_binding;
-static struct arm_smmu_option_prop arm_smmu_options[] = {
- { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
- { 0, NULL},
-};
+static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
+{
+ if (pm_runtime_enabled(smmu->dev))
+ return pm_runtime_get_sync(smmu->dev);
+
+ return 0;
+}
+
+static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
+{
+ if (pm_runtime_enabled(smmu->dev))
+ pm_runtime_put(smmu->dev);
+}
static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
{
return container_of(dom, struct arm_smmu_domain, domain);
}
-static void parse_driver_options(struct arm_smmu_device *smmu)
-{
- int i = 0;
-
- do {
- if (of_property_read_bool(smmu->dev->of_node,
- arm_smmu_options[i].prop)) {
- smmu->options |= arm_smmu_options[i].opt;
- dev_notice(smmu->dev, "option %s\n",
- arm_smmu_options[i].prop);
- }
- } while (arm_smmu_options[++i].opt);
-}
-
static struct device_node *dev_get_dev_node(struct device *dev)
{
if (dev_is_pci(dev)) {
@@ -310,7 +156,7 @@
int err;
of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
- "#stream-id-cells", 0)
+ "#stream-id-cells", -1)
if (it->node == np) {
*(void **)data = dev;
return 1;
@@ -392,15 +238,17 @@
}
/* Wait for any pending TLB invalidations to complete */
-static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
- void __iomem *sync, void __iomem *status)
+static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
+ int sync, int status)
{
unsigned int spin_cnt, delay;
+ u32 reg;
- writel_relaxed(0, sync);
+ arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
- if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
+ reg = arm_smmu_readl(smmu, page, status);
+ if (!(reg & sTLBGSTATUS_GSACTIVE))
return;
cpu_relax();
}
@@ -412,12 +260,11 @@
static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
{
- void __iomem *base = ARM_SMMU_GR0(smmu);
unsigned long flags;
spin_lock_irqsave(&smmu->global_sync_lock, flags);
- __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
- base + ARM_SMMU_GR0_sTLBGSTATUS);
+ __arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
+ ARM_SMMU_GR0_sTLBGSTATUS);
spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
}
@@ -425,12 +272,11 @@
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
unsigned long flags;
spin_lock_irqsave(&smmu_domain->cb_lock, flags);
- __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
- base + ARM_SMMU_CB_TLBSTATUS);
+ __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
+ ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
}
@@ -444,10 +290,13 @@
static void arm_smmu_tlb_inv_context_s1(void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
- struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
-
- writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
+ /*
+ * The TLBI write may be relaxed, so ensure that PTEs cleared by the
+ * current CPU are visible beforehand.
+ */
+ wmb();
+ arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
+ ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
arm_smmu_tlb_sync_context(cookie);
}
@@ -455,112 +304,167 @@
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- void __iomem *base = ARM_SMMU_GR0(smmu);
- writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+ /* See above */
+ wmb();
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
arm_smmu_tlb_sync_global(smmu);
}
-static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
+ size_t granule, bool leaf, void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
- void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
+ int reg, idx = cfg->cbndx;
- if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
wmb();
- if (stage1) {
- reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
+ reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
- if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
- iova &= ~12UL;
- iova |= cfg->asid;
- do {
- writel_relaxed(iova, reg);
- iova += granule;
- } while (size -= granule);
- } else {
- iova >>= 12;
- iova |= (u64)cfg->asid << 48;
- do {
- writeq_relaxed(iova, reg);
- iova += granule >> 12;
- } while (size -= granule);
- }
- } else {
- reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
- ARM_SMMU_CB_S2_TLBIIPAS2;
- iova >>= 12;
+ if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
+ iova = (iova >> 12) << 12;
+ iova |= cfg->asid;
do {
- smmu_write_atomic_lq(iova, reg);
+ arm_smmu_cb_write(smmu, idx, reg, iova);
+ iova += granule;
+ } while (size -= granule);
+ } else {
+ iova >>= 12;
+ iova |= (u64)cfg->asid << 48;
+ do {
+ arm_smmu_cb_writeq(smmu, idx, reg, iova);
iova += granule >> 12;
} while (size -= granule);
}
}
+static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
+ size_t granule, bool leaf, void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ int reg, idx = smmu_domain->cfg.cbndx;
+
+ if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ wmb();
+
+ reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
+ iova >>= 12;
+ do {
+ if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
+ arm_smmu_cb_writeq(smmu, idx, reg, iova);
+ else
+ arm_smmu_cb_write(smmu, idx, reg, iova);
+ iova += granule >> 12;
+ } while (size -= granule);
+}
+
/*
* On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
* almost negligible, but the benefit of getting the first one in as far ahead
* of the sync as possible is significant, hence we don't just make this a
- * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
+ * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think.
*/
static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
- void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
wmb();
- writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
}
-static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
- .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
- .tlb_sync = arm_smmu_tlb_sync_context,
+static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+
+ ops->tlb_inv_range(iova, size, granule, false, cookie);
+ ops->tlb_sync(cookie);
+}
+
+static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+
+ ops->tlb_inv_range(iova, size, granule, true, cookie);
+ ops->tlb_sync(cookie);
+}
+
+static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+
+ ops->tlb_inv_range(iova, granule, granule, true, cookie);
+}
+
+static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = {
+ .tlb = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
+ .tlb_flush_walk = arm_smmu_tlb_inv_walk,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
+ .tlb_add_page = arm_smmu_tlb_add_page,
+ },
+ .tlb_inv_range = arm_smmu_tlb_inv_range_s1,
+ .tlb_sync = arm_smmu_tlb_sync_context,
};
-static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
- .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
- .tlb_sync = arm_smmu_tlb_sync_context,
+static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
+ .tlb = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
+ .tlb_flush_walk = arm_smmu_tlb_inv_walk,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
+ .tlb_add_page = arm_smmu_tlb_add_page,
+ },
+ .tlb_inv_range = arm_smmu_tlb_inv_range_s2,
+ .tlb_sync = arm_smmu_tlb_sync_context,
};
-static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
- .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
- .tlb_sync = arm_smmu_tlb_sync_vmid,
+static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
+ .tlb = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
+ .tlb_flush_walk = arm_smmu_tlb_inv_walk,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
+ .tlb_add_page = arm_smmu_tlb_add_page,
+ },
+ .tlb_inv_range = arm_smmu_tlb_inv_vmid_nosync,
+ .tlb_sync = arm_smmu_tlb_sync_vmid,
};
static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
{
- u32 fsr, fsynr;
+ u32 fsr, fsynr, cbfrsynra;
unsigned long iova;
struct iommu_domain *domain = dev;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- void __iomem *cb_base;
+ int idx = smmu_domain->cfg.cbndx;
- cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
- fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
-
+ fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
if (!(fsr & FSR_FAULT))
return IRQ_NONE;
- fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
- iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
+ fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
+ iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
+ cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
dev_err_ratelimited(smmu->dev,
- "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
- fsr, iova, fsynr, cfg->cbndx);
+ "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
+ fsr, iova, fsynr, cbfrsynra, idx);
- writel(fsr, cb_base + ARM_SMMU_CB_FSR);
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
return IRQ_HANDLED;
}
@@ -568,12 +472,11 @@
{
u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
struct arm_smmu_device *smmu = dev;
- void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
- gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
- gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
- gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
- gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
+ gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
+ gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
+ gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
+ gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
if (!gfsr)
return IRQ_NONE;
@@ -584,7 +487,7 @@
"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
gfsr, gfsynr0, gfsynr1, gfsynr2);
- writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
return IRQ_HANDLED;
}
@@ -597,16 +500,16 @@
cb->cfg = cfg;
- /* TTBCR */
+ /* TCR */
if (stage1) {
if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
} else {
cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
- cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
+ cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
- cb->tcr[1] |= TTBCR2_AS;
+ cb->tcr[1] |= TCR2_AS;
}
} else {
cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
@@ -619,9 +522,9 @@
cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
} else {
cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
- cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+ cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
- cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+ cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
}
} else {
cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
@@ -645,74 +548,71 @@
bool stage1;
struct arm_smmu_cb *cb = &smmu->cbs[idx];
struct arm_smmu_cfg *cfg = cb->cfg;
- void __iomem *cb_base, *gr1_base;
-
- cb_base = ARM_SMMU_CB(smmu, idx);
/* Unassigned context banks only need disabling */
if (!cfg) {
- writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
return;
}
- gr1_base = ARM_SMMU_GR1(smmu);
stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
/* CBA2R */
if (smmu->version > ARM_SMMU_V1) {
if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
- reg = CBA2R_RW64_64BIT;
+ reg = CBA2R_VA64;
else
- reg = CBA2R_RW64_32BIT;
+ reg = 0;
/* 16-bit VMIDs live in CBA2R */
if (smmu->features & ARM_SMMU_FEAT_VMID16)
- reg |= cfg->vmid << CBA2R_VMID_SHIFT;
+ reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
- writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
+ arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
}
/* CBAR */
- reg = cfg->cbar;
+ reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
if (smmu->version < ARM_SMMU_V2)
- reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
+ reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
/*
* Use the weakest shareability/memory types, so they are
* overridden by the ttbcr/pte.
*/
if (stage1) {
- reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
- (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
+ reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
+ FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
/* 8-bit VMIDs live in CBAR */
- reg |= cfg->vmid << CBAR_VMID_SHIFT;
+ reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
}
- writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
+ arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
/*
- * TTBCR
+ * TCR
* We must write this before the TTBRs, since it determines the
* access behaviour of some fields (in particular, ASID[15:8]).
*/
if (stage1 && smmu->version > ARM_SMMU_V1)
- writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
- writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
/* TTBRs */
if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
- writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
- writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
- writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
} else {
- writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
+ arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
if (stage1)
- writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
+ arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
+ cb->ttbr[1]);
}
/* MAIRs (stage-1 only) */
if (stage1) {
- writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
- writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
}
/* SCTLR */
@@ -722,7 +622,7 @@
if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
reg |= SCTLR_E;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
}
static int arm_smmu_init_domain_context(struct iommu_domain *domain,
@@ -812,7 +712,7 @@
ias = min(ias, 32UL);
oas = min(oas, 32UL);
}
- smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
+ smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
break;
case ARM_SMMU_DOMAIN_NESTED:
/*
@@ -832,9 +732,9 @@
oas = min(oas, 40UL);
}
if (smmu->version == ARM_SMMU_V2)
- smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
+ smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
else
- smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+ smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
break;
default:
ret = -EINVAL;
@@ -854,22 +754,29 @@
}
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
- cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
+ cfg->vmid = cfg->cbndx + 1;
else
- cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+ cfg->asid = cfg->cbndx;
+
+ smmu_domain->smmu = smmu;
+ if (smmu->impl && smmu->impl->init_context) {
+ ret = smmu->impl->init_context(smmu_domain);
+ if (ret)
+ goto out_unlock;
+ }
pgtbl_cfg = (struct io_pgtable_cfg) {
.pgsize_bitmap = smmu->pgsize_bitmap,
.ias = ias,
.oas = oas,
- .tlb = smmu_domain->tlb_ops,
+ .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
+ .tlb = &smmu_domain->flush_ops->tlb,
.iommu_dev = smmu->dev,
};
- if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
- pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+ if (smmu_domain->non_strict)
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
- smmu_domain->smmu = smmu;
pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
if (!pgtbl_ops) {
ret = -ENOMEM;
@@ -905,6 +812,7 @@
return 0;
out_clear_smmu:
+ __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
smmu_domain->smmu = NULL;
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
@@ -916,11 +824,15 @@
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- int irq;
+ int ret, irq;
if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
return;
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return;
+
/*
* Disable the context bank and free the page tables before freeing
* it.
@@ -935,6 +847,8 @@
free_io_pgtable_ops(smmu_domain->pgtbl_ops);
__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
+
+ arm_smmu_rpm_put(smmu);
}
static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
@@ -982,24 +896,24 @@
static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
{
struct arm_smmu_smr *smr = smmu->smrs + idx;
- u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
+ u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
reg |= SMR_VALID;
- writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
}
static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
{
struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
- u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
- (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
- (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
+ u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
+ FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
+ FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
smmu->smrs[idx].valid)
reg |= S2CR_EXIDVALID;
- writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
}
static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
@@ -1015,7 +929,6 @@
*/
static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
{
- void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
u32 smr;
if (!smmu->smrs)
@@ -1026,15 +939,15 @@
* bits are set, so check each one separately. We can reject
* masters later if they try to claim IDs outside these masks.
*/
- smr = smmu->streamid_mask << SMR_ID_SHIFT;
- writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
- smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
- smmu->streamid_mask = smr >> SMR_ID_SHIFT;
+ smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
+ smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
+ smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
- smr = smmu->streamid_mask << SMR_MASK_SHIFT;
- writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
- smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
- smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
+ smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
+ smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
+ smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
}
static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
@@ -1093,7 +1006,7 @@
static int arm_smmu_master_alloc_smes(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
struct arm_smmu_device *smmu = cfg->smmu;
struct arm_smmu_smr *smrs = smmu->smrs;
@@ -1103,8 +1016,8 @@
mutex_lock(&smmu->stream_map_mutex);
/* Figure out a viable stream map entry allocation */
for_each_cfg_sme(fwspec, i, idx) {
- u16 sid = fwspec->ids[i];
- u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
+ u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
+ u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
if (idx != INVALID_SMENDX) {
ret = -EEXIST;
@@ -1196,7 +1109,7 @@
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int ret;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_device *smmu;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -1216,10 +1129,15 @@
return -ENODEV;
smmu = fwspec_smmu(fwspec);
+
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return ret;
+
/* Ensure that the domain is finalised */
ret = arm_smmu_init_domain_context(domain, smmu);
if (ret < 0)
- return ret;
+ goto rpm_put;
/*
* Sanity check the domain. We don't support domains across
@@ -1229,41 +1147,75 @@
dev_err(dev,
"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
- return -EINVAL;
+ ret = -EINVAL;
+ goto rpm_put;
}
/* Looks ok, so add the device to the domain */
- return arm_smmu_domain_add_master(smmu_domain, fwspec);
+ ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
+
+rpm_put:
+ arm_smmu_rpm_put(smmu);
+ return ret;
}
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+ int ret;
if (!ops)
return -ENODEV;
- return ops->map(ops, iova, paddr, size, prot);
+ arm_smmu_rpm_get(smmu);
+ ret = ops->map(ops, iova, paddr, size, prot);
+ arm_smmu_rpm_put(smmu);
+
+ return ret;
}
static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t size)
+ size_t size, struct iommu_iotlb_gather *gather)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+ size_t ret;
if (!ops)
return 0;
- return ops->unmap(ops, iova, size);
+ arm_smmu_rpm_get(smmu);
+ ret = ops->unmap(ops, iova, size, gather);
+ arm_smmu_rpm_put(smmu);
+
+ return ret;
}
-static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (smmu_domain->tlb_ops)
- smmu_domain->tlb_ops->tlb_sync(smmu_domain);
+ if (smmu_domain->flush_ops) {
+ arm_smmu_rpm_get(smmu);
+ smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain);
+ arm_smmu_rpm_put(smmu);
+ }
+}
+
+static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+ if (smmu_domain->flush_ops) {
+ arm_smmu_rpm_get(smmu);
+ smmu_domain->flush_ops->tlb_sync(smmu_domain);
+ arm_smmu_rpm_put(smmu);
+ }
}
static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
@@ -1274,23 +1226,25 @@
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
struct device *dev = smmu->dev;
- void __iomem *cb_base;
+ void __iomem *reg;
u32 tmp;
u64 phys;
unsigned long va, flags;
+ int ret, idx = cfg->cbndx;
- cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return 0;
spin_lock_irqsave(&smmu_domain->cb_lock, flags);
- /* ATS1 registers can only be written atomically */
va = iova & ~0xfffUL;
- if (smmu->version == ARM_SMMU_V2)
- smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
- else /* Register is only 32-bit in v1 */
- writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
+ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+ arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
+ else
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
- if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
- !(tmp & ATSR_ACTIVE), 5, 50)) {
+ reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
+ if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) {
spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
dev_err(dev,
"iova to phys timed out on %pad. Falling back to software table walk.\n",
@@ -1298,7 +1252,7 @@
return ops->iova_to_phys(ops, iova);
}
- phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
+ phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
if (phys & CB_PAR_F) {
dev_err(dev, "translation fault!\n");
@@ -1306,6 +1260,8 @@
return 0;
}
+ arm_smmu_rpm_put(smmu);
+
return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
}
@@ -1344,16 +1300,11 @@
}
}
-static int arm_smmu_match_node(struct device *dev, void *data)
-{
- return dev->fwnode == data;
-}
-
static
struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
{
- struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
- fwnode, arm_smmu_match_node);
+ struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
+ fwnode);
put_device(dev);
return dev ? dev_get_drvdata(dev) : NULL;
}
@@ -1362,7 +1313,7 @@
{
struct arm_smmu_device *smmu;
struct arm_smmu_master_cfg *cfg;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
int i, ret;
if (using_legacy_binding) {
@@ -1373,7 +1324,7 @@
* will allocate/initialise a new one. Thus we need to update fwspec for
* later use.
*/
- fwspec = dev->iommu_fwspec;
+ fwspec = dev_iommu_fwspec_get(dev);
if (ret)
goto out_free;
} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
@@ -1384,8 +1335,8 @@
ret = -EINVAL;
for (i = 0; i < fwspec->num_ids; i++) {
- u16 sid = fwspec->ids[i];
- u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
+ u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
+ u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
if (sid & ~smmu->streamid_mask) {
dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
@@ -1410,12 +1361,21 @@
while (i--)
cfg->smendx[i] = INVALID_SMENDX;
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ goto out_cfg_free;
+
ret = arm_smmu_master_alloc_smes(dev);
+ arm_smmu_rpm_put(smmu);
+
if (ret)
goto out_cfg_free;
iommu_device_link(&smmu->iommu, dev);
+ device_link_add(dev, smmu->dev,
+ DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
+
return 0;
out_cfg_free:
@@ -1427,10 +1387,10 @@
static void arm_smmu_remove_device(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_master_cfg *cfg;
struct arm_smmu_device *smmu;
-
+ int ret;
if (!fwspec || fwspec->ops != &arm_smmu_ops)
return;
@@ -1438,8 +1398,15 @@
cfg = fwspec->iommu_priv;
smmu = cfg->smmu;
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return;
+
iommu_device_unlink(&smmu->iommu, dev);
arm_smmu_master_free_smes(fwspec);
+
+ arm_smmu_rpm_put(smmu);
+
iommu_group_remove_device(dev);
kfree(fwspec->iommu_priv);
iommu_fwspec_free(dev);
@@ -1447,7 +1414,7 @@
static struct iommu_group *arm_smmu_device_group(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
struct iommu_group *group = NULL;
int i, idx;
@@ -1465,6 +1432,8 @@
if (dev_is_pci(dev))
group = pci_device_group(dev);
+ else if (dev_is_fsl_mc(dev))
+ group = fsl_mc_device_group(dev);
else
group = generic_device_group(dev);
@@ -1476,15 +1445,27 @@
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
- return 0;
+ switch(domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ *(int *)data = smmu_domain->non_strict;
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
default:
- return -ENODEV;
+ return -EINVAL;
}
}
@@ -1494,28 +1475,38 @@
int ret = 0;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
mutex_lock(&smmu_domain->init_mutex);
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- if (smmu_domain->smmu) {
- ret = -EPERM;
- goto out_unlock;
+ switch(domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ if (smmu_domain->smmu) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (*(int *)data)
+ smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+ else
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ break;
+ default:
+ ret = -ENODEV;
}
-
- if (*(int *)data)
- smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
- else
- smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ smmu_domain->non_strict = *(int *)data;
+ break;
+ default:
+ ret = -ENODEV;
+ }
break;
default:
- ret = -ENODEV;
+ ret = -EINVAL;
}
-
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
return ret;
@@ -1526,12 +1517,12 @@
u32 mask, fwid = 0;
if (args->args_count > 0)
- fwid |= (u16)args->args[0];
+ fwid |= FIELD_PREP(SMR_ID, args->args[0]);
if (args->args_count > 1)
- fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
+ fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
- fwid |= (u16)mask << SMR_MASK_SHIFT;
+ fwid |= FIELD_PREP(SMR_MASK, mask);
return iommu_fwspec_add_ids(dev, &fwid, 1);
}
@@ -1568,7 +1559,7 @@
.attach_dev = arm_smmu_attach_dev,
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
- .flush_iotlb_all = arm_smmu_iotlb_sync,
+ .flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
@@ -1584,13 +1575,12 @@
static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
{
- void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
int i;
- u32 reg, major;
+ u32 reg;
/* clear global FSR */
- reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
- writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
+ reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
/*
* Reset stream mapping groups: Initial values mark all SMRn as
@@ -1599,47 +1589,17 @@
for (i = 0; i < smmu->num_mapping_groups; ++i)
arm_smmu_write_sme(smmu, i);
- if (smmu->model == ARM_MMU500) {
- /*
- * Before clearing ARM_MMU500_ACTLR_CPRE, need to
- * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
- * bit is only present in MMU-500r2 onwards.
- */
- reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
- major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
- reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
- if (major >= 2)
- reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
- /*
- * Allow unmatched Stream IDs to allocate bypass
- * TLB entries for reduced latency.
- */
- reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
- writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
- }
-
/* Make sure all context banks are disabled and clear CB_FSR */
for (i = 0; i < smmu->num_context_banks; ++i) {
- void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
-
arm_smmu_write_context_bank(smmu, i);
- writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
- /*
- * Disable MMU-500's not-particularly-beneficial next-page
- * prefetcher for the sake of errata #841119 and #826419.
- */
- if (smmu->model == ARM_MMU500) {
- reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
- reg &= ~ARM_MMU500_ACTLR_CPRE;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
- }
+ arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT);
}
/* Invalidate the TLB, just in case */
- writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
- writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
- reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+ reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
/* Enable fault reporting */
reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
@@ -1658,7 +1618,7 @@
reg &= ~sCR0_FB;
/* Don't upgrade barriers */
- reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
+ reg &= ~(sCR0_BSU);
if (smmu->features & ARM_SMMU_FEAT_VMID16)
reg |= sCR0_VMID16EN;
@@ -1666,9 +1626,12 @@
if (smmu->features & ARM_SMMU_FEAT_EXIDS)
reg |= sCR0_EXIDENABLE;
+ if (smmu->impl && smmu->impl->reset)
+ smmu->impl->reset(smmu);
+
/* Push the button */
arm_smmu_tlb_sync_global(smmu);
- writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
}
static int arm_smmu_id_size_to_bits(int size)
@@ -1692,8 +1655,7 @@
static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
{
- unsigned long size;
- void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+ unsigned int size;
u32 id;
bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
int i;
@@ -1703,7 +1665,7 @@
smmu->version == ARM_SMMU_V2 ? 2 : 1);
/* ID0 */
- id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
+ id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
/* Restrict available stages based on module parameter */
if (force_stage == 1)
@@ -1757,12 +1719,12 @@
smmu->features |= ARM_SMMU_FEAT_EXIDS;
size = 1 << 16;
} else {
- size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
+ size = 1 << FIELD_GET(ID0_NUMSIDB, id);
}
smmu->streamid_mask = size - 1;
if (id & ID0_SMS) {
smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
- size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
+ size = FIELD_GET(ID0_NUMSMRG, id);
if (size == 0) {
dev_err(smmu->dev,
"stream-matching supported, but no SMRs present!\n");
@@ -1776,7 +1738,7 @@
return -ENOMEM;
dev_notice(smmu->dev,
- "\tstream matching with %lu register groups", size);
+ "\tstream matching with %u register groups", size);
}
/* s2cr->type == 0 means translation, so initialise explicitly */
smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
@@ -1797,49 +1759,38 @@
}
/* ID1 */
- id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
+ id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
/* Check for size mismatch of SMMU address space from mapped region */
- size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
- size <<= smmu->pgshift;
- if (smmu->cb_base != gr0_base + size)
+ size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
+ if (smmu->numpage != 2 * size << smmu->pgshift)
dev_warn(smmu->dev,
- "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
- size * 2, (smmu->cb_base - gr0_base) * 2);
+ "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
+ 2 * size << smmu->pgshift, smmu->numpage);
+ /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
+ smmu->numpage = size;
- smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
- smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
+ smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
+ smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
if (smmu->num_s2_context_banks > smmu->num_context_banks) {
dev_err(smmu->dev, "impossible number of S2 context banks!\n");
return -ENODEV;
}
dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
smmu->num_context_banks, smmu->num_s2_context_banks);
- /*
- * Cavium CN88xx erratum #27704.
- * Ensure ASID and VMID allocation is unique across all SMMUs in
- * the system.
- */
- if (smmu->model == CAVIUM_SMMUV2) {
- smmu->cavium_id_base =
- atomic_add_return(smmu->num_context_banks,
- &cavium_smmu_context_count);
- smmu->cavium_id_base -= smmu->num_context_banks;
- dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
- }
smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
sizeof(*smmu->cbs), GFP_KERNEL);
if (!smmu->cbs)
return -ENOMEM;
/* ID2 */
- id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
- size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
+ id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
+ size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
smmu->ipa_size = size;
/* The output mask is also applied for bypass */
- size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
+ size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
smmu->pa_size = size;
if (id & ID2_VMID16)
@@ -1859,7 +1810,7 @@
if (smmu->version == ARM_SMMU_V1_64K)
smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
} else {
- size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
+ size = FIELD_GET(ID2_UBS, id);
smmu->va_size = arm_smmu_id_size_to_bits(size);
if (id & ID2_PTFS_4K)
smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
@@ -1896,6 +1847,9 @@
dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
smmu->ipa_size, smmu->pa_size);
+ if (smmu->impl && smmu->impl->cfg_probe)
+ return smmu->impl->cfg_probe(smmu);
+
return 0;
}
@@ -1905,13 +1859,14 @@
};
#define ARM_SMMU_MATCH_DATA(name, ver, imp) \
-static struct arm_smmu_match_data name = { .version = ver, .model = imp }
+static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
+ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
@@ -1920,9 +1875,9 @@
{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
+ { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
{ },
};
-MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
#ifdef CONFIG_ACPI
static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
@@ -2007,8 +1962,6 @@
smmu->version = data->version;
smmu->model = data->model;
- parse_driver_options(smmu);
-
legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
if (legacy_binding && !using_generic_binding) {
if (!using_legacy_binding)
@@ -2042,6 +1995,10 @@
bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
}
#endif
+#ifdef CONFIG_FSL_MC_BUS
+ if (!iommu_present(&fsl_mc_bus_type))
+ bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
+#endif
}
static int arm_smmu_device_probe(struct platform_device *pdev)
@@ -2067,12 +2024,20 @@
if (err)
return err;
+ smmu = arm_smmu_impl_init(smmu);
+ if (IS_ERR(smmu))
+ return PTR_ERR(smmu);
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
ioaddr = res->start;
smmu->base = devm_ioremap_resource(dev, res);
if (IS_ERR(smmu->base))
return PTR_ERR(smmu->base);
- smmu->cb_base = smmu->base + resource_size(res) / 2;
+ /*
+ * The resource size should effectively match the value of SMMU_TOP;
+ * stash that temporarily until we know PAGESIZE to validate it with.
+ */
+ smmu->numpage = resource_size(res);
num_irqs = 0;
while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
@@ -2104,6 +2069,17 @@
smmu->irqs[i] = irq;
}
+ err = devm_clk_bulk_get_all(dev, &smmu->clks);
+ if (err < 0) {
+ dev_err(dev, "failed to get clocks %d\n", err);
+ return err;
+ }
+ smmu->num_clks = err;
+
+ err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
+ if (err)
+ return err;
+
err = arm_smmu_device_cfg_probe(smmu);
if (err)
return err;
@@ -2154,6 +2130,17 @@
arm_smmu_test_smr_masks(smmu);
/*
+ * We want to avoid touching dev->power.lock in fastpaths unless
+ * it's really going to do something useful - pm_runtime_enabled()
+ * can serve as an ideal proxy for that decision. So, conditionally
+ * enable pm_runtime.
+ */
+ if (dev->pm_domain) {
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
+ }
+
+ /*
* For ACPI and generic DT bindings, an SMMU will be probed before
* any device which might need it, so we want the bus ops in place
* ready to handle default domain setup as soon as any SMMU exists.
@@ -2178,48 +2165,82 @@
}
device_initcall_sync(arm_smmu_legacy_bus_init);
-static int arm_smmu_device_remove(struct platform_device *pdev)
+static void arm_smmu_device_shutdown(struct platform_device *pdev)
{
struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
if (!smmu)
- return -ENODEV;
+ return;
if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
dev_err(&pdev->dev, "removing device with active domains!\n");
+ arm_smmu_rpm_get(smmu);
/* Turn the thing off */
- writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD);
+ arm_smmu_rpm_put(smmu);
+
+ if (pm_runtime_enabled(smmu->dev))
+ pm_runtime_force_suspend(smmu->dev);
+ else
+ clk_bulk_disable(smmu->num_clks, smmu->clks);
+
+ clk_bulk_unprepare(smmu->num_clks, smmu->clks);
+}
+
+static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
+{
+ struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
+ if (ret)
+ return ret;
+
+ arm_smmu_device_reset(smmu);
+
return 0;
}
-static void arm_smmu_device_shutdown(struct platform_device *pdev)
+static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
{
- arm_smmu_device_remove(pdev);
+ struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+
+ clk_bulk_disable(smmu->num_clks, smmu->clks);
+
+ return 0;
}
static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
{
- struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+ if (pm_runtime_suspended(dev))
+ return 0;
- arm_smmu_device_reset(smmu);
- return 0;
+ return arm_smmu_runtime_resume(dev);
}
-static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
+static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
+{
+ if (pm_runtime_suspended(dev))
+ return 0;
+
+ return arm_smmu_runtime_suspend(dev);
+}
+
+static const struct dev_pm_ops arm_smmu_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
+ SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
+ arm_smmu_runtime_resume, NULL)
+};
static struct platform_driver arm_smmu_driver = {
.driver = {
- .name = "arm-smmu",
- .of_match_table = of_match_ptr(arm_smmu_of_match),
- .pm = &arm_smmu_pm_ops,
+ .name = "arm-smmu",
+ .of_match_table = of_match_ptr(arm_smmu_of_match),
+ .pm = &arm_smmu_pm_ops,
+ .suppress_bind_attrs = true,
},
.probe = arm_smmu_device_probe,
- .remove = arm_smmu_device_remove,
.shutdown = arm_smmu_device_shutdown,
};
-module_platform_driver(arm_smmu_driver);
-
-MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
-MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
-MODULE_LICENSE("GPL v2");
+builtin_platform_driver(arm_smmu_driver);
diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
new file mode 100644
index 0000000..b19b6ca
--- /dev/null
+++ b/drivers/iommu/arm-smmu.h
@@ -0,0 +1,402 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * IOMMU API for ARM architected SMMU implementations.
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#ifndef _ARM_SMMU_H
+#define _ARM_SMMU_H
+
+#include <linux/atomic.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/io-pgtable.h>
+#include <linux/iommu.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+/* Configuration registers */
+#define ARM_SMMU_GR0_sCR0 0x0
+#define sCR0_VMID16EN BIT(31)
+#define sCR0_BSU GENMASK(15, 14)
+#define sCR0_FB BIT(13)
+#define sCR0_PTM BIT(12)
+#define sCR0_VMIDPNE BIT(11)
+#define sCR0_USFCFG BIT(10)
+#define sCR0_GCFGFIE BIT(5)
+#define sCR0_GCFGFRE BIT(4)
+#define sCR0_EXIDENABLE BIT(3)
+#define sCR0_GFIE BIT(2)
+#define sCR0_GFRE BIT(1)
+#define sCR0_CLIENTPD BIT(0)
+
+/* Auxiliary Configuration register */
+#define ARM_SMMU_GR0_sACR 0x10
+
+/* Identification registers */
+#define ARM_SMMU_GR0_ID0 0x20
+#define ID0_S1TS BIT(30)
+#define ID0_S2TS BIT(29)
+#define ID0_NTS BIT(28)
+#define ID0_SMS BIT(27)
+#define ID0_ATOSNS BIT(26)
+#define ID0_PTFS_NO_AARCH32 BIT(25)
+#define ID0_PTFS_NO_AARCH32S BIT(24)
+#define ID0_NUMIRPT GENMASK(23, 16)
+#define ID0_CTTW BIT(14)
+#define ID0_NUMSIDB GENMASK(12, 9)
+#define ID0_EXIDS BIT(8)
+#define ID0_NUMSMRG GENMASK(7, 0)
+
+#define ARM_SMMU_GR0_ID1 0x24
+#define ID1_PAGESIZE BIT(31)
+#define ID1_NUMPAGENDXB GENMASK(30, 28)
+#define ID1_NUMS2CB GENMASK(23, 16)
+#define ID1_NUMCB GENMASK(7, 0)
+
+#define ARM_SMMU_GR0_ID2 0x28
+#define ID2_VMID16 BIT(15)
+#define ID2_PTFS_64K BIT(14)
+#define ID2_PTFS_16K BIT(13)
+#define ID2_PTFS_4K BIT(12)
+#define ID2_UBS GENMASK(11, 8)
+#define ID2_OAS GENMASK(7, 4)
+#define ID2_IAS GENMASK(3, 0)
+
+#define ARM_SMMU_GR0_ID3 0x2c
+#define ARM_SMMU_GR0_ID4 0x30
+#define ARM_SMMU_GR0_ID5 0x34
+#define ARM_SMMU_GR0_ID6 0x38
+
+#define ARM_SMMU_GR0_ID7 0x3c
+#define ID7_MAJOR GENMASK(7, 4)
+#define ID7_MINOR GENMASK(3, 0)
+
+#define ARM_SMMU_GR0_sGFSR 0x48
+#define ARM_SMMU_GR0_sGFSYNR0 0x50
+#define ARM_SMMU_GR0_sGFSYNR1 0x54
+#define ARM_SMMU_GR0_sGFSYNR2 0x58
+
+/* Global TLB invalidation */
+#define ARM_SMMU_GR0_TLBIVMID 0x64
+#define ARM_SMMU_GR0_TLBIALLNSNH 0x68
+#define ARM_SMMU_GR0_TLBIALLH 0x6c
+#define ARM_SMMU_GR0_sTLBGSYNC 0x70
+
+#define ARM_SMMU_GR0_sTLBGSTATUS 0x74
+#define sTLBGSTATUS_GSACTIVE BIT(0)
+
+/* Stream mapping registers */
+#define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2))
+#define SMR_VALID BIT(31)
+#define SMR_MASK GENMASK(31, 16)
+#define SMR_ID GENMASK(15, 0)
+
+#define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2))
+#define S2CR_PRIVCFG GENMASK(25, 24)
+enum arm_smmu_s2cr_privcfg {
+ S2CR_PRIVCFG_DEFAULT,
+ S2CR_PRIVCFG_DIPAN,
+ S2CR_PRIVCFG_UNPRIV,
+ S2CR_PRIVCFG_PRIV,
+};
+#define S2CR_TYPE GENMASK(17, 16)
+enum arm_smmu_s2cr_type {
+ S2CR_TYPE_TRANS,
+ S2CR_TYPE_BYPASS,
+ S2CR_TYPE_FAULT,
+};
+#define S2CR_EXIDVALID BIT(10)
+#define S2CR_CBNDX GENMASK(7, 0)
+
+/* Context bank attribute registers */
+#define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2))
+#define CBAR_IRPTNDX GENMASK(31, 24)
+#define CBAR_TYPE GENMASK(17, 16)
+enum arm_smmu_cbar_type {
+ CBAR_TYPE_S2_TRANS,
+ CBAR_TYPE_S1_TRANS_S2_BYPASS,
+ CBAR_TYPE_S1_TRANS_S2_FAULT,
+ CBAR_TYPE_S1_TRANS_S2_TRANS,
+};
+#define CBAR_S1_MEMATTR GENMASK(15, 12)
+#define CBAR_S1_MEMATTR_WB 0xf
+#define CBAR_S1_BPSHCFG GENMASK(9, 8)
+#define CBAR_S1_BPSHCFG_NSH 3
+#define CBAR_VMID GENMASK(7, 0)
+
+#define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2))
+
+#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
+#define CBA2R_VMID16 GENMASK(31, 16)
+#define CBA2R_VA64 BIT(0)
+
+#define ARM_SMMU_CB_SCTLR 0x0
+#define SCTLR_S1_ASIDPNE BIT(12)
+#define SCTLR_CFCFG BIT(7)
+#define SCTLR_CFIE BIT(6)
+#define SCTLR_CFRE BIT(5)
+#define SCTLR_E BIT(4)
+#define SCTLR_AFE BIT(2)
+#define SCTLR_TRE BIT(1)
+#define SCTLR_M BIT(0)
+
+#define ARM_SMMU_CB_ACTLR 0x4
+
+#define ARM_SMMU_CB_RESUME 0x8
+#define RESUME_TERMINATE BIT(0)
+
+#define ARM_SMMU_CB_TCR2 0x10
+#define TCR2_SEP GENMASK(17, 15)
+#define TCR2_SEP_UPSTREAM 0x7
+#define TCR2_AS BIT(4)
+
+#define ARM_SMMU_CB_TTBR0 0x20
+#define ARM_SMMU_CB_TTBR1 0x28
+#define TTBRn_ASID GENMASK_ULL(63, 48)
+
+#define ARM_SMMU_CB_TCR 0x30
+#define ARM_SMMU_CB_CONTEXTIDR 0x34
+#define ARM_SMMU_CB_S1_MAIR0 0x38
+#define ARM_SMMU_CB_S1_MAIR1 0x3c
+
+#define ARM_SMMU_CB_PAR 0x50
+#define CB_PAR_F BIT(0)
+
+#define ARM_SMMU_CB_FSR 0x58
+#define FSR_MULTI BIT(31)
+#define FSR_SS BIT(30)
+#define FSR_UUT BIT(8)
+#define FSR_ASF BIT(7)
+#define FSR_TLBLKF BIT(6)
+#define FSR_TLBMCF BIT(5)
+#define FSR_EF BIT(4)
+#define FSR_PF BIT(3)
+#define FSR_AFF BIT(2)
+#define FSR_TF BIT(1)
+
+#define FSR_IGN (FSR_AFF | FSR_ASF | \
+ FSR_TLBMCF | FSR_TLBLKF)
+#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \
+ FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
+
+#define ARM_SMMU_CB_FAR 0x60
+
+#define ARM_SMMU_CB_FSYNR0 0x68
+#define FSYNR0_WNR BIT(4)
+
+#define ARM_SMMU_CB_S1_TLBIVA 0x600
+#define ARM_SMMU_CB_S1_TLBIASID 0x610
+#define ARM_SMMU_CB_S1_TLBIVAL 0x620
+#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
+#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
+#define ARM_SMMU_CB_TLBSYNC 0x7f0
+#define ARM_SMMU_CB_TLBSTATUS 0x7f4
+#define ARM_SMMU_CB_ATS1PR 0x800
+
+#define ARM_SMMU_CB_ATSR 0x8f0
+#define ATSR_ACTIVE BIT(0)
+
+
+/* Maximum number of context banks per SMMU */
+#define ARM_SMMU_MAX_CBS 128
+
+
+/* Shared driver definitions */
+enum arm_smmu_arch_version {
+ ARM_SMMU_V1,
+ ARM_SMMU_V1_64K,
+ ARM_SMMU_V2,
+};
+
+enum arm_smmu_implementation {
+ GENERIC_SMMU,
+ ARM_MMU500,
+ CAVIUM_SMMUV2,
+ QCOM_SMMUV2,
+};
+
+struct arm_smmu_device {
+ struct device *dev;
+
+ void __iomem *base;
+ unsigned int numpage;
+ unsigned int pgshift;
+
+#define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
+#define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
+#define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
+#define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
+#define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
+#define ARM_SMMU_FEAT_VMID16 (1 << 6)
+#define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
+#define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
+#define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
+#define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
+#define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
+#define ARM_SMMU_FEAT_EXIDS (1 << 12)
+ u32 features;
+
+ enum arm_smmu_arch_version version;
+ enum arm_smmu_implementation model;
+ const struct arm_smmu_impl *impl;
+
+ u32 num_context_banks;
+ u32 num_s2_context_banks;
+ DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
+ struct arm_smmu_cb *cbs;
+ atomic_t irptndx;
+
+ u32 num_mapping_groups;
+ u16 streamid_mask;
+ u16 smr_mask_mask;
+ struct arm_smmu_smr *smrs;
+ struct arm_smmu_s2cr *s2crs;
+ struct mutex stream_map_mutex;
+
+ unsigned long va_size;
+ unsigned long ipa_size;
+ unsigned long pa_size;
+ unsigned long pgsize_bitmap;
+
+ u32 num_global_irqs;
+ u32 num_context_irqs;
+ unsigned int *irqs;
+ struct clk_bulk_data *clks;
+ int num_clks;
+
+ spinlock_t global_sync_lock;
+
+ /* IOMMU core code handle */
+ struct iommu_device iommu;
+};
+
+enum arm_smmu_context_fmt {
+ ARM_SMMU_CTX_FMT_NONE,
+ ARM_SMMU_CTX_FMT_AARCH64,
+ ARM_SMMU_CTX_FMT_AARCH32_L,
+ ARM_SMMU_CTX_FMT_AARCH32_S,
+};
+
+struct arm_smmu_cfg {
+ u8 cbndx;
+ u8 irptndx;
+ union {
+ u16 asid;
+ u16 vmid;
+ };
+ enum arm_smmu_cbar_type cbar;
+ enum arm_smmu_context_fmt fmt;
+};
+#define INVALID_IRPTNDX 0xff
+
+enum arm_smmu_domain_stage {
+ ARM_SMMU_DOMAIN_S1 = 0,
+ ARM_SMMU_DOMAIN_S2,
+ ARM_SMMU_DOMAIN_NESTED,
+ ARM_SMMU_DOMAIN_BYPASS,
+};
+
+struct arm_smmu_flush_ops {
+ struct iommu_flush_ops tlb;
+ void (*tlb_inv_range)(unsigned long iova, size_t size, size_t granule,
+ bool leaf, void *cookie);
+ void (*tlb_sync)(void *cookie);
+};
+
+struct arm_smmu_domain {
+ struct arm_smmu_device *smmu;
+ struct io_pgtable_ops *pgtbl_ops;
+ const struct arm_smmu_flush_ops *flush_ops;
+ struct arm_smmu_cfg cfg;
+ enum arm_smmu_domain_stage stage;
+ bool non_strict;
+ struct mutex init_mutex; /* Protects smmu pointer */
+ spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
+ struct iommu_domain domain;
+};
+
+
+/* Implementation details, yay! */
+struct arm_smmu_impl {
+ u32 (*read_reg)(struct arm_smmu_device *smmu, int page, int offset);
+ void (*write_reg)(struct arm_smmu_device *smmu, int page, int offset,
+ u32 val);
+ u64 (*read_reg64)(struct arm_smmu_device *smmu, int page, int offset);
+ void (*write_reg64)(struct arm_smmu_device *smmu, int page, int offset,
+ u64 val);
+ int (*cfg_probe)(struct arm_smmu_device *smmu);
+ int (*reset)(struct arm_smmu_device *smmu);
+ int (*init_context)(struct arm_smmu_domain *smmu_domain);
+};
+
+static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
+{
+ return smmu->base + (n << smmu->pgshift);
+}
+
+static inline u32 arm_smmu_readl(struct arm_smmu_device *smmu, int page, int offset)
+{
+ if (smmu->impl && unlikely(smmu->impl->read_reg))
+ return smmu->impl->read_reg(smmu, page, offset);
+ return readl_relaxed(arm_smmu_page(smmu, page) + offset);
+}
+
+static inline void arm_smmu_writel(struct arm_smmu_device *smmu, int page,
+ int offset, u32 val)
+{
+ if (smmu->impl && unlikely(smmu->impl->write_reg))
+ smmu->impl->write_reg(smmu, page, offset, val);
+ else
+ writel_relaxed(val, arm_smmu_page(smmu, page) + offset);
+}
+
+static inline u64 arm_smmu_readq(struct arm_smmu_device *smmu, int page, int offset)
+{
+ if (smmu->impl && unlikely(smmu->impl->read_reg64))
+ return smmu->impl->read_reg64(smmu, page, offset);
+ return readq_relaxed(arm_smmu_page(smmu, page) + offset);
+}
+
+static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page,
+ int offset, u64 val)
+{
+ if (smmu->impl && unlikely(smmu->impl->write_reg64))
+ smmu->impl->write_reg64(smmu, page, offset, val);
+ else
+ writeq_relaxed(val, arm_smmu_page(smmu, page) + offset);
+}
+
+#define ARM_SMMU_GR0 0
+#define ARM_SMMU_GR1 1
+#define ARM_SMMU_CB(s, n) ((s)->numpage + (n))
+
+#define arm_smmu_gr0_read(s, o) \
+ arm_smmu_readl((s), ARM_SMMU_GR0, (o))
+#define arm_smmu_gr0_write(s, o, v) \
+ arm_smmu_writel((s), ARM_SMMU_GR0, (o), (v))
+
+#define arm_smmu_gr1_read(s, o) \
+ arm_smmu_readl((s), ARM_SMMU_GR1, (o))
+#define arm_smmu_gr1_write(s, o, v) \
+ arm_smmu_writel((s), ARM_SMMU_GR1, (o), (v))
+
+#define arm_smmu_cb_read(s, n, o) \
+ arm_smmu_readl((s), ARM_SMMU_CB((s), (n)), (o))
+#define arm_smmu_cb_write(s, n, o, v) \
+ arm_smmu_writel((s), ARM_SMMU_CB((s), (n)), (o), (v))
+#define arm_smmu_cb_readq(s, n, o) \
+ arm_smmu_readq((s), ARM_SMMU_CB((s), (n)), (o))
+#define arm_smmu_cb_writeq(s, n, o, v) \
+ arm_smmu_writeq((s), ARM_SMMU_CB((s), (n)), (o), (v))
+
+struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu);
+
+#endif /* _ARM_SMMU_H */
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 511ff9a..f321279 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* A fairly generic DMA-API to IOMMU-API glue layer.
*
@@ -5,23 +6,13 @@
*
* based in part on arch/arm/mm/dma-mapping.c:
* Copyright (C) 2000-2004 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/acpi_iort.h>
#include <linux/device.h>
+#include <linux/dma-contiguous.h>
#include <linux/dma-iommu.h>
+#include <linux/dma-noncoherent.h>
#include <linux/gfp.h>
#include <linux/huge_mm.h>
#include <linux/iommu.h>
@@ -32,8 +23,6 @@
#include <linux/scatterlist.h>
#include <linux/vmalloc.h>
-#define IOMMU_MAPPING_ERROR 0
-
struct iommu_dma_msi_page {
struct list_head list;
dma_addr_t iova;
@@ -55,6 +44,9 @@
};
struct list_head msi_page_list;
spinlock_t msi_lock;
+
+ /* Domain for flush queue callback; NULL if flush queue not in use */
+ struct iommu_domain *fq_domain;
};
static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
@@ -77,11 +69,6 @@
return cookie;
}
-int iommu_dma_init(void)
-{
- return iova_cache_get();
-}
-
/**
* iommu_get_dma_cookie - Acquire DMA-API resources for a domain
* @domain: IOMMU domain to prepare for DMA-API usage
@@ -174,7 +161,7 @@
void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
{
- if (!is_of_node(dev->iommu_fwspec->iommu_fwnode))
+ if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
iort_iommu_msi_get_resv_regions(dev, list);
}
@@ -205,12 +192,13 @@
return 0;
}
-static void iova_reserve_pci_windows(struct pci_dev *dev,
+static int iova_reserve_pci_windows(struct pci_dev *dev,
struct iova_domain *iovad)
{
struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
struct resource_entry *window;
unsigned long lo, hi;
+ phys_addr_t start = 0, end;
resource_list_for_each_entry(window, &bridge->windows) {
if (resource_type(window->res) != IORESOURCE_MEM)
@@ -220,6 +208,31 @@
hi = iova_pfn(iovad, window->res->end - window->offset);
reserve_iova(iovad, lo, hi);
}
+
+ /* Get reserved DMA windows from host bridge */
+ resource_list_for_each_entry(window, &bridge->dma_ranges) {
+ end = window->res->start - window->offset;
+resv_iova:
+ if (end > start) {
+ lo = iova_pfn(iovad, start);
+ hi = iova_pfn(iovad, end);
+ reserve_iova(iovad, lo, hi);
+ } else {
+ /* dma_ranges list should be sorted */
+ dev_err(&dev->dev, "Failed to reserve IOVA\n");
+ return -EINVAL;
+ }
+
+ start = window->res->end - window->offset + 1;
+ /* If window is last entry */
+ if (window->node.next == &bridge->dma_ranges &&
+ end != ~(phys_addr_t)0) {
+ end = ~(phys_addr_t)0;
+ goto resv_iova;
+ }
+ }
+
+ return 0;
}
static int iova_reserve_iommu_regions(struct device *dev,
@@ -231,8 +244,11 @@
LIST_HEAD(resv_regions);
int ret = 0;
- if (dev_is_pci(dev))
- iova_reserve_pci_windows(to_pci_dev(dev), iovad);
+ if (dev_is_pci(dev)) {
+ ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad);
+ if (ret)
+ return ret;
+ }
iommu_get_resv_regions(dev, &resv_regions);
list_for_each_entry(region, &resv_regions, list) {
@@ -257,6 +273,20 @@
return ret;
}
+static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
+{
+ struct iommu_dma_cookie *cookie;
+ struct iommu_domain *domain;
+
+ cookie = container_of(iovad, struct iommu_dma_cookie, iovad);
+ domain = cookie->fq_domain;
+ /*
+ * The IOMMU driver supporting DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE
+ * implies that ops->flush_iotlb_all must be non-NULL.
+ */
+ domain->ops->flush_iotlb_all(domain);
+}
+
/**
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -269,20 +299,22 @@
* to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
* any change which could make prior IOVAs invalid will fail.
*/
-int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
+static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
u64 size, struct device *dev)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
- struct iova_domain *iovad = &cookie->iovad;
- unsigned long order, base_pfn, end_pfn;
+ unsigned long order, base_pfn;
+ struct iova_domain *iovad;
+ int attr;
if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
return -EINVAL;
+ iovad = &cookie->iovad;
+
/* Use the smallest supported page size for IOVA granularity */
order = __ffs(domain->pgsize_bitmap);
base_pfn = max_t(unsigned long, 1, base >> order);
- end_pfn = (base + size - 1) >> order;
/* Check the domain allows at least some access to the device... */
if (domain->geometry.force_aperture) {
@@ -308,12 +340,18 @@
}
init_iova_domain(iovad, 1UL << order, base_pfn);
+
+ if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
+ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
+ cookie->fq_domain = domain;
+ init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL);
+ }
+
if (!dev)
return 0;
return iova_reserve_iommu_regions(dev, domain);
}
-EXPORT_SYMBOL(iommu_dma_init_domain);
/**
* dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
@@ -324,7 +362,7 @@
*
* Return: corresponding IOMMU API page protection flags
*/
-int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
+static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
unsigned long attrs)
{
int prot = coherent ? IOMMU_CACHE : 0;
@@ -393,25 +431,58 @@
/* The MSI case is only ever cleaning up its most recent allocation */
if (cookie->type == IOMMU_DMA_MSI_COOKIE)
cookie->msi_iova -= size;
+ else if (cookie->fq_domain) /* non-strict mode */
+ queue_iova(iovad, iova_pfn(iovad, iova),
+ size >> iova_shift(iovad), 0);
else
free_iova_fast(iovad, iova_pfn(iovad, iova),
size >> iova_shift(iovad));
}
-static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
+static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
size_t size)
{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
size_t iova_off = iova_offset(iovad, dma_addr);
+ struct iommu_iotlb_gather iotlb_gather;
+ size_t unmapped;
dma_addr -= iova_off;
size = iova_align(iovad, size + iova_off);
+ iommu_iotlb_gather_init(&iotlb_gather);
- WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
+ unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
+ WARN_ON(unmapped != size);
+
+ if (!cookie->fq_domain)
+ iommu_tlb_sync(domain, &iotlb_gather);
iommu_dma_free_iova(cookie, dma_addr, size);
}
+static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
+ size_t size, int prot)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_off = iova_offset(iovad, phys);
+ dma_addr_t iova;
+
+ size = iova_align(iovad, size + iova_off);
+
+ iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
+ if (!iova)
+ return DMA_MAPPING_ERROR;
+
+ if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
+ iommu_dma_free_iova(cookie, iova, size);
+ return DMA_MAPPING_ERROR;
+ }
+ return iova + iova_off;
+}
+
static void __iommu_dma_free_pages(struct page **pages, int count)
{
while (count--)
@@ -419,20 +490,17 @@
kvfree(pages);
}
-static struct page **__iommu_dma_alloc_pages(unsigned int count,
- unsigned long order_mask, gfp_t gfp)
+static struct page **__iommu_dma_alloc_pages(struct device *dev,
+ unsigned int count, unsigned long order_mask, gfp_t gfp)
{
struct page **pages;
- unsigned int i = 0, array_size = count * sizeof(*pages);
+ unsigned int i = 0, nid = dev_to_node(dev);
order_mask &= (2U << MAX_ORDER) - 1;
if (!order_mask)
return NULL;
- if (array_size <= PAGE_SIZE)
- pages = kzalloc(array_size, GFP_KERNEL);
- else
- pages = vzalloc(array_size);
+ pages = kvzalloc(count * sizeof(*pages), GFP_KERNEL);
if (!pages)
return NULL;
@@ -451,10 +519,12 @@
for (order_mask &= (2U << __fls(count)) - 1;
order_mask; order_mask &= ~order_size) {
unsigned int order = __fls(order_mask);
+ gfp_t alloc_flags = gfp;
order_size = 1U << order;
- page = alloc_pages((order_mask - order_size) ?
- gfp | __GFP_NORETRY : gfp, order);
+ if (order_mask > order_size)
+ alloc_flags |= __GFP_NORETRY;
+ page = alloc_pages_node(nid, alloc_flags, order);
if (!page)
continue;
if (!order)
@@ -479,54 +549,35 @@
}
/**
- * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc()
- * @dev: Device which owns this buffer
- * @pages: Array of buffer pages as returned by iommu_dma_alloc()
- * @size: Size of buffer in bytes
- * @handle: DMA address of buffer
- *
- * Frees both the pages associated with the buffer, and the array
- * describing them
- */
-void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
- dma_addr_t *handle)
-{
- __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
- __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
- *handle = IOMMU_MAPPING_ERROR;
-}
-
-/**
- * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space
+ * iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space
* @dev: Device to allocate memory for. Must be a real device
* attached to an iommu_dma_domain
* @size: Size of buffer in bytes
+ * @dma_handle: Out argument for allocated DMA handle
* @gfp: Allocation flags
* @attrs: DMA attributes for this allocation
- * @prot: IOMMU mapping flags
- * @handle: Out argument for allocated DMA handle
- * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
- * given VA/PA are visible to the given non-coherent device.
*
* If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
* but an IOMMU which supports smaller pages might not map the whole thing.
*
- * Return: Array of struct page pointers describing the buffer,
- * or NULL on failure.
+ * Return: Mapped virtual address, or NULL on failure.
*/
-struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
- unsigned long attrs, int prot, dma_addr_t *handle,
- void (*flush_page)(struct device *, const void *, phys_addr_t))
+static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
+ bool coherent = dev_is_dma_coherent(dev);
+ int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
+ pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
+ unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
struct page **pages;
struct sg_table sgt;
dma_addr_t iova;
- unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
+ void *vaddr;
- *handle = IOMMU_MAPPING_ERROR;
+ *dma_handle = DMA_MAPPING_ERROR;
min_size = alloc_sizes & -alloc_sizes;
if (min_size < PAGE_SIZE) {
@@ -539,7 +590,8 @@
alloc_sizes = min_size;
count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- pages = __iommu_dma_alloc_pages(count, alloc_sizes >> PAGE_SHIFT, gfp);
+ pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
+ gfp);
if (!pages)
return NULL;
@@ -551,26 +603,29 @@
if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
goto out_free_iova;
- if (!(prot & IOMMU_CACHE)) {
- struct sg_mapping_iter miter;
- /*
- * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
- * sufficient here, so skip it by using the "wrong" direction.
- */
- sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG);
- while (sg_miter_next(&miter))
- flush_page(dev, miter.addr, page_to_phys(miter.page));
- sg_miter_stop(&miter);
+ if (!(ioprot & IOMMU_CACHE)) {
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgt.sgl, sg, sgt.orig_nents, i)
+ arch_dma_prep_coherent(sg_page(sg), sg->length);
}
- if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, prot)
+ if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, ioprot)
< size)
goto out_free_sg;
- *handle = iova;
- sg_free_table(&sgt);
- return pages;
+ vaddr = dma_common_pages_remap(pages, size, prot,
+ __builtin_return_address(0));
+ if (!vaddr)
+ goto out_unmap;
+ *dma_handle = iova;
+ sg_free_table(&sgt);
+ return vaddr;
+
+out_unmap:
+ __iommu_dma_unmap(dev, iova, size);
out_free_sg:
sg_free_table(&sgt);
out_free_iova:
@@ -581,64 +636,94 @@
}
/**
- * iommu_dma_mmap - Map a buffer into provided user VMA
- * @pages: Array representing buffer from iommu_dma_alloc()
+ * __iommu_dma_mmap - Map a buffer into provided user VMA
+ * @pages: Array representing buffer from __iommu_dma_alloc()
* @size: Size of buffer in bytes
* @vma: VMA describing requested userspace mapping
*
* Maps the pages of the buffer in @pages into @vma. The caller is responsible
* for verifying the correct size and protection of @vma beforehand.
*/
-
-int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
+static int __iommu_dma_mmap(struct page **pages, size_t size,
+ struct vm_area_struct *vma)
{
- unsigned long uaddr = vma->vm_start;
- unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- int ret = -ENXIO;
-
- for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) {
- ret = vm_insert_page(vma, uaddr, pages[i]);
- if (ret)
- break;
- uaddr += PAGE_SIZE;
- }
- return ret;
+ return vm_map_pages(vma, pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
}
-static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
- size_t size, int prot)
+static void iommu_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
{
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct iommu_dma_cookie *cookie = domain->iova_cookie;
- size_t iova_off = 0;
- dma_addr_t iova;
+ phys_addr_t phys;
- if (cookie->type == IOMMU_DMA_IOVA_COOKIE) {
- iova_off = iova_offset(&cookie->iovad, phys);
- size = iova_align(&cookie->iovad, size + iova_off);
- }
+ if (dev_is_dma_coherent(dev))
+ return;
- iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
- if (!iova)
- return IOMMU_MAPPING_ERROR;
-
- if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
- iommu_dma_free_iova(cookie, iova, size);
- return IOMMU_MAPPING_ERROR;
- }
- return iova + iova_off;
+ phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
+ arch_sync_dma_for_cpu(dev, phys, size, dir);
}
-dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size, int prot)
+static void iommu_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
{
- return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot);
+ phys_addr_t phys;
+
+ if (dev_is_dma_coherent(dev))
+ return;
+
+ phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
+ arch_sync_dma_for_device(dev, phys, size, dir);
}
-void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
- enum dma_data_direction dir, unsigned long attrs)
+static void iommu_dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir)
{
- __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
+ struct scatterlist *sg;
+ int i;
+
+ if (dev_is_dma_coherent(dev))
+ return;
+
+ for_each_sg(sgl, sg, nelems, i)
+ arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
+}
+
+static void iommu_dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ if (dev_is_dma_coherent(dev))
+ return;
+
+ for_each_sg(sgl, sg, nelems, i)
+ arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
+}
+
+static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ phys_addr_t phys = page_to_phys(page) + offset;
+ bool coherent = dev_is_dma_coherent(dev);
+ int prot = dma_info_to_prot(dir, coherent, attrs);
+ dma_addr_t dma_handle;
+
+ dma_handle =__iommu_dma_map(dev, phys, size, prot);
+ if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ dma_handle != DMA_MAPPING_ERROR)
+ arch_sync_dma_for_device(dev, phys, size, dir);
+ return dma_handle;
+}
+
+static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
+ __iommu_dma_unmap(dev, dma_handle, size);
}
/*
@@ -664,7 +749,7 @@
s->offset += s_iova_off;
s->length = s_length;
- sg_dma_address(s) = IOMMU_MAPPING_ERROR;
+ sg_dma_address(s) = DMA_MAPPING_ERROR;
sg_dma_len(s) = 0;
/*
@@ -675,7 +760,7 @@
* - and wouldn't make the resulting output segment too long
*/
if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
- (cur_len + s_length <= max_len)) {
+ (max_len - cur_len >= s_length)) {
/* ...then concatenate it with the previous one */
cur_len += s_length;
} else {
@@ -707,11 +792,11 @@
int i;
for_each_sg(sg, s, nents, i) {
- if (sg_dma_address(s) != IOMMU_MAPPING_ERROR)
+ if (sg_dma_address(s) != DMA_MAPPING_ERROR)
s->offset += sg_dma_address(s);
if (sg_dma_len(s))
s->length = sg_dma_len(s);
- sg_dma_address(s) = IOMMU_MAPPING_ERROR;
+ sg_dma_address(s) = DMA_MAPPING_ERROR;
sg_dma_len(s) = 0;
}
}
@@ -723,18 +808,22 @@
* impedance-matching, to be able to hand off a suitably-aligned list,
* but still preserve the original offsets and sizes for the caller.
*/
-int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
- int nents, int prot)
+static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
{
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
struct scatterlist *s, *prev = NULL;
+ int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
dma_addr_t iova;
size_t iova_len = 0;
unsigned long mask = dma_get_seg_boundary(dev);
int i;
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
+
/*
* Work out how much IOVA space we need, and align the segments to
* IOVA granules for the IOMMU driver to handle. With some clever
@@ -794,12 +883,16 @@
return 0;
}
-void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir, unsigned long attrs)
+static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
{
dma_addr_t start, end;
struct scatterlist *tmp;
int i;
+
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
+
/*
* The scatterlist segments are mapped into a single
* contiguous IOVA allocation, so this is incredibly easy.
@@ -811,25 +904,236 @@
sg = tmp;
}
end = sg_dma_address(sg) + sg_dma_len(sg);
- __iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start);
+ __iommu_dma_unmap(dev, start, end - start);
}
-dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
+static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
return __iommu_dma_map(dev, phys, size,
dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO);
}
-void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
+static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
- __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
+ __iommu_dma_unmap(dev, handle, size);
}
-int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
{
- return dma_addr == IOMMU_MAPPING_ERROR;
+ size_t alloc_size = PAGE_ALIGN(size);
+ int count = alloc_size >> PAGE_SHIFT;
+ struct page *page = NULL, **pages = NULL;
+
+ /* Non-coherent atomic allocation? Easy */
+ if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ dma_free_from_pool(cpu_addr, alloc_size))
+ return;
+
+ if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
+ /*
+ * If it the address is remapped, then it's either non-coherent
+ * or highmem CMA, or an iommu_dma_alloc_remap() construction.
+ */
+ pages = dma_common_find_pages(cpu_addr);
+ if (!pages)
+ page = vmalloc_to_page(cpu_addr);
+ dma_common_free_remap(cpu_addr, alloc_size);
+ } else {
+ /* Lowmem means a coherent atomic or CMA allocation */
+ page = virt_to_page(cpu_addr);
+ }
+
+ if (pages)
+ __iommu_dma_free_pages(pages, count);
+ if (page)
+ dma_free_contiguous(dev, page, alloc_size);
+}
+
+static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, unsigned long attrs)
+{
+ __iommu_dma_unmap(dev, handle, size);
+ __iommu_dma_free(dev, size, cpu_addr);
+}
+
+static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
+ struct page **pagep, gfp_t gfp, unsigned long attrs)
+{
+ bool coherent = dev_is_dma_coherent(dev);
+ size_t alloc_size = PAGE_ALIGN(size);
+ int node = dev_to_node(dev);
+ struct page *page = NULL;
+ void *cpu_addr;
+
+ page = dma_alloc_contiguous(dev, alloc_size, gfp);
+ if (!page)
+ page = alloc_pages_node(node, gfp, get_order(alloc_size));
+ if (!page)
+ return NULL;
+
+ if (IS_ENABLED(CONFIG_DMA_REMAP) && (!coherent || PageHighMem(page))) {
+ pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
+
+ cpu_addr = dma_common_contiguous_remap(page, alloc_size,
+ prot, __builtin_return_address(0));
+ if (!cpu_addr)
+ goto out_free_pages;
+
+ if (!coherent)
+ arch_dma_prep_coherent(page, size);
+ } else {
+ cpu_addr = page_address(page);
+ }
+
+ *pagep = page;
+ memset(cpu_addr, 0, alloc_size);
+ return cpu_addr;
+out_free_pages:
+ dma_free_contiguous(dev, page, alloc_size);
+ return NULL;
+}
+
+static void *iommu_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
+{
+ bool coherent = dev_is_dma_coherent(dev);
+ int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
+ struct page *page = NULL;
+ void *cpu_addr;
+
+ gfp |= __GFP_ZERO;
+
+ if (IS_ENABLED(CONFIG_DMA_REMAP) && gfpflags_allow_blocking(gfp) &&
+ !(attrs & DMA_ATTR_FORCE_CONTIGUOUS))
+ return iommu_dma_alloc_remap(dev, size, handle, gfp, attrs);
+
+ if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ !gfpflags_allow_blocking(gfp) && !coherent)
+ cpu_addr = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp);
+ else
+ cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
+ if (!cpu_addr)
+ return NULL;
+
+ *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot);
+ if (*handle == DMA_MAPPING_ERROR) {
+ __iommu_dma_free(dev, size, cpu_addr);
+ return NULL;
+ }
+
+ return cpu_addr;
+}
+
+static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
+{
+ unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned long pfn, off = vma->vm_pgoff;
+ int ret;
+
+ vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
+
+ if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
+ return ret;
+
+ if (off >= nr_pages || vma_pages(vma) > nr_pages - off)
+ return -ENXIO;
+
+ if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
+ struct page **pages = dma_common_find_pages(cpu_addr);
+
+ if (pages)
+ return __iommu_dma_mmap(pages, size, vma);
+ pfn = vmalloc_to_pfn(cpu_addr);
+ } else {
+ pfn = page_to_pfn(virt_to_page(cpu_addr));
+ }
+
+ return remap_pfn_range(vma, vma->vm_start, pfn + off,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
+static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
+{
+ struct page *page;
+ int ret;
+
+ if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
+ struct page **pages = dma_common_find_pages(cpu_addr);
+
+ if (pages) {
+ return sg_alloc_table_from_pages(sgt, pages,
+ PAGE_ALIGN(size) >> PAGE_SHIFT,
+ 0, size, GFP_KERNEL);
+ }
+
+ page = vmalloc_to_page(cpu_addr);
+ } else {
+ page = virt_to_page(cpu_addr);
+ }
+
+ ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+ if (!ret)
+ sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+ return ret;
+}
+
+static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+
+ return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
+}
+
+static const struct dma_map_ops iommu_dma_ops = {
+ .alloc = iommu_dma_alloc,
+ .free = iommu_dma_free,
+ .mmap = iommu_dma_mmap,
+ .get_sgtable = iommu_dma_get_sgtable,
+ .map_page = iommu_dma_map_page,
+ .unmap_page = iommu_dma_unmap_page,
+ .map_sg = iommu_dma_map_sg,
+ .unmap_sg = iommu_dma_unmap_sg,
+ .sync_single_for_cpu = iommu_dma_sync_single_for_cpu,
+ .sync_single_for_device = iommu_dma_sync_single_for_device,
+ .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu,
+ .sync_sg_for_device = iommu_dma_sync_sg_for_device,
+ .map_resource = iommu_dma_map_resource,
+ .unmap_resource = iommu_dma_unmap_resource,
+ .get_merge_boundary = iommu_dma_get_merge_boundary,
+};
+
+/*
+ * The IOMMU core code allocates the default DMA domain, which the underlying
+ * IOMMU driver needs to support via the dma-iommu layer.
+ */
+void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+ if (!domain)
+ goto out_err;
+
+ /*
+ * The IOMMU core code allocates the default DMA domain, which the
+ * underlying IOMMU driver needs to support via the dma-iommu layer.
+ */
+ if (domain->type == IOMMU_DOMAIN_DMA) {
+ if (iommu_dma_init_domain(domain, dma_base, size, dev))
+ goto out_err;
+ dev->dma_ops = &iommu_dma_ops;
+ }
+
+ return;
+out_err:
+ pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
+ dev_name(dev));
}
static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
@@ -850,32 +1154,38 @@
if (!msi_page)
return NULL;
- iova = __iommu_dma_map(dev, msi_addr, size, prot);
- if (iommu_dma_mapping_error(dev, iova))
+ iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
+ if (!iova)
goto out_free_page;
+ if (iommu_map(domain, iova, msi_addr, size, prot))
+ goto out_free_iova;
+
INIT_LIST_HEAD(&msi_page->list);
msi_page->phys = msi_addr;
msi_page->iova = iova;
list_add(&msi_page->list, &cookie->msi_page_list);
return msi_page;
+out_free_iova:
+ iommu_dma_free_iova(cookie, iova, size);
out_free_page:
kfree(msi_page);
return NULL;
}
-void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
+int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
{
- struct device *dev = msi_desc_to_dev(irq_get_msi_desc(irq));
+ struct device *dev = msi_desc_to_dev(desc);
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct iommu_dma_cookie *cookie;
struct iommu_dma_msi_page *msi_page;
- phys_addr_t msi_addr = (u64)msg->address_hi << 32 | msg->address_lo;
unsigned long flags;
- if (!domain || !domain->iova_cookie)
- return;
+ if (!domain || !domain->iova_cookie) {
+ desc->iommu_cookie = NULL;
+ return 0;
+ }
cookie = domain->iova_cookie;
@@ -888,19 +1198,32 @@
msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
spin_unlock_irqrestore(&cookie->msi_lock, flags);
- if (WARN_ON(!msi_page)) {
- /*
- * We're called from a void callback, so the best we can do is
- * 'fail' by filling the message with obviously bogus values.
- * Since we got this far due to an IOMMU being present, it's
- * not like the existing address would have worked anyway...
- */
- msg->address_hi = ~0U;
- msg->address_lo = ~0U;
- msg->data = ~0U;
- } else {
- msg->address_hi = upper_32_bits(msi_page->iova);
- msg->address_lo &= cookie_msi_granule(cookie) - 1;
- msg->address_lo += lower_32_bits(msi_page->iova);
- }
+ msi_desc_set_iommu_cookie(desc, msi_page);
+
+ if (!msi_page)
+ return -ENOMEM;
+ return 0;
}
+
+void iommu_dma_compose_msi_msg(struct msi_desc *desc,
+ struct msi_msg *msg)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ const struct iommu_dma_msi_page *msi_page;
+
+ msi_page = msi_desc_get_iommu_cookie(desc);
+
+ if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
+ return;
+
+ msg->address_hi = upper_32_bits(msi_page->iova);
+ msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
+ msg->address_lo += lower_32_bits(msi_page->iova);
+}
+
+static int iommu_dma_init(void)
+{
+ return iova_cache_get();
+}
+arch_initcall(iommu_dma_init);
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index d9c748b..eecd6a4 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2006, Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* Copyright (C) 2006-2008 Intel Corporation
* Author: Ashok Raj <ashok.raj@intel.com>
* Author: Shaohua Li <shaohua.li@intel.com>
@@ -39,6 +27,7 @@
#include <linux/dmi.h>
#include <linux/slab.h>
#include <linux/iommu.h>
+#include <linux/numa.h>
#include <asm/irq_remapping.h>
#include <asm/iommu_table.h>
@@ -144,7 +133,7 @@
for (tmp = dev; tmp; tmp = tmp->bus->self)
level++;
- size = sizeof(*info) + level * sizeof(struct acpi_dmar_pci_path);
+ size = struct_size(info, path, level);
if (size <= sizeof(dmar_pci_notify_info_buf)) {
info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
} else {
@@ -477,7 +466,7 @@
int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
if (!node_online(node))
- node = -1;
+ node = NUMA_NO_NODE;
drhd->iommu->node = node;
return 0;
}
@@ -1062,7 +1051,7 @@
iommu->msagaw = msagaw;
iommu->segment = drhd->segment;
- iommu->node = -1;
+ iommu->node = NUMA_NO_NODE;
ver = readl(iommu->reg + DMAR_VER_REG);
pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
@@ -1160,6 +1149,7 @@
int head, tail;
struct q_inval *qi = iommu->qi;
int wait_index = (index + 1) % QI_LENGTH;
+ int shift = qi_shift(iommu);
if (qi->desc_status[wait_index] == QI_ABORT)
return -EAGAIN;
@@ -1173,13 +1163,19 @@
*/
if (fault & DMA_FSTS_IQE) {
head = readl(iommu->reg + DMAR_IQH_REG);
- if ((head >> DMAR_IQ_SHIFT) == index) {
- pr_err("VT-d detected invalid descriptor: "
- "low=%llx, high=%llx\n",
- (unsigned long long)qi->desc[index].low,
- (unsigned long long)qi->desc[index].high);
- memcpy(&qi->desc[index], &qi->desc[wait_index],
- sizeof(struct qi_desc));
+ if ((head >> shift) == index) {
+ struct qi_desc *desc = qi->desc + head;
+
+ /*
+ * desc->qw2 and desc->qw3 are either reserved or
+ * used by software as private data. We won't print
+ * out these two qw's for security consideration.
+ */
+ pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
+ (unsigned long long)desc->qw0,
+ (unsigned long long)desc->qw1);
+ memcpy(desc, qi->desc + (wait_index << shift),
+ 1 << shift);
writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
return -EINVAL;
}
@@ -1191,10 +1187,10 @@
*/
if (fault & DMA_FSTS_ITE) {
head = readl(iommu->reg + DMAR_IQH_REG);
- head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
+ head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
head |= 1;
tail = readl(iommu->reg + DMAR_IQT_REG);
- tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
+ tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
@@ -1222,15 +1218,14 @@
{
int rc;
struct q_inval *qi = iommu->qi;
- struct qi_desc *hw, wait_desc;
+ int offset, shift, length;
+ struct qi_desc wait_desc;
int wait_index, index;
unsigned long flags;
if (!qi)
return 0;
- hw = qi->desc;
-
restart:
rc = 0;
@@ -1243,16 +1238,21 @@
index = qi->free_head;
wait_index = (index + 1) % QI_LENGTH;
+ shift = qi_shift(iommu);
+ length = 1 << shift;
qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
- hw[index] = *desc;
-
- wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
+ offset = index << shift;
+ memcpy(qi->desc + offset, desc, length);
+ wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
- wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
+ wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
+ wait_desc.qw2 = 0;
+ wait_desc.qw3 = 0;
- hw[wait_index] = wait_desc;
+ offset = wait_index << shift;
+ memcpy(qi->desc + offset, &wait_desc, length);
qi->free_head = (qi->free_head + 2) % QI_LENGTH;
qi->free_cnt -= 2;
@@ -1261,7 +1261,7 @@
* update the HW tail register indicating the presence of
* new descriptors.
*/
- writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
+ writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
while (qi->desc_status[wait_index] != QI_DONE) {
/*
@@ -1298,8 +1298,10 @@
{
struct qi_desc desc;
- desc.low = QI_IEC_TYPE;
- desc.high = 0;
+ desc.qw0 = QI_IEC_TYPE;
+ desc.qw1 = 0;
+ desc.qw2 = 0;
+ desc.qw3 = 0;
/* should never fail */
qi_submit_sync(&desc, iommu);
@@ -1310,9 +1312,11 @@
{
struct qi_desc desc;
- desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
+ desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
| QI_CC_GRAN(type) | QI_CC_TYPE;
- desc.high = 0;
+ desc.qw1 = 0;
+ desc.qw2 = 0;
+ desc.qw3 = 0;
qi_submit_sync(&desc, iommu);
}
@@ -1331,10 +1335,12 @@
if (cap_read_drain(iommu->cap))
dr = 1;
- desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
+ desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
- desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
+ desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
| QI_IOTLB_AM(size_order);
+ desc.qw2 = 0;
+ desc.qw3 = 0;
qi_submit_sync(&desc, iommu);
}
@@ -1347,15 +1353,17 @@
if (mask) {
WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1));
addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
- desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
+ desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
} else
- desc.high = QI_DEV_IOTLB_ADDR(addr);
+ desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
if (qdep >= QI_DEV_IOTLB_MAX_INVS)
qdep = 0;
- desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
+ desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
+ desc.qw2 = 0;
+ desc.qw3 = 0;
qi_submit_sync(&desc, iommu);
}
@@ -1403,16 +1411,24 @@
u32 sts;
unsigned long flags;
struct q_inval *qi = iommu->qi;
+ u64 val = virt_to_phys(qi->desc);
qi->free_head = qi->free_tail = 0;
qi->free_cnt = QI_LENGTH;
+ /*
+ * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
+ * is present.
+ */
+ if (ecap_smts(iommu->ecap))
+ val |= (1 << 11) | 1;
+
raw_spin_lock_irqsave(&iommu->register_lock, flags);
/* write zero to the tail reg */
writel(0, iommu->reg + DMAR_IQT_REG);
- dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
+ dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
iommu->gcmd |= DMA_GCMD_QIE;
writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
@@ -1448,8 +1464,12 @@
qi = iommu->qi;
-
- desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
+ /*
+ * Need two pages to accommodate 256 descriptors of 256 bits each
+ * if the remapping hardware supports scalable mode translation.
+ */
+ desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
+ !!ecap_smts(iommu->ecap));
if (!desc_page) {
kfree(qi);
iommu->qi = NULL;
@@ -1499,6 +1519,64 @@
"PCE for translation request specifies blocking",
};
+static const char * const dma_remap_sm_fault_reasons[] = {
+ "SM: Invalid Root Table Address",
+ "SM: TTM 0 for request with PASID",
+ "SM: TTM 0 for page group request",
+ "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
+ "SM: Error attempting to access Root Entry",
+ "SM: Present bit in Root Entry is clear",
+ "SM: Non-zero reserved field set in Root Entry",
+ "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
+ "SM: Error attempting to access Context Entry",
+ "SM: Present bit in Context Entry is clear",
+ "SM: Non-zero reserved field set in the Context Entry",
+ "SM: Invalid Context Entry",
+ "SM: DTE field in Context Entry is clear",
+ "SM: PASID Enable field in Context Entry is clear",
+ "SM: PASID is larger than the max in Context Entry",
+ "SM: PRE field in Context-Entry is clear",
+ "SM: RID_PASID field error in Context-Entry",
+ "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
+ "SM: Error attempting to access the PASID Directory Entry",
+ "SM: Present bit in Directory Entry is clear",
+ "SM: Non-zero reserved field set in PASID Directory Entry",
+ "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
+ "SM: Error attempting to access PASID Table Entry",
+ "SM: Present bit in PASID Table Entry is clear",
+ "SM: Non-zero reserved field set in PASID Table Entry",
+ "SM: Invalid Scalable-Mode PASID Table Entry",
+ "SM: ERE field is clear in PASID Table Entry",
+ "SM: SRE field is clear in PASID Table Entry",
+ "Unknown", "Unknown",/* 0x5E-0x5F */
+ "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
+ "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
+ "SM: Error attempting to access first-level paging entry",
+ "SM: Present bit in first-level paging entry is clear",
+ "SM: Non-zero reserved field set in first-level paging entry",
+ "SM: Error attempting to access FL-PML4 entry",
+ "SM: First-level entry address beyond MGAW in Nested translation",
+ "SM: Read permission error in FL-PML4 entry in Nested translation",
+ "SM: Read permission error in first-level paging entry in Nested translation",
+ "SM: Write permission error in first-level paging entry in Nested translation",
+ "SM: Error attempting to access second-level paging entry",
+ "SM: Read/Write permission error in second-level paging entry",
+ "SM: Non-zero reserved field set in second-level paging entry",
+ "SM: Invalid second-level page table pointer",
+ "SM: A/D bit update needed in second-level entry when set up in no snoop",
+ "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
+ "SM: Address in first-level translation is not canonical",
+ "SM: U/S set 0 for first-level translation with user privilege",
+ "SM: No execute permission for request with PASID and ER=1",
+ "SM: Address beyond the DMA hardware max",
+ "SM: Second-level entry address beyond the max",
+ "SM: No write permission for Write/AtomicOp request",
+ "SM: No read permission for Read/AtomicOp request",
+ "SM: Invalid address-interrupt address",
+ "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
+ "SM: A/D bit update needed in first-level entry when set up in no snoop",
+};
+
static const char *irq_remap_fault_reasons[] =
{
"Detected reserved fields in the decoded interrupt-remapped request",
@@ -1516,6 +1594,10 @@
ARRAY_SIZE(irq_remap_fault_reasons))) {
*fault_type = INTR_REMAP;
return irq_remap_fault_reasons[fault_reason - 0x20];
+ } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
+ ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
+ *fault_type = DMA_REMAP;
+ return dma_remap_sm_fault_reasons[fault_reason - 0x30];
} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
*fault_type = DMA_REMAP;
return dma_remap_fault_reasons[fault_reason];
@@ -1591,7 +1673,8 @@
}
static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
- u8 fault_reason, u16 source_id, unsigned long long addr)
+ u8 fault_reason, int pasid, u16 source_id,
+ unsigned long long addr)
{
const char *reason;
int fault_type;
@@ -1604,10 +1687,11 @@
PCI_FUNC(source_id & 0xFF), addr >> 48,
fault_reason, reason);
else
- pr_err("[%s] Request device [%02x:%02x.%d] fault addr %llx [fault reason %02d] %s\n",
+ pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
type ? "DMA Read" : "DMA Write",
source_id >> 8, PCI_SLOT(source_id & 0xFF),
- PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+ PCI_FUNC(source_id & 0xFF), pasid, addr,
+ fault_reason, reason);
return 0;
}
@@ -1639,8 +1723,9 @@
u8 fault_reason;
u16 source_id;
u64 guest_addr;
- int type;
+ int type, pasid;
u32 data;
+ bool pasid_present;
/* highest 32 bits */
data = readl(iommu->reg + reg +
@@ -1652,10 +1737,12 @@
fault_reason = dma_frcd_fault_reason(data);
type = dma_frcd_type(data);
+ pasid = dma_frcd_pasid_value(data);
data = readl(iommu->reg + reg +
fault_index * PRIMARY_FAULT_REG_LEN + 8);
source_id = dma_frcd_source_id(data);
+ pasid_present = dma_frcd_pasid_present(data);
guest_addr = dmar_readq(iommu->reg + reg +
fault_index * PRIMARY_FAULT_REG_LEN);
guest_addr = dma_frcd_page_addr(guest_addr);
@@ -1668,7 +1755,9 @@
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
if (!ratelimited)
+ /* Using pasid -1 if pasid is not present */
dmar_fault_do_one(iommu, type, fault_reason,
+ pasid_present ? pasid : -1,
source_id, guest_addr);
fault_index++;
@@ -2042,3 +2131,28 @@
{
return dmar_device_hotplug(handle, false);
}
+
+/*
+ * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
+ *
+ * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
+ * the ACPI DMAR table. This means that the platform boot firmware has made
+ * sure no device can issue DMA outside of RMRR regions.
+ */
+bool dmar_platform_optin(void)
+{
+ struct acpi_table_dmar *dmar;
+ acpi_status status;
+ bool ret;
+
+ status = acpi_get_table(ACPI_SIG_DMAR, 0,
+ (struct acpi_table_header **)&dmar);
+ if (ACPI_FAILURE(status))
+ return false;
+
+ ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
+ acpi_put_table((struct acpi_table_header *)dmar);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dmar_platform_optin);
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 1bd0cd7..9c94e16 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2011,2016 Samsung Electronics Co., Ltd.
* http://www.samsung.com
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifdef CONFIG_EXYNOS_IOMMU_DEBUG
@@ -569,7 +566,7 @@
static const struct iommu_ops exynos_iommu_ops;
-static int __init exynos_sysmmu_probe(struct platform_device *pdev)
+static int exynos_sysmmu_probe(struct platform_device *pdev)
{
int irq, ret;
struct device *dev = &pdev->dev;
@@ -586,10 +583,8 @@
return PTR_ERR(data->sfrbase);
irq = platform_get_irq(pdev, 0);
- if (irq <= 0) {
- dev_err(dev, "Unable to find IRQ resource\n");
+ if (irq <= 0)
return irq;
- }
ret = devm_request_irq(dev, irq, exynos_sysmmu_irq, 0,
dev_name(dev), data);
@@ -1133,7 +1128,8 @@
}
static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
- unsigned long l_iova, size_t size)
+ unsigned long l_iova, size_t size,
+ struct iommu_iotlb_gather *gather)
{
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
sysmmu_iova_t iova = (sysmmu_iova_t)l_iova;
@@ -1260,6 +1256,7 @@
* direct calls to pm_runtime_get/put in this driver.
*/
data->link = device_link_add(dev, data->sysmmu,
+ DL_FLAG_STATELESS |
DL_FLAG_PM_RUNTIME);
}
iommu_group_put(group);
diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index 8540625..cde281b 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright (C) 2013 Freescale Semiconductor, Inc.
- *
*/
#define pr_fmt(fmt) "fsl-pamu: %s: " fmt, __func__
@@ -543,7 +531,7 @@
return ~(u32)0;
}
- for_each_node_by_type(node, "cpu") {
+ for_each_of_cpu_node(node) {
prop = of_get_property(node, "reg", &len);
for (i = 0; i < len / sizeof(u32); i++) {
if (be32_to_cpup(&prop[i]) == vcpu) {
diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h
index c3434f2..e1496ba 100644
--- a/drivers/iommu/fsl_pamu.h
+++ b/drivers/iommu/fsl_pamu.h
@@ -1,19 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright (C) 2013 Freescale Semiconductor, Inc.
- *
*/
#ifndef __FSL_PAMU_H
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index f089136..06828e2 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright (C) 2013 Freescale Semiconductor, Inc.
* Author: Varun Sethi <varun.sethi@freescale.com>
- *
*/
#define pr_fmt(fmt) "fsl-pamu-domain: %s: " fmt, __func__
@@ -814,6 +802,55 @@
return 0;
}
+static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
+{
+ struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&dma_domain->domain_lock, flags);
+ /* Ensure domain is inactive i.e. DMA should be disabled for the domain */
+ if (dma_domain->enabled) {
+ pr_debug("Can't set geometry attributes as domain is active\n");
+ spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+ return -EBUSY;
+ }
+
+ /* Ensure that the geometry has been set for the domain */
+ if (!dma_domain->geom_size) {
+ pr_debug("Please configure geometry before setting the number of windows\n");
+ spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+ return -EINVAL;
+ }
+
+ /*
+ * Ensure we have valid window count i.e. it should be less than
+ * maximum permissible limit and should be a power of two.
+ */
+ if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
+ pr_debug("Invalid window count\n");
+ spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+ return -EINVAL;
+ }
+
+ ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
+ w_count > 1 ? w_count : 0);
+ if (!ret) {
+ kfree(dma_domain->win_arr);
+ dma_domain->win_arr = kcalloc(w_count,
+ sizeof(*dma_domain->win_arr),
+ GFP_ATOMIC);
+ if (!dma_domain->win_arr) {
+ spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+ return -ENOMEM;
+ }
+ dma_domain->win_cnt = w_count;
+ }
+ spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+ return ret;
+}
+
static int fsl_pamu_set_domain_attr(struct iommu_domain *domain,
enum iommu_attr attr_type, void *data)
{
@@ -830,6 +867,9 @@
case DOMAIN_ATTR_FSL_PAMU_ENABLE:
ret = configure_domain_dma_state(dma_domain, *(int *)data);
break;
+ case DOMAIN_ATTR_WINDOWS:
+ ret = fsl_pamu_set_windows(domain, *(u32 *)data);
+ break;
default:
pr_debug("Unsupported attribute type\n");
ret = -EINVAL;
@@ -856,6 +896,9 @@
case DOMAIN_ATTR_FSL_PAMUV1:
*(int *)data = DOMAIN_ATTR_FSL_PAMUV1;
break;
+ case DOMAIN_ATTR_WINDOWS:
+ *(u32 *)data = dma_domain->win_cnt;
+ break;
default:
pr_debug("Unsupported attribute type\n");
ret = -EINVAL;
@@ -916,13 +959,13 @@
static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
{
struct pci_controller *pci_ctl;
- bool pci_endpt_partioning;
+ bool pci_endpt_partitioning;
struct iommu_group *group = NULL;
pci_ctl = pci_bus_to_host(pdev->bus);
- pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl);
+ pci_endpt_partitioning = check_pci_ctl_endpt_part(pci_ctl);
/* We can partition PCIe devices so assign device group to the device */
- if (pci_endpt_partioning) {
+ if (pci_endpt_partitioning) {
group = pci_device_group(&pdev->dev);
/*
@@ -994,62 +1037,6 @@
iommu_group_remove_device(dev);
}
-static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
-{
- struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&dma_domain->domain_lock, flags);
- /* Ensure domain is inactive i.e. DMA should be disabled for the domain */
- if (dma_domain->enabled) {
- pr_debug("Can't set geometry attributes as domain is active\n");
- spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
- return -EBUSY;
- }
-
- /* Ensure that the geometry has been set for the domain */
- if (!dma_domain->geom_size) {
- pr_debug("Please configure geometry before setting the number of windows\n");
- spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
- return -EINVAL;
- }
-
- /*
- * Ensure we have valid window count i.e. it should be less than
- * maximum permissible limit and should be a power of two.
- */
- if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
- pr_debug("Invalid window count\n");
- spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
- return -EINVAL;
- }
-
- ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
- w_count > 1 ? w_count : 0);
- if (!ret) {
- kfree(dma_domain->win_arr);
- dma_domain->win_arr = kcalloc(w_count,
- sizeof(*dma_domain->win_arr),
- GFP_ATOMIC);
- if (!dma_domain->win_arr) {
- spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
- return -ENOMEM;
- }
- dma_domain->win_cnt = w_count;
- }
- spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-
- return ret;
-}
-
-static u32 fsl_pamu_get_windows(struct iommu_domain *domain)
-{
- struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
-
- return dma_domain->win_cnt;
-}
-
static const struct iommu_ops fsl_pamu_ops = {
.capable = fsl_pamu_capable,
.domain_alloc = fsl_pamu_domain_alloc,
@@ -1058,8 +1045,6 @@
.detach_dev = fsl_pamu_detach_device,
.domain_window_enable = fsl_pamu_window_enable,
.domain_window_disable = fsl_pamu_window_disable,
- .domain_get_windows = fsl_pamu_get_windows,
- .domain_set_windows = fsl_pamu_set_windows,
.iova_to_phys = fsl_pamu_iova_to_phys,
.domain_set_attr = fsl_pamu_set_domain_attr,
.domain_get_attr = fsl_pamu_get_domain_attr,
diff --git a/drivers/iommu/fsl_pamu_domain.h b/drivers/iommu/fsl_pamu_domain.h
index f2b0f74..2865d42 100644
--- a/drivers/iommu/fsl_pamu_domain.h
+++ b/drivers/iommu/fsl_pamu_domain.h
@@ -1,19 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright (C) 2013 Freescale Semiconductor, Inc.
- *
*/
#ifndef __FSL_PAMU_DOMAIN_H
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
new file mode 100644
index 0000000..a386b83
--- /dev/null
+++ b/drivers/iommu/hyperv-iommu.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Hyper-V stub IOMMU driver.
+ *
+ * Copyright (C) 2019, Microsoft, Inc.
+ *
+ * Author : Lan Tianyu <Tianyu.Lan@microsoft.com>
+ */
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+
+#include <asm/apic.h>
+#include <asm/cpu.h>
+#include <asm/hw_irq.h>
+#include <asm/io_apic.h>
+#include <asm/irq_remapping.h>
+#include <asm/hypervisor.h>
+
+#include "irq_remapping.h"
+
+#ifdef CONFIG_IRQ_REMAP
+
+/*
+ * According 82093AA IO-APIC spec , IO APIC has a 24-entry Interrupt
+ * Redirection Table. Hyper-V exposes one single IO-APIC and so define
+ * 24 IO APIC remmapping entries.
+ */
+#define IOAPIC_REMAPPING_ENTRY 24
+
+static cpumask_t ioapic_max_cpumask = { CPU_BITS_NONE };
+static struct irq_domain *ioapic_ir_domain;
+
+static int hyperv_ir_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+{
+ struct irq_data *parent = data->parent_data;
+ struct irq_cfg *cfg = irqd_cfg(data);
+ struct IO_APIC_route_entry *entry;
+ int ret;
+
+ /* Return error If new irq affinity is out of ioapic_max_cpumask. */
+ if (!cpumask_subset(mask, &ioapic_max_cpumask))
+ return -EINVAL;
+
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;
+
+ entry = data->chip_data;
+ entry->dest = cfg->dest_apicid;
+ entry->vector = cfg->vector;
+ send_cleanup_vector(cfg);
+
+ return 0;
+}
+
+static struct irq_chip hyperv_ir_chip = {
+ .name = "HYPERV-IR",
+ .irq_ack = apic_ack_irq,
+ .irq_set_affinity = hyperv_ir_set_affinity,
+};
+
+static int hyperv_irq_remapping_alloc(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs,
+ void *arg)
+{
+ struct irq_alloc_info *info = arg;
+ struct irq_data *irq_data;
+ struct irq_desc *desc;
+ int ret = 0;
+
+ if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1)
+ return -EINVAL;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+ if (ret < 0)
+ return ret;
+
+ irq_data = irq_domain_get_irq_data(domain, virq);
+ if (!irq_data) {
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+ return -EINVAL;
+ }
+
+ irq_data->chip = &hyperv_ir_chip;
+
+ /*
+ * If there is interrupt remapping function of IOMMU, setting irq
+ * affinity only needs to change IRTE of IOMMU. But Hyper-V doesn't
+ * support interrupt remapping function, setting irq affinity of IO-APIC
+ * interrupts still needs to change IO-APIC registers. But ioapic_
+ * configure_entry() will ignore value of cfg->vector and cfg->
+ * dest_apicid when IO-APIC's parent irq domain is not the vector
+ * domain.(See ioapic_configure_entry()) In order to setting vector
+ * and dest_apicid to IO-APIC register, IO-APIC entry pointer is saved
+ * in the chip_data and hyperv_irq_remapping_activate()/hyperv_ir_set_
+ * affinity() set vector and dest_apicid directly into IO-APIC entry.
+ */
+ irq_data->chip_data = info->ioapic_entry;
+
+ /*
+ * Hypver-V IO APIC irq affinity should be in the scope of
+ * ioapic_max_cpumask because no irq remapping support.
+ */
+ desc = irq_data_to_desc(irq_data);
+ cpumask_copy(desc->irq_common_data.affinity, &ioapic_max_cpumask);
+
+ return 0;
+}
+
+static void hyperv_irq_remapping_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+}
+
+static int hyperv_irq_remapping_activate(struct irq_domain *domain,
+ struct irq_data *irq_data, bool reserve)
+{
+ struct irq_cfg *cfg = irqd_cfg(irq_data);
+ struct IO_APIC_route_entry *entry = irq_data->chip_data;
+
+ entry->dest = cfg->dest_apicid;
+ entry->vector = cfg->vector;
+
+ return 0;
+}
+
+static struct irq_domain_ops hyperv_ir_domain_ops = {
+ .alloc = hyperv_irq_remapping_alloc,
+ .free = hyperv_irq_remapping_free,
+ .activate = hyperv_irq_remapping_activate,
+};
+
+static int __init hyperv_prepare_irq_remapping(void)
+{
+ struct fwnode_handle *fn;
+ int i;
+
+ if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) ||
+ !x2apic_supported())
+ return -ENODEV;
+
+ fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0);
+ if (!fn)
+ return -ENOMEM;
+
+ ioapic_ir_domain =
+ irq_domain_create_hierarchy(arch_get_ir_parent_domain(),
+ 0, IOAPIC_REMAPPING_ENTRY, fn,
+ &hyperv_ir_domain_ops, NULL);
+
+ irq_domain_free_fwnode(fn);
+
+ /*
+ * Hyper-V doesn't provide irq remapping function for
+ * IO-APIC and so IO-APIC only accepts 8-bit APIC ID.
+ * Cpu's APIC ID is read from ACPI MADT table and APIC IDs
+ * in the MADT table on Hyper-v are sorted monotonic increasingly.
+ * APIC ID reflects cpu topology. There maybe some APIC ID
+ * gaps when cpu number in a socket is not power of two. Prepare
+ * max cpu affinity for IOAPIC irqs. Scan cpu 0-255 and set cpu
+ * into ioapic_max_cpumask if its APIC ID is less than 256.
+ */
+ for (i = min_t(unsigned int, num_possible_cpus() - 1, 255); i >= 0; i--)
+ if (cpu_physical_id(i) < 256)
+ cpumask_set_cpu(i, &ioapic_max_cpumask);
+
+ return 0;
+}
+
+static int __init hyperv_enable_irq_remapping(void)
+{
+ return IRQ_REMAP_X2APIC_MODE;
+}
+
+static struct irq_domain *hyperv_get_ir_irq_domain(struct irq_alloc_info *info)
+{
+ if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC)
+ return ioapic_ir_domain;
+ else
+ return NULL;
+}
+
+struct irq_remap_ops hyperv_irq_remap_ops = {
+ .prepare = hyperv_prepare_irq_remapping,
+ .enable = hyperv_enable_irq_remapping,
+ .get_ir_irq_domain = hyperv_get_ir_irq_domain,
+};
+
+#endif
diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c
new file mode 100644
index 0000000..471f05d
--- /dev/null
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2018 Intel Corporation.
+ *
+ * Authors: Gayatri Kammela <gayatri.kammela@intel.com>
+ * Sohil Mehta <sohil.mehta@intel.com>
+ * Jacob Pan <jacob.jun.pan@linux.intel.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+#include <linux/pci.h>
+
+#include <asm/irq_remapping.h>
+
+#include "intel-pasid.h"
+
+struct tbl_walk {
+ u16 bus;
+ u16 devfn;
+ u32 pasid;
+ struct root_entry *rt_entry;
+ struct context_entry *ctx_entry;
+ struct pasid_entry *pasid_tbl_entry;
+};
+
+struct iommu_regset {
+ int offset;
+ const char *regs;
+};
+
+#define IOMMU_REGSET_ENTRY(_reg_) \
+ { DMAR_##_reg_##_REG, __stringify(_reg_) }
+static const struct iommu_regset iommu_regs[] = {
+ IOMMU_REGSET_ENTRY(VER),
+ IOMMU_REGSET_ENTRY(CAP),
+ IOMMU_REGSET_ENTRY(ECAP),
+ IOMMU_REGSET_ENTRY(GCMD),
+ IOMMU_REGSET_ENTRY(GSTS),
+ IOMMU_REGSET_ENTRY(RTADDR),
+ IOMMU_REGSET_ENTRY(CCMD),
+ IOMMU_REGSET_ENTRY(FSTS),
+ IOMMU_REGSET_ENTRY(FECTL),
+ IOMMU_REGSET_ENTRY(FEDATA),
+ IOMMU_REGSET_ENTRY(FEADDR),
+ IOMMU_REGSET_ENTRY(FEUADDR),
+ IOMMU_REGSET_ENTRY(AFLOG),
+ IOMMU_REGSET_ENTRY(PMEN),
+ IOMMU_REGSET_ENTRY(PLMBASE),
+ IOMMU_REGSET_ENTRY(PLMLIMIT),
+ IOMMU_REGSET_ENTRY(PHMBASE),
+ IOMMU_REGSET_ENTRY(PHMLIMIT),
+ IOMMU_REGSET_ENTRY(IQH),
+ IOMMU_REGSET_ENTRY(IQT),
+ IOMMU_REGSET_ENTRY(IQA),
+ IOMMU_REGSET_ENTRY(ICS),
+ IOMMU_REGSET_ENTRY(IRTA),
+ IOMMU_REGSET_ENTRY(PQH),
+ IOMMU_REGSET_ENTRY(PQT),
+ IOMMU_REGSET_ENTRY(PQA),
+ IOMMU_REGSET_ENTRY(PRS),
+ IOMMU_REGSET_ENTRY(PECTL),
+ IOMMU_REGSET_ENTRY(PEDATA),
+ IOMMU_REGSET_ENTRY(PEADDR),
+ IOMMU_REGSET_ENTRY(PEUADDR),
+ IOMMU_REGSET_ENTRY(MTRRCAP),
+ IOMMU_REGSET_ENTRY(MTRRDEF),
+ IOMMU_REGSET_ENTRY(MTRR_FIX64K_00000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX16K_80000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX16K_A0000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_C0000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_C8000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_D0000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_D8000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_E0000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_E8000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_F0000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_F8000),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE0),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK0),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE1),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK1),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE2),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK2),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE3),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK3),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE4),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK4),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE5),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK5),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE6),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK6),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE7),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK7),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE8),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK8),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE9),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK9),
+ IOMMU_REGSET_ENTRY(VCCAP),
+ IOMMU_REGSET_ENTRY(VCMD),
+ IOMMU_REGSET_ENTRY(VCRSP),
+};
+
+static int iommu_regset_show(struct seq_file *m, void *unused)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ unsigned long flag;
+ int i, ret = 0;
+ u64 value;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!drhd->reg_base_addr) {
+ seq_puts(m, "IOMMU: Invalid base address\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ seq_printf(m, "IOMMU: %s Register Base Address: %llx\n",
+ iommu->name, drhd->reg_base_addr);
+ seq_puts(m, "Name\t\t\tOffset\t\tContents\n");
+ /*
+ * Publish the contents of the 64-bit hardware registers
+ * by adding the offset to the pointer (virtual address).
+ */
+ raw_spin_lock_irqsave(&iommu->register_lock, flag);
+ for (i = 0 ; i < ARRAY_SIZE(iommu_regs); i++) {
+ value = dmar_readq(iommu->reg + iommu_regs[i].offset);
+ seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
+ iommu_regs[i].regs, iommu_regs[i].offset,
+ value);
+ }
+ raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
+ seq_putc(m, '\n');
+ }
+out:
+ rcu_read_unlock();
+
+ return ret;
+}
+DEFINE_SHOW_ATTRIBUTE(iommu_regset);
+
+static inline void print_tbl_walk(struct seq_file *m)
+{
+ struct tbl_walk *tbl_wlk = m->private;
+
+ seq_printf(m, "%02x:%02x.%x\t0x%016llx:0x%016llx\t0x%016llx:0x%016llx\t",
+ tbl_wlk->bus, PCI_SLOT(tbl_wlk->devfn),
+ PCI_FUNC(tbl_wlk->devfn), tbl_wlk->rt_entry->hi,
+ tbl_wlk->rt_entry->lo, tbl_wlk->ctx_entry->hi,
+ tbl_wlk->ctx_entry->lo);
+
+ /*
+ * A legacy mode DMAR doesn't support PASID, hence default it to -1
+ * indicating that it's invalid. Also, default all PASID related fields
+ * to 0.
+ */
+ if (!tbl_wlk->pasid_tbl_entry)
+ seq_printf(m, "%-6d\t0x%016llx:0x%016llx:0x%016llx\n", -1,
+ (u64)0, (u64)0, (u64)0);
+ else
+ seq_printf(m, "%-6d\t0x%016llx:0x%016llx:0x%016llx\n",
+ tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[2],
+ tbl_wlk->pasid_tbl_entry->val[1],
+ tbl_wlk->pasid_tbl_entry->val[0]);
+}
+
+static void pasid_tbl_walk(struct seq_file *m, struct pasid_entry *tbl_entry,
+ u16 dir_idx)
+{
+ struct tbl_walk *tbl_wlk = m->private;
+ u8 tbl_idx;
+
+ for (tbl_idx = 0; tbl_idx < PASID_TBL_ENTRIES; tbl_idx++) {
+ if (pasid_pte_is_present(tbl_entry)) {
+ tbl_wlk->pasid_tbl_entry = tbl_entry;
+ tbl_wlk->pasid = (dir_idx << PASID_PDE_SHIFT) + tbl_idx;
+ print_tbl_walk(m);
+ }
+
+ tbl_entry++;
+ }
+}
+
+static void pasid_dir_walk(struct seq_file *m, u64 pasid_dir_ptr,
+ u16 pasid_dir_size)
+{
+ struct pasid_dir_entry *dir_entry = phys_to_virt(pasid_dir_ptr);
+ struct pasid_entry *pasid_tbl;
+ u16 dir_idx;
+
+ for (dir_idx = 0; dir_idx < pasid_dir_size; dir_idx++) {
+ pasid_tbl = get_pasid_table_from_pde(dir_entry);
+ if (pasid_tbl)
+ pasid_tbl_walk(m, pasid_tbl, dir_idx);
+
+ dir_entry++;
+ }
+}
+
+static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 bus)
+{
+ struct context_entry *context;
+ u16 devfn, pasid_dir_size;
+ u64 pasid_dir_ptr;
+
+ for (devfn = 0; devfn < 256; devfn++) {
+ struct tbl_walk tbl_wlk = {0};
+
+ /*
+ * Scalable mode root entry points to upper scalable mode
+ * context table and lower scalable mode context table. Each
+ * scalable mode context table has 128 context entries where as
+ * legacy mode context table has 256 context entries. So in
+ * scalable mode, the context entries for former 128 devices are
+ * in the lower scalable mode context table, while the latter
+ * 128 devices are in the upper scalable mode context table.
+ * In scalable mode, when devfn > 127, iommu_context_addr()
+ * automatically refers to upper scalable mode context table and
+ * hence the caller doesn't have to worry about differences
+ * between scalable mode and non scalable mode.
+ */
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ if (!context)
+ return;
+
+ if (!context_present(context))
+ continue;
+
+ tbl_wlk.bus = bus;
+ tbl_wlk.devfn = devfn;
+ tbl_wlk.rt_entry = &iommu->root_entry[bus];
+ tbl_wlk.ctx_entry = context;
+ m->private = &tbl_wlk;
+
+ if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT) {
+ pasid_dir_ptr = context->lo & VTD_PAGE_MASK;
+ pasid_dir_size = get_pasid_dir_size(context);
+ pasid_dir_walk(m, pasid_dir_ptr, pasid_dir_size);
+ continue;
+ }
+
+ print_tbl_walk(m);
+ }
+}
+
+static void root_tbl_walk(struct seq_file *m, struct intel_iommu *iommu)
+{
+ unsigned long flags;
+ u16 bus;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ seq_printf(m, "IOMMU %s: Root Table Address: 0x%llx\n", iommu->name,
+ (u64)virt_to_phys(iommu->root_entry));
+ seq_puts(m, "B.D.F\tRoot_entry\t\t\t\tContext_entry\t\t\t\tPASID\tPASID_table_entry\n");
+
+ /*
+ * No need to check if the root entry is present or not because
+ * iommu_context_addr() performs the same check before returning
+ * context entry.
+ */
+ for (bus = 0; bus < 256; bus++)
+ ctx_tbl_walk(m, iommu, bus);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static int dmar_translation_struct_show(struct seq_file *m, void *unused)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ root_tbl_walk(m, iommu);
+ seq_putc(m, '\n');
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(dmar_translation_struct);
+
+#ifdef CONFIG_IRQ_REMAP
+static void ir_tbl_remap_entry_show(struct seq_file *m,
+ struct intel_iommu *iommu)
+{
+ struct irte *ri_entry;
+ unsigned long flags;
+ int idx;
+
+ seq_puts(m, " Entry SrcID DstID Vct IRTE_high\t\tIRTE_low\n");
+
+ raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
+ for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) {
+ ri_entry = &iommu->ir_table->base[idx];
+ if (!ri_entry->present || ri_entry->p_pst)
+ continue;
+
+ seq_printf(m, " %-5d %02x:%02x.%01x %08x %02x %016llx\t%016llx\n",
+ idx, PCI_BUS_NUM(ri_entry->sid),
+ PCI_SLOT(ri_entry->sid), PCI_FUNC(ri_entry->sid),
+ ri_entry->dest_id, ri_entry->vector,
+ ri_entry->high, ri_entry->low);
+ }
+ raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+}
+
+static void ir_tbl_posted_entry_show(struct seq_file *m,
+ struct intel_iommu *iommu)
+{
+ struct irte *pi_entry;
+ unsigned long flags;
+ int idx;
+
+ seq_puts(m, " Entry SrcID PDA_high PDA_low Vct IRTE_high\t\tIRTE_low\n");
+
+ raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
+ for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) {
+ pi_entry = &iommu->ir_table->base[idx];
+ if (!pi_entry->present || !pi_entry->p_pst)
+ continue;
+
+ seq_printf(m, " %-5d %02x:%02x.%01x %08x %08x %02x %016llx\t%016llx\n",
+ idx, PCI_BUS_NUM(pi_entry->sid),
+ PCI_SLOT(pi_entry->sid), PCI_FUNC(pi_entry->sid),
+ pi_entry->pda_h, pi_entry->pda_l << 6,
+ pi_entry->vector, pi_entry->high,
+ pi_entry->low);
+ }
+ raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+}
+
+/*
+ * For active IOMMUs go through the Interrupt remapping
+ * table and print valid entries in a table format for
+ * Remapped and Posted Interrupts.
+ */
+static int ir_translation_struct_show(struct seq_file *m, void *unused)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ u64 irta;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!ecap_ir_support(iommu->ecap))
+ continue;
+
+ seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n",
+ iommu->name);
+
+ if (iommu->ir_table) {
+ irta = virt_to_phys(iommu->ir_table->base);
+ seq_printf(m, " IR table address:%llx\n", irta);
+ ir_tbl_remap_entry_show(m, iommu);
+ } else {
+ seq_puts(m, "Interrupt Remapping is not enabled\n");
+ }
+ seq_putc(m, '\n');
+ }
+
+ seq_puts(m, "****\n\n");
+
+ for_each_active_iommu(iommu, drhd) {
+ if (!cap_pi_support(iommu->cap))
+ continue;
+
+ seq_printf(m, "Posted Interrupt supported on IOMMU: %s\n",
+ iommu->name);
+
+ if (iommu->ir_table) {
+ irta = virt_to_phys(iommu->ir_table->base);
+ seq_printf(m, " IR table address:%llx\n", irta);
+ ir_tbl_posted_entry_show(m, iommu);
+ } else {
+ seq_puts(m, "Interrupt Remapping is not enabled\n");
+ }
+ seq_putc(m, '\n');
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(ir_translation_struct);
+#endif
+
+void __init intel_iommu_debugfs_init(void)
+{
+ struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
+ iommu_debugfs_dir);
+
+ debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL,
+ &iommu_regset_fops);
+ debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug,
+ NULL, &dmar_translation_struct_fops);
+#ifdef CONFIG_IRQ_REMAP
+ debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
+ NULL, &ir_translation_struct_fops);
+#endif
+}
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a76c47f..6db6d96 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright © 2006-2014 Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* Authors: David Woodhouse <dwmw2@infradead.org>,
* Ashok Raj <ashok.raj@intel.com>,
* Shaohua Li <shaohua.li@intel.com>,
@@ -19,6 +11,7 @@
*/
#define pr_fmt(fmt) "DMAR: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
#include <linux/init.h>
#include <linux/bitmap.h>
@@ -47,9 +40,12 @@
#include <linux/dma-contiguous.h>
#include <linux/dma-direct.h>
#include <linux/crash_dump.h>
+#include <linux/numa.h>
+#include <linux/swiotlb.h>
#include <asm/irq_remapping.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
+#include <trace/events/intel_iommu.h>
#include "irq_remapping.h"
#include "intel-pasid.h"
@@ -184,17 +180,8 @@
*/
static int force_on = 0;
int intel_iommu_tboot_noforce;
+static int no_platform_optin;
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
- u64 lo;
- u64 hi;
-};
#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
/*
@@ -220,21 +207,6 @@
return re->hi & VTD_PAGE_MASK;
}
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
- u64 lo;
- u64 hi;
-};
static inline void context_clear_pasid_enable(struct context_entry *context)
{
@@ -261,7 +233,7 @@
return (context->lo & 1);
}
-static inline bool context_present(struct context_entry *context)
+bool context_present(struct context_entry *context)
{
return context_pasid_enabled(context) ?
__context_present(context) :
@@ -316,49 +288,6 @@
}
/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-10: available
- * 11: snoop behavior
- * 12-63: Host physcial address
- */
-struct dma_pte {
- u64 val;
-};
-
-static inline void dma_clear_pte(struct dma_pte *pte)
-{
- pte->val = 0;
-}
-
-static inline u64 dma_pte_addr(struct dma_pte *pte)
-{
-#ifdef CONFIG_64BIT
- return pte->val & VTD_PAGE_MASK;
-#else
- /* Must have a full atomic 64-bit read */
- return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
-#endif
-}
-
-static inline bool dma_pte_present(struct dma_pte *pte)
-{
- return (pte->val & 3) != 0;
-}
-
-static inline bool dma_pte_superpage(struct dma_pte *pte)
-{
- return (pte->val & DMA_PTE_LARGE_PAGE);
-}
-
-static inline int first_pte_in_page(struct dma_pte *pte)
-{
- return !((unsigned long)pte & ~VTD_PAGE_MASK);
-}
-
-/*
* This domain is a statically identity mapping domain.
* 1. This domain creats a static 1:1 mapping to all usable memory.
* 2. It maps to each iommu if successful.
@@ -367,14 +296,16 @@
static struct dmar_domain *si_domain;
static int hw_pass_through = 1;
-/*
- * Domain represents a virtual machine, more than one devices
- * across iommus may be owned in one domain, e.g. kvm guest.
- */
-#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
-
/* si_domain contains mulitple devices */
-#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
+#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
+
+/*
+ * This is a DMA domain allocated through the iommu domain allocation
+ * interface. But one or more devices belonging to this domain have
+ * been chosen to use a private domain. We should avoid to use the
+ * map/unmap/iova_to_phys APIs on it.
+ */
+#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
#define for_each_domain_iommu(idx, domain) \
for (idx = 0; idx < g_num_of_iommus; idx++) \
@@ -387,7 +318,6 @@
u64 end_address; /* reserved end address */
struct dmar_dev_scope *devices; /* target devices */
int devices_cnt; /* target device count */
- struct iommu_resv_region *resv; /* reserved region handle */
};
struct dmar_atsr_unit {
@@ -409,13 +339,17 @@
static void domain_exit(struct dmar_domain *domain);
static void domain_remove_dev_info(struct dmar_domain *domain);
-static void dmar_remove_one_dev_info(struct dmar_domain *domain,
- struct device *dev);
+static void dmar_remove_one_dev_info(struct device *dev);
static void __dmar_remove_one_dev_info(struct device_domain_info *info);
static void domain_context_clear(struct intel_iommu *iommu,
struct device *dev);
static int domain_detach_iommu(struct dmar_domain *domain,
struct intel_iommu *iommu);
+static bool device_is_rmrr_locked(struct device *dev);
+static int intel_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev);
+static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova);
#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
int dmar_disabled = 0;
@@ -423,6 +357,7 @@
int dmar_disabled = 1;
#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
+int intel_iommu_sm;
int intel_iommu_enabled = 0;
EXPORT_SYMBOL_GPL(intel_iommu_enabled);
@@ -430,63 +365,44 @@
static int dmar_forcedac;
static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
-static int intel_iommu_ecs = 1;
-static int intel_iommu_pasid28;
static int iommu_identity_mapping;
+static int intel_no_bounce;
#define IDENTMAP_ALL 1
#define IDENTMAP_GFX 2
#define IDENTMAP_AZALIA 4
-/* Broadwell and Skylake have broken ECS support — normal so-called "second
- * level" translation of DMA requests-without-PASID doesn't actually happen
- * unless you also set the NESTE bit in an extended context-entry. Which of
- * course means that SVM doesn't work because it's trying to do nested
- * translation of the physical addresses it finds in the process page tables,
- * through the IOVA->phys mapping found in the "second level" page tables.
- *
- * The VT-d specification was retroactively changed to change the definition
- * of the capability bits and pretend that Broadwell/Skylake never happened...
- * but unfortunately the wrong bit was changed. It's ECS which is broken, but
- * for some reason it was the PASID capability bit which was redefined (from
- * bit 28 on BDW/SKL to bit 40 in future).
- *
- * So our test for ECS needs to eschew those implementations which set the old
- * PASID capabiity bit 28, since those are the ones on which ECS is broken.
- * Unless we are working around the 'pasid28' limitations, that is, by putting
- * the device into passthrough mode for normal DMA and thus masking the bug.
- */
-#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
- (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
-/* PASID support is thus enabled if ECS is enabled and *either* of the old
- * or new capability bits are set. */
-#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
- (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
-
int intel_iommu_gfx_mapped;
EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
+#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
static DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
+#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
+ to_pci_dev(d)->untrusted)
+
/*
* Iterate over elements in device_domain_list and call the specified
- * callback @fn against each element. This helper should only be used
- * in the context where the device_domain_lock has already been holden.
+ * callback @fn against each element.
*/
int for_each_device_domain(int (*fn)(struct device_domain_info *info,
void *data), void *data)
{
int ret = 0;
+ unsigned long flags;
struct device_domain_info *info;
- assert_spin_locked(&device_domain_lock);
+ spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry(info, &device_domain_list, global) {
ret = fn(info, data);
- if (ret)
+ if (ret) {
+ spin_unlock_irqrestore(&device_domain_lock, flags);
return ret;
+ }
}
+ spin_unlock_irqrestore(&device_domain_lock, flags);
return 0;
}
@@ -528,6 +444,7 @@
pr_info("IOMMU enabled\n");
} else if (!strncmp(str, "off", 3)) {
dmar_disabled = 1;
+ no_platform_optin = 1;
pr_info("IOMMU disabled\n");
} else if (!strncmp(str, "igfx_off", 8)) {
dmar_map_gfx = 0;
@@ -541,19 +458,16 @@
} else if (!strncmp(str, "sp_off", 6)) {
pr_info("Disable supported super page\n");
intel_iommu_superpage = 0;
- } else if (!strncmp(str, "ecs_off", 7)) {
- printk(KERN_INFO
- "Intel-IOMMU: disable extended context table support\n");
- intel_iommu_ecs = 0;
- } else if (!strncmp(str, "pasid28", 7)) {
- printk(KERN_INFO
- "Intel-IOMMU: enable pre-production PASID support\n");
- intel_iommu_pasid28 = 1;
- iommu_identity_mapping |= IDENTMAP_GFX;
+ } else if (!strncmp(str, "sm_on", 5)) {
+ pr_info("Intel-IOMMU: scalable mode supported\n");
+ intel_iommu_sm = 1;
} else if (!strncmp(str, "tboot_noforce", 13)) {
printk(KERN_INFO
"Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
intel_iommu_tboot_noforce = 1;
+ } else if (!strncmp(str, "nobounce", 8)) {
+ pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
+ intel_no_bounce = 1;
}
str += strcspn(str, ",");
@@ -633,22 +547,11 @@
kmem_cache_free(iommu_devinfo_cache, vaddr);
}
-static inline int domain_type_is_vm(struct dmar_domain *domain)
-{
- return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
-}
-
static inline int domain_type_is_si(struct dmar_domain *domain)
{
return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
}
-static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
-{
- return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
- DOMAIN_FLAG_STATIC_IDENTITY);
-}
-
static inline int domain_pfn_supported(struct dmar_domain *domain,
unsigned long pfn)
{
@@ -696,7 +599,9 @@
int iommu_id;
/* si_domain and vm domain should not get here. */
- BUG_ON(domain_type_is_vm_or_si(domain));
+ if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
+ return NULL;
+
for_each_domain_iommu(iommu_id, domain)
break;
@@ -788,15 +693,15 @@
domain->iommu_superpage = domain_update_iommu_superpage(NULL);
}
-static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
- u8 bus, u8 devfn, int alloc)
+struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+ u8 devfn, int alloc)
{
struct root_entry *root = &iommu->root_entry[bus];
struct context_entry *context;
u64 *entry;
entry = &root->lo;
- if (ecs_enabled(iommu)) {
+ if (sm_supported(iommu)) {
if (devfn >= 0x80) {
devfn -= 0x80;
entry = &root->hi;
@@ -827,12 +732,39 @@
return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
}
+/**
+ * is_downstream_to_pci_bridge - test if a device belongs to the PCI
+ * sub-hierarchy of a candidate PCI-PCI bridge
+ * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
+ * @bridge: the candidate PCI-PCI bridge
+ *
+ * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
+ */
+static bool
+is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
+{
+ struct pci_dev *pdev, *pbridge;
+
+ if (!dev_is_pci(dev) || !dev_is_pci(bridge))
+ return false;
+
+ pdev = to_pci_dev(dev);
+ pbridge = to_pci_dev(bridge);
+
+ if (pbridge->subordinate &&
+ pbridge->subordinate->number <= pdev->bus->number &&
+ pbridge->subordinate->busn_res.end >= pdev->bus->number)
+ return true;
+
+ return false;
+}
+
static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
{
struct dmar_drhd_unit *drhd = NULL;
struct intel_iommu *iommu;
struct device *tmp;
- struct pci_dev *ptmp, *pdev = NULL;
+ struct pci_dev *pdev = NULL;
u16 segment = 0;
int i;
@@ -878,13 +810,7 @@
goto out;
}
- if (!pdev || !dev_is_pci(tmp))
- continue;
-
- ptmp = to_pci_dev(tmp);
- if (ptmp->subordinate &&
- ptmp->subordinate->number <= pdev->bus->number &&
- ptmp->subordinate->busn_res.end >= pdev->bus->number)
+ if (is_downstream_to_pci_bridge(dev, tmp))
goto got_pdev;
}
@@ -938,7 +864,7 @@
if (context)
free_pgtable_page(context);
- if (!ecs_enabled(iommu))
+ if (!sm_supported(iommu))
continue;
context = iommu_context_addr(iommu, i, 0x80, 0);
@@ -955,7 +881,7 @@
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
unsigned long pfn, int *target_level)
{
- struct dma_pte *parent, *pte = NULL;
+ struct dma_pte *parent, *pte;
int level = agaw_to_level(domain->agaw);
int offset;
@@ -1006,13 +932,12 @@
return pte;
}
-
/* return address's pte at specific level */
static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
unsigned long pfn,
int level, int *large_page)
{
- struct dma_pte *parent, *pte = NULL;
+ struct dma_pte *parent, *pte;
int total = agaw_to_level(domain->agaw);
int offset;
@@ -1044,7 +969,7 @@
unsigned long start_pfn,
unsigned long last_pfn)
{
- unsigned int large_page = 1;
+ unsigned int large_page;
struct dma_pte *first_pte, *pte;
BUG_ON(!domain_pfn_supported(domain, start_pfn));
@@ -1222,7 +1147,7 @@
unsigned long start_pfn,
unsigned long last_pfn)
{
- struct page *freelist = NULL;
+ struct page *freelist;
BUG_ON(!domain_pfn_supported(domain, start_pfn));
BUG_ON(!domain_pfn_supported(domain, last_pfn));
@@ -1290,8 +1215,8 @@
unsigned long flag;
addr = virt_to_phys(iommu->root_entry);
- if (ecs_enabled(iommu))
- addr |= DMA_RTADDR_RTT;
+ if (sm_supported(iommu))
+ addr |= DMA_RTADDR_SMT;
raw_spin_lock_irqsave(&iommu->register_lock, flag);
dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
@@ -1305,7 +1230,7 @@
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-static void iommu_flush_write_buffer(struct intel_iommu *iommu)
+void iommu_flush_write_buffer(struct intel_iommu *iommu)
{
u32 val;
unsigned long flag;
@@ -1481,7 +1406,7 @@
/* pdev will be returned if device is not a vf */
pf_pdev = pci_physfn(pdev);
- info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
+ info->pfsid = pci_dev_id(pf_pdev);
}
#ifdef CONFIG_INTEL_IOMMU_SVM
@@ -1493,10 +1418,14 @@
if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
info->pasid_enabled = 1;
- if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
+ if (info->pri_supported &&
+ (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
+ !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
info->pri_enabled = 1;
#endif
- if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
+ if (!pdev->untrusted && info->ats_supported &&
+ pci_ats_page_aligned(pdev) &&
+ !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
info->ats_enabled = 1;
domain_update_iotlb(info->domain);
info->ats_qdep = pci_ats_queue_depth(pdev);
@@ -1624,6 +1553,9 @@
u32 pmen;
unsigned long flags;
+ if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
+ return;
+
raw_spin_lock_irqsave(&iommu->register_lock, flags);
pmen = readl(iommu->reg + DMAR_PMEN_REG);
pmen &= ~DMA_PMEN_EPM;
@@ -1668,7 +1600,6 @@
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-
static int iommu_init_domains(struct intel_iommu *iommu)
{
u32 ndomains, nlongs;
@@ -1706,8 +1637,6 @@
return -ENOMEM;
}
-
-
/*
* If Caching mode is set, then invalid translations are tagged
* with domain-id 0, hence we need to pre-allocate it. We also
@@ -1716,6 +1645,16 @@
*/
set_bit(0, iommu->domain_ids);
+ /*
+ * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
+ * entry for first-level or pass-through translation modes should
+ * be programmed with a domain id different from those used for
+ * second-level or nested translation. We reserve a domain id for
+ * this purpose.
+ */
+ if (sm_supported(iommu))
+ set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
+
return 0;
}
@@ -1727,32 +1666,15 @@
if (!iommu->domains || !iommu->domain_ids)
return;
-again:
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
- struct dmar_domain *domain;
-
if (info->iommu != iommu)
continue;
if (!info->dev || !info->domain)
continue;
- domain = info->domain;
-
__dmar_remove_one_dev_info(info);
-
- if (!domain_type_is_vm_or_si(domain)) {
- /*
- * The domain_exit() function can't be called under
- * device_domain_lock, as it takes this lock itself.
- * So release the lock here and re-run the loop
- * afterwards.
- */
- spin_unlock_irqrestore(&device_domain_lock, flags);
- domain_exit(domain);
- goto again;
- }
}
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -1780,10 +1702,9 @@
free_context_table(iommu);
#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_enabled(iommu)) {
+ if (pasid_supported(iommu)) {
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
- intel_svm_exit(iommu);
}
#endif
}
@@ -1797,7 +1718,7 @@
return NULL;
memset(domain, 0, sizeof(*domain));
- domain->nid = -1;
+ domain->nid = NUMA_NO_NODE;
domain->flags = flags;
domain->has_iotlb_device = false;
INIT_LIST_HEAD(&domain->devices);
@@ -1843,7 +1764,7 @@
static int domain_detach_iommu(struct dmar_domain *domain,
struct intel_iommu *iommu)
{
- int num, count = INT_MAX;
+ int num, count;
assert_spin_locked(&device_domain_lock);
assert_spin_locked(&iommu->lock);
@@ -1896,7 +1817,7 @@
IOVA_PFN(r->start),
IOVA_PFN(r->end));
if (!iova) {
- pr_err("Reserve iova failed\n");
+ pci_err(pdev, "Reserve iova for %pR failed\n", r);
return -ENODEV;
}
}
@@ -1982,29 +1903,76 @@
static void domain_exit(struct dmar_domain *domain)
{
- struct page *freelist = NULL;
-
- /* Domain 0 is reserved, so dont process it */
- if (!domain)
- return;
/* Remove associated devices and clear attached or cached domains */
- rcu_read_lock();
domain_remove_dev_info(domain);
- rcu_read_unlock();
/* destroy iovas */
put_iova_domain(&domain->iovad);
- freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
+ if (domain->pgd) {
+ struct page *freelist;
- dma_free_pagelist(freelist);
+ freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
+ dma_free_pagelist(freelist);
+ }
free_domain_mem(domain);
}
+/*
+ * Get the PASID directory size for scalable mode context entry.
+ * Value of X in the PDTS field of a scalable mode context entry
+ * indicates PASID directory with 2^(X + 7) entries.
+ */
+static inline unsigned long context_get_sm_pds(struct pasid_table *table)
+{
+ int pds, max_pde;
+
+ max_pde = table->max_pasid >> PASID_PDE_SHIFT;
+ pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
+ if (pds < 7)
+ return 0;
+
+ return pds - 7;
+}
+
+/*
+ * Set the RID_PASID field of a scalable mode context entry. The
+ * IOMMU hardware will use the PASID value set in this field for
+ * DMA translations of DMA requests without PASID.
+ */
+static inline void
+context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
+{
+ context->hi |= pasid & ((1 << 20) - 1);
+ context->hi |= (1 << 20);
+}
+
+/*
+ * Set the DTE(Device-TLB Enable) field of a scalable mode context
+ * entry.
+ */
+static inline void context_set_sm_dte(struct context_entry *context)
+{
+ context->lo |= (1 << 2);
+}
+
+/*
+ * Set the PRE(Page Request Enable) field of a scalable mode context
+ * entry.
+ */
+static inline void context_set_sm_pre(struct context_entry *context)
+{
+ context->lo |= (1 << 4);
+}
+
+/* Convert value to context PASID directory size field coding. */
+#define context_pdts(pds) (((pds) & 0x7) << 9)
+
static int domain_context_mapping_one(struct dmar_domain *domain,
struct intel_iommu *iommu,
+ struct pasid_table *table,
u8 bus, u8 devfn)
{
u16 did = domain->iommu_did[iommu->seq_id];
@@ -2012,8 +1980,7 @@
struct device_domain_info *info = NULL;
struct context_entry *context;
unsigned long flags;
- struct dma_pte *pgd;
- int ret, agaw;
+ int ret;
WARN_ON(did == 0);
@@ -2059,41 +2026,68 @@
}
}
- pgd = domain->pgd;
-
context_clear_entry(context);
- context_set_domain_id(context, did);
- /*
- * Skip top levels of page tables for iommu which has less agaw
- * than default. Unnecessary for PT mode.
- */
- if (translation != CONTEXT_TT_PASS_THROUGH) {
- for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
- ret = -ENOMEM;
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd))
- goto out_unlock;
- }
+ if (sm_supported(iommu)) {
+ unsigned long pds;
+ WARN_ON(!table);
+
+ /* Setup the PASID DIR pointer: */
+ pds = context_get_sm_pds(table);
+ context->lo = (u64)virt_to_phys(table->table) |
+ context_pdts(pds);
+
+ /* Setup the RID_PASID field: */
+ context_set_sm_rid2pasid(context, PASID_RID2PASID);
+
+ /*
+ * Setup the Device-TLB enable bit and Page request
+ * Enable bit:
+ */
info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
if (info && info->ats_supported)
- translation = CONTEXT_TT_DEV_IOTLB;
- else
- translation = CONTEXT_TT_MULTI_LEVEL;
-
- context_set_address_root(context, virt_to_phys(pgd));
- context_set_address_width(context, iommu->agaw);
+ context_set_sm_dte(context);
+ if (info && info->pri_supported)
+ context_set_sm_pre(context);
} else {
- /*
- * In pass through mode, AW must be programmed to
- * indicate the largest AGAW value supported by
- * hardware. And ASR is ignored by hardware.
- */
- context_set_address_width(context, iommu->msagaw);
+ struct dma_pte *pgd = domain->pgd;
+ int agaw;
+
+ context_set_domain_id(context, did);
+
+ if (translation != CONTEXT_TT_PASS_THROUGH) {
+ /*
+ * Skip top levels of page tables for iommu which has
+ * less agaw than default. Unnecessary for PT mode.
+ */
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ ret = -ENOMEM;
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd))
+ goto out_unlock;
+ }
+
+ info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
+ if (info && info->ats_supported)
+ translation = CONTEXT_TT_DEV_IOTLB;
+ else
+ translation = CONTEXT_TT_MULTI_LEVEL;
+
+ context_set_address_root(context, virt_to_phys(pgd));
+ context_set_address_width(context, agaw);
+ } else {
+ /*
+ * In pass through mode, AW must be programmed to
+ * indicate the largest AGAW value supported by
+ * hardware. And ASR is ignored by hardware.
+ */
+ context_set_address_width(context, iommu->msagaw);
+ }
+
+ context_set_translation_type(context, translation);
}
- context_set_translation_type(context, translation);
context_set_fault_enable(context);
context_set_present(context);
domain_flush_cache(domain, context, sizeof(*context));
@@ -2127,6 +2121,7 @@
struct domain_context_mapping_data {
struct dmar_domain *domain;
struct intel_iommu *iommu;
+ struct pasid_table *table;
};
static int domain_context_mapping_cb(struct pci_dev *pdev,
@@ -2135,25 +2130,31 @@
struct domain_context_mapping_data *data = opaque;
return domain_context_mapping_one(data->domain, data->iommu,
- PCI_BUS_NUM(alias), alias & 0xff);
+ data->table, PCI_BUS_NUM(alias),
+ alias & 0xff);
}
static int
domain_context_mapping(struct dmar_domain *domain, struct device *dev)
{
+ struct domain_context_mapping_data data;
+ struct pasid_table *table;
struct intel_iommu *iommu;
u8 bus, devfn;
- struct domain_context_mapping_data data;
iommu = device_to_iommu(dev, &bus, &devfn);
if (!iommu)
return -ENODEV;
+ table = intel_pasid_get_table(dev);
+
if (!dev_is_pci(dev))
- return domain_context_mapping_one(domain, iommu, bus, devfn);
+ return domain_context_mapping_one(domain, iommu, table,
+ bus, devfn);
data.domain = domain;
data.iommu = iommu;
+ data.table = table;
return pci_for_each_dma_alias(to_pci_dev(dev),
&domain_context_mapping_cb, &data);
@@ -2335,32 +2336,23 @@
}
static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
- struct scatterlist *sg, unsigned long phys_pfn,
- unsigned long nr_pages, int prot)
+ struct scatterlist *sg, unsigned long phys_pfn,
+ unsigned long nr_pages, int prot)
{
- int ret;
- struct intel_iommu *iommu;
+ int iommu_id, ret;
+ struct intel_iommu *iommu;
- /* Do the real mapping first */
- ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
- if (ret)
- return ret;
+ /* Do the real mapping first */
+ ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
+ if (ret)
+ return ret;
- /* Notify about the new mapping */
- if (domain_type_is_vm(domain)) {
- /* VM typed domains can have more than one IOMMUs */
- int iommu_id;
- for_each_domain_iommu(iommu_id, domain) {
- iommu = g_iommus[iommu_id];
- __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
- }
- } else {
- /* General domains only have one IOMMU */
- iommu = domain_get_iommu(domain);
- __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
- }
+ for_each_domain_iommu(iommu_id, domain) {
+ iommu = g_iommus[iommu_id];
+ __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
+ }
- return 0;
+ return 0;
}
static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
@@ -2436,8 +2428,18 @@
{
struct device_domain_info *info;
+ if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
+ struct iommu_domain *domain;
+
+ dev->archdata.iommu = NULL;
+ domain = iommu_get_domain_for_dev(dev);
+ if (domain)
+ intel_iommu_attach_device(domain, dev);
+ }
+
/* No lock here, assumes no domain exit in normal case */
info = dev->archdata.iommu;
+
if (likely(info))
return info->domain;
return NULL;
@@ -2479,18 +2481,21 @@
info->domain = domain;
info->iommu = iommu;
info->pasid_table = NULL;
+ info->auxd_enabled = 0;
+ INIT_LIST_HEAD(&info->auxiliary_domains);
if (dev && dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(info->dev);
- if (!pci_ats_disabled() &&
+ if (!pdev->untrusted &&
+ !pci_ats_disabled() &&
ecap_dev_iotlb_support(iommu->ecap) &&
pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
dmar_find_matched_atsr_unit(pdev))
info->ats_supported = 1;
- if (ecs_enabled(iommu)) {
- if (pasid_enabled(iommu)) {
+ if (sm_supported(iommu)) {
+ if (pasid_supported(iommu)) {
int features = pci_pasid_features(pdev);
if (features >= 0)
info->pasid_supported = features | 1;
@@ -2536,20 +2541,36 @@
list_add(&info->global, &device_domain_list);
if (dev)
dev->archdata.iommu = info;
-
- if (dev && dev_is_pci(dev) && info->pasid_supported) {
- ret = intel_pasid_alloc_table(dev);
- if (ret) {
- pr_warn("No pasid table for %s, pasid disabled\n",
- dev_name(dev));
- info->pasid_supported = 0;
- }
- }
spin_unlock_irqrestore(&device_domain_lock, flags);
+ /* PASID table is mandatory for a PCI device in scalable mode. */
+ if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
+ ret = intel_pasid_alloc_table(dev);
+ if (ret) {
+ dev_err(dev, "PASID table allocation failed\n");
+ dmar_remove_one_dev_info(dev);
+ return NULL;
+ }
+
+ /* Setup the PASID entry for requests without PASID: */
+ spin_lock(&iommu->lock);
+ if (hw_pass_through && domain_type_is_si(domain))
+ ret = intel_pasid_setup_pass_through(iommu, domain,
+ dev, PASID_RID2PASID);
+ else
+ ret = intel_pasid_setup_second_level(iommu, domain,
+ dev, PASID_RID2PASID);
+ spin_unlock(&iommu->lock);
+ if (ret) {
+ dev_err(dev, "Setup RID2PASID failed\n");
+ dmar_remove_one_dev_info(dev);
+ return NULL;
+ }
+ }
+
if (dev && domain_context_mapping(domain, dev)) {
- pr_err("Domain context map for %s failed\n", dev_name(dev));
- dmar_remove_one_dev_info(domain, dev);
+ dev_err(dev, "Domain context map failed\n");
+ dmar_remove_one_dev_info(dev);
return NULL;
}
@@ -2564,7 +2585,7 @@
static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
{
- struct device_domain_info *info = NULL;
+ struct device_domain_info *info;
struct dmar_domain *domain = NULL;
struct intel_iommu *iommu;
u16 dma_alias;
@@ -2605,7 +2626,6 @@
}
out:
-
return domain;
}
@@ -2645,29 +2665,6 @@
return domain;
}
-static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
-{
- struct dmar_domain *domain, *tmp;
-
- domain = find_domain(dev);
- if (domain)
- goto out;
-
- domain = find_or_alloc_domain(dev, gaw);
- if (!domain)
- goto out;
-
- tmp = set_domain_for_dev(dev, domain);
- if (!tmp || domain != tmp) {
- domain_exit(domain);
- domain = tmp;
- }
-
-out:
-
- return domain;
-}
-
static int iommu_domain_identity_map(struct dmar_domain *domain,
unsigned long long start,
unsigned long long end)
@@ -2703,13 +2700,12 @@
range which is reserved in E820, so which didn't get set
up to start with in si_domain */
if (domain == si_domain && hw_pass_through) {
- pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
- dev_name(dev), start, end);
+ dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
+ start, end);
return 0;
}
- pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
- dev_name(dev), start, end);
+ dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
if (end < start) {
WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
@@ -2733,63 +2729,13 @@
return iommu_domain_identity_map(domain, start, end);
}
-static int iommu_prepare_identity_map(struct device *dev,
- unsigned long long start,
- unsigned long long end)
-{
- struct dmar_domain *domain;
- int ret;
-
- domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
- if (!domain)
- return -ENOMEM;
-
- ret = domain_prepare_identity_map(dev, domain, start, end);
- if (ret)
- domain_exit(domain);
-
- return ret;
-}
-
-static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
- struct device *dev)
-{
- if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
- return 0;
- return iommu_prepare_identity_map(dev, rmrr->base_address,
- rmrr->end_address);
-}
-
-#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
-static inline void iommu_prepare_isa(void)
-{
- struct pci_dev *pdev;
- int ret;
-
- pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
- if (!pdev)
- return;
-
- pr_info("Prepare 0-16MiB unity mapping for LPC\n");
- ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
-
- if (ret)
- pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
-
- pci_dev_put(pdev);
-}
-#else
-static inline void iommu_prepare_isa(void)
-{
- return;
-}
-#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
-
static int md_domain_init(struct dmar_domain *domain, int guest_width);
static int __init si_domain_init(int hw)
{
- int nid, ret = 0;
+ struct dmar_rmrr_unit *rmrr;
+ struct device *dev;
+ int i, nid, ret;
si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
if (!si_domain)
@@ -2800,8 +2746,6 @@
return -EFAULT;
}
- pr_debug("Identity mapping domain allocated\n");
-
if (hw)
return 0;
@@ -2817,6 +2761,31 @@
}
}
+ /*
+ * Normally we use DMA domains for devices which have RMRRs. But we
+ * loose this requirement for graphic and usb devices. Identity map
+ * the RMRRs for graphic and USB devices so that they could use the
+ * si_domain.
+ */
+ for_each_rmrr_units(rmrr) {
+ for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
+ i, dev) {
+ unsigned long long start = rmrr->base_address;
+ unsigned long long end = rmrr->end_address;
+
+ if (device_is_rmrr_locked(dev))
+ continue;
+
+ if (WARN_ON(end < start ||
+ end >> agaw_to_width(si_domain->agaw)))
+ continue;
+
+ ret = iommu_domain_identity_map(si_domain, start, end);
+ if (ret)
+ return ret;
+ }
+ }
+
return 0;
}
@@ -2824,11 +2793,8 @@
{
struct device_domain_info *info;
- if (likely(!iommu_identity_mapping))
- return 0;
-
info = dev->archdata.iommu;
- if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
+ if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
return (info->domain == si_domain);
return 0;
@@ -2865,7 +2831,8 @@
*/
for_each_active_dev_scope(rmrr->devices,
rmrr->devices_cnt, i, tmp)
- if (tmp == dev) {
+ if (tmp == dev ||
+ is_downstream_to_pci_bridge(dev, tmp)) {
rcu_read_unlock();
return true;
}
@@ -2874,6 +2841,35 @@
return false;
}
+/**
+ * device_rmrr_is_relaxable - Test whether the RMRR of this device
+ * is relaxable (ie. is allowed to be not enforced under some conditions)
+ * @dev: device handle
+ *
+ * We assume that PCI USB devices with RMRRs have them largely
+ * for historical reasons and that the RMRR space is not actively used post
+ * boot. This exclusion may change if vendors begin to abuse it.
+ *
+ * The same exception is made for graphics devices, with the requirement that
+ * any use of the RMRR regions will be torn down before assigning the device
+ * to a guest.
+ *
+ * Return: true if the RMRR is relaxable, false otherwise
+ */
+static bool device_rmrr_is_relaxable(struct device *dev)
+{
+ struct pci_dev *pdev;
+
+ if (!dev_is_pci(dev))
+ return false;
+
+ pdev = to_pci_dev(dev);
+ if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
+ return true;
+ else
+ return false;
+}
+
/*
* There are a couple cases where we need to restrict the functionality of
* devices associated with RMRRs. The first is when evaluating a device for
@@ -2888,46 +2884,51 @@
* We therefore prevent devices associated with an RMRR from participating in
* the IOMMU API, which eliminates them from device assignment.
*
- * In both cases we assume that PCI USB devices with RMRRs have them largely
- * for historical reasons and that the RMRR space is not actively used post
- * boot. This exclusion may change if vendors begin to abuse it.
- *
- * The same exception is made for graphics devices, with the requirement that
- * any use of the RMRR regions will be torn down before assigning the device
- * to a guest.
+ * In both cases, devices which have relaxable RMRRs are not concerned by this
+ * restriction. See device_rmrr_is_relaxable comment.
*/
static bool device_is_rmrr_locked(struct device *dev)
{
if (!device_has_rmrr(dev))
return false;
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
-
- if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
- return false;
- }
+ if (device_rmrr_is_relaxable(dev))
+ return false;
return true;
}
-static int iommu_should_identity_map(struct device *dev, int startup)
+/*
+ * Return the required default domain type for a specific device.
+ *
+ * @dev: the device in query
+ * @startup: true if this is during early boot
+ *
+ * Returns:
+ * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
+ * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
+ * - 0: both identity and dynamic domains work for this device
+ */
+static int device_def_domain_type(struct device *dev)
{
-
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
if (device_is_rmrr_locked(dev))
- return 0;
+ return IOMMU_DOMAIN_DMA;
+
+ /*
+ * Prevent any device marked as untrusted from getting
+ * placed into the statically identity mapping domain.
+ */
+ if (pdev->untrusted)
+ return IOMMU_DOMAIN_DMA;
if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
- return 1;
+ return IOMMU_DOMAIN_IDENTITY;
if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
- return 1;
-
- if (!(iommu_identity_mapping & IDENTMAP_ALL))
- return 0;
+ return IOMMU_DOMAIN_IDENTITY;
/*
* We want to start off with all devices in the 1:1 domain, and
@@ -2948,93 +2949,18 @@
*/
if (!pci_is_pcie(pdev)) {
if (!pci_is_root_bus(pdev->bus))
- return 0;
+ return IOMMU_DOMAIN_DMA;
if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
- return 0;
+ return IOMMU_DOMAIN_DMA;
} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
- return 0;
+ return IOMMU_DOMAIN_DMA;
} else {
if (device_has_rmrr(dev))
- return 0;
+ return IOMMU_DOMAIN_DMA;
}
- /*
- * At boot time, we don't yet know if devices will be 64-bit capable.
- * Assume that they will — if they turn out not to be, then we can
- * take them out of the 1:1 domain later.
- */
- if (!startup) {
- /*
- * If the device's dma_mask is less than the system's memory
- * size then this is not a candidate for identity mapping.
- */
- u64 dma_mask = *dev->dma_mask;
-
- if (dev->coherent_dma_mask &&
- dev->coherent_dma_mask < dma_mask)
- dma_mask = dev->coherent_dma_mask;
-
- return dma_mask >= dma_get_required_mask(dev);
- }
-
- return 1;
-}
-
-static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
-{
- int ret;
-
- if (!iommu_should_identity_map(dev, 1))
- return 0;
-
- ret = domain_add_dev_info(si_domain, dev);
- if (!ret)
- pr_info("%s identity mapping for device %s\n",
- hw ? "Hardware" : "Software", dev_name(dev));
- else if (ret == -ENODEV)
- /* device not associated with an iommu */
- ret = 0;
-
- return ret;
-}
-
-
-static int __init iommu_prepare_static_identity_mapping(int hw)
-{
- struct pci_dev *pdev = NULL;
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- struct device *dev;
- int i;
- int ret = 0;
-
- for_each_pci_dev(pdev) {
- ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
- if (ret)
- return ret;
- }
-
- for_each_active_iommu(iommu, drhd)
- for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
- struct acpi_device_physical_node *pn;
- struct acpi_device *adev;
-
- if (dev->bus != &acpi_bus_type)
- continue;
-
- adev= to_acpi_device(dev);
- mutex_lock(&adev->physical_node_lock);
- list_for_each_entry(pn, &adev->physical_node_list, node) {
- ret = dev_prepare_static_identity_mapping(pn->dev, hw);
- if (ret)
- break;
- }
- mutex_unlock(&adev->physical_node_lock);
- if (ret)
- return ret;
- }
-
- return 0;
+ return (iommu_identity_mapping & IDENTMAP_ALL) ?
+ IOMMU_DOMAIN_IDENTITY : 0;
}
static void intel_iommu_init_qi(struct intel_iommu *iommu)
@@ -3259,11 +3185,8 @@
static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
- struct dmar_rmrr_unit *rmrr;
- bool copied_tables = false;
- struct device *dev;
struct intel_iommu *iommu;
- int i, ret;
+ int ret;
/*
* for each drhd
@@ -3296,13 +3219,18 @@
goto error;
}
- for_each_active_iommu(iommu, drhd) {
+ for_each_iommu(iommu, drhd) {
+ if (drhd->ignored) {
+ iommu_disable_translation(iommu);
+ continue;
+ }
+
/*
* Find the max pasid size of all IOMMU's in the system.
* We need to ensure the system pasid table is no bigger
* than the smallest supported.
*/
- if (pasid_enabled(iommu)) {
+ if (pasid_supported(iommu)) {
u32 temp = 2 << ecap_pss(iommu->ecap);
intel_pasid_max_id = min_t(u32, temp,
@@ -3356,14 +3284,13 @@
} else {
pr_info("Copied translation tables from previous kernel for %s\n",
iommu->name);
- copied_tables = true;
}
}
if (!ecap_pass_through(iommu->ecap))
hw_pass_through = 0;
#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_enabled(iommu))
+ if (pasid_supported(iommu))
intel_svm_init(iommu);
#endif
}
@@ -3380,71 +3307,21 @@
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
}
- if (iommu_pass_through)
+ if (iommu_default_passthrough())
iommu_identity_mapping |= IDENTMAP_ALL;
#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
- iommu_identity_mapping |= IDENTMAP_GFX;
+ dmar_map_gfx = 0;
#endif
+ if (!dmar_map_gfx)
+ iommu_identity_mapping |= IDENTMAP_GFX;
+
check_tylersburg_isoch();
- if (iommu_identity_mapping) {
- ret = si_domain_init(hw_pass_through);
- if (ret)
- goto free_iommu;
- }
-
-
- /*
- * If we copied translations from a previous kernel in the kdump
- * case, we can not assign the devices to domains now, as that
- * would eliminate the old mappings. So skip this part and defer
- * the assignment to device driver initialization time.
- */
- if (copied_tables)
- goto domains_done;
-
- /*
- * If pass through is not set or not enabled, setup context entries for
- * identity mappings for rmrr, gfx, and isa and may fall back to static
- * identity mapping if iommu_identity_mapping is set.
- */
- if (iommu_identity_mapping) {
- ret = iommu_prepare_static_identity_mapping(hw_pass_through);
- if (ret) {
- pr_crit("Failed to setup IOMMU pass-through\n");
- goto free_iommu;
- }
- }
- /*
- * For each rmrr
- * for each dev attached to rmrr
- * do
- * locate drhd for dev, alloc domain for dev
- * allocate free domain
- * allocate page table entries for rmrr
- * if context not allocated for bus
- * allocate and init context
- * set present in root table for this bus
- * init context with domain, translation etc
- * endfor
- * endfor
- */
- pr_info("Setting RMRR:\n");
- for_each_rmrr_units(rmrr) {
- /* some BIOS lists non-exist devices in DMAR table. */
- for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
- i, dev) {
- ret = iommu_prepare_rmrr_dev(rmrr, dev);
- if (ret)
- pr_err("Mapping reserved region failed\n");
- }
- }
-
- iommu_prepare_isa();
-
-domains_done:
+ ret = si_domain_init(hw_pass_through);
+ if (ret)
+ goto free_iommu;
/*
* for each drhd
@@ -3467,8 +3344,14 @@
iommu_flush_write_buffer(iommu);
#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+ if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
+ /*
+ * Call dmar_alloc_hwirq() with dmar_global_lock held,
+ * could cause possible lock race condition.
+ */
+ up_write(&dmar_global_lock);
ret = intel_svm_enable_prq(iommu);
+ down_write(&dmar_global_lock);
if (ret)
goto free_iommu;
}
@@ -3476,11 +3359,6 @@
ret = dmar_set_interrupt(iommu);
if (ret)
goto free_iommu;
-
- if (!translation_pre_enabled(iommu))
- iommu_enable_translation(iommu);
-
- iommu_disable_protect_mem_regions(iommu);
}
return 0;
@@ -3502,7 +3380,7 @@
struct dmar_domain *domain,
unsigned long nrpages, uint64_t dma_mask)
{
- unsigned long iova_pfn = 0;
+ unsigned long iova_pfn;
/* Restrict dma_mask to the width that the iommu can handle */
dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
@@ -3523,24 +3401,24 @@
iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
IOVA_PFN(dma_mask), true);
if (unlikely(!iova_pfn)) {
- pr_err("Allocating %ld-page iova for %s failed",
- nrpages, dev_name(dev));
+ dev_err(dev, "Allocating %ld-page iova failed", nrpages);
return 0;
}
return iova_pfn;
}
-struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
+static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
{
struct dmar_domain *domain, *tmp;
struct dmar_rmrr_unit *rmrr;
struct device *i_dev;
int i, ret;
+ /* Device shouldn't be attached by any domains. */
domain = find_domain(dev);
if (domain)
- goto out;
+ return NULL;
domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
if (!domain)
@@ -3570,56 +3448,55 @@
}
out:
-
if (!domain)
- pr_err("Allocating domain for %s failed\n", dev_name(dev));
-
+ dev_err(dev, "Allocating domain failed\n");
+ else
+ domain->domain.type = IOMMU_DOMAIN_DMA;
return domain;
}
/* Check if the dev needs to go through non-identity map and unmap process.*/
-static int iommu_no_mapping(struct device *dev)
+static bool iommu_need_mapping(struct device *dev)
{
- int found;
+ int ret;
if (iommu_dummy(dev))
- return 1;
+ return false;
- if (!iommu_identity_mapping)
- return 0;
+ ret = identity_mapping(dev);
+ if (ret) {
+ u64 dma_mask = *dev->dma_mask;
- found = identity_mapping(dev);
- if (found) {
- if (iommu_should_identity_map(dev, 0))
- return 1;
- else {
- /*
- * 32 bit DMA is removed from si_domain and fall back
- * to non-identity mapping.
- */
- dmar_remove_one_dev_info(si_domain, dev);
- pr_info("32bit %s uses non-identity mapping\n",
- dev_name(dev));
- return 0;
- }
- } else {
+ if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
+ dma_mask = dev->coherent_dma_mask;
+
+ if (dma_mask >= dma_direct_get_required_mask(dev))
+ return false;
+
/*
- * In case of a detached 64 bit DMA device from vm, the device
- * is put into si_domain for identity mapping.
+ * 32 bit DMA is removed from si_domain and fall back to
+ * non-identity mapping.
*/
- if (iommu_should_identity_map(dev, 0)) {
- int ret;
- ret = domain_add_dev_info(si_domain, dev);
- if (!ret) {
- pr_info("64bit %s uses identity mapping\n",
- dev_name(dev));
- return 1;
+ dmar_remove_one_dev_info(dev);
+ ret = iommu_request_dma_domain_for_dev(dev);
+ if (ret) {
+ struct iommu_domain *domain;
+ struct dmar_domain *dmar_domain;
+
+ domain = iommu_get_domain_for_dev(dev);
+ if (domain) {
+ dmar_domain = to_dmar_domain(domain);
+ dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
}
+ dmar_remove_one_dev_info(dev);
+ get_private_domain_for_dev(dev);
}
+
+ dev_info(dev, "32bit DMA uses non-identity mapping\n");
}
- return 0;
+ return true;
}
static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
@@ -3635,12 +3512,9 @@
BUG_ON(dir == DMA_NONE);
- if (iommu_no_mapping(dev))
- return paddr;
-
- domain = get_valid_domain_for_dev(dev);
+ domain = find_domain(dev);
if (!domain)
- return 0;
+ return DMA_MAPPING_ERROR;
iommu = domain_get_iommu(domain);
size = aligned_nrpages(paddr, size);
@@ -3671,14 +3545,17 @@
start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
start_paddr += paddr & ~PAGE_MASK;
+
+ trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
+
return start_paddr;
error:
if (iova_pfn)
free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
- pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
- dev_name(dev), size, (unsigned long long)paddr, dir);
- return 0;
+ dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
+ size, (unsigned long long)paddr, dir);
+ return DMA_MAPPING_ERROR;
}
static dma_addr_t intel_map_page(struct device *dev, struct page *page,
@@ -3686,8 +3563,20 @@
enum dma_data_direction dir,
unsigned long attrs)
{
- return __intel_map_single(dev, page_to_phys(page) + offset, size,
- dir, *dev->dma_mask);
+ if (iommu_need_mapping(dev))
+ return __intel_map_single(dev, page_to_phys(page) + offset,
+ size, dir, *dev->dma_mask);
+ return dma_direct_map_page(dev, page, offset, size, dir, attrs);
+}
+
+static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ if (iommu_need_mapping(dev))
+ return __intel_map_single(dev, phys_addr, size, dir,
+ *dev->dma_mask);
+ return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
}
static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
@@ -3698,9 +3587,7 @@
unsigned long iova_pfn;
struct intel_iommu *iommu;
struct page *freelist;
-
- if (iommu_no_mapping(dev))
- return;
+ struct pci_dev *pdev = NULL;
domain = find_domain(dev);
BUG_ON(!domain);
@@ -3713,12 +3600,12 @@
start_pfn = mm_to_dma_pfn(iova_pfn);
last_pfn = start_pfn + nrpages - 1;
- pr_debug("Device %s unmapping: pfn %lx-%lx\n",
- dev_name(dev), start_pfn, last_pfn);
+ if (dev_is_pci(dev))
+ pdev = to_pci_dev(dev);
freelist = domain_unmap(domain, start_pfn, last_pfn);
-
- if (intel_iommu_strict) {
+ if (intel_iommu_strict || (pdev && pdev->untrusted) ||
+ !has_iova_flush_queue(&domain->iovad)) {
iommu_flush_iotlb_psi(iommu, domain, start_pfn,
nrpages, !freelist, 0);
/* free iova */
@@ -3732,13 +3619,25 @@
* cpu used up by the iotlb flush operation...
*/
}
+
+ trace_unmap_single(dev, dev_addr, size);
}
static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
- intel_unmap(dev, dev_addr, size);
+ if (iommu_need_mapping(dev))
+ intel_unmap(dev, dev_addr, size);
+ else
+ dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
+}
+
+static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ if (iommu_need_mapping(dev))
+ intel_unmap(dev, dev_addr, size);
}
static void *intel_alloc_coherent(struct device *dev, size_t size,
@@ -3748,28 +3647,17 @@
struct page *page = NULL;
int order;
+ if (!iommu_need_mapping(dev))
+ return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
+
size = PAGE_ALIGN(size);
order = get_order(size);
- if (!iommu_no_mapping(dev))
- flags &= ~(GFP_DMA | GFP_DMA32);
- else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
- if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
- flags |= GFP_DMA;
- else
- flags |= GFP_DMA32;
- }
-
if (gfpflags_allow_blocking(flags)) {
unsigned int count = size >> PAGE_SHIFT;
page = dma_alloc_from_contiguous(dev, count, order,
flags & __GFP_NOWARN);
- if (page && iommu_no_mapping(dev) &&
- page_to_phys(page) + size > dev->coherent_dma_mask) {
- dma_release_from_contiguous(dev, page, count);
- page = NULL;
- }
}
if (!page)
@@ -3781,7 +3669,7 @@
*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
DMA_BIDIRECTIONAL,
dev->coherent_dma_mask);
- if (*dma_handle)
+ if (*dma_handle != DMA_MAPPING_ERROR)
return page_address(page);
if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
__free_pages(page, order);
@@ -3795,6 +3683,9 @@
int order;
struct page *page = virt_to_page(vaddr);
+ if (!iommu_need_mapping(dev))
+ return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
+
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -3812,25 +3703,16 @@
struct scatterlist *sg;
int i;
+ if (!iommu_need_mapping(dev))
+ return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
+
for_each_sg(sglist, sg, nelems, i) {
nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
}
intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
-}
-static int intel_nontranslate_map_sg(struct device *hddev,
- struct scatterlist *sglist, int nelems, int dir)
-{
- int i;
- struct scatterlist *sg;
-
- for_each_sg(sglist, sg, nelems, i) {
- BUG_ON(!sg_page(sg));
- sg->dma_address = sg_phys(sg);
- sg->dma_length = sg->length;
- }
- return nelems;
+ trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
}
static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
@@ -3847,10 +3729,10 @@
struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
- if (iommu_no_mapping(dev))
- return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
+ if (!iommu_need_mapping(dev))
+ return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
- domain = get_valid_domain_for_dev(dev);
+ domain = find_domain(dev);
if (!domain)
return 0;
@@ -3887,25 +3769,278 @@
return 0;
}
+ trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
+ sg_phys(sglist), size << VTD_PAGE_SHIFT);
+
return nelems;
}
-static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
+static u64 intel_get_required_mask(struct device *dev)
{
- return !dma_addr;
+ if (!iommu_need_mapping(dev))
+ return dma_direct_get_required_mask(dev);
+ return DMA_BIT_MASK(32);
}
-const struct dma_map_ops intel_dma_ops = {
+static const struct dma_map_ops intel_dma_ops = {
.alloc = intel_alloc_coherent,
.free = intel_free_coherent,
.map_sg = intel_map_sg,
.unmap_sg = intel_unmap_sg,
.map_page = intel_map_page,
.unmap_page = intel_unmap_page,
- .mapping_error = intel_mapping_error,
-#ifdef CONFIG_X86
+ .map_resource = intel_map_resource,
+ .unmap_resource = intel_unmap_resource,
.dma_supported = dma_direct_supported,
-#endif
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
+ .get_required_mask = intel_get_required_mask,
+};
+
+static void
+bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir, enum dma_sync_target target)
+{
+ struct dmar_domain *domain;
+ phys_addr_t tlb_addr;
+
+ domain = find_domain(dev);
+ if (WARN_ON(!domain))
+ return;
+
+ tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
+ if (is_swiotlb_buffer(tlb_addr))
+ swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
+}
+
+static dma_addr_t
+bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs,
+ u64 dma_mask)
+{
+ size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
+ struct dmar_domain *domain;
+ struct intel_iommu *iommu;
+ unsigned long iova_pfn;
+ unsigned long nrpages;
+ phys_addr_t tlb_addr;
+ int prot = 0;
+ int ret;
+
+ domain = find_domain(dev);
+ if (WARN_ON(dir == DMA_NONE || !domain))
+ return DMA_MAPPING_ERROR;
+
+ iommu = domain_get_iommu(domain);
+ if (WARN_ON(!iommu))
+ return DMA_MAPPING_ERROR;
+
+ nrpages = aligned_nrpages(0, size);
+ iova_pfn = intel_alloc_iova(dev, domain,
+ dma_to_mm_pfn(nrpages), dma_mask);
+ if (!iova_pfn)
+ return DMA_MAPPING_ERROR;
+
+ /*
+ * Check if DMAR supports zero-length reads on write only
+ * mappings..
+ */
+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
+ !cap_zlr(iommu->cap))
+ prot |= DMA_PTE_READ;
+ if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
+ prot |= DMA_PTE_WRITE;
+
+ /*
+ * If both the physical buffer start address and size are
+ * page aligned, we don't need to use a bounce page.
+ */
+ if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
+ tlb_addr = swiotlb_tbl_map_single(dev,
+ __phys_to_dma(dev, io_tlb_start),
+ paddr, size, aligned_size, dir, attrs);
+ if (tlb_addr == DMA_MAPPING_ERROR) {
+ goto swiotlb_error;
+ } else {
+ /* Cleanup the padding area. */
+ void *padding_start = phys_to_virt(tlb_addr);
+ size_t padding_size = aligned_size;
+
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ (dir == DMA_TO_DEVICE ||
+ dir == DMA_BIDIRECTIONAL)) {
+ padding_start += size;
+ padding_size -= size;
+ }
+
+ memset(padding_start, 0, padding_size);
+ }
+ } else {
+ tlb_addr = paddr;
+ }
+
+ ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
+ tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
+ if (ret)
+ goto mapping_error;
+
+ trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
+
+ return (phys_addr_t)iova_pfn << PAGE_SHIFT;
+
+mapping_error:
+ if (is_swiotlb_buffer(tlb_addr))
+ swiotlb_tbl_unmap_single(dev, tlb_addr, size,
+ aligned_size, dir, attrs);
+swiotlb_error:
+ free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
+ dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
+ size, (unsigned long long)paddr, dir);
+
+ return DMA_MAPPING_ERROR;
+}
+
+static void
+bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
+ struct dmar_domain *domain;
+ phys_addr_t tlb_addr;
+
+ domain = find_domain(dev);
+ if (WARN_ON(!domain))
+ return;
+
+ tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
+ if (WARN_ON(!tlb_addr))
+ return;
+
+ intel_unmap(dev, dev_addr, size);
+ if (is_swiotlb_buffer(tlb_addr))
+ swiotlb_tbl_unmap_single(dev, tlb_addr, size,
+ aligned_size, dir, attrs);
+
+ trace_bounce_unmap_single(dev, dev_addr, size);
+}
+
+static dma_addr_t
+bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ return bounce_map_single(dev, page_to_phys(page) + offset,
+ size, dir, attrs, *dev->dma_mask);
+}
+
+static dma_addr_t
+bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ return bounce_map_single(dev, phys_addr, size,
+ dir, attrs, *dev->dma_mask);
+}
+
+static void
+bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ bounce_unmap_single(dev, dev_addr, size, dir, attrs);
+}
+
+static void
+bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ bounce_unmap_single(dev, dev_addr, size, dir, attrs);
+}
+
+static void
+bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sglist, sg, nelems, i)
+ bounce_unmap_page(dev, sg->dma_address,
+ sg_dma_len(sg), dir, attrs);
+}
+
+static int
+bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ int i;
+ struct scatterlist *sg;
+
+ for_each_sg(sglist, sg, nelems, i) {
+ sg->dma_address = bounce_map_page(dev, sg_page(sg),
+ sg->offset, sg->length,
+ dir, attrs);
+ if (sg->dma_address == DMA_MAPPING_ERROR)
+ goto out_unmap;
+ sg_dma_len(sg) = sg->length;
+ }
+
+ return nelems;
+
+out_unmap:
+ bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
+ return 0;
+}
+
+static void
+bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir)
+{
+ bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
+}
+
+static void
+bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir)
+{
+ bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
+}
+
+static void
+bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sglist, sg, nelems, i)
+ bounce_sync_single(dev, sg_dma_address(sg),
+ sg_dma_len(sg), dir, SYNC_FOR_CPU);
+}
+
+static void
+bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sglist, sg, nelems, i)
+ bounce_sync_single(dev, sg_dma_address(sg),
+ sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
+}
+
+static const struct dma_map_ops bounce_dma_ops = {
+ .alloc = intel_alloc_coherent,
+ .free = intel_free_coherent,
+ .map_sg = bounce_map_sg,
+ .unmap_sg = bounce_unmap_sg,
+ .map_page = bounce_map_page,
+ .unmap_page = bounce_unmap_page,
+ .sync_single_for_cpu = bounce_sync_single_for_cpu,
+ .sync_single_for_device = bounce_sync_single_for_device,
+ .sync_sg_for_cpu = bounce_sync_sg_for_cpu,
+ .sync_sg_for_device = bounce_sync_sg_for_device,
+ .map_resource = bounce_map_resource,
+ .unmap_resource = bounce_unmap_resource,
+ .dma_supported = dma_direct_supported,
};
static inline int iommu_domain_cache_init(void)
@@ -4030,9 +4165,7 @@
/* This IOMMU has *only* gfx devices. Either bypass it or
set the gfx_mapped flag, as appropriate */
- if (dmar_map_gfx) {
- intel_iommu_gfx_mapped = 1;
- } else {
+ if (!dmar_map_gfx) {
drhd->ignored = 1;
for_each_active_dev_scope(drhd->devices,
drhd->devices_cnt, i, dev)
@@ -4061,7 +4194,7 @@
iommu_disable_protect_mem_regions(iommu);
continue;
}
-
+
iommu_flush_write_buffer(iommu);
iommu_set_root_entry(iommu);
@@ -4177,13 +4310,10 @@
static inline void init_iommu_pm_ops(void) {}
#endif /* CONFIG_PM */
-
int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
{
struct acpi_dmar_reserved_memory *rmrr;
- int prot = DMA_PTE_READ|DMA_PTE_WRITE;
struct dmar_rmrr_unit *rmrru;
- size_t length;
rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
if (!rmrru)
@@ -4194,23 +4324,15 @@
rmrru->base_address = rmrr->base_address;
rmrru->end_address = rmrr->end_address;
- length = rmrr->end_address - rmrr->base_address + 1;
- rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
- IOMMU_RESV_DIRECT);
- if (!rmrru->resv)
- goto free_rmrru;
-
rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
((void *)rmrr) + rmrr->header.length,
&rmrru->devices_cnt);
if (rmrru->devices_cnt && rmrru->devices == NULL)
- goto free_all;
+ goto free_rmrru;
list_add(&rmrru->list, &dmar_rmrr_units);
return 0;
-free_all:
- kfree(rmrru->resv);
free_rmrru:
kfree(rmrru);
out:
@@ -4320,7 +4442,7 @@
static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
{
- int sp, ret = 0;
+ int sp, ret;
struct intel_iommu *iommu = dmaru->iommu;
if (g_iommus[iommu->seq_id])
@@ -4358,7 +4480,7 @@
goto out;
#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_enabled(iommu))
+ if (pasid_supported(iommu))
intel_svm_init(iommu);
#endif
@@ -4375,7 +4497,7 @@
iommu_flush_write_buffer(iommu);
#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+ if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
ret = intel_svm_enable_prq(iommu);
if (ret)
goto disable_iommu;
@@ -4428,7 +4550,6 @@
list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
list_del(&rmrru->list);
dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
- kfree(rmrru->resv);
kfree(rmrru);
}
@@ -4484,7 +4605,7 @@
int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
{
- int ret = 0;
+ int ret;
struct dmar_rmrr_unit *rmrru;
struct dmar_atsr_unit *atsru;
struct acpi_dmar_atsr *atsr;
@@ -4501,7 +4622,7 @@
((void *)rmrr) + rmrr->header.length,
rmrr->segment, rmrru->devices,
rmrru->devices_cnt);
- if(ret < 0)
+ if (ret < 0)
return ret;
} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
dmar_remove_dev_scope(info, rmrr->segment,
@@ -4521,7 +4642,7 @@
atsru->devices_cnt);
if (ret > 0)
break;
- else if(ret < 0)
+ else if (ret < 0)
return ret;
} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
if (dmar_remove_dev_scope(info, atsr->segment,
@@ -4533,39 +4654,6 @@
return 0;
}
-/*
- * Here we only respond to action of unbound device from driver.
- *
- * Added device is not attached to its DMAR domain here yet. That will happen
- * when mapping the device to iova.
- */
-static int device_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
- struct dmar_domain *domain;
-
- if (iommu_dummy(dev))
- return 0;
-
- if (action != BUS_NOTIFY_REMOVED_DEVICE)
- return 0;
-
- domain = find_domain(dev);
- if (!domain)
- return 0;
-
- dmar_remove_one_dev_info(domain, dev);
- if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
- domain_exit(domain);
-
- return 0;
-}
-
-static struct notifier_block device_nb = {
- .notifier_call = device_notifier,
-};
-
static int intel_iommu_memory_notifier(struct notifier_block *nb,
unsigned long val, void *v)
{
@@ -4755,14 +4843,92 @@
NULL,
};
+static inline bool has_untrusted_dev(void)
+{
+ struct pci_dev *pdev = NULL;
+
+ for_each_pci_dev(pdev)
+ if (pdev->untrusted)
+ return true;
+
+ return false;
+}
+
+static int __init platform_optin_force_iommu(void)
+{
+ if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
+ return 0;
+
+ if (no_iommu || dmar_disabled)
+ pr_info("Intel-IOMMU force enabled due to platform opt in\n");
+
+ /*
+ * If Intel-IOMMU is disabled by default, we will apply identity
+ * map for all devices except those marked as being untrusted.
+ */
+ if (dmar_disabled)
+ iommu_identity_mapping |= IDENTMAP_ALL;
+
+ dmar_disabled = 0;
+ no_iommu = 0;
+
+ return 1;
+}
+
+static int __init probe_acpi_namespace_devices(void)
+{
+ struct dmar_drhd_unit *drhd;
+ /* To avoid a -Wunused-but-set-variable warning. */
+ struct intel_iommu *iommu __maybe_unused;
+ struct device *dev;
+ int i, ret = 0;
+
+ for_each_active_iommu(iommu, drhd) {
+ for_each_active_dev_scope(drhd->devices,
+ drhd->devices_cnt, i, dev) {
+ struct acpi_device_physical_node *pn;
+ struct iommu_group *group;
+ struct acpi_device *adev;
+
+ if (dev->bus != &acpi_bus_type)
+ continue;
+
+ adev = to_acpi_device(dev);
+ mutex_lock(&adev->physical_node_lock);
+ list_for_each_entry(pn,
+ &adev->physical_node_list, node) {
+ group = iommu_group_get(pn->dev);
+ if (group) {
+ iommu_group_put(group);
+ continue;
+ }
+
+ pn->dev->bus->iommu_ops = &intel_iommu_ops;
+ ret = iommu_probe_device(pn->dev);
+ if (ret)
+ break;
+ }
+ mutex_unlock(&adev->physical_node_lock);
+
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
int __init intel_iommu_init(void)
{
int ret = -ENODEV;
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
- /* VT-d is required for a TXT/tboot launch, so enforce that */
- force_on = tboot_force_iommu();
+ /*
+ * Intel IOMMU is required for a TXT/tboot launch or platform
+ * opt in, so enforce that.
+ */
+ force_on = tboot_force_iommu() || platform_optin_force_iommu();
if (iommu_init_mempool()) {
if (force_on)
@@ -4828,6 +4994,9 @@
goto out_free_reserved_range;
}
+ if (dmar_map_gfx)
+ intel_iommu_gfx_mapped = 1;
+
init_no_remapping_devices();
ret = init_dmars();
@@ -4838,10 +5007,16 @@
goto out_free_reserved_range;
}
up_write(&dmar_global_lock);
- pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
- swiotlb = 0;
+ /*
+ * If the system has no untrusted device or the user has decided
+ * to disable the bounce page mechanisms, we don't need swiotlb.
+ * Mark this and the pre-allocated bounce pages will be released
+ * later.
+ */
+ if (!has_untrusted_dev() || intel_no_bounce)
+ swiotlb = 0;
#endif
dma_ops = &intel_dma_ops;
@@ -4856,12 +5031,27 @@
}
bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
- bus_register_notifier(&pci_bus_type, &device_nb);
if (si_domain && !hw_pass_through)
register_memory_notifier(&intel_iommu_memory_nb);
cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
intel_iommu_cpu_dead);
+
+ down_read(&dmar_global_lock);
+ if (probe_acpi_namespace_devices())
+ pr_warn("ACPI name space devices didn't probe correctly\n");
+ up_read(&dmar_global_lock);
+
+ /* Finally, we enable the DMA remapping hardware. */
+ for_each_iommu(iommu, drhd) {
+ if (!drhd->ignored && !translation_pre_enabled(iommu))
+ iommu_enable_translation(iommu);
+
+ iommu_disable_protect_mem_regions(iommu);
+ }
+ pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
+
intel_iommu_enabled = 1;
+ intel_iommu_debugfs_init();
return 0;
@@ -4898,6 +5088,7 @@
static void __dmar_remove_one_dev_info(struct device_domain_info *info)
{
+ struct dmar_domain *domain;
struct intel_iommu *iommu;
unsigned long flags;
@@ -4907,8 +5098,13 @@
return;
iommu = info->iommu;
+ domain = info->domain;
if (info->dev) {
+ if (dev_is_pci(info->dev) && sm_supported(iommu))
+ intel_pasid_tear_down_entry(iommu, info->dev,
+ PASID_RID2PASID);
+
iommu_disable_dev_iotlb(info);
domain_context_clear(iommu, info->dev);
intel_pasid_free_table(info->dev);
@@ -4917,21 +5113,27 @@
unlink_domain_info(info);
spin_lock_irqsave(&iommu->lock, flags);
- domain_detach_iommu(info->domain, iommu);
+ domain_detach_iommu(domain, iommu);
spin_unlock_irqrestore(&iommu->lock, flags);
+ /* free the private domain */
+ if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
+ !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
+ list_empty(&domain->devices))
+ domain_exit(info->domain);
+
free_devinfo_mem(info);
}
-static void dmar_remove_one_dev_info(struct dmar_domain *domain,
- struct device *dev)
+static void dmar_remove_one_dev_info(struct device *dev)
{
struct device_domain_info *info;
unsigned long flags;
spin_lock_irqsave(&device_domain_lock, flags);
info = dev->archdata.iommu;
- __dmar_remove_one_dev_info(info);
+ if (info)
+ __dmar_remove_one_dev_info(info);
spin_unlock_irqrestore(&device_domain_lock, flags);
}
@@ -4965,63 +5167,185 @@
struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
- if (type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
+ switch (type) {
+ case IOMMU_DOMAIN_DMA:
+ /* fallthrough */
+ case IOMMU_DOMAIN_UNMANAGED:
+ dmar_domain = alloc_domain(0);
+ if (!dmar_domain) {
+ pr_err("Can't allocate dmar_domain\n");
+ return NULL;
+ }
+ if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+ pr_err("Domain initialization failed\n");
+ domain_exit(dmar_domain);
+ return NULL;
+ }
- dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
- if (!dmar_domain) {
- pr_err("Can't allocate dmar_domain\n");
+ if (type == IOMMU_DOMAIN_DMA &&
+ init_iova_flush_queue(&dmar_domain->iovad,
+ iommu_flush_iova, iova_entry_free)) {
+ pr_warn("iova flush queue initialization failed\n");
+ intel_iommu_strict = 1;
+ }
+
+ domain_update_iommu_cap(dmar_domain);
+
+ domain = &dmar_domain->domain;
+ domain->geometry.aperture_start = 0;
+ domain->geometry.aperture_end =
+ __DOMAIN_MAX_ADDR(dmar_domain->gaw);
+ domain->geometry.force_aperture = true;
+
+ return domain;
+ case IOMMU_DOMAIN_IDENTITY:
+ return &si_domain->domain;
+ default:
return NULL;
}
- if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
- pr_err("Domain initialization failed\n");
- domain_exit(dmar_domain);
- return NULL;
- }
- domain_update_iommu_cap(dmar_domain);
- domain = &dmar_domain->domain;
- domain->geometry.aperture_start = 0;
- domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
- domain->geometry.force_aperture = true;
-
- return domain;
+ return NULL;
}
static void intel_iommu_domain_free(struct iommu_domain *domain)
{
- domain_exit(to_dmar_domain(domain));
+ if (domain != &si_domain->domain)
+ domain_exit(to_dmar_domain(domain));
}
-static int intel_iommu_attach_device(struct iommu_domain *domain,
- struct device *dev)
+/*
+ * Check whether a @domain could be attached to the @dev through the
+ * aux-domain attach/detach APIs.
+ */
+static inline bool
+is_aux_domain(struct device *dev, struct iommu_domain *domain)
+{
+ struct device_domain_info *info = dev->archdata.iommu;
+
+ return info && info->auxd_enabled &&
+ domain->type == IOMMU_DOMAIN_UNMANAGED;
+}
+
+static void auxiliary_link_device(struct dmar_domain *domain,
+ struct device *dev)
+{
+ struct device_domain_info *info = dev->archdata.iommu;
+
+ assert_spin_locked(&device_domain_lock);
+ if (WARN_ON(!info))
+ return;
+
+ domain->auxd_refcnt++;
+ list_add(&domain->auxd, &info->auxiliary_domains);
+}
+
+static void auxiliary_unlink_device(struct dmar_domain *domain,
+ struct device *dev)
+{
+ struct device_domain_info *info = dev->archdata.iommu;
+
+ assert_spin_locked(&device_domain_lock);
+ if (WARN_ON(!info))
+ return;
+
+ list_del(&domain->auxd);
+ domain->auxd_refcnt--;
+
+ if (!domain->auxd_refcnt && domain->default_pasid > 0)
+ intel_pasid_free_id(domain->default_pasid);
+}
+
+static int aux_domain_add_dev(struct dmar_domain *domain,
+ struct device *dev)
+{
+ int ret;
+ u8 bus, devfn;
+ unsigned long flags;
+ struct intel_iommu *iommu;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ if (domain->default_pasid <= 0) {
+ int pasid;
+
+ pasid = intel_pasid_alloc_id(domain, PASID_MIN,
+ pci_max_pasids(to_pci_dev(dev)),
+ GFP_KERNEL);
+ if (pasid <= 0) {
+ pr_err("Can't allocate default pasid\n");
+ return -ENODEV;
+ }
+ domain->default_pasid = pasid;
+ }
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ /*
+ * iommu->lock must be held to attach domain to iommu and setup the
+ * pasid entry for second level translation.
+ */
+ spin_lock(&iommu->lock);
+ ret = domain_attach_iommu(domain, iommu);
+ if (ret)
+ goto attach_failed;
+
+ /* Setup the PASID entry for mediated devices: */
+ ret = intel_pasid_setup_second_level(iommu, domain, dev,
+ domain->default_pasid);
+ if (ret)
+ goto table_failed;
+ spin_unlock(&iommu->lock);
+
+ auxiliary_link_device(domain, dev);
+
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return 0;
+
+table_failed:
+ domain_detach_iommu(domain, iommu);
+attach_failed:
+ spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ if (!domain->auxd_refcnt && domain->default_pasid > 0)
+ intel_pasid_free_id(domain->default_pasid);
+
+ return ret;
+}
+
+static void aux_domain_remove_dev(struct dmar_domain *domain,
+ struct device *dev)
+{
+ struct device_domain_info *info;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+
+ if (!is_aux_domain(dev, &domain->domain))
+ return;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ info = dev->archdata.iommu;
+ iommu = info->iommu;
+
+ auxiliary_unlink_device(domain, dev);
+
+ spin_lock(&iommu->lock);
+ intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
+ domain_detach_iommu(domain, iommu);
+ spin_unlock(&iommu->lock);
+
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
+static int prepare_domain_attach_device(struct iommu_domain *domain,
+ struct device *dev)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
int addr_width;
u8 bus, devfn;
- if (device_is_rmrr_locked(dev)) {
- dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
- return -EPERM;
- }
-
- /* normally dev is not mapped */
- if (unlikely(domain_context_mapped(dev))) {
- struct dmar_domain *old_domain;
-
- old_domain = find_domain(dev);
- if (old_domain) {
- rcu_read_lock();
- dmar_remove_one_dev_info(old_domain, dev);
- rcu_read_unlock();
-
- if (!domain_type_is_vm_or_si(old_domain) &&
- list_empty(&old_domain->devices))
- domain_exit(old_domain);
- }
- }
-
iommu = device_to_iommu(dev, &bus, &devfn);
if (!iommu)
return -ENODEV;
@@ -5032,9 +5356,9 @@
addr_width = cap_mgaw(iommu->cap);
if (dmar_domain->max_addr > (1LL << addr_width)) {
- pr_err("%s: iommu width (%d) is not "
- "sufficient for the mapped address (%llx)\n",
- __func__, addr_width, dmar_domain->max_addr);
+ dev_err(dev, "%s: iommu width (%d) is not "
+ "sufficient for the mapped address (%llx)\n",
+ __func__, addr_width, dmar_domain->max_addr);
return -EFAULT;
}
dmar_domain->gaw = addr_width;
@@ -5054,13 +5378,64 @@
dmar_domain->agaw--;
}
- return domain_add_dev_info(dmar_domain, dev);
+ return 0;
+}
+
+static int intel_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ int ret;
+
+ if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
+ device_is_rmrr_locked(dev)) {
+ dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
+ return -EPERM;
+ }
+
+ if (is_aux_domain(dev, domain))
+ return -EPERM;
+
+ /* normally dev is not mapped */
+ if (unlikely(domain_context_mapped(dev))) {
+ struct dmar_domain *old_domain;
+
+ old_domain = find_domain(dev);
+ if (old_domain)
+ dmar_remove_one_dev_info(dev);
+ }
+
+ ret = prepare_domain_attach_device(domain, dev);
+ if (ret)
+ return ret;
+
+ return domain_add_dev_info(to_dmar_domain(domain), dev);
+}
+
+static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ int ret;
+
+ if (!is_aux_domain(dev, domain))
+ return -EPERM;
+
+ ret = prepare_domain_attach_device(domain, dev);
+ if (ret)
+ return ret;
+
+ return aux_domain_add_dev(to_dmar_domain(domain), dev);
}
static void intel_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
- dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
+ dmar_remove_one_dev_info(dev);
+}
+
+static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ aux_domain_remove_dev(to_dmar_domain(domain), dev);
}
static int intel_iommu_map(struct iommu_domain *domain,
@@ -5072,6 +5447,9 @@
int prot = 0;
int ret;
+ if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
+ return -EINVAL;
+
if (iommu_prot & IOMMU_READ)
prot |= DMA_PTE_READ;
if (iommu_prot & IOMMU_WRITE)
@@ -5102,7 +5480,8 @@
}
static size_t intel_iommu_unmap(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+ unsigned long iova, size_t size,
+ struct iommu_iotlb_gather *gather)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct page *freelist = NULL;
@@ -5113,6 +5492,8 @@
/* Cope with horrid API which requires us to unmap more than the
size argument if it happens to be a large-page mapping. */
BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
+ if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
+ return 0;
if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
size = VTD_PAGE_SIZE << level_to_offset_bits(level);
@@ -5144,6 +5525,9 @@
int level = 0;
u64 phys = 0;
+ if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
+ return 0;
+
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
if (pte)
phys = dma_pte_addr(pte);
@@ -5151,6 +5535,42 @@
return phys;
}
+static inline bool scalable_mode_support(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ bool ret = true;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!sm_supported(iommu)) {
+ ret = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static inline bool iommu_pasid_support(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ bool ret = true;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!pasid_supported(iommu)) {
+ ret = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
static bool intel_iommu_capable(enum iommu_cap cap)
{
if (cap == IOMMU_CAP_CACHE_COHERENCY)
@@ -5163,9 +5583,12 @@
static int intel_iommu_add_device(struct device *dev)
{
+ struct dmar_domain *dmar_domain;
+ struct iommu_domain *domain;
struct intel_iommu *iommu;
struct iommu_group *group;
u8 bus, devfn;
+ int ret;
iommu = device_to_iommu(dev, &bus, &devfn);
if (!iommu)
@@ -5173,12 +5596,52 @@
iommu_device_link(&iommu->iommu, dev);
+ if (translation_pre_enabled(iommu))
+ dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
+
group = iommu_group_get_for_dev(dev);
if (IS_ERR(group))
return PTR_ERR(group);
iommu_group_put(group);
+
+ domain = iommu_get_domain_for_dev(dev);
+ dmar_domain = to_dmar_domain(domain);
+ if (domain->type == IOMMU_DOMAIN_DMA) {
+ if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
+ ret = iommu_request_dm_for_dev(dev);
+ if (ret) {
+ dmar_remove_one_dev_info(dev);
+ dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
+ domain_add_dev_info(si_domain, dev);
+ dev_info(dev,
+ "Device uses a private identity domain.\n");
+ }
+ }
+ } else {
+ if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
+ ret = iommu_request_dma_domain_for_dev(dev);
+ if (ret) {
+ dmar_remove_one_dev_info(dev);
+ dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
+ if (!get_private_domain_for_dev(dev)) {
+ dev_warn(dev,
+ "Failed to get a private domain.\n");
+ return -ENOMEM;
+ }
+
+ dev_info(dev,
+ "Device uses a private dma domain.\n");
+ }
+ }
+ }
+
+ if (device_needs_bounce(dev)) {
+ dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
+ set_dma_ops(dev, &bounce_dma_ops);
+ }
+
return 0;
}
@@ -5191,30 +5654,64 @@
if (!iommu)
return;
+ dmar_remove_one_dev_info(dev);
+
iommu_group_remove_device(dev);
iommu_device_unlink(&iommu->iommu, dev);
+
+ if (device_needs_bounce(dev))
+ set_dma_ops(dev, NULL);
}
static void intel_iommu_get_resv_regions(struct device *device,
struct list_head *head)
{
+ int prot = DMA_PTE_READ | DMA_PTE_WRITE;
struct iommu_resv_region *reg;
struct dmar_rmrr_unit *rmrr;
struct device *i_dev;
int i;
- rcu_read_lock();
+ down_read(&dmar_global_lock);
for_each_rmrr_units(rmrr) {
for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
i, i_dev) {
- if (i_dev != device)
+ struct iommu_resv_region *resv;
+ enum iommu_resv_type type;
+ size_t length;
+
+ if (i_dev != device &&
+ !is_downstream_to_pci_bridge(device, i_dev))
continue;
- list_add_tail(&rmrr->resv->list, head);
+ length = rmrr->end_address - rmrr->base_address + 1;
+
+ type = device_rmrr_is_relaxable(device) ?
+ IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
+
+ resv = iommu_alloc_resv_region(rmrr->base_address,
+ length, prot, type);
+ if (!resv)
+ break;
+
+ list_add_tail(&resv->list, head);
}
}
- rcu_read_unlock();
+ up_read(&dmar_global_lock);
+
+#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
+ if (dev_is_pci(device)) {
+ struct pci_dev *pdev = to_pci_dev(device);
+
+ if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
+ reg = iommu_alloc_resv_region(0, 1UL << 24, 0,
+ IOMMU_RESV_DIRECT);
+ if (reg)
+ list_add_tail(®->list, head);
+ }
+ }
+#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
@@ -5229,27 +5726,11 @@
{
struct iommu_resv_region *entry, *next;
- list_for_each_entry_safe(entry, next, head, list) {
- if (entry->type == IOMMU_RESV_RESERVED)
- kfree(entry);
- }
+ list_for_each_entry_safe(entry, next, head, list)
+ kfree(entry);
}
-#ifdef CONFIG_INTEL_IOMMU_SVM
-#define MAX_NR_PASID_BITS (20)
-static inline unsigned long intel_iommu_get_pts(struct device *dev)
-{
- int pts, max_pasid;
-
- max_pasid = intel_pasid_get_dev_max_id(dev);
- pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS);
- if (pts < 5)
- return 0;
-
- return pts - 5;
-}
-
-int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
+int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
{
struct device_domain_info *info;
struct context_entry *context;
@@ -5258,7 +5739,7 @@
u64 ctx_lo;
int ret;
- domain = get_valid_domain_for_dev(sdev->dev);
+ domain = find_domain(dev);
if (!domain)
return -EINVAL;
@@ -5266,7 +5747,7 @@
spin_lock(&iommu->lock);
ret = -EINVAL;
- info = sdev->dev->archdata.iommu;
+ info = dev->archdata.iommu;
if (!info || !info->pasid_supported)
goto out;
@@ -5276,40 +5757,13 @@
ctx_lo = context[0].lo;
- sdev->did = domain->iommu_did[iommu->seq_id];
- sdev->sid = PCI_DEVID(info->bus, info->devfn);
-
if (!(ctx_lo & CONTEXT_PASIDE)) {
- if (iommu->pasid_state_table)
- context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
- context[1].lo = (u64)virt_to_phys(info->pasid_table->table) |
- intel_iommu_get_pts(sdev->dev);
-
- wmb();
- /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
- * extended to permit requests-with-PASID if the PASIDE bit
- * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
- * however, the PASIDE bit is ignored and requests-with-PASID
- * are unconditionally blocked. Which makes less sense.
- * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
- * "guest mode" translation types depending on whether ATS
- * is available or not. Annoyingly, we can't use the new
- * modes *unless* PASIDE is set. */
- if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
- ctx_lo &= ~CONTEXT_TT_MASK;
- if (info->ats_supported)
- ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
- else
- ctx_lo |= CONTEXT_TT_PT_PASID << 2;
- }
ctx_lo |= CONTEXT_PASIDE;
- if (iommu->pasid_state_table)
- ctx_lo |= CONTEXT_DINVE;
- if (info->pri_supported)
- ctx_lo |= CONTEXT_PRS;
context[0].lo = ctx_lo;
wmb();
- iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
+ iommu->flush.flush_context(iommu,
+ domain->iommu_did[iommu->seq_id],
+ PCI_DEVID(info->bus, info->devfn),
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
}
@@ -5318,12 +5772,6 @@
if (!info->pasid_enabled)
iommu_enable_dev_iotlb(info);
- if (info->ats_enabled) {
- sdev->dev_iotlb = 1;
- sdev->qdep = info->ats_qdep;
- if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
- sdev->qdep = 0;
- }
ret = 0;
out:
@@ -5333,6 +5781,20 @@
return ret;
}
+static void intel_iommu_apply_resv_region(struct device *dev,
+ struct iommu_domain *domain,
+ struct iommu_resv_region *region)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned long start, end;
+
+ start = IOVA_PFN(region->start);
+ end = IOVA_PFN(region->start + region->length - 1);
+
+ WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
+}
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
{
struct intel_iommu *iommu;
@@ -5354,12 +5816,148 @@
}
#endif /* CONFIG_INTEL_IOMMU_SVM */
+static int intel_iommu_enable_auxd(struct device *dev)
+{
+ struct device_domain_info *info;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+ u8 bus, devfn;
+ int ret;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu || dmar_disabled)
+ return -EINVAL;
+
+ if (!sm_supported(iommu) || !pasid_supported(iommu))
+ return -EINVAL;
+
+ ret = intel_iommu_enable_pasid(iommu, dev);
+ if (ret)
+ return -ENODEV;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ info = dev->archdata.iommu;
+ info->auxd_enabled = 1;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return 0;
+}
+
+static int intel_iommu_disable_auxd(struct device *dev)
+{
+ struct device_domain_info *info;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ info = dev->archdata.iommu;
+ if (!WARN_ON(!info))
+ info->auxd_enabled = 0;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return 0;
+}
+
+/*
+ * A PCI express designated vendor specific extended capability is defined
+ * in the section 3.7 of Intel scalable I/O virtualization technical spec
+ * for system software and tools to detect endpoint devices supporting the
+ * Intel scalable IO virtualization without host driver dependency.
+ *
+ * Returns the address of the matching extended capability structure within
+ * the device's PCI configuration space or 0 if the device does not support
+ * it.
+ */
+static int siov_find_pci_dvsec(struct pci_dev *pdev)
+{
+ int pos;
+ u16 vendor, id;
+
+ pos = pci_find_next_ext_capability(pdev, 0, 0x23);
+ while (pos) {
+ pci_read_config_word(pdev, pos + 4, &vendor);
+ pci_read_config_word(pdev, pos + 8, &id);
+ if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
+ return pos;
+
+ pos = pci_find_next_ext_capability(pdev, pos, 0x23);
+ }
+
+ return 0;
+}
+
+static bool
+intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
+{
+ if (feat == IOMMU_DEV_FEAT_AUX) {
+ int ret;
+
+ if (!dev_is_pci(dev) || dmar_disabled ||
+ !scalable_mode_support() || !iommu_pasid_support())
+ return false;
+
+ ret = pci_pasid_features(to_pci_dev(dev));
+ if (ret < 0)
+ return false;
+
+ return !!siov_find_pci_dvsec(to_pci_dev(dev));
+ }
+
+ return false;
+}
+
+static int
+intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
+{
+ if (feat == IOMMU_DEV_FEAT_AUX)
+ return intel_iommu_enable_auxd(dev);
+
+ return -ENODEV;
+}
+
+static int
+intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
+{
+ if (feat == IOMMU_DEV_FEAT_AUX)
+ return intel_iommu_disable_auxd(dev);
+
+ return -ENODEV;
+}
+
+static bool
+intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
+{
+ struct device_domain_info *info = dev->archdata.iommu;
+
+ if (feat == IOMMU_DEV_FEAT_AUX)
+ return scalable_mode_support() && info && info->auxd_enabled;
+
+ return false;
+}
+
+static int
+intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+
+ return dmar_domain->default_pasid > 0 ?
+ dmar_domain->default_pasid : -EINVAL;
+}
+
+static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
+ struct device *dev)
+{
+ return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
+}
+
const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,
.domain_free = intel_iommu_domain_free,
.attach_dev = intel_iommu_attach_device,
.detach_dev = intel_iommu_detach_device,
+ .aux_attach_dev = intel_iommu_aux_attach_device,
+ .aux_detach_dev = intel_iommu_aux_detach_device,
+ .aux_get_pasid = intel_iommu_aux_get_pasid,
.map = intel_iommu_map,
.unmap = intel_iommu_unmap,
.iova_to_phys = intel_iommu_iova_to_phys,
@@ -5367,24 +5965,56 @@
.remove_device = intel_iommu_remove_device,
.get_resv_regions = intel_iommu_get_resv_regions,
.put_resv_regions = intel_iommu_put_resv_regions,
+ .apply_resv_region = intel_iommu_apply_resv_region,
.device_group = pci_device_group,
+ .dev_has_feat = intel_iommu_dev_has_feat,
+ .dev_feat_enabled = intel_iommu_dev_feat_enabled,
+ .dev_enable_feat = intel_iommu_dev_enable_feat,
+ .dev_disable_feat = intel_iommu_dev_disable_feat,
+ .is_attach_deferred = intel_iommu_is_attach_deferred,
.pgsize_bitmap = INTEL_IOMMU_PGSIZES,
};
-static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
+static void quirk_iommu_igfx(struct pci_dev *dev)
{
- /* G4x/GM45 integrated gfx dmar support is totally busted. */
- pr_info("Disabling IOMMU for graphics on this chipset\n");
+ pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
dmar_map_gfx = 0;
}
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
+/* G4x/GM45 integrated gfx dmar support is totally busted. */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
+
+/* Broadwell igfx malfunctions with dmar */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
static void quirk_iommu_rwbf(struct pci_dev *dev)
{
@@ -5392,7 +6022,7 @@
* Mobile 4 Series Chipset neglects to set RWBF capability,
* but needs it. Same seems to hold for the desktop versions.
*/
- pr_info("Forcing write-buffer flush capability\n");
+ pci_info(dev, "Forcing write-buffer flush capability\n");
rwbf_quirk = 1;
}
@@ -5422,11 +6052,11 @@
return;
if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
- pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
+ pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
dmar_map_gfx = 0;
} else if (dmar_map_gfx) {
/* we have to ensure the gfx device is idle before we flush */
- pr_info("Disabling batched IOTLB flush on Ironlake\n");
+ pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
intel_iommu_strict = 1;
}
}
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index fe95c9b..040a445 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -9,6 +9,8 @@
#define pr_fmt(fmt) "DMAR: " fmt
+#include <linux/bitops.h>
+#include <linux/cpufeature.h>
#include <linux/dmar.h>
#include <linux/intel-iommu.h>
#include <linux/iommu.h>
@@ -123,12 +125,13 @@
struct pasid_table *pasid_table;
struct pasid_table_opaque data;
struct page *pages;
- size_t size, count;
+ int max_pasid = 0;
int ret, order;
+ int size;
+ might_sleep();
info = dev->archdata.iommu;
- if (WARN_ON(!info || !dev_is_pci(dev) ||
- !info->pasid_supported || info->pasid_table))
+ if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
return -EINVAL;
/* DMA alias device already has a pasid table, use it: */
@@ -138,23 +141,27 @@
if (ret)
goto attach_out;
- pasid_table = kzalloc(sizeof(*pasid_table), GFP_ATOMIC);
+ pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
if (!pasid_table)
return -ENOMEM;
INIT_LIST_HEAD(&pasid_table->dev);
- size = sizeof(struct pasid_entry);
- count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id);
- order = get_order(size * count);
+ if (info->pasid_supported)
+ max_pasid = min_t(int, pci_max_pasids(to_pci_dev(dev)),
+ intel_pasid_max_id);
+
+ size = max_pasid >> (PASID_PDE_SHIFT - 3);
+ order = size ? get_order(size) : 0;
pages = alloc_pages_node(info->iommu->node,
- GFP_ATOMIC | __GFP_ZERO,
- order);
- if (!pages)
+ GFP_KERNEL | __GFP_ZERO, order);
+ if (!pages) {
+ kfree(pasid_table);
return -ENOMEM;
+ }
pasid_table->table = page_address(pages);
pasid_table->order = order;
- pasid_table->max_pasid = count;
+ pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
attach_out:
device_attach_pasid_table(info, pasid_table);
@@ -166,10 +173,12 @@
{
struct device_domain_info *info;
struct pasid_table *pasid_table;
+ struct pasid_dir_entry *dir;
+ struct pasid_entry *table;
+ int i, max_pde;
info = dev->archdata.iommu;
- if (!info || !dev_is_pci(dev) ||
- !info->pasid_supported || !info->pasid_table)
+ if (!info || !dev_is_pci(dev) || !info->pasid_table)
return;
pasid_table = info->pasid_table;
@@ -178,6 +187,14 @@
if (!list_empty(&pasid_table->dev))
return;
+ /* Free scalable mode PASID directory tables: */
+ dir = pasid_table->table;
+ max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
+ for (i = 0; i < max_pde; i++) {
+ table = get_pasid_table_from_pde(&dir[i]);
+ free_pgtable_page(table);
+ }
+
free_pages((unsigned long)pasid_table->table, pasid_table->order);
kfree(pasid_table);
}
@@ -206,17 +223,37 @@
struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
{
+ struct device_domain_info *info;
struct pasid_table *pasid_table;
+ struct pasid_dir_entry *dir;
struct pasid_entry *entries;
+ int dir_index, index;
pasid_table = intel_pasid_get_table(dev);
if (WARN_ON(!pasid_table || pasid < 0 ||
pasid >= intel_pasid_get_dev_max_id(dev)))
return NULL;
- entries = pasid_table->table;
+ dir = pasid_table->table;
+ info = dev->archdata.iommu;
+ dir_index = pasid >> PASID_PDE_SHIFT;
+ index = pasid & PASID_PTE_MASK;
- return &entries[pasid];
+ spin_lock(&pasid_lock);
+ entries = get_pasid_table_from_pde(&dir[dir_index]);
+ if (!entries) {
+ entries = alloc_pgtable_page(info->iommu->node);
+ if (!entries) {
+ spin_unlock(&pasid_lock);
+ return NULL;
+ }
+
+ WRITE_ONCE(dir[dir_index].val,
+ (u64)virt_to_phys(entries) | PASID_PTE_PRESENT);
+ }
+ spin_unlock(&pasid_lock);
+
+ return &entries[index];
}
/*
@@ -224,10 +261,17 @@
*/
static inline void pasid_clear_entry(struct pasid_entry *pe)
{
- WRITE_ONCE(pe->val, 0);
+ WRITE_ONCE(pe->val[0], 0);
+ WRITE_ONCE(pe->val[1], 0);
+ WRITE_ONCE(pe->val[2], 0);
+ WRITE_ONCE(pe->val[3], 0);
+ WRITE_ONCE(pe->val[4], 0);
+ WRITE_ONCE(pe->val[5], 0);
+ WRITE_ONCE(pe->val[6], 0);
+ WRITE_ONCE(pe->val[7], 0);
}
-void intel_pasid_clear_entry(struct device *dev, int pasid)
+static void intel_pasid_clear_entry(struct device *dev, int pasid)
{
struct pasid_entry *pe;
@@ -237,3 +281,361 @@
pasid_clear_entry(pe);
}
+
+static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
+{
+ u64 old;
+
+ old = READ_ONCE(*ptr);
+ WRITE_ONCE(*ptr, (old & ~mask) | bits);
+}
+
+/*
+ * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
+ * PASID entry.
+ */
+static inline void
+pasid_set_domain_id(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value);
+}
+
+/*
+ * Get domain ID value of a scalable mode PASID entry.
+ */
+static inline u16
+pasid_get_domain_id(struct pasid_entry *pe)
+{
+ return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0));
+}
+
+/*
+ * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_slptr(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value);
+}
+
+/*
+ * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
+ * entry.
+ */
+static inline void
+pasid_set_address_width(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2);
+}
+
+/*
+ * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_translation_type(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6);
+}
+
+/*
+ * Enable fault processing by clearing the FPD(Fault Processing
+ * Disable) field (Bit 1) of a scalable mode PASID entry.
+ */
+static inline void pasid_set_fault_enable(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[0], 1 << 1, 0);
+}
+
+/*
+ * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
+ * scalable mode PASID entry.
+ */
+static inline void pasid_set_sre(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 0, 1);
+}
+
+/*
+ * Setup the P(Present) field (Bit 0) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_present(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[0], 1 << 0, 1);
+}
+
+/*
+ * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
+{
+ pasid_set_bits(&pe->val[1], 1 << 23, value << 23);
+}
+
+/*
+ * Setup the First Level Page table Pointer field (Bit 140~191)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_flptr(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value);
+}
+
+/*
+ * Setup the First Level Paging Mode field (Bit 130~131) of a
+ * scalable mode PASID entry.
+ */
+static inline void
+pasid_set_flpm(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2);
+}
+
+static void
+pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
+ u16 did, int pasid)
+{
+ struct qi_desc desc;
+
+ desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
+ desc.qw1 = 0;
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+
+ qi_submit_sync(&desc, iommu);
+}
+
+static void
+iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid)
+{
+ struct qi_desc desc;
+
+ desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
+ desc.qw1 = 0;
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+
+ qi_submit_sync(&desc, iommu);
+}
+
+static void
+devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
+ struct device *dev, int pasid)
+{
+ struct device_domain_info *info;
+ u16 sid, qdep, pfsid;
+
+ info = dev->archdata.iommu;
+ if (!info || !info->ats_enabled)
+ return;
+
+ sid = info->bus << 8 | info->devfn;
+ qdep = info->ats_qdep;
+ pfsid = info->pfsid;
+
+ qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
+}
+
+void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
+ struct device *dev, int pasid)
+{
+ struct pasid_entry *pte;
+ u16 did;
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (WARN_ON(!pte))
+ return;
+
+ did = pasid_get_domain_id(pte);
+ intel_pasid_clear_entry(dev, pasid);
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+ iotlb_invalidation_with_pasid(iommu, did, pasid);
+
+ /* Device IOTLB doesn't need to be flushed in caching mode. */
+ if (!cap_caching_mode(iommu->cap))
+ devtlb_invalidation_with_pasid(iommu, dev, pasid);
+}
+
+/*
+ * Set up the scalable mode pasid table entry for first only
+ * translation type.
+ */
+int intel_pasid_setup_first_level(struct intel_iommu *iommu,
+ struct device *dev, pgd_t *pgd,
+ int pasid, u16 did, int flags)
+{
+ struct pasid_entry *pte;
+
+ if (!ecap_flts(iommu->ecap)) {
+ pr_err("No first level translation support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (WARN_ON(!pte))
+ return -EINVAL;
+
+ pasid_clear_entry(pte);
+
+ /* Setup the first level page table pointer: */
+ pasid_set_flptr(pte, (u64)__pa(pgd));
+ if (flags & PASID_FLAG_SUPERVISOR_MODE) {
+ if (!ecap_srs(iommu->ecap)) {
+ pr_err("No supervisor request support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+ pasid_set_sre(pte);
+ }
+
+#ifdef CONFIG_X86
+ if (cpu_feature_enabled(X86_FEATURE_LA57))
+ pasid_set_flpm(pte, 1);
+#endif /* CONFIG_X86 */
+
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, iommu->agaw);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ /* Setup Present and PASID Granular Transfer Type: */
+ pasid_set_translation_type(pte, 1);
+ pasid_set_present(pte);
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ if (cap_caching_mode(iommu->cap)) {
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+ iotlb_invalidation_with_pasid(iommu, did, pasid);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+
+ return 0;
+}
+
+/*
+ * Set up the scalable mode pasid entry for second only translation type.
+ */
+int intel_pasid_setup_second_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, int pasid)
+{
+ struct pasid_entry *pte;
+ struct dma_pte *pgd;
+ u64 pgd_val;
+ int agaw;
+ u16 did;
+
+ /*
+ * If hardware advertises no support for second level
+ * translation, return directly.
+ */
+ if (!ecap_slts(iommu->ecap)) {
+ pr_err("No second level translation support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ /*
+ * Skip top levels of page tables for iommu which has less agaw
+ * than default. Unnecessary for PT mode.
+ */
+ pgd = domain->pgd;
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd)) {
+ dev_err(dev, "Invalid domain page table\n");
+ return -EINVAL;
+ }
+ }
+
+ pgd_val = virt_to_phys(pgd);
+ did = domain->iommu_did[iommu->seq_id];
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ return -ENODEV;
+ }
+
+ pasid_clear_entry(pte);
+ pasid_set_domain_id(pte, did);
+ pasid_set_slptr(pte, pgd_val);
+ pasid_set_address_width(pte, agaw);
+ pasid_set_translation_type(pte, 2);
+ pasid_set_fault_enable(pte);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ /*
+ * Since it is a second level only translation setup, we should
+ * set SRE bit as well (addresses are expected to be GPAs).
+ */
+ pasid_set_sre(pte);
+ pasid_set_present(pte);
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ if (cap_caching_mode(iommu->cap)) {
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+ iotlb_invalidation_with_pasid(iommu, did, pasid);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+
+ return 0;
+}
+
+/*
+ * Set up the scalable mode pasid entry for passthrough translation type.
+ */
+int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, int pasid)
+{
+ u16 did = FLPT_DEFAULT_DID;
+ struct pasid_entry *pte;
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ return -ENODEV;
+ }
+
+ pasid_clear_entry(pte);
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, iommu->agaw);
+ pasid_set_translation_type(pte, 4);
+ pasid_set_fault_enable(pte);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ /*
+ * We should set SRE bit as well since the addresses are expected
+ * to be GPAs.
+ */
+ pasid_set_sre(pte);
+ pasid_set_present(pte);
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ if (cap_caching_mode(iommu->cap)) {
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+ iotlb_invalidation_with_pasid(iommu, did, pasid);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+
+ return 0;
+}
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 1fb5e12..fc8cd8f 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -10,11 +10,39 @@
#ifndef __INTEL_PASID_H
#define __INTEL_PASID_H
+#define PASID_RID2PASID 0x0
#define PASID_MIN 0x1
-#define PASID_MAX 0x20000
+#define PASID_MAX 0x100000
+#define PASID_PTE_MASK 0x3F
+#define PASID_PTE_PRESENT 1
+#define PDE_PFN_MASK PAGE_MASK
+#define PASID_PDE_SHIFT 6
+#define MAX_NR_PASID_BITS 20
+#define PASID_TBL_ENTRIES BIT(PASID_PDE_SHIFT)
+
+#define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1)
+#define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7))
+
+/*
+ * Domain ID reserved for pasid entries programmed for first-level
+ * only and pass-through transfer modes.
+ */
+#define FLPT_DEFAULT_DID 1
+
+/*
+ * The SUPERVISOR_MODE flag indicates a first level translation which
+ * can be used for access to kernel addresses. It is valid only for
+ * access to the kernel's static 1:1 mapping of physical memory — not
+ * to vmalloc or even module mappings.
+ */
+#define PASID_FLAG_SUPERVISOR_MODE BIT(0)
+
+struct pasid_dir_entry {
+ u64 val;
+};
struct pasid_entry {
- u64 val;
+ u64 val[8];
};
/* The representative of a PASID table */
@@ -25,6 +53,28 @@
struct list_head dev; /* device list */
};
+/* Get PRESENT bit of a PASID directory entry. */
+static inline bool pasid_pde_is_present(struct pasid_dir_entry *pde)
+{
+ return READ_ONCE(pde->val) & PASID_PTE_PRESENT;
+}
+
+/* Get PASID table from a PASID directory entry. */
+static inline struct pasid_entry *
+get_pasid_table_from_pde(struct pasid_dir_entry *pde)
+{
+ if (!pasid_pde_is_present(pde))
+ return NULL;
+
+ return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK);
+}
+
+/* Get PRESENT bit of a PASID table entry. */
+static inline bool pasid_pte_is_present(struct pasid_entry *pte)
+{
+ return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT;
+}
+
extern u32 intel_pasid_max_id;
int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp);
void intel_pasid_free_id(int pasid);
@@ -34,6 +84,16 @@
struct pasid_table *intel_pasid_get_table(struct device *dev);
int intel_pasid_get_dev_max_id(struct device *dev);
struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid);
-void intel_pasid_clear_entry(struct device *dev, int pasid);
+int intel_pasid_setup_first_level(struct intel_iommu *iommu,
+ struct device *dev, pgd_t *pgd,
+ int pasid, u16 did, int flags);
+int intel_pasid_setup_second_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, int pasid);
+int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, int pasid);
+void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
+ struct device *dev, int pasid);
#endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 188f4ea..9b15913 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright © 2015 Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* Authors: David Woodhouse <dwmw2@infradead.org>
*/
@@ -29,21 +21,10 @@
#include "intel-pasid.h"
-#define PASID_ENTRY_P BIT_ULL(0)
-#define PASID_ENTRY_FLPM_5LP BIT_ULL(9)
-#define PASID_ENTRY_SRE BIT_ULL(11)
-
static irqreturn_t prq_event_thread(int irq, void *d);
-struct pasid_state_entry {
- u64 val;
-};
-
int intel_svm_init(struct intel_iommu *iommu)
{
- struct page *pages;
- int order;
-
if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
!cap_fl1gp_support(iommu->cap))
return -EINVAL;
@@ -52,41 +33,6 @@
!cap_5lp_support(iommu->cap))
return -EINVAL;
- /* Start at 2 because it's defined as 2^(1+PSS) */
- iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
-
- /* Eventually I'm promised we will get a multi-level PASID table
- * and it won't have to be physically contiguous. Until then,
- * limit the size because 8MiB contiguous allocations can be hard
- * to come by. The limit of 0x20000, which is 1MiB for each of
- * the PASID and PASID-state tables, is somewhat arbitrary. */
- if (iommu->pasid_max > 0x20000)
- iommu->pasid_max = 0x20000;
-
- order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
- if (ecap_dis(iommu->ecap)) {
- /* Just making it explicit... */
- BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
- if (pages)
- iommu->pasid_state_table = page_address(pages);
- else
- pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
- iommu->name);
- }
-
- return 0;
-}
-
-int intel_svm_exit(struct intel_iommu *iommu)
-{
- int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
-
- if (iommu->pasid_state_table) {
- free_pages((unsigned long)iommu->pasid_state_table, order);
- iommu->pasid_state_table = NULL;
- }
-
return 0;
}
@@ -154,36 +100,43 @@
}
static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
- unsigned long address, unsigned long pages, int ih, int gl)
+ unsigned long address, unsigned long pages, int ih)
{
struct qi_desc desc;
- if (pages == -1) {
- /* For global kernel pages we have to flush them in *all* PASIDs
- * because that's the only option the hardware gives us. Despite
- * the fact that they are actually only accessible through one. */
- if (gl)
- desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
- QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE;
- else
- desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
- QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
- desc.high = 0;
+ /*
+ * Do PASID granu IOTLB invalidation if page selective capability is
+ * not available.
+ */
+ if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap)) {
+ desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
+ QI_EIOTLB_DID(sdev->did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ QI_EIOTLB_TYPE;
+ desc.qw1 = 0;
} else {
int mask = ilog2(__roundup_pow_of_two(pages));
- desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
- QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
- desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) |
- QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
+ desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
+ QI_EIOTLB_DID(sdev->did) |
+ QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
+ QI_EIOTLB_TYPE;
+ desc.qw1 = QI_EIOTLB_ADDR(address) |
+ QI_EIOTLB_IH(ih) |
+ QI_EIOTLB_AM(mask);
}
+ desc.qw2 = 0;
+ desc.qw3 = 0;
qi_submit_sync(&desc, svm->iommu);
if (sdev->dev_iotlb) {
- desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
- QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
+ desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
+ QI_DEV_EIOTLB_SID(sdev->sid) |
+ QI_DEV_EIOTLB_QDEP(sdev->qdep) |
+ QI_DEIOTLB_TYPE;
if (pages == -1) {
- desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE;
+ desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) |
+ QI_DEV_EIOTLB_SIZE;
} else if (pages > 1) {
/* The least significant zero bit indicates the size. So,
* for example, an "address" value of 0x12345f000 will
@@ -191,38 +144,28 @@
unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
unsigned long mask = __rounddown_pow_of_two(address ^ last);
- desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE;
+ desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) |
+ (mask - 1)) | QI_DEV_EIOTLB_SIZE;
} else {
- desc.high = QI_DEV_EIOTLB_ADDR(address);
+ desc.qw1 = QI_DEV_EIOTLB_ADDR(address);
}
+ desc.qw2 = 0;
+ desc.qw3 = 0;
qi_submit_sync(&desc, svm->iommu);
}
}
static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
- unsigned long pages, int ih, int gl)
+ unsigned long pages, int ih)
{
struct intel_svm_dev *sdev;
- /* Try deferred invalidate if available */
- if (svm->iommu->pasid_state_table &&
- !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
- return;
-
rcu_read_lock();
list_for_each_entry_rcu(sdev, &svm->devs, list)
- intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl);
+ intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
rcu_read_unlock();
}
-static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
- unsigned long address, pte_t pte)
-{
- struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
-
- intel_flush_svm_range(svm, address, 1, 1, 0);
-}
-
/* Pages have been freed at this point */
static void intel_invalidate_range(struct mmu_notifier *mn,
struct mm_struct *mm,
@@ -231,18 +174,7 @@
struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
intel_flush_svm_range(svm, start,
- (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
-}
-
-
-static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid)
-{
- struct qi_desc desc;
-
- desc.high = 0;
- desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
-
- qi_submit_sync(&desc, svm->iommu);
+ (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
}
static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
@@ -264,18 +196,15 @@
*/
rcu_read_lock();
list_for_each_entry_rcu(sdev, &svm->devs, list) {
- intel_pasid_clear_entry(sdev->dev, svm->pasid);
- intel_flush_pasid_dev(svm, sdev, svm->pasid);
- intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
+ intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
+ intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
}
rcu_read_unlock();
}
static const struct mmu_notifier_ops intel_mmuops = {
- .flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
.release = intel_mm_release,
- .change_pte = intel_change_pte,
.invalidate_range = intel_invalidate_range,
};
@@ -285,15 +214,14 @@
int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
{
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
- struct pasid_entry *entry;
+ struct device_domain_info *info;
struct intel_svm_dev *sdev;
struct intel_svm *svm = NULL;
struct mm_struct *mm = NULL;
- u64 pasid_entry_val;
int pasid_max;
int ret;
- if (!iommu)
+ if (!iommu || dmar_disabled)
return -EINVAL;
if (dev_is_pci(dev)) {
@@ -350,13 +278,29 @@
}
sdev->dev = dev;
- ret = intel_iommu_enable_pasid(iommu, sdev);
+ ret = intel_iommu_enable_pasid(iommu, dev);
if (ret || !pasid) {
/* If they don't actually want to assign a PASID, this is
* just an enabling check/preparation. */
kfree(sdev);
goto out;
}
+
+ info = dev->archdata.iommu;
+ if (!info || !info->pasid_supported) {
+ kfree(sdev);
+ goto out;
+ }
+
+ sdev->did = FLPT_DEFAULT_DID;
+ sdev->sid = PCI_DEVID(info->bus, info->devfn);
+ if (info->ats_enabled) {
+ sdev->dev_iotlb = 1;
+ sdev->qdep = info->ats_qdep;
+ if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
+ sdev->qdep = 0;
+ }
+
/* Finish the setup now we know we're keeping it */
sdev->users = 1;
sdev->ops = ops;
@@ -398,26 +342,39 @@
kfree(sdev);
goto out;
}
- pasid_entry_val = (u64)__pa(mm->pgd) | PASID_ENTRY_P;
- } else
- pasid_entry_val = (u64)__pa(init_mm.pgd) |
- PASID_ENTRY_P | PASID_ENTRY_SRE;
- if (cpu_feature_enabled(X86_FEATURE_LA57))
- pasid_entry_val |= PASID_ENTRY_FLPM_5LP;
+ }
- entry = intel_pasid_get_entry(dev, svm->pasid);
- entry->val = pasid_entry_val;
-
- wmb();
-
- /*
- * Flush PASID cache when a PASID table entry becomes
- * present.
- */
- if (cap_caching_mode(iommu->cap))
- intel_flush_pasid_dev(svm, sdev, svm->pasid);
+ spin_lock(&iommu->lock);
+ ret = intel_pasid_setup_first_level(iommu, dev,
+ mm ? mm->pgd : init_mm.pgd,
+ svm->pasid, FLPT_DEFAULT_DID,
+ mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
+ spin_unlock(&iommu->lock);
+ if (ret) {
+ if (mm)
+ mmu_notifier_unregister(&svm->notifier, mm);
+ intel_pasid_free_id(svm->pasid);
+ kfree(svm);
+ kfree(sdev);
+ goto out;
+ }
list_add_tail(&svm->list, &global_svm_list);
+ } else {
+ /*
+ * Binding a new device with existing PASID, need to setup
+ * the PASID entry.
+ */
+ spin_lock(&iommu->lock);
+ ret = intel_pasid_setup_first_level(iommu, dev,
+ mm ? mm->pgd : init_mm.pgd,
+ svm->pasid, FLPT_DEFAULT_DID,
+ mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
+ spin_unlock(&iommu->lock);
+ if (ret) {
+ kfree(sdev);
+ goto out;
+ }
}
list_add_rcu(&sdev->list, &svm->devs);
@@ -461,10 +418,9 @@
* to use. We have a *shared* PASID table, because it's
* large and has to be physically contiguous. So it's
* hard to be as defensive as we might like. */
- intel_flush_pasid_dev(svm, sdev, svm->pasid);
- intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
+ intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
+ intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
kfree_rcu(sdev, rcu);
- intel_pasid_clear_entry(dev, svm->pasid);
if (list_empty(&svm->devs)) {
intel_pasid_free_id(svm->pasid);
@@ -523,20 +479,31 @@
/* Page request queue descriptor */
struct page_req_dsc {
- u64 srr:1;
- u64 bof:1;
- u64 pasid_present:1;
- u64 lpig:1;
- u64 pasid:20;
- u64 bus:8;
- u64 private:23;
- u64 prg_index:9;
- u64 rd_req:1;
- u64 wr_req:1;
- u64 exe_req:1;
- u64 priv_req:1;
- u64 devfn:8;
- u64 addr:52;
+ union {
+ struct {
+ u64 type:8;
+ u64 pasid_present:1;
+ u64 priv_data_present:1;
+ u64 rsvd:6;
+ u64 rid:16;
+ u64 pasid:20;
+ u64 exe_req:1;
+ u64 pm_req:1;
+ u64 rsvd2:10;
+ };
+ u64 qw_0;
+ };
+ union {
+ struct {
+ u64 rd_req:1;
+ u64 wr_req:1;
+ u64 lpig:1;
+ u64 prg_index:9;
+ u64 addr:52;
+ };
+ u64 qw_1;
+ };
+ u64 priv_data[2];
};
#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
@@ -649,7 +616,7 @@
/* Accounting for major/minor faults? */
rcu_read_lock();
list_for_each_entry_rcu(sdev, &svm->devs, list) {
- if (sdev->sid == PCI_DEVID(req->bus, req->devfn))
+ if (sdev->sid == req->rid)
break;
}
/* Other devices can go away, but the drivers are not permitted
@@ -662,34 +629,39 @@
if (sdev && sdev->ops && sdev->ops->fault_cb) {
int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
- (req->exe_req << 1) | (req->priv_req);
- sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result);
+ (req->exe_req << 1) | (req->pm_req);
+ sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr,
+ req->priv_data, rwxp, result);
}
/* We get here in the error case where the PASID lookup failed,
and these can be NULL. Do not use them below this point! */
sdev = NULL;
svm = NULL;
no_pasid:
- if (req->lpig) {
- /* Page Group Response */
- resp.low = QI_PGRP_PASID(req->pasid) |
- QI_PGRP_DID((req->bus << 8) | req->devfn) |
+ if (req->lpig || req->priv_data_present) {
+ /*
+ * Per VT-d spec. v3.0 ch7.7, system software must
+ * respond with page group response if private data
+ * is present (PDP) or last page in group (LPIG) bit
+ * is set. This is an additional VT-d feature beyond
+ * PCI ATS spec.
+ */
+ resp.qw0 = QI_PGRP_PASID(req->pasid) |
+ QI_PGRP_DID(req->rid) |
QI_PGRP_PASID_P(req->pasid_present) |
+ QI_PGRP_PDP(req->pasid_present) |
+ QI_PGRP_RESP_CODE(result) |
QI_PGRP_RESP_TYPE;
- resp.high = QI_PGRP_IDX(req->prg_index) |
- QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result);
+ resp.qw1 = QI_PGRP_IDX(req->prg_index) |
+ QI_PGRP_LPIG(req->lpig);
- qi_submit_sync(&resp, iommu);
- } else if (req->srr) {
- /* Page Stream Response */
- resp.low = QI_PSTRM_IDX(req->prg_index) |
- QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) |
- QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE;
- resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) |
- QI_PSTRM_RESP_CODE(result);
-
- qi_submit_sync(&resp, iommu);
+ if (req->priv_data_present)
+ memcpy(&resp.qw2, req->priv_data,
+ sizeof(req->priv_data));
}
+ resp.qw2 = 0;
+ resp.qw3 = 0;
+ qi_submit_sync(&resp, iommu);
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
diff --git a/drivers/iommu/intel-trace.c b/drivers/iommu/intel-trace.c
new file mode 100644
index 0000000..bfb6a6e
--- /dev/null
+++ b/drivers/iommu/intel-trace.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel IOMMU trace support
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/intel_iommu.h>
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 967450b..81e43c1 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -76,7 +76,7 @@
* in single-threaded environment with interrupt disabled, so no need to tabke
* the dmar_global_lock.
*/
-static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
+DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
static const struct irq_domain_ops intel_ir_domain_ops;
static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
@@ -101,7 +101,7 @@
iommu->flags |= VTD_FLAG_IRQ_REMAP_PRE_ENABLED;
}
-static int alloc_irte(struct intel_iommu *iommu, int irq,
+static int alloc_irte(struct intel_iommu *iommu,
struct irq_2_iommu *irq_iommu, u16 count)
{
struct ir_table *table = iommu->ir_table;
@@ -145,9 +145,11 @@
{
struct qi_desc desc;
- desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
+ desc.qw0 = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
| QI_IEC_SELECTIVE;
- desc.high = 0;
+ desc.qw1 = 0;
+ desc.qw2 = 0;
+ desc.qw3 = 0;
return qi_submit_sync(&desc, iommu);
}
@@ -292,6 +294,18 @@
irte->sid = sid;
}
+/*
+ * Set an IRTE to match only the bus number. Interrupt requests that reference
+ * this IRTE must have a requester-id whose bus number is between or equal
+ * to the start_bus and end_bus arguments.
+ */
+static void set_irte_verify_bus(struct irte *irte, unsigned int start_bus,
+ unsigned int end_bus)
+{
+ set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
+ (start_bus << 8) | end_bus);
+}
+
static int set_ioapic_sid(struct irte *irte, int apic)
{
int i;
@@ -354,14 +368,20 @@
struct set_msi_sid_data {
struct pci_dev *pdev;
u16 alias;
+ int count;
+ int busmatch_count;
};
static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque)
{
struct set_msi_sid_data *data = opaque;
+ if (data->count == 0 || PCI_BUS_NUM(alias) == PCI_BUS_NUM(data->alias))
+ data->busmatch_count++;
+
data->pdev = pdev;
data->alias = alias;
+ data->count++;
return 0;
}
@@ -373,6 +393,8 @@
if (!irte || !dev)
return -1;
+ data.count = 0;
+ data.busmatch_count = 0;
pci_for_each_dma_alias(dev, set_msi_sid_cb, &data);
/*
@@ -381,6 +403,11 @@
* device is the case of a PCIe-to-PCI bridge, where the alias is for
* the subordinate bus. In this case we can only verify the bus.
*
+ * If there are multiple aliases, all with the same bus number,
+ * then all we can do is verify the bus. This is typical in NTB
+ * hardware which use proxy IDs where the device will generate traffic
+ * from multiple devfn numbers on the same bus.
+ *
* If the alias device is on a different bus than our source device
* then we have a topology based alias, use it.
*
@@ -389,14 +416,15 @@
* original device.
*/
if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number)
- set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
- PCI_DEVID(PCI_BUS_NUM(data.alias),
- dev->bus->number));
+ set_irte_verify_bus(irte, PCI_BUS_NUM(data.alias),
+ dev->bus->number);
+ else if (data.count >= 2 && data.busmatch_count == data.count)
+ set_irte_verify_bus(irte, dev->bus->number, dev->bus->number);
else if (data.pdev->bus->number != dev->bus->number)
set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias);
else
set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
- PCI_DEVID(dev->bus->number, dev->devfn));
+ pci_dev_id(dev));
return 0;
}
@@ -520,8 +548,7 @@
goto out_free_table;
}
- bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES),
- sizeof(long), GFP_ATOMIC);
+ bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_ATOMIC);
if (bitmap == NULL) {
pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id);
goto out_free_pages;
@@ -588,7 +615,7 @@
return 0;
out_free_bitmap:
- kfree(bitmap);
+ bitmap_free(bitmap);
out_free_pages:
__free_pages(pages, INTR_REMAP_PAGE_ORDER);
out_free_table:
@@ -612,7 +639,7 @@
}
free_pages((unsigned long)iommu->ir_table->base,
INTR_REMAP_PAGE_ORDER);
- kfree(iommu->ir_table->bitmap);
+ bitmap_free(iommu->ir_table->bitmap);
kfree(iommu->ir_table);
iommu->ir_table = NULL;
}
@@ -1347,7 +1374,7 @@
goto out_free_parent;
down_read(&dmar_global_lock);
- index = alloc_irte(iommu, virq, &data->irq_2_iommu, nr_irqs);
+ index = alloc_irte(iommu, &data->irq_2_iommu, nr_irqs);
up_read(&dmar_global_lock);
if (index < 0) {
pr_warn("Failed to allocate IRTE\n");
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index b5948ba..4cb3949 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* CPU-agnostic ARM page table allocator.
*
@@ -14,18 +15,6 @@
* - PXN
* - Domains
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
* Copyright (C) 2014-2015 ARM Limited
* Copyright (c) 2014-2015 MediaTek Inc.
*/
@@ -35,6 +24,7 @@
#include <linux/atomic.h>
#include <linux/dma-mapping.h>
#include <linux/gfp.h>
+#include <linux/io-pgtable.h>
#include <linux/iommu.h>
#include <linux/kernel.h>
#include <linux/kmemleak.h>
@@ -45,8 +35,6 @@
#include <asm/barrier.h>
-#include "io-pgtable.h"
-
/* Struct accessors */
#define io_pgtable_to_data(x) \
container_of((x), struct arm_v7s_io_pgtable, iop)
@@ -124,7 +112,9 @@
#define ARM_V7S_TEX_MASK 0x7
#define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
-#define ARM_V7S_ATTR_MTK_4GB BIT(9) /* MTK extend it for 4GB mode */
+/* MediaTek extend the two bits for PA 32bit/33bit */
+#define ARM_V7S_ATTR_MTK_PA_BIT32 BIT(9)
+#define ARM_V7S_ATTR_MTK_PA_BIT33 BIT(4)
/* *well, except for TEX on level 2 large pages, of course :( */
#define ARM_V7S_CONT_PAGE_TEX_SHIFT 6
@@ -161,6 +151,14 @@
#define ARM_V7S_TCR_PD1 BIT(5)
+#ifdef CONFIG_ZONE_DMA32
+#define ARM_V7S_TABLE_GFP_DMA GFP_DMA32
+#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32
+#else
+#define ARM_V7S_TABLE_GFP_DMA GFP_DMA
+#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA
+#endif
+
typedef u32 arm_v7s_iopte;
static bool selftest_running;
@@ -173,18 +171,62 @@
spinlock_t split_lock;
};
+static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl);
+
static dma_addr_t __arm_v7s_dma_addr(void *pages)
{
return (dma_addr_t)virt_to_phys(pages);
}
-static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl)
+static bool arm_v7s_is_mtk_enabled(struct io_pgtable_cfg *cfg)
{
+ return IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
+ (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT);
+}
+
+static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
+ struct io_pgtable_cfg *cfg)
+{
+ arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
+
+ if (!arm_v7s_is_mtk_enabled(cfg))
+ return pte;
+
+ if (paddr & BIT_ULL(32))
+ pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
+ if (paddr & BIT_ULL(33))
+ pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
+ return pte;
+}
+
+static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
+ struct io_pgtable_cfg *cfg)
+{
+ arm_v7s_iopte mask;
+ phys_addr_t paddr;
+
if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
- pte &= ARM_V7S_TABLE_MASK;
+ mask = ARM_V7S_TABLE_MASK;
+ else if (arm_v7s_pte_is_cont(pte, lvl))
+ mask = ARM_V7S_LVL_MASK(lvl) * ARM_V7S_CONT_PAGES;
else
- pte &= ARM_V7S_LVL_MASK(lvl);
- return phys_to_virt(pte);
+ mask = ARM_V7S_LVL_MASK(lvl);
+
+ paddr = pte & mask;
+ if (!arm_v7s_is_mtk_enabled(cfg))
+ return paddr;
+
+ if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
+ paddr |= BIT_ULL(32);
+ if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
+ paddr |= BIT_ULL(33);
+ return paddr;
+}
+
+static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
+ struct arm_v7s_io_pgtable *data)
+{
+ return phys_to_virt(iopte_to_paddr(pte, lvl, &data->iop.cfg));
}
static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
@@ -198,14 +240,17 @@
void *table = NULL;
if (lvl == 1)
- table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size));
+ table = (void *)__get_free_pages(
+ __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size));
else if (lvl == 2)
- table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA);
+ table = kmem_cache_zalloc(data->l2_tables, gfp);
phys = virt_to_phys(table);
- if (phys != (arm_v7s_iopte)phys)
+ if (phys != (arm_v7s_iopte)phys) {
/* Doesn't fit in PTE */
+ dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
goto out_free;
- if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
+ }
+ if (table && !cfg->coherent_walk) {
dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma))
goto out_free;
@@ -217,7 +262,8 @@
if (dma != phys)
goto out_unmap;
}
- kmemleak_ignore(table);
+ if (lvl == 2)
+ kmemleak_ignore(table);
return table;
out_unmap:
@@ -238,7 +284,7 @@
struct device *dev = cfg->iommu_dev;
size_t size = ARM_V7S_TABLE_SIZE(lvl);
- if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
+ if (!cfg->coherent_walk)
dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
DMA_TO_DEVICE);
if (lvl == 1)
@@ -250,7 +296,7 @@
static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
struct io_pgtable_cfg *cfg)
{
- if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)
+ if (cfg->coherent_walk)
return;
dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
@@ -295,9 +341,6 @@
if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
pte |= ARM_V7S_ATTR_NS_SECTION;
- if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
- pte |= ARM_V7S_ATTR_MTK_4GB;
-
return pte;
}
@@ -362,7 +405,8 @@
return false;
}
-static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
+static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *,
+ struct iommu_iotlb_gather *, unsigned long,
size_t, int, arm_v7s_iopte *);
static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
@@ -383,7 +427,7 @@
size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
- if (WARN_ON(__arm_v7s_unmap(data, iova + i * sz,
+ if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz,
sz, lvl, tblp) != sz))
return -EINVAL;
} else if (ptep[i]) {
@@ -396,7 +440,7 @@
if (num_entries > 1)
pte = arm_v7s_pte_to_cont(pte, lvl);
- pte |= paddr & ARM_V7S_LVL_MASK(lvl);
+ pte |= paddr_to_iopte(paddr, lvl, cfg);
__arm_v7s_set_pte(ptep, pte, num_entries, cfg);
return 0;
@@ -462,7 +506,7 @@
}
if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
- cptep = iopte_deref(pte, lvl);
+ cptep = iopte_deref(pte, lvl, data);
} else if (pte) {
/* We require an unmap first */
WARN_ON(!selftest_running);
@@ -484,7 +528,8 @@
if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
- if (WARN_ON(upper_32_bits(iova) || upper_32_bits(paddr)))
+ if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
+ paddr >= (1ULL << data->iop.cfg.oas)))
return -ERANGE;
ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd);
@@ -493,9 +538,8 @@
* a chance for anything to kick off a table walk for the new iova.
*/
if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
- io_pgtable_tlb_add_flush(iop, iova, size,
- ARM_V7S_BLOCK_SIZE(2), false);
- io_pgtable_tlb_sync(iop);
+ io_pgtable_tlb_flush_walk(iop, iova, size,
+ ARM_V7S_BLOCK_SIZE(2));
} else {
wmb();
}
@@ -512,7 +556,8 @@
arm_v7s_iopte pte = data->pgd[i];
if (ARM_V7S_PTE_IS_TABLE(pte, 1))
- __arm_v7s_free_table(iopte_deref(pte, 1), 2, data);
+ __arm_v7s_free_table(iopte_deref(pte, 1, data),
+ 2, data);
}
__arm_v7s_free_table(data->pgd, 1, data);
kmem_cache_destroy(data->l2_tables);
@@ -541,12 +586,12 @@
__arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
size *= ARM_V7S_CONT_PAGES;
- io_pgtable_tlb_add_flush(iop, iova, size, size, true);
- io_pgtable_tlb_sync(iop);
+ io_pgtable_tlb_flush_leaf(iop, iova, size, size);
return pte;
}
static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
+ struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size,
arm_v7s_iopte blk_pte,
arm_v7s_iopte *ptep)
@@ -582,15 +627,16 @@
if (!ARM_V7S_PTE_IS_TABLE(pte, 1))
return 0;
- tablep = iopte_deref(pte, 1);
- return __arm_v7s_unmap(data, iova, size, 2, tablep);
+ tablep = iopte_deref(pte, 1, data);
+ return __arm_v7s_unmap(data, gather, iova, size, 2, tablep);
}
- io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
+ io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
return size;
}
static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
+ struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, int lvl,
arm_v7s_iopte *ptep)
{
@@ -637,14 +683,19 @@
for (i = 0; i < num_entries; i++) {
if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) {
/* Also flush any partial walks */
- io_pgtable_tlb_add_flush(iop, iova, blk_size,
- ARM_V7S_BLOCK_SIZE(lvl + 1), false);
- io_pgtable_tlb_sync(iop);
- ptep = iopte_deref(pte[i], lvl);
+ io_pgtable_tlb_flush_walk(iop, iova, blk_size,
+ ARM_V7S_BLOCK_SIZE(lvl + 1));
+ ptep = iopte_deref(pte[i], lvl, data);
__arm_v7s_free_table(ptep, lvl + 1, data);
+ } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
+ /*
+ * Order the PTE update against queueing the IOVA, to
+ * guarantee that a flush callback from a different CPU
+ * has observed it before the TLBIALL can be issued.
+ */
+ smp_wmb();
} else {
- io_pgtable_tlb_add_flush(iop, iova, blk_size,
- blk_size, true);
+ io_pgtable_tlb_add_page(iop, gather, iova, blk_size);
}
iova += blk_size;
}
@@ -654,23 +705,24 @@
* Insert a table at the next level to map the old region,
* minus the part we want to unmap
*/
- return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep);
+ return arm_v7s_split_blk_unmap(data, gather, iova, size, pte[0],
+ ptep);
}
/* Keep on walkin' */
- ptep = iopte_deref(pte[0], lvl);
- return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep);
+ ptep = iopte_deref(pte[0], lvl, data);
+ return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
}
static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
- size_t size)
+ size_t size, struct iommu_iotlb_gather *gather)
{
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
if (WARN_ON(upper_32_bits(iova)))
return 0;
- return __arm_v7s_unmap(data, iova, size, 1, data->pgd);
+ return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
}
static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
@@ -684,7 +736,7 @@
do {
ptep += ARM_V7S_LVL_IDX(iova, ++lvl);
pte = READ_ONCE(*ptep);
- ptep = iopte_deref(pte, lvl);
+ ptep = iopte_deref(pte, lvl, data);
} while (ARM_V7S_PTE_IS_TABLE(pte, lvl));
if (!ARM_V7S_PTE_IS_VALID(pte))
@@ -693,7 +745,7 @@
mask = ARM_V7S_LVL_MASK(lvl);
if (arm_v7s_pte_is_cont(pte, lvl))
mask *= ARM_V7S_CONT_PAGES;
- return (pte & mask) | (iova & ~mask);
+ return iopte_to_paddr(pte, lvl, &data->iop.cfg) | (iova & ~mask);
}
static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
@@ -701,22 +753,21 @@
{
struct arm_v7s_io_pgtable *data;
-#ifdef PHYS_OFFSET
- if (upper_32_bits(PHYS_OFFSET))
+ if (cfg->ias > ARM_V7S_ADDR_BITS)
return NULL;
-#endif
- if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
+
+ if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
return NULL;
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_NO_PERMS |
IO_PGTABLE_QUIRK_TLBI_ON_MAP |
- IO_PGTABLE_QUIRK_ARM_MTK_4GB |
- IO_PGTABLE_QUIRK_NO_DMA))
+ IO_PGTABLE_QUIRK_ARM_MTK_EXT |
+ IO_PGTABLE_QUIRK_NON_STRICT))
return NULL;
/* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
- if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB &&
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT &&
!(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS))
return NULL;
@@ -728,7 +779,7 @@
data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
ARM_V7S_TABLE_SIZE(2),
ARM_V7S_TABLE_SIZE(2),
- SLAB_CACHE_DMA, NULL);
+ ARM_V7S_TABLE_SLAB_FLAGS, NULL);
if (!data->l2_tables)
goto out_free_data;
@@ -774,8 +825,11 @@
/* TTBRs */
cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) |
ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS |
- ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
- ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA);
+ (cfg->coherent_walk ?
+ (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
+ ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
+ (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
+ ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
cfg->arm_v7s_cfg.ttbr[1] = 0;
return &data->iop;
@@ -799,22 +853,24 @@
WARN_ON(cookie != cfg_cookie);
}
-static void dummy_tlb_add_flush(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
+ void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
-static void dummy_tlb_sync(void *cookie)
+static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule, void *cookie)
{
- WARN_ON(cookie != cfg_cookie);
+ dummy_tlb_flush(iova, granule, granule, cookie);
}
-static const struct iommu_gather_ops dummy_tlb_ops = {
+static const struct iommu_flush_ops dummy_tlb_ops = {
.tlb_flush_all = dummy_tlb_flush_all,
- .tlb_add_flush = dummy_tlb_add_flush,
- .tlb_sync = dummy_tlb_sync,
+ .tlb_flush_walk = dummy_tlb_flush,
+ .tlb_flush_leaf = dummy_tlb_flush,
+ .tlb_add_page = dummy_tlb_add_page,
};
#define __FAIL(ops) ({ \
@@ -830,7 +886,8 @@
.tlb = &dummy_tlb_ops,
.oas = 32,
.ias = 32,
- .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA,
+ .coherent_walk = true,
+ .quirks = IO_PGTABLE_QUIRK_ARM_NS,
.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
};
unsigned int iova, size, iova_start;
@@ -888,7 +945,7 @@
size = 1UL << __ffs(cfg.pgsize_bitmap);
while (i < loopnr) {
iova_start = i * SZ_16M;
- if (ops->unmap(ops, iova_start + size, size) != size)
+ if (ops->unmap(ops, iova_start + size, size, NULL) != size)
return __FAIL(ops);
/* Remap of partial unmap */
@@ -906,7 +963,7 @@
for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
size = 1UL << i;
- if (ops->unmap(ops, iova, size) != size)
+ if (ops->unmap(ops, iova, size, NULL) != size)
return __FAIL(ops);
if (ops->iova_to_phys(ops, iova + 42))
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 88641b4..ca51036 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* CPU-agnostic ARM page table allocator.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
* Copyright (C) 2014 ARM Limited
*
* Author: Will Deacon <will.deacon@arm.com>
@@ -22,7 +11,7 @@
#include <linux/atomic.h>
#include <linux/bitops.h>
-#include <linux/iommu.h>
+#include <linux/io-pgtable.h>
#include <linux/kernel.h>
#include <linux/sizes.h>
#include <linux/slab.h>
@@ -31,8 +20,6 @@
#include <asm/barrier.h>
-#include "io-pgtable.h"
-
#define ARM_LPAE_MAX_ADDR_BITS 52
#define ARM_LPAE_S2_MAX_CONCAT_PAGES 16
#define ARM_LPAE_MAX_LEVELS 4
@@ -168,10 +155,19 @@
#define ARM_LPAE_MAIR_ATTR_MASK 0xff
#define ARM_LPAE_MAIR_ATTR_DEVICE 0x04
#define ARM_LPAE_MAIR_ATTR_NC 0x44
+#define ARM_LPAE_MAIR_ATTR_INC_OWBRWA 0xf4
#define ARM_LPAE_MAIR_ATTR_WBRWA 0xff
#define ARM_LPAE_MAIR_ATTR_IDX_NC 0
#define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1
#define ARM_LPAE_MAIR_ATTR_IDX_DEV 2
+#define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE 3
+
+#define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
+#define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2)
+#define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4)
+
+#define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL
+#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
/* IOPTE accessors */
#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
@@ -181,11 +177,6 @@
#define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK)
-#define iopte_leaf(pte,l) \
- (l == (ARM_LPAE_MAX_LEVELS - 1) ? \
- (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) : \
- (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK))
-
struct arm_lpae_io_pgtable {
struct io_pgtable iop;
@@ -199,6 +190,15 @@
typedef u64 arm_lpae_iopte;
+static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl,
+ enum io_pgtable_fmt fmt)
+{
+ if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE)
+ return iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_PAGE;
+
+ return iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_BLOCK;
+}
+
static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
struct arm_lpae_io_pgtable *data)
{
@@ -243,7 +243,7 @@
return NULL;
pages = page_address(p);
- if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
+ if (!cfg->coherent_walk) {
dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma))
goto out_free;
@@ -269,7 +269,7 @@
static void __arm_lpae_free_pages(void *pages, size_t size,
struct io_pgtable_cfg *cfg)
{
- if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
+ if (!cfg->coherent_walk)
dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
size, DMA_TO_DEVICE);
free_pages((unsigned long)pages, get_order(size));
@@ -287,11 +287,12 @@
{
*ptep = pte;
- if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
+ if (!cfg->coherent_walk)
__arm_lpae_sync_pte(ptep, cfg);
}
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+ struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, int lvl,
arm_lpae_iopte *ptep);
@@ -304,12 +305,14 @@
if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
pte |= ARM_LPAE_PTE_NS;
- if (lvl == ARM_LPAE_MAX_LEVELS - 1)
+ if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
pte |= ARM_LPAE_PTE_TYPE_PAGE;
else
pte |= ARM_LPAE_PTE_TYPE_BLOCK;
- pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
+ if (data->iop.fmt != ARM_MALI_LPAE)
+ pte |= ARM_LPAE_PTE_AF;
+ pte |= ARM_LPAE_PTE_SH_IS;
pte |= paddr_to_iopte(paddr, data);
__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
@@ -322,7 +325,7 @@
{
arm_lpae_iopte pte = *ptep;
- if (iopte_leaf(pte, lvl)) {
+ if (iopte_leaf(pte, lvl, data->iop.fmt)) {
/* We require an unmap first */
WARN_ON(!selftest_running);
return -EEXIST;
@@ -335,8 +338,10 @@
size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
- if (WARN_ON(__arm_lpae_unmap(data, iova, sz, lvl, tblp) != sz))
+ if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
+ WARN_ON(1);
return -EINVAL;
+ }
}
__arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
@@ -363,8 +368,7 @@
old = cmpxchg64_relaxed(ptep, curr, new);
- if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) ||
- (old & ARM_LPAE_PTE_SW_SYNC))
+ if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC))
return old;
/* Even if it's not ours, there's no point waiting; just kick it */
@@ -405,12 +409,11 @@
pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
if (pte)
__arm_lpae_free_pages(cptep, tblsz, cfg);
- } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) &&
- !(pte & ARM_LPAE_PTE_SW_SYNC)) {
+ } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
__arm_lpae_sync_pte(ptep, cfg);
}
- if (pte && !iopte_leaf(pte, lvl)) {
+ if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) {
cptep = iopte_deref(pte, data);
} else if (pte) {
/* We require an unmap first */
@@ -430,31 +433,40 @@
if (data->iop.fmt == ARM_64_LPAE_S1 ||
data->iop.fmt == ARM_32_LPAE_S1) {
pte = ARM_LPAE_PTE_nG;
-
if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
pte |= ARM_LPAE_PTE_AP_RDONLY;
-
if (!(prot & IOMMU_PRIV))
pte |= ARM_LPAE_PTE_AP_UNPRIV;
-
- if (prot & IOMMU_MMIO)
- pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
- << ARM_LPAE_PTE_ATTRINDX_SHIFT);
- else if (prot & IOMMU_CACHE)
- pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
- << ARM_LPAE_PTE_ATTRINDX_SHIFT);
} else {
pte = ARM_LPAE_PTE_HAP_FAULT;
if (prot & IOMMU_READ)
pte |= ARM_LPAE_PTE_HAP_READ;
if (prot & IOMMU_WRITE)
pte |= ARM_LPAE_PTE_HAP_WRITE;
+ }
+
+ /*
+ * Note that this logic is structured to accommodate Mali LPAE
+ * having stage-1-like attributes but stage-2-like permissions.
+ */
+ if (data->iop.fmt == ARM_64_LPAE_S2 ||
+ data->iop.fmt == ARM_32_LPAE_S2) {
if (prot & IOMMU_MMIO)
pte |= ARM_LPAE_PTE_MEMATTR_DEV;
else if (prot & IOMMU_CACHE)
pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
else
pte |= ARM_LPAE_PTE_MEMATTR_NC;
+ } else {
+ if (prot & IOMMU_MMIO)
+ pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
+ << ARM_LPAE_PTE_ATTRINDX_SHIFT);
+ else if (prot & IOMMU_CACHE)
+ pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
+ << ARM_LPAE_PTE_ATTRINDX_SHIFT);
+ else if (prot & IOMMU_QCOM_SYS_CACHE)
+ pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE
+ << ARM_LPAE_PTE_ATTRINDX_SHIFT);
}
if (prot & IOMMU_NOEXEC)
@@ -512,7 +524,7 @@
while (ptep != end) {
arm_lpae_iopte pte = *ptep++;
- if (!pte || iopte_leaf(pte, lvl))
+ if (!pte || iopte_leaf(pte, lvl, data->iop.fmt))
continue;
__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
@@ -530,6 +542,7 @@
}
static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
+ struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size,
arm_lpae_iopte blk_pte, int lvl,
arm_lpae_iopte *ptep)
@@ -574,16 +587,16 @@
return 0;
tablep = iopte_deref(pte, data);
+ } else if (unmap_idx >= 0) {
+ io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
+ return size;
}
- if (unmap_idx < 0)
- return __arm_lpae_unmap(data, iova, size, lvl, tablep);
-
- io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
- return size;
+ return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
}
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+ struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, int lvl,
arm_lpae_iopte *ptep)
{
@@ -603,34 +616,40 @@
if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
__arm_lpae_set_pte(ptep, 0, &iop->cfg);
- if (!iopte_leaf(pte, lvl)) {
+ if (!iopte_leaf(pte, lvl, iop->fmt)) {
/* Also flush any partial walks */
- io_pgtable_tlb_add_flush(iop, iova, size,
- ARM_LPAE_GRANULE(data), false);
- io_pgtable_tlb_sync(iop);
+ io_pgtable_tlb_flush_walk(iop, iova, size,
+ ARM_LPAE_GRANULE(data));
ptep = iopte_deref(pte, data);
__arm_lpae_free_pgtable(data, lvl + 1, ptep);
+ } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
+ /*
+ * Order the PTE update against queueing the IOVA, to
+ * guarantee that a flush callback from a different CPU
+ * has observed it before the TLBIALL can be issued.
+ */
+ smp_wmb();
} else {
- io_pgtable_tlb_add_flush(iop, iova, size, size, true);
+ io_pgtable_tlb_add_page(iop, gather, iova, size);
}
return size;
- } else if (iopte_leaf(pte, lvl)) {
+ } else if (iopte_leaf(pte, lvl, iop->fmt)) {
/*
* Insert a table at the next level to map the old region,
* minus the part we want to unmap
*/
- return arm_lpae_split_blk_unmap(data, iova, size, pte,
+ return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
lvl + 1, ptep);
}
/* Keep on walkin' */
ptep = iopte_deref(pte, data);
- return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
+ return __arm_lpae_unmap(data, gather, iova, size, lvl + 1, ptep);
}
static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
- size_t size)
+ size_t size, struct iommu_iotlb_gather *gather)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
arm_lpae_iopte *ptep = data->pgd;
@@ -639,7 +658,7 @@
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
return 0;
- return __arm_lpae_unmap(data, iova, size, lvl, ptep);
+ return __arm_lpae_unmap(data, gather, iova, size, lvl, ptep);
}
static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
@@ -663,7 +682,7 @@
return 0;
/* Leaf entry? */
- if (iopte_leaf(pte,lvl))
+ if (iopte_leaf(pte, lvl, data->iop.fmt))
goto found_translation;
/* Take it to the next level */
@@ -772,7 +791,8 @@
u64 reg;
struct arm_lpae_io_pgtable *data;
- if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA))
+ if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
+ IO_PGTABLE_QUIRK_NON_STRICT))
return NULL;
data = arm_lpae_alloc_pgtable(cfg);
@@ -780,9 +800,15 @@
return NULL;
/* TCR */
- reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
- (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
- (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
+ if (cfg->coherent_walk) {
+ reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
+ (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
+ (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
+ } else {
+ reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) |
+ (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) |
+ (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT);
+ }
switch (ARM_LPAE_GRANULE(data)) {
case SZ_4K:
@@ -834,7 +860,9 @@
(ARM_LPAE_MAIR_ATTR_WBRWA
<< ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
(ARM_LPAE_MAIR_ATTR_DEVICE
- << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
+ (ARM_LPAE_MAIR_ATTR_INC_OWBRWA
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));
cfg->arm_lpae_s1_cfg.mair[0] = reg;
cfg->arm_lpae_s1_cfg.mair[1] = 0;
@@ -864,7 +892,7 @@
struct arm_lpae_io_pgtable *data;
/* The NS quirk doesn't apply at stage 2 */
- if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA)
+ if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT))
return NULL;
data = arm_lpae_alloc_pgtable(cfg);
@@ -987,6 +1015,61 @@
return iop;
}
+static struct io_pgtable *
+arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+ struct arm_lpae_io_pgtable *data;
+
+ /* No quirks for Mali (hopefully) */
+ if (cfg->quirks)
+ return NULL;
+
+ if (cfg->ias > 48 || cfg->oas > 40)
+ return NULL;
+
+ cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
+
+ data = arm_lpae_alloc_pgtable(cfg);
+ if (!data)
+ return NULL;
+
+ /* Mali seems to need a full 4-level table regardless of IAS */
+ if (data->levels < ARM_LPAE_MAX_LEVELS) {
+ data->levels = ARM_LPAE_MAX_LEVELS;
+ data->pgd_size = sizeof(arm_lpae_iopte);
+ }
+ /*
+ * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
+ * best we can do is mimic the out-of-tree driver and hope that the
+ * "implementation-defined caching policy" is good enough. Similarly,
+ * we'll use it for the sake of a valid attribute for our 'device'
+ * index, although callers should never request that in practice.
+ */
+ cfg->arm_mali_lpae_cfg.memattr =
+ (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
+ (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
+ (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
+
+ data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+ if (!data->pgd)
+ goto out_free_data;
+
+ /* Ensure the empty pgd is visible before TRANSTAB can be written */
+ wmb();
+
+ cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
+ ARM_MALI_LPAE_TTBR_READ_INNER |
+ ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+ return &data->iop;
+
+out_free_data:
+ kfree(data);
+ return NULL;
+}
+
struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
.alloc = arm_64_lpae_alloc_pgtable_s1,
.free = arm_lpae_free_pgtable,
@@ -1007,6 +1090,11 @@
.free = arm_lpae_free_pgtable,
};
+struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
+ .alloc = arm_mali_lpae_alloc_pgtable,
+ .free = arm_lpae_free_pgtable,
+};
+
#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
static struct io_pgtable_cfg *cfg_cookie;
@@ -1016,22 +1104,24 @@
WARN_ON(cookie != cfg_cookie);
}
-static void dummy_tlb_add_flush(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
+ void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
-static void dummy_tlb_sync(void *cookie)
+static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule, void *cookie)
{
- WARN_ON(cookie != cfg_cookie);
+ dummy_tlb_flush(iova, granule, granule, cookie);
}
-static const struct iommu_gather_ops dummy_tlb_ops __initconst = {
+static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
.tlb_flush_all = dummy_tlb_flush_all,
- .tlb_add_flush = dummy_tlb_add_flush,
- .tlb_sync = dummy_tlb_sync,
+ .tlb_flush_walk = dummy_tlb_flush,
+ .tlb_flush_leaf = dummy_tlb_flush,
+ .tlb_add_page = dummy_tlb_add_page,
};
static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
@@ -1114,7 +1204,7 @@
/* Partial unmap */
size = 1UL << __ffs(cfg->pgsize_bitmap);
- if (ops->unmap(ops, SZ_1G + size, size) != size)
+ if (ops->unmap(ops, SZ_1G + size, size, NULL) != size)
return __FAIL(ops, i);
/* Remap of partial unmap */
@@ -1129,7 +1219,7 @@
for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
size = 1UL << j;
- if (ops->unmap(ops, iova, size) != size)
+ if (ops->unmap(ops, iova, size, NULL) != size)
return __FAIL(ops, i);
if (ops->iova_to_phys(ops, iova + 42))
@@ -1168,7 +1258,7 @@
struct io_pgtable_cfg cfg = {
.tlb = &dummy_tlb_ops,
.oas = 48,
- .quirks = IO_PGTABLE_QUIRK_NO_DMA,
+ .coherent_walk = true,
};
for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 127558d..ced53e5 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -1,29 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Generic page table allocator for IOMMUs.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
* Copyright (C) 2014 ARM Limited
*
* Author: Will Deacon <will.deacon@arm.com>
*/
#include <linux/bug.h>
+#include <linux/io-pgtable.h>
#include <linux/kernel.h>
#include <linux/types.h>
-#include "io-pgtable.h"
-
static const struct io_pgtable_init_fns *
io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE
@@ -31,6 +19,7 @@
[ARM_32_LPAE_S2] = &io_pgtable_arm_32_lpae_s2_init_fns,
[ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns,
[ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns,
+ [ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns,
#endif
#ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S
[ARM_V7S] = &io_pgtable_arm_v7s_init_fns,
@@ -61,6 +50,7 @@
return &iop->ops;
}
+EXPORT_SYMBOL_GPL(alloc_io_pgtable_ops);
/*
* It is the IOMMU driver's responsibility to ensure that the page table
@@ -77,3 +67,4 @@
io_pgtable_tlb_flush_all(iop);
io_pgtable_init_table[iop->fmt]->free(iop);
}
+EXPORT_SYMBOL_GPL(free_io_pgtable_ops);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
deleted file mode 100644
index 2df7909..0000000
--- a/drivers/iommu/io-pgtable.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __IO_PGTABLE_H
-#define __IO_PGTABLE_H
-#include <linux/bitops.h>
-
-/*
- * Public API for use by IOMMU drivers
- */
-enum io_pgtable_fmt {
- ARM_32_LPAE_S1,
- ARM_32_LPAE_S2,
- ARM_64_LPAE_S1,
- ARM_64_LPAE_S2,
- ARM_V7S,
- IO_PGTABLE_NUM_FMTS,
-};
-
-/**
- * struct iommu_gather_ops - IOMMU callbacks for TLB and page table management.
- *
- * @tlb_flush_all: Synchronously invalidate the entire TLB context.
- * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
- * @tlb_sync: Ensure any queued TLB invalidation has taken effect, and
- * any corresponding page table updates are visible to the
- * IOMMU.
- *
- * Note that these can all be called in atomic context and must therefore
- * not block.
- */
-struct iommu_gather_ops {
- void (*tlb_flush_all)(void *cookie);
- void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule,
- bool leaf, void *cookie);
- void (*tlb_sync)(void *cookie);
-};
-
-/**
- * struct io_pgtable_cfg - Configuration data for a set of page tables.
- *
- * @quirks: A bitmap of hardware quirks that require some special
- * action by the low-level page table allocator.
- * @pgsize_bitmap: A bitmap of page sizes supported by this set of page
- * tables.
- * @ias: Input address (iova) size, in bits.
- * @oas: Output address (paddr) size, in bits.
- * @tlb: TLB management callbacks for this set of tables.
- * @iommu_dev: The device representing the DMA configuration for the
- * page table walker.
- */
-struct io_pgtable_cfg {
- /*
- * IO_PGTABLE_QUIRK_ARM_NS: (ARM formats) Set NS and NSTABLE bits in
- * stage 1 PTEs, for hardware which insists on validating them
- * even in non-secure state where they should normally be ignored.
- *
- * IO_PGTABLE_QUIRK_NO_PERMS: Ignore the IOMMU_READ, IOMMU_WRITE and
- * IOMMU_NOEXEC flags and map everything with full access, for
- * hardware which does not implement the permissions of a given
- * format, and/or requires some format-specific default value.
- *
- * IO_PGTABLE_QUIRK_TLBI_ON_MAP: If the format forbids caching invalid
- * (unmapped) entries but the hardware might do so anyway, perform
- * TLB maintenance when mapping as well as when unmapping.
- *
- * IO_PGTABLE_QUIRK_ARM_MTK_4GB: (ARM v7s format) Set bit 9 in all
- * PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
- * when the SoC is in "4GB mode" and they can only access the high
- * remap of DRAM (0x1_00000000 to 0x1_ffffffff).
- *
- * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever
- * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
- * software-emulated IOMMU), such that pagetable updates need not
- * be treated as explicit DMA data.
- */
- #define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
- #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1)
- #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2)
- #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3)
- #define IO_PGTABLE_QUIRK_NO_DMA BIT(4)
- unsigned long quirks;
- unsigned long pgsize_bitmap;
- unsigned int ias;
- unsigned int oas;
- const struct iommu_gather_ops *tlb;
- struct device *iommu_dev;
-
- /* Low-level data specific to the table format */
- union {
- struct {
- u64 ttbr[2];
- u64 tcr;
- u64 mair[2];
- } arm_lpae_s1_cfg;
-
- struct {
- u64 vttbr;
- u64 vtcr;
- } arm_lpae_s2_cfg;
-
- struct {
- u32 ttbr[2];
- u32 tcr;
- u32 nmrr;
- u32 prrr;
- } arm_v7s_cfg;
- };
-};
-
-/**
- * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
- *
- * @map: Map a physically contiguous memory region.
- * @unmap: Unmap a physically contiguous memory region.
- * @iova_to_phys: Translate iova to physical address.
- *
- * These functions map directly onto the iommu_ops member functions with
- * the same names.
- */
-struct io_pgtable_ops {
- int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot);
- size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
- size_t size);
- phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
- unsigned long iova);
-};
-
-/**
- * alloc_io_pgtable_ops() - Allocate a page table allocator for use by an IOMMU.
- *
- * @fmt: The page table format.
- * @cfg: The page table configuration. This will be modified to represent
- * the configuration actually provided by the allocator (e.g. the
- * pgsize_bitmap may be restricted).
- * @cookie: An opaque token provided by the IOMMU driver and passed back to
- * the callback routines in cfg->tlb.
- */
-struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
- struct io_pgtable_cfg *cfg,
- void *cookie);
-
-/**
- * free_io_pgtable_ops() - Free an io_pgtable_ops structure. The caller
- * *must* ensure that the page table is no longer
- * live, but the TLB can be dirty.
- *
- * @ops: The ops returned from alloc_io_pgtable_ops.
- */
-void free_io_pgtable_ops(struct io_pgtable_ops *ops);
-
-
-/*
- * Internal structures for page table allocator implementations.
- */
-
-/**
- * struct io_pgtable - Internal structure describing a set of page tables.
- *
- * @fmt: The page table format.
- * @cookie: An opaque token provided by the IOMMU driver and passed back to
- * any callback routines.
- * @cfg: A copy of the page table configuration.
- * @ops: The page table operations in use for this set of page tables.
- */
-struct io_pgtable {
- enum io_pgtable_fmt fmt;
- void *cookie;
- struct io_pgtable_cfg cfg;
- struct io_pgtable_ops ops;
-};
-
-#define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops)
-
-static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
-{
- iop->cfg.tlb->tlb_flush_all(iop->cookie);
-}
-
-static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop,
- unsigned long iova, size_t size, size_t granule, bool leaf)
-{
- iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie);
-}
-
-static inline void io_pgtable_tlb_sync(struct io_pgtable *iop)
-{
- iop->cfg.tlb->tlb_sync(iop->cookie);
-}
-
-/**
- * struct io_pgtable_init_fns - Alloc/free a set of page tables for a
- * particular format.
- *
- * @alloc: Allocate a set of page tables described by cfg.
- * @free: Free the page tables associated with iop.
- */
-struct io_pgtable_init_fns {
- struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie);
- void (*free)(struct io_pgtable *iop);
-};
-
-extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns;
-extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns;
-extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns;
-extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
-extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
-
-#endif /* __IO_PGTABLE_H */
diff --git a/drivers/iommu/iommu-debugfs.c b/drivers/iommu/iommu-debugfs.c
index 3b1bf88..f035489 100644
--- a/drivers/iommu/iommu-debugfs.c
+++ b/drivers/iommu/iommu-debugfs.c
@@ -12,6 +12,7 @@
#include <linux/debugfs.h>
struct dentry *iommu_debugfs_dir;
+EXPORT_SYMBOL_GPL(iommu_debugfs_dir);
/**
* iommu_debugfs_setup - create the top-level iommu directory in debugfs
@@ -23,9 +24,9 @@
* Emit a strong warning at boot time to indicate that this feature is
* enabled.
*
- * This function is called from iommu_init; drivers may then call
- * iommu_debugfs_new_driver_dir() to instantiate a vendor-specific
- * directory to be used to expose internal data.
+ * This function is called from iommu_init; drivers may then use
+ * iommu_debugfs_dir to instantiate a vendor-specific directory to be used
+ * to expose internal data.
*/
void iommu_debugfs_setup(void)
{
@@ -48,19 +49,3 @@
pr_warn("*************************************************************\n");
}
}
-
-/**
- * iommu_debugfs_new_driver_dir - create a vendor directory under debugfs/iommu
- * @vendor: name of the vendor-specific subdirectory to create
- *
- * This function is called by an IOMMU driver to create the top-level debugfs
- * directory for that driver.
- *
- * Return: upon success, a pointer to the dentry for the new directory.
- * NULL in case of failure.
- */
-struct dentry *iommu_debugfs_new_driver_dir(const char *vendor)
-{
- return debugfs_create_dir(vendor, iommu_debugfs_dir);
-}
-EXPORT_SYMBOL_GPL(iommu_debugfs_new_driver_dir);
diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c
index 36d1a7c..e436ff8 100644
--- a/drivers/iommu/iommu-sysfs.c
+++ b/drivers/iommu/iommu-sysfs.c
@@ -1,17 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* IOMMU sysfs class support
*
* Copyright (C) 2014 Red Hat, Inc. All rights reserved.
* Author: Alex Williamson <alex.williamson@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/device.h>
#include <linux/iommu.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/slab.h>
/*
@@ -22,25 +19,25 @@
NULL,
};
-static const struct attribute_group iommu_devices_attr_group = {
+static const struct attribute_group devices_attr_group = {
.name = "devices",
.attrs = devices_attr,
};
-static const struct attribute_group *iommu_dev_groups[] = {
- &iommu_devices_attr_group,
+static const struct attribute_group *dev_groups[] = {
+ &devices_attr_group,
NULL,
};
-static void iommu_release_device(struct device *dev)
+static void release_device(struct device *dev)
{
kfree(dev);
}
static struct class iommu_class = {
.name = "iommu",
- .dev_release = iommu_release_device,
- .dev_groups = iommu_dev_groups,
+ .dev_release = release_device,
+ .dev_groups = dev_groups,
};
static int __init iommu_dev_init(void)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8c15c59..d658c7c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <jroedel@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define pr_fmt(fmt) "iommu: " fmt
@@ -22,7 +10,8 @@
#include <linux/kernel.h>
#include <linux/bug.h>
#include <linux/types.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/iommu.h>
@@ -32,19 +21,15 @@
#include <linux/pci.h>
#include <linux/bitops.h>
#include <linux/property.h>
+#include <linux/fsl/mc.h>
#include <trace/events/iommu.h>
static struct kset *iommu_group_kset;
static DEFINE_IDA(iommu_group_ida);
-#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH
-static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
-#else
-static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
-#endif
-struct iommu_callback_data {
- const struct iommu_ops *ops;
-};
+static unsigned int iommu_def_domain_type __read_mostly;
+static bool iommu_dma_strict __read_mostly = true;
+static u32 iommu_cmd_line __read_mostly;
struct iommu_group {
struct kobject kobj;
@@ -74,12 +59,25 @@
};
static const char * const iommu_group_resv_type_string[] = {
- [IOMMU_RESV_DIRECT] = "direct",
- [IOMMU_RESV_RESERVED] = "reserved",
- [IOMMU_RESV_MSI] = "msi",
- [IOMMU_RESV_SW_MSI] = "msi",
+ [IOMMU_RESV_DIRECT] = "direct",
+ [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable",
+ [IOMMU_RESV_RESERVED] = "reserved",
+ [IOMMU_RESV_MSI] = "msi",
+ [IOMMU_RESV_SW_MSI] = "msi",
};
+#define IOMMU_CMD_LINE_DMA_API BIT(0)
+
+static void iommu_set_cmd_line_dma_api(void)
+{
+ iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
+}
+
+static bool iommu_cmd_line_dma_api(void)
+{
+ return !!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API);
+}
+
#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \
struct iommu_group_attribute iommu_group_attr_##_name = \
__ATTR(_name, _mode, _show, _store)
@@ -92,12 +90,55 @@
static LIST_HEAD(iommu_device_list);
static DEFINE_SPINLOCK(iommu_device_lock);
+/*
+ * Use a function instead of an array here because the domain-type is a
+ * bit-field, so an array would waste memory.
+ */
+static const char *iommu_domain_type_str(unsigned int t)
+{
+ switch (t) {
+ case IOMMU_DOMAIN_BLOCKED:
+ return "Blocked";
+ case IOMMU_DOMAIN_IDENTITY:
+ return "Passthrough";
+ case IOMMU_DOMAIN_UNMANAGED:
+ return "Unmanaged";
+ case IOMMU_DOMAIN_DMA:
+ return "Translated";
+ default:
+ return "Unknown";
+ }
+}
+
+static int __init iommu_subsys_init(void)
+{
+ bool cmd_line = iommu_cmd_line_dma_api();
+
+ if (!cmd_line) {
+ if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH))
+ iommu_set_default_passthrough(false);
+ else
+ iommu_set_default_translated(false);
+
+ if (iommu_default_passthrough() && mem_encrypt_active()) {
+ pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n");
+ iommu_set_default_translated(false);
+ }
+ }
+
+ pr_info("Default domain type: %s %s\n",
+ iommu_domain_type_str(iommu_def_domain_type),
+ cmd_line ? "(set via kernel command line)" : "");
+
+ return 0;
+}
+subsys_initcall(iommu_subsys_init);
+
int iommu_device_register(struct iommu_device *iommu)
{
spin_lock(&iommu_device_lock);
list_add_tail(&iommu->list, &iommu_device_list);
spin_unlock(&iommu_device_lock);
-
return 0;
}
@@ -108,6 +149,57 @@
spin_unlock(&iommu_device_lock);
}
+static struct iommu_param *iommu_get_dev_param(struct device *dev)
+{
+ struct iommu_param *param = dev->iommu_param;
+
+ if (param)
+ return param;
+
+ param = kzalloc(sizeof(*param), GFP_KERNEL);
+ if (!param)
+ return NULL;
+
+ mutex_init(¶m->lock);
+ dev->iommu_param = param;
+ return param;
+}
+
+static void iommu_free_dev_param(struct device *dev)
+{
+ kfree(dev->iommu_param);
+ dev->iommu_param = NULL;
+}
+
+int iommu_probe_device(struct device *dev)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+ int ret;
+
+ WARN_ON(dev->iommu_group);
+ if (!ops)
+ return -EINVAL;
+
+ if (!iommu_get_dev_param(dev))
+ return -ENOMEM;
+
+ ret = ops->add_device(dev);
+ if (ret)
+ iommu_free_dev_param(dev);
+
+ return ret;
+}
+
+void iommu_release_device(struct device *dev)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+ if (dev->iommu_group)
+ ops->remove_device(dev);
+
+ iommu_free_dev_param(dev);
+}
+
static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
unsigned type);
static int __iommu_attach_device(struct iommu_domain *domain,
@@ -126,11 +218,21 @@
if (ret)
return ret;
- iommu_def_domain_type = pt ? IOMMU_DOMAIN_IDENTITY : IOMMU_DOMAIN_DMA;
+ if (pt)
+ iommu_set_default_passthrough(true);
+ else
+ iommu_set_default_translated(true);
+
return 0;
}
early_param("iommu.passthrough", iommu_set_def_domain_type);
+static int __init iommu_dma_setup(char *str)
+{
+ return kstrtobool(str, &iommu_dma_strict);
+}
+early_param("iommu.strict", iommu_dma_setup);
+
static ssize_t iommu_group_attr_show(struct kobject *kobj,
struct attribute *__attr, char *buf)
{
@@ -184,58 +286,58 @@
* @new: new region to insert
* @regions: list of regions
*
- * The new element is sorted by address with respect to the other
- * regions of the same type. In case it overlaps with another
- * region of the same type, regions are merged. In case it
- * overlaps with another region of different type, regions are
- * not merged.
+ * Elements are sorted by start address and overlapping segments
+ * of the same type are merged.
*/
-static int iommu_insert_resv_region(struct iommu_resv_region *new,
- struct list_head *regions)
+int iommu_insert_resv_region(struct iommu_resv_region *new,
+ struct list_head *regions)
{
- struct iommu_resv_region *region;
- phys_addr_t start = new->start;
- phys_addr_t end = new->start + new->length - 1;
- struct list_head *pos = regions->next;
+ struct iommu_resv_region *iter, *tmp, *nr, *top;
+ LIST_HEAD(stack);
- while (pos != regions) {
- struct iommu_resv_region *entry =
- list_entry(pos, struct iommu_resv_region, list);
- phys_addr_t a = entry->start;
- phys_addr_t b = entry->start + entry->length - 1;
- int type = entry->type;
-
- if (end < a) {
- goto insert;
- } else if (start > b) {
- pos = pos->next;
- } else if ((start >= a) && (end <= b)) {
- if (new->type == type)
- goto done;
- else
- pos = pos->next;
- } else {
- if (new->type == type) {
- phys_addr_t new_start = min(a, start);
- phys_addr_t new_end = max(b, end);
-
- list_del(&entry->list);
- entry->start = new_start;
- entry->length = new_end - new_start + 1;
- iommu_insert_resv_region(entry, regions);
- } else {
- pos = pos->next;
- }
- }
- }
-insert:
- region = iommu_alloc_resv_region(new->start, new->length,
- new->prot, new->type);
- if (!region)
+ nr = iommu_alloc_resv_region(new->start, new->length,
+ new->prot, new->type);
+ if (!nr)
return -ENOMEM;
- list_add_tail(®ion->list, pos);
-done:
+ /* First add the new element based on start address sorting */
+ list_for_each_entry(iter, regions, list) {
+ if (nr->start < iter->start ||
+ (nr->start == iter->start && nr->type <= iter->type))
+ break;
+ }
+ list_add_tail(&nr->list, &iter->list);
+
+ /* Merge overlapping segments of type nr->type in @regions, if any */
+ list_for_each_entry_safe(iter, tmp, regions, list) {
+ phys_addr_t top_end, iter_end = iter->start + iter->length - 1;
+
+ /* no merge needed on elements of different types than @nr */
+ if (iter->type != nr->type) {
+ list_move_tail(&iter->list, &stack);
+ continue;
+ }
+
+ /* look for the last stack element of same type as @iter */
+ list_for_each_entry_reverse(top, &stack, list)
+ if (top->type == iter->type)
+ goto check_overlap;
+
+ list_move_tail(&iter->list, &stack);
+ continue;
+
+check_overlap:
+ top_end = top->start + top->length - 1;
+
+ if (iter->start > top_end + 1) {
+ list_move_tail(&iter->list, &stack);
+ } else {
+ top->length = max(top_end, iter_end) - top->start + 1;
+ list_del(&iter->list);
+ kfree(iter);
+ }
+ }
+ list_splice(&stack, regions);
return 0;
}
@@ -315,7 +417,7 @@
type = "unmanaged\n";
break;
case IOMMU_DOMAIN_DMA:
- type = "DMA";
+ type = "DMA\n";
break;
}
}
@@ -547,7 +649,8 @@
start = ALIGN(entry->start, pg_size);
end = ALIGN(entry->start + entry->length, pg_size);
- if (entry->type != IOMMU_RESV_DIRECT)
+ if (entry->type != IOMMU_RESV_DIRECT &&
+ entry->type != IOMMU_RESV_DIRECT_RELAXABLE)
continue;
for (addr = start; addr < end; addr += pg_size) {
@@ -638,7 +741,7 @@
trace_add_device_to_group(group->id, dev);
- pr_info("Adding device %s to group %d\n", dev_name(dev), group->id);
+ dev_info(dev, "Adding to iommu group %d\n", group->id);
return 0;
@@ -654,7 +757,7 @@
sysfs_remove_link(&dev->kobj, "iommu_group");
err_free_device:
kfree(device);
- pr_err("Failed to add device %s to group %d: %d\n", dev_name(dev), group->id, ret);
+ dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret);
return ret;
}
EXPORT_SYMBOL_GPL(iommu_group_add_device);
@@ -671,7 +774,7 @@
struct iommu_group *group = dev->iommu_group;
struct group_device *tmp_device, *device = NULL;
- pr_info("Removing device %s from group %d\n", dev_name(dev), group->id);
+ dev_info(dev, "Removing from iommu group %d\n", group->id);
/* Pre-notify listeners that a device is being removed. */
blocking_notifier_call_chain(&group->notifier,
@@ -829,6 +932,206 @@
EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
/**
+ * iommu_register_device_fault_handler() - Register a device fault handler
+ * @dev: the device
+ * @handler: the fault handler
+ * @data: private data passed as argument to the handler
+ *
+ * When an IOMMU fault event is received, this handler gets called with the
+ * fault event and data as argument. The handler should return 0 on success. If
+ * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also
+ * complete the fault by calling iommu_page_response() with one of the following
+ * response code:
+ * - IOMMU_PAGE_RESP_SUCCESS: retry the translation
+ * - IOMMU_PAGE_RESP_INVALID: terminate the fault
+ * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting
+ * page faults if possible.
+ *
+ * Return 0 if the fault handler was installed successfully, or an error.
+ */
+int iommu_register_device_fault_handler(struct device *dev,
+ iommu_dev_fault_handler_t handler,
+ void *data)
+{
+ struct iommu_param *param = dev->iommu_param;
+ int ret = 0;
+
+ if (!param)
+ return -EINVAL;
+
+ mutex_lock(¶m->lock);
+ /* Only allow one fault handler registered for each device */
+ if (param->fault_param) {
+ ret = -EBUSY;
+ goto done_unlock;
+ }
+
+ get_device(dev);
+ param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL);
+ if (!param->fault_param) {
+ put_device(dev);
+ ret = -ENOMEM;
+ goto done_unlock;
+ }
+ param->fault_param->handler = handler;
+ param->fault_param->data = data;
+ mutex_init(¶m->fault_param->lock);
+ INIT_LIST_HEAD(¶m->fault_param->faults);
+
+done_unlock:
+ mutex_unlock(¶m->lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler);
+
+/**
+ * iommu_unregister_device_fault_handler() - Unregister the device fault handler
+ * @dev: the device
+ *
+ * Remove the device fault handler installed with
+ * iommu_register_device_fault_handler().
+ *
+ * Return 0 on success, or an error.
+ */
+int iommu_unregister_device_fault_handler(struct device *dev)
+{
+ struct iommu_param *param = dev->iommu_param;
+ int ret = 0;
+
+ if (!param)
+ return -EINVAL;
+
+ mutex_lock(¶m->lock);
+
+ if (!param->fault_param)
+ goto unlock;
+
+ /* we cannot unregister handler if there are pending faults */
+ if (!list_empty(¶m->fault_param->faults)) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ kfree(param->fault_param);
+ param->fault_param = NULL;
+ put_device(dev);
+unlock:
+ mutex_unlock(¶m->lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler);
+
+/**
+ * iommu_report_device_fault() - Report fault event to device driver
+ * @dev: the device
+ * @evt: fault event data
+ *
+ * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
+ * handler. When this function fails and the fault is recoverable, it is the
+ * caller's responsibility to complete the fault.
+ *
+ * Return 0 on success, or an error.
+ */
+int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
+{
+ struct iommu_param *param = dev->iommu_param;
+ struct iommu_fault_event *evt_pending = NULL;
+ struct iommu_fault_param *fparam;
+ int ret = 0;
+
+ if (!param || !evt)
+ return -EINVAL;
+
+ /* we only report device fault if there is a handler registered */
+ mutex_lock(¶m->lock);
+ fparam = param->fault_param;
+ if (!fparam || !fparam->handler) {
+ ret = -EINVAL;
+ goto done_unlock;
+ }
+
+ if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
+ (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
+ evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event),
+ GFP_KERNEL);
+ if (!evt_pending) {
+ ret = -ENOMEM;
+ goto done_unlock;
+ }
+ mutex_lock(&fparam->lock);
+ list_add_tail(&evt_pending->list, &fparam->faults);
+ mutex_unlock(&fparam->lock);
+ }
+
+ ret = fparam->handler(&evt->fault, fparam->data);
+ if (ret && evt_pending) {
+ mutex_lock(&fparam->lock);
+ list_del(&evt_pending->list);
+ mutex_unlock(&fparam->lock);
+ kfree(evt_pending);
+ }
+done_unlock:
+ mutex_unlock(¶m->lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_report_device_fault);
+
+int iommu_page_response(struct device *dev,
+ struct iommu_page_response *msg)
+{
+ bool pasid_valid;
+ int ret = -EINVAL;
+ struct iommu_fault_event *evt;
+ struct iommu_fault_page_request *prm;
+ struct iommu_param *param = dev->iommu_param;
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+ if (!domain || !domain->ops->page_response)
+ return -ENODEV;
+
+ if (!param || !param->fault_param)
+ return -EINVAL;
+
+ if (msg->version != IOMMU_PAGE_RESP_VERSION_1 ||
+ msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID)
+ return -EINVAL;
+
+ /* Only send response if there is a fault report pending */
+ mutex_lock(¶m->fault_param->lock);
+ if (list_empty(¶m->fault_param->faults)) {
+ dev_warn_ratelimited(dev, "no pending PRQ, drop response\n");
+ goto done_unlock;
+ }
+ /*
+ * Check if we have a matching page request pending to respond,
+ * otherwise return -EINVAL
+ */
+ list_for_each_entry(evt, ¶m->fault_param->faults, list) {
+ prm = &evt->fault.prm;
+ pasid_valid = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+
+ if ((pasid_valid && prm->pasid != msg->pasid) ||
+ prm->grpid != msg->grpid)
+ continue;
+
+ /* Sanitize the reply */
+ msg->flags = pasid_valid ? IOMMU_PAGE_RESP_PASID_VALID : 0;
+
+ ret = domain->ops->page_response(dev, evt, msg);
+ list_del(&evt->list);
+ kfree(evt);
+ break;
+ }
+
+done_unlock:
+ mutex_unlock(¶m->fault_param->lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_page_response);
+
+/**
* iommu_group_id - Return ID for a group
* @group: the group to ID
*
@@ -1024,6 +1327,18 @@
return iommu_group_alloc();
}
+/* Get the IOMMU group for device on fsl-mc bus */
+struct iommu_group *fsl_mc_device_group(struct device *dev)
+{
+ struct device *cont_dev = fsl_mc_cont_dev(dev);
+ struct iommu_group *group;
+
+ group = iommu_group_get(cont_dev);
+ if (!group)
+ group = iommu_group_alloc();
+ return group;
+}
+
/**
* iommu_group_get_for_dev - Find or create the IOMMU group for a device
* @dev: target device
@@ -1063,15 +1378,24 @@
dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type);
if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) {
- dev_warn(dev,
- "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA",
- iommu_def_domain_type);
dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA);
+ if (dom) {
+ dev_warn(dev,
+ "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA",
+ iommu_def_domain_type);
+ }
}
group->default_domain = dom;
if (!group->domain)
group->domain = dom;
+
+ if (dom && !iommu_dma_strict) {
+ int attr = 1;
+ iommu_domain_set_attr(dom,
+ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+ &attr);
+ }
}
ret = iommu_group_add_device(group, dev);
@@ -1090,16 +1414,7 @@
static int add_iommu_group(struct device *dev, void *data)
{
- struct iommu_callback_data *cb = data;
- const struct iommu_ops *ops = cb->ops;
- int ret;
-
- if (!ops->add_device)
- return 0;
-
- WARN_ON(dev->iommu_group);
-
- ret = ops->add_device(dev);
+ int ret = iommu_probe_device(dev);
/*
* We ignore -ENODEV errors for now, as they just mean that the
@@ -1114,11 +1429,7 @@
static int remove_iommu_group(struct device *dev, void *data)
{
- struct iommu_callback_data *cb = data;
- const struct iommu_ops *ops = cb->ops;
-
- if (ops->remove_device && dev->iommu_group)
- ops->remove_device(dev);
+ iommu_release_device(dev);
return 0;
}
@@ -1126,27 +1437,22 @@
static int iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
- struct device *dev = data;
- const struct iommu_ops *ops = dev->bus->iommu_ops;
- struct iommu_group *group;
unsigned long group_action = 0;
+ struct device *dev = data;
+ struct iommu_group *group;
/*
* ADD/DEL call into iommu driver ops if provided, which may
* result in ADD/DEL notifiers to group->notifier
*/
if (action == BUS_NOTIFY_ADD_DEVICE) {
- if (ops->add_device) {
- int ret;
+ int ret;
- ret = ops->add_device(dev);
- return (ret) ? NOTIFY_DONE : NOTIFY_OK;
- }
+ ret = iommu_probe_device(dev);
+ return (ret) ? NOTIFY_DONE : NOTIFY_OK;
} else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
- if (ops->remove_device && dev->iommu_group) {
- ops->remove_device(dev);
- return 0;
- }
+ iommu_release_device(dev);
+ return NOTIFY_OK;
}
/*
@@ -1184,9 +1490,6 @@
{
int err;
struct notifier_block *nb;
- struct iommu_callback_data cb = {
- .ops = ops,
- };
nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
if (!nb)
@@ -1198,7 +1501,7 @@
if (err)
goto out_free;
- err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
+ err = bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
if (err)
goto out_err;
@@ -1207,7 +1510,7 @@
out_err:
/* Clean up */
- bus_for_each_dev(bus, NULL, &cb, remove_iommu_group);
+ bus_for_each_dev(bus, NULL, NULL, remove_iommu_group);
bus_unregister_notifier(bus, nb);
out_free:
@@ -1416,7 +1719,16 @@
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
/*
- * IOMMU groups are really the natrual working unit of the IOMMU, but
+ * For IOMMU_DOMAIN_DMA implementations which already provide their own
+ * guarantees that the group and its default domain are valid and correct.
+ */
+struct iommu_domain *iommu_get_dma_domain(struct device *dev)
+{
+ return dev->iommu_group->default_domain;
+}
+
+/*
+ * IOMMU groups are really the natural working unit of the IOMMU, but
* the IOMMU API works on domains and devices. Bridge that gap by
* iterating over the devices in a group. Ideally we'd have a single
* device which represents the requestor ID of the group, but we also
@@ -1545,13 +1857,14 @@
int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
+ const struct iommu_ops *ops = domain->ops;
unsigned long orig_iova = iova;
unsigned int min_pagesz;
size_t orig_size = size;
phys_addr_t orig_paddr = paddr;
int ret = 0;
- if (unlikely(domain->ops->map == NULL ||
+ if (unlikely(ops->map == NULL ||
domain->pgsize_bitmap == 0UL))
return -ENODEV;
@@ -1580,7 +1893,7 @@
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
iova, &paddr, pgsize);
- ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
+ ret = ops->map(domain, iova, paddr, pgsize, prot);
if (ret)
break;
@@ -1589,6 +1902,9 @@
size -= pgsize;
}
+ if (ops->iotlb_sync_map)
+ ops->iotlb_sync_map(domain);
+
/* unroll mapping in case something went wrong */
if (ret)
iommu_unmap(domain, orig_iova, orig_size - size);
@@ -1601,7 +1917,7 @@
static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
- bool sync)
+ struct iommu_iotlb_gather *iotlb_gather)
{
const struct iommu_ops *ops = domain->ops;
size_t unmapped_page, unmapped = 0;
@@ -1638,13 +1954,10 @@
while (unmapped < size) {
size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
- unmapped_page = ops->unmap(domain, iova, pgsize);
+ unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
if (!unmapped_page)
break;
- if (sync && ops->iotlb_range_add)
- ops->iotlb_range_add(domain, iova, pgsize);
-
pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
iova, unmapped_page);
@@ -1652,9 +1965,6 @@
unmapped += unmapped_page;
}
- if (sync && ops->iotlb_sync)
- ops->iotlb_sync(domain);
-
trace_unmap(orig_iova, size, unmapped);
return unmapped;
}
@@ -1662,47 +1972,54 @@
size_t iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
- return __iommu_unmap(domain, iova, size, true);
+ struct iommu_iotlb_gather iotlb_gather;
+ size_t ret;
+
+ iommu_iotlb_gather_init(&iotlb_gather);
+ ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
+ iommu_tlb_sync(domain, &iotlb_gather);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(iommu_unmap);
size_t iommu_unmap_fast(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+ unsigned long iova, size_t size,
+ struct iommu_iotlb_gather *iotlb_gather)
{
- return __iommu_unmap(domain, iova, size, false);
+ return __iommu_unmap(domain, iova, size, iotlb_gather);
}
EXPORT_SYMBOL_GPL(iommu_unmap_fast);
size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
struct scatterlist *sg, unsigned int nents, int prot)
{
- struct scatterlist *s;
- size_t mapped = 0;
- unsigned int i, min_pagesz;
+ size_t len = 0, mapped = 0;
+ phys_addr_t start;
+ unsigned int i = 0;
int ret;
- if (unlikely(domain->pgsize_bitmap == 0UL))
- return 0;
+ while (i <= nents) {
+ phys_addr_t s_phys = sg_phys(sg);
- min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
+ if (len && s_phys != start + len) {
+ ret = iommu_map(domain, iova + mapped, start, len, prot);
+ if (ret)
+ goto out_err;
- for_each_sg(sg, s, nents, i) {
- phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
+ mapped += len;
+ len = 0;
+ }
- /*
- * We are mapping on IOMMU page boundaries, so offset within
- * the page must be 0. However, the IOMMU may support pages
- * smaller than PAGE_SIZE, so s->offset may still represent
- * an offset of that boundary within the CPU page.
- */
- if (!IS_ALIGNED(s->offset, min_pagesz))
- goto out_err;
+ if (len) {
+ len += sg->length;
+ } else {
+ len = sg->length;
+ start = s_phys;
+ }
- ret = iommu_map(domain, iova + mapped, phys, s->length, prot);
- if (ret)
- goto out_err;
-
- mapped += s->length;
+ if (++i < nents)
+ sg = sg_next(sg);
}
return mapped;
@@ -1796,7 +2113,6 @@
struct iommu_domain_geometry *geometry;
bool *paging;
int ret = 0;
- u32 *count;
switch (attr) {
case DOMAIN_ATTR_GEOMETRY:
@@ -1808,15 +2124,6 @@
paging = data;
*paging = (domain->pgsize_bitmap != 0UL);
break;
- case DOMAIN_ATTR_WINDOWS:
- count = data;
-
- if (domain->ops->domain_get_windows != NULL)
- *count = domain->ops->domain_get_windows(domain);
- else
- ret = -ENODEV;
-
- break;
default:
if (!domain->ops->domain_get_attr)
return -EINVAL;
@@ -1832,18 +2139,8 @@
enum iommu_attr attr, void *data)
{
int ret = 0;
- u32 *count;
switch (attr) {
- case DOMAIN_ATTR_WINDOWS:
- count = data;
-
- if (domain->ops->domain_set_windows != NULL)
- ret = domain->ops->domain_set_windows(domain, *count);
- else
- ret = -ENODEV;
-
- break;
default:
if (domain->ops->domain_set_attr == NULL)
return -EINVAL;
@@ -1889,24 +2186,22 @@
return region;
}
-/* Request that a device is direct mapped by the IOMMU */
-int iommu_request_dm_for_dev(struct device *dev)
+static int
+request_default_domain_for_dev(struct device *dev, unsigned long type)
{
- struct iommu_domain *dm_domain;
+ struct iommu_domain *domain;
struct iommu_group *group;
int ret;
/* Device must already be in a group before calling this function */
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
+ group = iommu_group_get(dev);
+ if (!group)
+ return -EINVAL;
mutex_lock(&group->mutex);
- /* Check if the default domain is already direct mapped */
ret = 0;
- if (group->default_domain &&
- group->default_domain->type == IOMMU_DOMAIN_IDENTITY)
+ if (group->default_domain && group->default_domain->type == type)
goto out;
/* Don't change mappings of existing devices */
@@ -1914,25 +2209,27 @@
if (iommu_group_device_count(group) != 1)
goto out;
- /* Allocate a direct mapped domain */
ret = -ENOMEM;
- dm_domain = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_IDENTITY);
- if (!dm_domain)
+ domain = __iommu_domain_alloc(dev->bus, type);
+ if (!domain)
goto out;
/* Attach the device to the domain */
- ret = __iommu_attach_group(dm_domain, group);
+ ret = __iommu_attach_group(domain, group);
if (ret) {
- iommu_domain_free(dm_domain);
+ iommu_domain_free(domain);
goto out;
}
- /* Make the direct mapped domain the default for this group */
+ iommu_group_create_direct_mappings(group, dev);
+
+ /* Make the domain the default for this group */
if (group->default_domain)
iommu_domain_free(group->default_domain);
- group->default_domain = dm_domain;
+ group->default_domain = domain;
- pr_info("Using direct mapping for device %s\n", dev_name(dev));
+ dev_info(dev, "Using iommu %s mapping\n",
+ type == IOMMU_DOMAIN_DMA ? "dma" : "direct");
ret = 0;
out:
@@ -1942,6 +2239,40 @@
return ret;
}
+/* Request that a device is direct mapped by the IOMMU */
+int iommu_request_dm_for_dev(struct device *dev)
+{
+ return request_default_domain_for_dev(dev, IOMMU_DOMAIN_IDENTITY);
+}
+
+/* Request that a device can't be direct mapped by the IOMMU */
+int iommu_request_dma_domain_for_dev(struct device *dev)
+{
+ return request_default_domain_for_dev(dev, IOMMU_DOMAIN_DMA);
+}
+
+void iommu_set_default_passthrough(bool cmd_line)
+{
+ if (cmd_line)
+ iommu_set_cmd_line_dma_api();
+
+ iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
+}
+
+void iommu_set_default_translated(bool cmd_line)
+{
+ if (cmd_line)
+ iommu_set_cmd_line_dma_api();
+
+ iommu_def_domain_type = IOMMU_DOMAIN_DMA;
+}
+
+bool iommu_default_passthrough(void)
+{
+ return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY;
+}
+EXPORT_SYMBOL_GPL(iommu_default_passthrough);
+
const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
{
const struct iommu_ops *ops = NULL;
@@ -1960,7 +2291,7 @@
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
const struct iommu_ops *ops)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
if (fwspec)
return ops == fwspec->ops ? 0 : -EINVAL;
@@ -1972,26 +2303,26 @@
of_node_get(to_of_node(iommu_fwnode));
fwspec->iommu_fwnode = iommu_fwnode;
fwspec->ops = ops;
- dev->iommu_fwspec = fwspec;
+ dev_iommu_fwspec_set(dev, fwspec);
return 0;
}
EXPORT_SYMBOL_GPL(iommu_fwspec_init);
void iommu_fwspec_free(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
if (fwspec) {
fwnode_handle_put(fwspec->iommu_fwnode);
kfree(fwspec);
- dev->iommu_fwspec = NULL;
+ dev_iommu_fwspec_set(dev, NULL);
}
}
EXPORT_SYMBOL_GPL(iommu_fwspec_free);
int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
size_t size;
int i;
@@ -2000,11 +2331,11 @@
size = offsetof(struct iommu_fwspec, ids[fwspec->num_ids + num_ids]);
if (size > sizeof(*fwspec)) {
- fwspec = krealloc(dev->iommu_fwspec, size, GFP_KERNEL);
+ fwspec = krealloc(fwspec, size, GFP_KERNEL);
if (!fwspec)
return -ENOMEM;
- dev->iommu_fwspec = fwspec;
+ dev_iommu_fwspec_set(dev, fwspec);
}
for (i = 0; i < num_ids; i++)
@@ -2014,3 +2345,203 @@
return 0;
}
EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
+
+/*
+ * Per device IOMMU features.
+ */
+bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+ if (ops && ops->dev_has_feat)
+ return ops->dev_has_feat(dev, feat);
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(iommu_dev_has_feature);
+
+int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+ if (ops && ops->dev_enable_feat)
+ return ops->dev_enable_feat(dev, feat);
+
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(iommu_dev_enable_feature);
+
+/*
+ * The device drivers should do the necessary cleanups before calling this.
+ * For example, before disabling the aux-domain feature, the device driver
+ * should detach all aux-domains. Otherwise, this will return -EBUSY.
+ */
+int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+ if (ops && ops->dev_disable_feat)
+ return ops->dev_disable_feat(dev, feat);
+
+ return -EBUSY;
+}
+EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
+
+bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+ if (ops && ops->dev_feat_enabled)
+ return ops->dev_feat_enabled(dev, feat);
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled);
+
+/*
+ * Aux-domain specific attach/detach.
+ *
+ * Only works if iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) returns
+ * true. Also, as long as domains are attached to a device through this
+ * interface, any tries to call iommu_attach_device() should fail
+ * (iommu_detach_device() can't fail, so we fail when trying to re-attach).
+ * This should make us safe against a device being attached to a guest as a
+ * whole while there are still pasid users on it (aux and sva).
+ */
+int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev)
+{
+ int ret = -ENODEV;
+
+ if (domain->ops->aux_attach_dev)
+ ret = domain->ops->aux_attach_dev(domain, dev);
+
+ if (!ret)
+ trace_attach_device_to_domain(dev);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_aux_attach_device);
+
+void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev)
+{
+ if (domain->ops->aux_detach_dev) {
+ domain->ops->aux_detach_dev(domain, dev);
+ trace_detach_device_from_domain(dev);
+ }
+}
+EXPORT_SYMBOL_GPL(iommu_aux_detach_device);
+
+int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
+{
+ int ret = -ENODEV;
+
+ if (domain->ops->aux_get_pasid)
+ ret = domain->ops->aux_get_pasid(domain, dev);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
+
+/**
+ * iommu_sva_bind_device() - Bind a process address space to a device
+ * @dev: the device
+ * @mm: the mm to bind, caller must hold a reference to it
+ *
+ * Create a bond between device and address space, allowing the device to access
+ * the mm using the returned PASID. If a bond already exists between @device and
+ * @mm, it is returned and an additional reference is taken. Caller must call
+ * iommu_sva_unbind_device() to release each reference.
+ *
+ * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to
+ * initialize the required SVA features.
+ *
+ * On error, returns an ERR_PTR value.
+ */
+struct iommu_sva *
+iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
+{
+ struct iommu_group *group;
+ struct iommu_sva *handle = ERR_PTR(-EINVAL);
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+ if (!ops || !ops->sva_bind)
+ return ERR_PTR(-ENODEV);
+
+ group = iommu_group_get(dev);
+ if (!group)
+ return ERR_PTR(-ENODEV);
+
+ /* Ensure device count and domain don't change while we're binding */
+ mutex_lock(&group->mutex);
+
+ /*
+ * To keep things simple, SVA currently doesn't support IOMMU groups
+ * with more than one device. Existing SVA-capable systems are not
+ * affected by the problems that required IOMMU groups (lack of ACS
+ * isolation, device ID aliasing and other hardware issues).
+ */
+ if (iommu_group_device_count(group) != 1)
+ goto out_unlock;
+
+ handle = ops->sva_bind(dev, mm, drvdata);
+
+out_unlock:
+ mutex_unlock(&group->mutex);
+ iommu_group_put(group);
+
+ return handle;
+}
+EXPORT_SYMBOL_GPL(iommu_sva_bind_device);
+
+/**
+ * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device
+ * @handle: the handle returned by iommu_sva_bind_device()
+ *
+ * Put reference to a bond between device and address space. The device should
+ * not be issuing any more transaction for this PASID. All outstanding page
+ * requests for this PASID must have been flushed to the IOMMU.
+ *
+ * Returns 0 on success, or an error value
+ */
+void iommu_sva_unbind_device(struct iommu_sva *handle)
+{
+ struct iommu_group *group;
+ struct device *dev = handle->dev;
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+
+ if (!ops || !ops->sva_unbind)
+ return;
+
+ group = iommu_group_get(dev);
+ if (!group)
+ return;
+
+ mutex_lock(&group->mutex);
+ ops->sva_unbind(handle);
+ mutex_unlock(&group->mutex);
+
+ iommu_group_put(group);
+}
+EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
+
+int iommu_sva_set_ops(struct iommu_sva *handle,
+ const struct iommu_sva_ops *sva_ops)
+{
+ if (handle->ops && handle->ops != sva_ops)
+ return -EEXIST;
+
+ handle->ops = sva_ops;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_sva_set_ops);
+
+int iommu_sva_get_pasid(struct iommu_sva *handle)
+{
+ const struct iommu_ops *ops = handle->dev->bus->iommu_ops;
+
+ if (!ops || !ops->sva_get_pasid)
+ return IOMMU_PASID_INVALID;
+
+ return ops->sva_get_pasid(handle);
+}
+EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 83fe262..41c605b 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright © 2006-2009, Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
*/
@@ -56,6 +44,7 @@
iovad->granule = granule;
iovad->start_pfn = start_pfn;
iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
+ iovad->max32_alloc_size = iovad->dma_32bit_pfn;
iovad->flush_cb = NULL;
iovad->fq = NULL;
iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
@@ -65,9 +54,14 @@
}
EXPORT_SYMBOL_GPL(init_iova_domain);
+bool has_iova_flush_queue(struct iova_domain *iovad)
+{
+ return !!iovad->fq;
+}
+
static void free_iova_flush_queue(struct iova_domain *iovad)
{
- if (!iovad->fq)
+ if (!has_iova_flush_queue(iovad))
return;
if (timer_pending(&iovad->fq_timer))
@@ -85,13 +79,14 @@
int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
{
+ struct iova_fq __percpu *queue;
int cpu;
atomic64_set(&iovad->fq_flush_start_cnt, 0);
atomic64_set(&iovad->fq_flush_finish_cnt, 0);
- iovad->fq = alloc_percpu(struct iova_fq);
- if (!iovad->fq)
+ queue = alloc_percpu(struct iova_fq);
+ if (!queue)
return -ENOMEM;
iovad->flush_cb = flush_cb;
@@ -100,13 +95,17 @@
for_each_possible_cpu(cpu) {
struct iova_fq *fq;
- fq = per_cpu_ptr(iovad->fq, cpu);
+ fq = per_cpu_ptr(queue, cpu);
fq->head = 0;
fq->tail = 0;
spin_lock_init(&fq->lock);
}
+ smp_wmb();
+
+ iovad->fq = queue;
+
timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
atomic_set(&iovad->fq_timer_on, 0);
@@ -138,9 +137,12 @@
struct iova *cached_iova;
cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
- if (free->pfn_hi < iovad->dma_32bit_pfn &&
- free->pfn_lo >= cached_iova->pfn_lo)
+ if (free == cached_iova ||
+ (free->pfn_hi < iovad->dma_32bit_pfn &&
+ free->pfn_lo >= cached_iova->pfn_lo)) {
iovad->cached32_node = rb_next(&free->node);
+ iovad->max32_alloc_size = iovad->dma_32bit_pfn;
+ }
cached_iova = rb_entry(iovad->cached_node, struct iova, node);
if (free->pfn_lo >= cached_iova->pfn_lo)
@@ -190,6 +192,10 @@
/* Walk the tree backwards */
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+ if (limit_pfn <= iovad->dma_32bit_pfn &&
+ size >= iovad->max32_alloc_size)
+ goto iova32_full;
+
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = rb_entry(curr, struct iova, node);
do {
@@ -201,8 +207,8 @@
} while (curr && new_pfn <= curr_iova->pfn_hi);
if (limit_pfn < size || new_pfn < iovad->start_pfn) {
- spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
- return -ENOMEM;
+ iovad->max32_alloc_size = size;
+ goto iova32_full;
}
/* pfn_lo will point to size aligned address if size_aligned is set */
@@ -214,9 +220,11 @@
__cached_rbnode_insert_update(iovad, new);
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-
-
return 0;
+
+iova32_full:
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ return -ENOMEM;
}
static struct kmem_cache *iova_cache;
@@ -569,7 +577,9 @@
spin_unlock_irqrestore(&fq->lock, flags);
- if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
+ /* Avoid false sharing as much as possible. */
+ if (!atomic_read(&iovad->fq_timer_on) &&
+ !atomic_cmpxchg(&iovad->fq_timer_on, 0, 1))
mod_timer(&iovad->fq_timer,
jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
}
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index d8598e4..2639fc7 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * IPMMU VMSA
+ * IOMMU API for Renesas VMSA-compatible IPMMU
+ * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
*
* Copyright (C) 2014 Renesas Electronics Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
*/
#include <linux/bitmap.h>
@@ -14,10 +12,11 @@
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/export.h>
+#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/io-pgtable.h>
#include <linux/iommu.h>
-#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_iommu.h>
@@ -37,17 +36,20 @@
#define arm_iommu_detach_device(...) do {} while (0)
#endif
-#include "io-pgtable.h"
+#define IPMMU_CTX_MAX 8U
+#define IPMMU_CTX_INVALID -1
-#define IPMMU_CTX_MAX 8
+#define IPMMU_UTLB_MAX 48U
struct ipmmu_features {
bool use_ns_alias_offset;
bool has_cache_leaf_nodes;
unsigned int number_of_contexts;
+ unsigned int num_utlbs;
bool setup_imbuscr;
bool twobit_imttbcr_sl0;
bool reserved_context;
+ bool cache_snoop;
};
struct ipmmu_vmsa_device {
@@ -56,11 +58,11 @@
struct iommu_device iommu;
struct ipmmu_vmsa_device *root;
const struct ipmmu_features *features;
- unsigned int num_utlbs;
unsigned int num_ctx;
spinlock_t lock; /* Protects ctx and domains[] */
DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
+ s8 utlb_ctx[IPMMU_UTLB_MAX];
struct iommu_group *group;
struct dma_iommu_mapping *mapping;
@@ -84,7 +86,9 @@
static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev)
{
- return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+ return fwspec ? fwspec->iommu_priv : NULL;
}
#define TLB_LOOP_TIMEOUT 100 /* 100us */
@@ -112,45 +116,44 @@
#define IMTTBCR 0x0008
#define IMTTBCR_EAE (1 << 31)
#define IMTTBCR_PMB (1 << 30)
-#define IMTTBCR_SH1_NON_SHAREABLE (0 << 28)
-#define IMTTBCR_SH1_OUTER_SHAREABLE (2 << 28)
-#define IMTTBCR_SH1_INNER_SHAREABLE (3 << 28)
-#define IMTTBCR_SH1_MASK (3 << 28)
-#define IMTTBCR_ORGN1_NC (0 << 26)
-#define IMTTBCR_ORGN1_WB_WA (1 << 26)
-#define IMTTBCR_ORGN1_WT (2 << 26)
-#define IMTTBCR_ORGN1_WB (3 << 26)
-#define IMTTBCR_ORGN1_MASK (3 << 26)
-#define IMTTBCR_IRGN1_NC (0 << 24)
-#define IMTTBCR_IRGN1_WB_WA (1 << 24)
-#define IMTTBCR_IRGN1_WT (2 << 24)
-#define IMTTBCR_IRGN1_WB (3 << 24)
-#define IMTTBCR_IRGN1_MASK (3 << 24)
+#define IMTTBCR_SH1_NON_SHAREABLE (0 << 28) /* R-Car Gen2 only */
+#define IMTTBCR_SH1_OUTER_SHAREABLE (2 << 28) /* R-Car Gen2 only */
+#define IMTTBCR_SH1_INNER_SHAREABLE (3 << 28) /* R-Car Gen2 only */
+#define IMTTBCR_SH1_MASK (3 << 28) /* R-Car Gen2 only */
+#define IMTTBCR_ORGN1_NC (0 << 26) /* R-Car Gen2 only */
+#define IMTTBCR_ORGN1_WB_WA (1 << 26) /* R-Car Gen2 only */
+#define IMTTBCR_ORGN1_WT (2 << 26) /* R-Car Gen2 only */
+#define IMTTBCR_ORGN1_WB (3 << 26) /* R-Car Gen2 only */
+#define IMTTBCR_ORGN1_MASK (3 << 26) /* R-Car Gen2 only */
+#define IMTTBCR_IRGN1_NC (0 << 24) /* R-Car Gen2 only */
+#define IMTTBCR_IRGN1_WB_WA (1 << 24) /* R-Car Gen2 only */
+#define IMTTBCR_IRGN1_WT (2 << 24) /* R-Car Gen2 only */
+#define IMTTBCR_IRGN1_WB (3 << 24) /* R-Car Gen2 only */
+#define IMTTBCR_IRGN1_MASK (3 << 24) /* R-Car Gen2 only */
#define IMTTBCR_TSZ1_MASK (7 << 16)
#define IMTTBCR_TSZ1_SHIFT 16
-#define IMTTBCR_SH0_NON_SHAREABLE (0 << 12)
-#define IMTTBCR_SH0_OUTER_SHAREABLE (2 << 12)
-#define IMTTBCR_SH0_INNER_SHAREABLE (3 << 12)
-#define IMTTBCR_SH0_MASK (3 << 12)
-#define IMTTBCR_ORGN0_NC (0 << 10)
-#define IMTTBCR_ORGN0_WB_WA (1 << 10)
-#define IMTTBCR_ORGN0_WT (2 << 10)
-#define IMTTBCR_ORGN0_WB (3 << 10)
-#define IMTTBCR_ORGN0_MASK (3 << 10)
-#define IMTTBCR_IRGN0_NC (0 << 8)
-#define IMTTBCR_IRGN0_WB_WA (1 << 8)
-#define IMTTBCR_IRGN0_WT (2 << 8)
-#define IMTTBCR_IRGN0_WB (3 << 8)
-#define IMTTBCR_IRGN0_MASK (3 << 8)
+#define IMTTBCR_SH0_NON_SHAREABLE (0 << 12) /* R-Car Gen2 only */
+#define IMTTBCR_SH0_OUTER_SHAREABLE (2 << 12) /* R-Car Gen2 only */
+#define IMTTBCR_SH0_INNER_SHAREABLE (3 << 12) /* R-Car Gen2 only */
+#define IMTTBCR_SH0_MASK (3 << 12) /* R-Car Gen2 only */
+#define IMTTBCR_ORGN0_NC (0 << 10) /* R-Car Gen2 only */
+#define IMTTBCR_ORGN0_WB_WA (1 << 10) /* R-Car Gen2 only */
+#define IMTTBCR_ORGN0_WT (2 << 10) /* R-Car Gen2 only */
+#define IMTTBCR_ORGN0_WB (3 << 10) /* R-Car Gen2 only */
+#define IMTTBCR_ORGN0_MASK (3 << 10) /* R-Car Gen2 only */
+#define IMTTBCR_IRGN0_NC (0 << 8) /* R-Car Gen2 only */
+#define IMTTBCR_IRGN0_WB_WA (1 << 8) /* R-Car Gen2 only */
+#define IMTTBCR_IRGN0_WT (2 << 8) /* R-Car Gen2 only */
+#define IMTTBCR_IRGN0_WB (3 << 8) /* R-Car Gen2 only */
+#define IMTTBCR_IRGN0_MASK (3 << 8) /* R-Car Gen2 only */
+#define IMTTBCR_SL0_TWOBIT_LVL_3 (0 << 6) /* R-Car Gen3 only */
+#define IMTTBCR_SL0_TWOBIT_LVL_2 (1 << 6) /* R-Car Gen3 only */
+#define IMTTBCR_SL0_TWOBIT_LVL_1 (2 << 6) /* R-Car Gen3 only */
#define IMTTBCR_SL0_LVL_2 (0 << 4)
#define IMTTBCR_SL0_LVL_1 (1 << 4)
#define IMTTBCR_TSZ0_MASK (7 << 0)
#define IMTTBCR_TSZ0_SHIFT O
-#define IMTTBCR_SL0_TWOBIT_LVL_3 (0 << 6)
-#define IMTTBCR_SL0_TWOBIT_LVL_2 (1 << 6)
-#define IMTTBCR_SL0_TWOBIT_LVL_1 (2 << 6)
-
#define IMBUSCR 0x000c
#define IMBUSCR_DVM (1 << 2)
#define IMBUSCR_BUSSEL_SYS (0 << 0)
@@ -187,7 +190,8 @@
#define IMMAIR_ATTR_IDX_WBRWA 1
#define IMMAIR_ATTR_IDX_DEV 2
-#define IMEAR 0x0030
+#define IMELAR 0x0030 /* IMEAR on R-Car Gen2 */
+#define IMEUAR 0x0034 /* R-Car Gen3 only */
#define IMPCTR 0x0200
#define IMPSTR 0x0208
@@ -335,6 +339,7 @@
ipmmu_write(mmu, IMUCTR(utlb),
IMUCTR_TTSEL_MMU(domain->context_id) | IMUCTR_FLUSH |
IMUCTR_MMUEN);
+ mmu->utlb_ctx[utlb] = domain->context_id;
}
/*
@@ -346,6 +351,7 @@
struct ipmmu_vmsa_device *mmu = domain->mmu;
ipmmu_write(mmu, IMUCTR(utlb), 0);
+ mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID;
}
static void ipmmu_tlb_flush_all(void *cookie)
@@ -355,16 +361,16 @@
ipmmu_tlb_invalidate(domain);
}
-static void ipmmu_tlb_add_flush(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+static void ipmmu_tlb_flush(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
- /* The hardware doesn't support selective TLB flush. */
+ ipmmu_tlb_flush_all(cookie);
}
-static const struct iommu_gather_ops ipmmu_gather_ops = {
+static const struct iommu_flush_ops ipmmu_flush_ops = {
.tlb_flush_all = ipmmu_tlb_flush_all,
- .tlb_add_flush = ipmmu_tlb_add_flush,
- .tlb_sync = ipmmu_tlb_flush_all,
+ .tlb_flush_walk = ipmmu_tlb_flush,
+ .tlb_flush_leaf = ipmmu_tlb_flush,
};
/* -----------------------------------------------------------------------------
@@ -404,52 +410,10 @@
spin_unlock_irqrestore(&mmu->lock, flags);
}
-static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
+static void ipmmu_domain_setup_context(struct ipmmu_vmsa_domain *domain)
{
u64 ttbr;
u32 tmp;
- int ret;
-
- /*
- * Allocate the page table operations.
- *
- * VMSA states in section B3.6.3 "Control of Secure or Non-secure memory
- * access, Long-descriptor format" that the NStable bit being set in a
- * table descriptor will result in the NStable and NS bits of all child
- * entries being ignored and considered as being set. The IPMMU seems
- * not to comply with this, as it generates a secure access page fault
- * if any of the NStable and NS bits isn't set when running in
- * non-secure mode.
- */
- domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS;
- domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
- domain->cfg.ias = 32;
- domain->cfg.oas = 40;
- domain->cfg.tlb = &ipmmu_gather_ops;
- domain->io_domain.geometry.aperture_end = DMA_BIT_MASK(32);
- domain->io_domain.geometry.force_aperture = true;
- /*
- * TODO: Add support for coherent walk through CCI with DVM and remove
- * cache handling. For now, delegate it to the io-pgtable code.
- */
- domain->cfg.iommu_dev = domain->mmu->root->dev;
-
- /*
- * Find an unused context.
- */
- ret = ipmmu_domain_allocate_context(domain->mmu->root, domain);
- if (ret < 0)
- return ret;
-
- domain->context_id = ret;
-
- domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg,
- domain);
- if (!domain->iop) {
- ipmmu_domain_free_context(domain->mmu->root,
- domain->context_id);
- return -EINVAL;
- }
/* TTBR0 */
ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0];
@@ -458,17 +422,19 @@
/*
* TTBCR
- * We use long descriptors with inner-shareable WBWA tables and allocate
- * the whole 32-bit VA space to TTBR0.
+ * We use long descriptors and allocate the whole 32-bit VA space to
+ * TTBR0.
*/
if (domain->mmu->features->twobit_imttbcr_sl0)
tmp = IMTTBCR_SL0_TWOBIT_LVL_1;
else
tmp = IMTTBCR_SL0_LVL_1;
- ipmmu_ctx_write_root(domain, IMTTBCR, IMTTBCR_EAE |
- IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA |
- IMTTBCR_IRGN0_WB_WA | tmp);
+ if (domain->mmu->features->cache_snoop)
+ tmp |= IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA |
+ IMTTBCR_IRGN0_WB_WA;
+
+ ipmmu_ctx_write_root(domain, IMTTBCR, IMTTBCR_EAE | tmp);
/* MAIR0 */
ipmmu_ctx_write_root(domain, IMMAIR0,
@@ -495,7 +461,55 @@
*/
ipmmu_ctx_write_all(domain, IMCTR,
IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN);
+}
+static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
+{
+ int ret;
+
+ /*
+ * Allocate the page table operations.
+ *
+ * VMSA states in section B3.6.3 "Control of Secure or Non-secure memory
+ * access, Long-descriptor format" that the NStable bit being set in a
+ * table descriptor will result in the NStable and NS bits of all child
+ * entries being ignored and considered as being set. The IPMMU seems
+ * not to comply with this, as it generates a secure access page fault
+ * if any of the NStable and NS bits isn't set when running in
+ * non-secure mode.
+ */
+ domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS;
+ domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
+ domain->cfg.ias = 32;
+ domain->cfg.oas = 40;
+ domain->cfg.tlb = &ipmmu_flush_ops;
+ domain->io_domain.geometry.aperture_end = DMA_BIT_MASK(32);
+ domain->io_domain.geometry.force_aperture = true;
+ /*
+ * TODO: Add support for coherent walk through CCI with DVM and remove
+ * cache handling. For now, delegate it to the io-pgtable code.
+ */
+ domain->cfg.coherent_walk = false;
+ domain->cfg.iommu_dev = domain->mmu->root->dev;
+
+ /*
+ * Find an unused context.
+ */
+ ret = ipmmu_domain_allocate_context(domain->mmu->root, domain);
+ if (ret < 0)
+ return ret;
+
+ domain->context_id = ret;
+
+ domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg,
+ domain);
+ if (!domain->iop) {
+ ipmmu_domain_free_context(domain->mmu->root,
+ domain->context_id);
+ return -EINVAL;
+ }
+
+ ipmmu_domain_setup_context(domain);
return 0;
}
@@ -523,14 +537,16 @@
{
const u32 err_mask = IMSTR_MHIT | IMSTR_ABORT | IMSTR_PF | IMSTR_TF;
struct ipmmu_vmsa_device *mmu = domain->mmu;
+ unsigned long iova;
u32 status;
- u32 iova;
status = ipmmu_ctx_read_root(domain, IMSTR);
if (!(status & err_mask))
return IRQ_NONE;
- iova = ipmmu_ctx_read_root(domain, IMEAR);
+ iova = ipmmu_ctx_read_root(domain, IMELAR);
+ if (IS_ENABLED(CONFIG_64BIT))
+ iova |= (u64)ipmmu_ctx_read_root(domain, IMEUAR) << 32;
/*
* Clear the error status flags. Unlike traditional interrupt flag
@@ -542,10 +558,10 @@
/* Log fatal errors. */
if (status & IMSTR_MHIT)
- dev_err_ratelimited(mmu->dev, "Multiple TLB hits @0x%08x\n",
+ dev_err_ratelimited(mmu->dev, "Multiple TLB hits @0x%lx\n",
iova);
if (status & IMSTR_ABORT)
- dev_err_ratelimited(mmu->dev, "Page Table Walk Abort @0x%08x\n",
+ dev_err_ratelimited(mmu->dev, "Page Table Walk Abort @0x%lx\n",
iova);
if (!(status & (IMSTR_PF | IMSTR_TF)))
@@ -561,7 +577,7 @@
return IRQ_HANDLED;
dev_err_ratelimited(mmu->dev,
- "Unhandled fault: status 0x%08x iova 0x%08x\n",
+ "Unhandled fault: status 0x%08x iova 0x%lx\n",
status, iova);
return IRQ_HANDLED;
@@ -646,7 +662,7 @@
static int ipmmu_attach_device(struct iommu_domain *io_domain,
struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
unsigned int i;
@@ -695,7 +711,7 @@
static void ipmmu_detach_device(struct iommu_domain *io_domain,
struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
unsigned int i;
@@ -719,14 +735,14 @@
}
static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
- size_t size)
+ size_t size, struct iommu_iotlb_gather *gather)
{
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
- return domain->iop->unmap(domain->iop, iova, size);
+ return domain->iop->unmap(domain->iop, iova, size, gather);
}
-static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
+static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain)
{
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
@@ -734,6 +750,12 @@
ipmmu_tlb_flush_all(domain);
}
+static void ipmmu_iotlb_sync(struct iommu_domain *io_domain,
+ struct iommu_iotlb_gather *gather)
+{
+ ipmmu_flush_iotlb_all(io_domain);
+}
+
static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
dma_addr_t iova)
{
@@ -747,36 +769,71 @@
static int ipmmu_init_platform_device(struct device *dev,
struct of_phandle_args *args)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct platform_device *ipmmu_pdev;
ipmmu_pdev = of_find_device_by_node(args->np);
if (!ipmmu_pdev)
return -ENODEV;
- dev->iommu_fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev);
+ fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev);
+
return 0;
}
-static bool ipmmu_slave_whitelist(struct device *dev)
-{
- /* By default, do not allow use of IPMMU */
- return false;
-}
-
static const struct soc_device_attribute soc_rcar_gen3[] = {
+ { .soc_id = "r8a774a1", },
+ { .soc_id = "r8a774c0", },
{ .soc_id = "r8a7795", },
{ .soc_id = "r8a7796", },
{ .soc_id = "r8a77965", },
{ .soc_id = "r8a77970", },
+ { .soc_id = "r8a77990", },
{ .soc_id = "r8a77995", },
{ /* sentinel */ }
};
+static const struct soc_device_attribute soc_rcar_gen3_whitelist[] = {
+ { .soc_id = "r8a774c0", },
+ { .soc_id = "r8a7795", .revision = "ES3.*" },
+ { .soc_id = "r8a77965", },
+ { .soc_id = "r8a77990", },
+ { .soc_id = "r8a77995", },
+ { /* sentinel */ }
+};
+
+static const char * const rcar_gen3_slave_whitelist[] = {
+};
+
+static bool ipmmu_slave_whitelist(struct device *dev)
+{
+ unsigned int i;
+
+ /*
+ * For R-Car Gen3 use a white list to opt-in slave devices.
+ * For Other SoCs, this returns true anyway.
+ */
+ if (!soc_device_match(soc_rcar_gen3))
+ return true;
+
+ /* Check whether this R-Car Gen3 can use the IPMMU correctly or not */
+ if (!soc_device_match(soc_rcar_gen3_whitelist))
+ return false;
+
+ /* Check whether this slave device can work with the IPMMU */
+ for (i = 0; i < ARRAY_SIZE(rcar_gen3_slave_whitelist); i++) {
+ if (!strcmp(dev_name(dev), rcar_gen3_slave_whitelist[i]))
+ return true;
+ }
+
+ /* Otherwise, do not allow use of IPMMU */
+ return false;
+}
+
static int ipmmu_of_xlate(struct device *dev,
struct of_phandle_args *spec)
{
- /* For R-Car Gen3 use a white list to opt-in slave devices */
- if (soc_device_match(soc_rcar_gen3) && !ipmmu_slave_whitelist(dev))
+ if (!ipmmu_slave_whitelist(dev))
return -ENODEV;
iommu_fwspec_add_ids(dev, spec->args, 1);
@@ -851,27 +908,37 @@
static int ipmmu_add_device(struct device *dev)
{
+ struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
struct iommu_group *group;
+ int ret;
/*
* Only let through devices that have been verified in xlate()
*/
- if (!to_ipmmu(dev))
+ if (!mmu)
return -ENODEV;
- if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA))
- return ipmmu_init_arm_mapping(dev);
+ if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)) {
+ ret = ipmmu_init_arm_mapping(dev);
+ if (ret)
+ return ret;
+ } else {
+ group = iommu_group_get_for_dev(dev);
+ if (IS_ERR(group))
+ return PTR_ERR(group);
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
+ iommu_group_put(group);
+ }
- iommu_group_put(group);
+ iommu_device_link(&mmu->iommu, dev);
return 0;
}
static void ipmmu_remove_device(struct device *dev)
{
+ struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
+
+ iommu_device_unlink(&mmu->iommu, dev);
arm_iommu_detach_device(dev);
iommu_group_remove_device(dev);
}
@@ -898,7 +965,7 @@
.detach_dev = ipmmu_detach_device,
.map = ipmmu_map,
.unmap = ipmmu_unmap,
- .flush_iotlb_all = ipmmu_iotlb_sync,
+ .flush_iotlb_all = ipmmu_flush_iotlb_all,
.iotlb_sync = ipmmu_iotlb_sync,
.iova_to_phys = ipmmu_iova_to_phys,
.add_device = ipmmu_add_device,
@@ -925,18 +992,22 @@
.use_ns_alias_offset = true,
.has_cache_leaf_nodes = false,
.number_of_contexts = 1, /* software only tested with one context */
+ .num_utlbs = 32,
.setup_imbuscr = true,
.twobit_imttbcr_sl0 = false,
.reserved_context = false,
+ .cache_snoop = true,
};
static const struct ipmmu_features ipmmu_features_rcar_gen3 = {
.use_ns_alias_offset = false,
.has_cache_leaf_nodes = true,
.number_of_contexts = 8,
+ .num_utlbs = 48,
.setup_imbuscr = false,
.twobit_imttbcr_sl0 = true,
.reserved_context = true,
+ .cache_snoop = false,
};
static const struct of_device_id ipmmu_of_ids[] = {
@@ -944,6 +1015,12 @@
.compatible = "renesas,ipmmu-vmsa",
.data = &ipmmu_features_default,
}, {
+ .compatible = "renesas,ipmmu-r8a774a1",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
+ .compatible = "renesas,ipmmu-r8a774c0",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
.compatible = "renesas,ipmmu-r8a7795",
.data = &ipmmu_features_rcar_gen3,
}, {
@@ -956,6 +1033,9 @@
.compatible = "renesas,ipmmu-r8a77970",
.data = &ipmmu_features_rcar_gen3,
}, {
+ .compatible = "renesas,ipmmu-r8a77990",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
.compatible = "renesas,ipmmu-r8a77995",
.data = &ipmmu_features_rcar_gen3,
}, {
@@ -963,8 +1043,6 @@
},
};
-MODULE_DEVICE_TABLE(of, ipmmu_of_ids);
-
static int ipmmu_probe(struct platform_device *pdev)
{
struct ipmmu_vmsa_device *mmu;
@@ -979,10 +1057,10 @@
}
mmu->dev = &pdev->dev;
- mmu->num_utlbs = 48;
spin_lock_init(&mmu->lock);
bitmap_zero(mmu->ctx, IPMMU_CTX_MAX);
mmu->features = of_device_get_match_data(&pdev->dev);
+ memset(mmu->utlb_ctx, IPMMU_CTX_INVALID, mmu->features->num_utlbs);
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
/* Map I/O memory and request IRQ. */
@@ -1006,10 +1084,7 @@
if (mmu->features->use_ns_alias_offset)
mmu->base += IM_NS_ALIAS_OFFSET;
- mmu->num_ctx = min_t(unsigned int, IPMMU_CTX_MAX,
- mmu->features->number_of_contexts);
-
- irq = platform_get_irq(pdev, 0);
+ mmu->num_ctx = min(IPMMU_CTX_MAX, mmu->features->number_of_contexts);
/*
* Determine if this IPMMU instance is a root device by checking for
@@ -1029,10 +1104,9 @@
/* Root devices have mandatory IRQs */
if (ipmmu_is_root(mmu)) {
- if (irq < 0) {
- dev_err(&pdev->dev, "no IRQ found\n");
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
return irq;
- }
ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0,
dev_name(&pdev->dev), mmu);
@@ -1099,10 +1173,48 @@
return 0;
}
+#ifdef CONFIG_PM_SLEEP
+static int ipmmu_resume_noirq(struct device *dev)
+{
+ struct ipmmu_vmsa_device *mmu = dev_get_drvdata(dev);
+ unsigned int i;
+
+ /* Reset root MMU and restore contexts */
+ if (ipmmu_is_root(mmu)) {
+ ipmmu_device_reset(mmu);
+
+ for (i = 0; i < mmu->num_ctx; i++) {
+ if (!mmu->domains[i])
+ continue;
+
+ ipmmu_domain_setup_context(mmu->domains[i]);
+ }
+ }
+
+ /* Re-enable active micro-TLBs */
+ for (i = 0; i < mmu->features->num_utlbs; i++) {
+ if (mmu->utlb_ctx[i] == IPMMU_CTX_INVALID)
+ continue;
+
+ ipmmu_utlb_enable(mmu->root->domains[mmu->utlb_ctx[i]], i);
+ }
+
+ return 0;
+}
+
+static const struct dev_pm_ops ipmmu_pm = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(NULL, ipmmu_resume_noirq)
+};
+#define DEV_PM_OPS &ipmmu_pm
+#else
+#define DEV_PM_OPS NULL
+#endif /* CONFIG_PM_SLEEP */
+
static struct platform_driver ipmmu_driver = {
.driver = {
.name = "ipmmu-vmsa",
.of_match_table = of_match_ptr(ipmmu_of_ids),
+ .pm = DEV_PM_OPS,
},
.probe = ipmmu_probe,
.remove = ipmmu_remove,
@@ -1135,15 +1247,4 @@
setup_done = true;
return 0;
}
-
-static void __exit ipmmu_exit(void)
-{
- return platform_driver_unregister(&ipmmu_driver);
-}
-
subsys_initcall(ipmmu_init);
-module_exit(ipmmu_exit);
-
-MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
-MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 7d0f307..83f36f6 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -1,4 +1,4 @@
-#include <linux/seq_file.h>
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/cpumask.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -104,6 +104,9 @@
else if (IS_ENABLED(CONFIG_AMD_IOMMU) &&
amd_iommu_irq_ops.prepare() == 0)
remap_ops = &amd_iommu_irq_ops;
+ else if (IS_ENABLED(CONFIG_HYPERV_IOMMU) &&
+ hyperv_irq_remap_ops.prepare() == 0)
+ remap_ops = &hyperv_irq_remap_ops;
else
return -ENOSYS;
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index 0afef6e..6a190d5 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <jroedel@suse.de>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* This header file contains stuff that is shared between different interrupt
* remapping drivers but with no need to be visible outside of the IOMMU layer.
*/
@@ -64,6 +52,7 @@
extern struct irq_remap_ops intel_irq_remap_ops;
extern struct irq_remap_ops amd_iommu_irq_ops;
+extern struct irq_remap_ops hyperv_irq_remap_ops;
#else /* CONFIG_IRQ_REMAP */
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index fc5f0b5..be99d40 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -1,26 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
+ * Author: Stepan Moskovchenko <stepanm@codeaurora.org>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/errno.h>
#include <linux/io.h>
+#include <linux/io-pgtable.h>
#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/spinlock.h>
@@ -31,11 +21,10 @@
#include <linux/of_iommu.h>
#include <asm/cacheflush.h>
-#include <asm/sizes.h>
+#include <linux/sizes.h>
#include "msm_iommu_hw-8xxx.h"
#include "msm_iommu.h"
-#include "io-pgtable.h"
#define MRC(reg, processor, op1, crn, crm, op2) \
__asm__ __volatile__ ( \
@@ -179,20 +168,29 @@
return;
}
-static void __flush_iotlb_sync(void *cookie)
+static void __flush_iotlb_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
- /*
- * Nothing is needed here, the barrier to guarantee
- * completion of the tlb sync operation is implicitly
- * taken care when the iommu client does a writel before
- * kick starting the other master.
- */
+ __flush_iotlb_range(iova, size, granule, false, cookie);
}
-static const struct iommu_gather_ops msm_iommu_gather_ops = {
+static void __flush_iotlb_leaf(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ __flush_iotlb_range(iova, size, granule, true, cookie);
+}
+
+static void __flush_iotlb_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule, void *cookie)
+{
+ __flush_iotlb_range(iova, granule, granule, true, cookie);
+}
+
+static const struct iommu_flush_ops msm_iommu_flush_ops = {
.tlb_flush_all = __flush_iotlb,
- .tlb_add_flush = __flush_iotlb_range,
- .tlb_sync = __flush_iotlb_sync,
+ .tlb_flush_walk = __flush_iotlb_walk,
+ .tlb_flush_leaf = __flush_iotlb_leaf,
+ .tlb_add_page = __flush_iotlb_page,
};
static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end)
@@ -356,7 +354,7 @@
.pgsize_bitmap = msm_iommu_ops.pgsize_bitmap,
.ias = 32,
.oas = 32,
- .tlb = &msm_iommu_gather_ops,
+ .tlb = &msm_iommu_flush_ops,
.iommu_dev = priv->dev,
};
@@ -459,10 +457,10 @@
master->num =
msm_iommu_alloc_ctx(iommu->context_map,
0, iommu->ncb);
- if (IS_ERR_VALUE(master->num)) {
- ret = -ENODEV;
- goto fail;
- }
+ if (IS_ERR_VALUE(master->num)) {
+ ret = -ENODEV;
+ goto fail;
+ }
config_mids(iommu, master);
__program_context(iommu->base, master->num,
priv);
@@ -520,13 +518,13 @@
}
static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t len)
+ size_t len, struct iommu_iotlb_gather *gather)
{
struct msm_priv *priv = to_msm_priv(domain);
unsigned long flags;
spin_lock_irqsave(&priv->pgtlock, flags);
- len = priv->iop->unmap(priv->iop, iova, len);
+ len = priv->iop->unmap(priv->iop, iova, len, gather);
spin_unlock_irqrestore(&priv->pgtlock, flags);
return len;
@@ -702,6 +700,13 @@
.detach_dev = msm_iommu_detach_dev,
.map = msm_iommu_map,
.unmap = msm_iommu_unmap,
+ /*
+ * Nothing is needed here, the barrier to guarantee
+ * completion of the tlb sync operation is implicitly
+ * taken care when the iommu client does a writel before
+ * kick starting the other master.
+ */
+ .iotlb_sync = NULL,
.iova_to_phys = msm_iommu_iova_to_phys,
.add_device = msm_iommu_add_device,
.remove_device = msm_iommu_remove_device,
@@ -761,7 +766,6 @@
iommu->irq = platform_get_irq(pdev, 0);
if (iommu->irq < 0) {
- dev_err(iommu->dev, "could not get iommu irq\n");
ret = -ENODEV;
goto fail;
}
@@ -861,14 +865,5 @@
return ret;
}
-
-static void __exit msm_iommu_driver_exit(void)
-{
- platform_driver_unregister(&msm_iommu_driver);
-}
-
subsys_initcall(msm_iommu_driver_init);
-module_exit(msm_iommu_driver_exit);
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/msm_iommu.h b/drivers/iommu/msm_iommu.h
index ae92d27..ddae378 100644
--- a/drivers/iommu/msm_iommu.h
+++ b/drivers/iommu/msm_iommu.h
@@ -1,18 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
*/
#ifndef MSM_IOMMU_H
diff --git a/drivers/iommu/msm_iommu_hw-8xxx.h b/drivers/iommu/msm_iommu_hw-8xxx.h
index fc16010..cd957c7 100644
--- a/drivers/iommu/msm_iommu_hw-8xxx.h
+++ b/drivers/iommu/msm_iommu_hw-8xxx.h
@@ -1,18 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
*/
#ifndef __ARCH_ARM_MACH_MSM_IOMMU_HW_8XXX_H
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index f9f69f7..67a483c 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2015-2016 MediaTek Inc.
* Author: Yong Wu <yong.wu@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/bug.h>
#include <linux/clk.h>
#include <linux/component.h>
@@ -36,6 +28,7 @@
#include "mtk_iommu.h"
#define REG_MMU_PT_BASE_ADDR 0x000
+#define MMU_PT_ADDR_MASK GENMASK(31, 7)
#define REG_MMU_INVALIDATE 0x020
#define F_ALL_INVLD 0x2
@@ -52,12 +45,9 @@
#define REG_MMU_DCM_DIS 0x050
#define REG_MMU_CTRL_REG 0x110
+#define F_MMU_TF_PROT_TO_PROGRAM_ADDR (2 << 4)
#define F_MMU_PREFETCH_RT_REPLACE_MOD BIT(4)
-#define F_MMU_TF_PROTECT_SEL_SHIFT(data) \
- ((data)->m4u_plat == M4U_MT2712 ? 4 : 5)
-/* It's named by F_MMU_TF_PROT_SEL in mt2712. */
-#define F_MMU_TF_PROTECT_SEL(prot, data) \
- (((prot) & 0x3) << F_MMU_TF_PROTECT_SEL_SHIFT(data))
+#define F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173 (2 << 5)
#define REG_MMU_IVRP_PADDR 0x114
@@ -74,26 +64,32 @@
#define F_INT_CLR_BIT BIT(12)
#define REG_MMU_INT_MAIN_CONTROL 0x124
-#define F_INT_TRANSLATION_FAULT BIT(0)
-#define F_INT_MAIN_MULTI_HIT_FAULT BIT(1)
-#define F_INT_INVALID_PA_FAULT BIT(2)
-#define F_INT_ENTRY_REPLACEMENT_FAULT BIT(3)
-#define F_INT_TLB_MISS_FAULT BIT(4)
-#define F_INT_MISS_TRANSACTION_FIFO_FAULT BIT(5)
-#define F_INT_PRETETCH_TRANSATION_FIFO_FAULT BIT(6)
+ /* mmu0 | mmu1 */
+#define F_INT_TRANSLATION_FAULT (BIT(0) | BIT(7))
+#define F_INT_MAIN_MULTI_HIT_FAULT (BIT(1) | BIT(8))
+#define F_INT_INVALID_PA_FAULT (BIT(2) | BIT(9))
+#define F_INT_ENTRY_REPLACEMENT_FAULT (BIT(3) | BIT(10))
+#define F_INT_TLB_MISS_FAULT (BIT(4) | BIT(11))
+#define F_INT_MISS_TRANSACTION_FIFO_FAULT (BIT(5) | BIT(12))
+#define F_INT_PRETETCH_TRANSATION_FIFO_FAULT (BIT(6) | BIT(13))
#define REG_MMU_CPE_DONE 0x12C
#define REG_MMU_FAULT_ST1 0x134
+#define F_REG_MMU0_FAULT_MASK GENMASK(6, 0)
+#define F_REG_MMU1_FAULT_MASK GENMASK(13, 7)
-#define REG_MMU_FAULT_VA 0x13c
+#define REG_MMU0_FAULT_VA 0x13c
#define F_MMU_FAULT_VA_WRITE_BIT BIT(1)
#define F_MMU_FAULT_VA_LAYER_BIT BIT(0)
-#define REG_MMU_INVLD_PA 0x140
-#define REG_MMU_INT_ID 0x150
-#define F_MMU0_INT_ID_LARB_ID(a) (((a) >> 7) & 0x7)
-#define F_MMU0_INT_ID_PORT_ID(a) (((a) >> 2) & 0x1f)
+#define REG_MMU0_INVLD_PA 0x140
+#define REG_MMU1_FAULT_VA 0x144
+#define REG_MMU1_INVLD_PA 0x148
+#define REG_MMU0_INT_ID 0x150
+#define REG_MMU1_INT_ID 0x154
+#define F_MMU_INT_ID_LARB_ID(a) (((a) >> 7) & 0x7)
+#define F_MMU_INT_ID_PORT_ID(a) (((a) >> 2) & 0x1f)
#define MTK_PROTECT_PA_ALIGN 128
@@ -113,7 +109,31 @@
struct iommu_domain domain;
};
-static struct iommu_ops mtk_iommu_ops;
+static const struct iommu_ops mtk_iommu_ops;
+
+/*
+ * In M4U 4GB mode, the physical address is remapped as below:
+ *
+ * CPU Physical address:
+ * ====================
+ *
+ * 0 1G 2G 3G 4G 5G
+ * |---A---|---B---|---C---|---D---|---E---|
+ * +--I/O--+------------Memory-------------+
+ *
+ * IOMMU output physical address:
+ * =============================
+ *
+ * 4G 5G 6G 7G 8G
+ * |---E---|---B---|---C---|---D---|
+ * +------------Memory-------------+
+ *
+ * The Region 'A'(I/O) can NOT be mapped by M4U; For Region 'B'/'C'/'D', the
+ * bit32 of the CPU physical address always is needed to set, and for Region
+ * 'E', the CPU physical address keep as is.
+ * Additionally, The iommu consumers always use the CPU phyiscal address.
+ */
+#define MTK_IOMMU_4GB_MODE_REMAP_BASE 0x140000000UL
static LIST_HEAD(m4ulist); /* List all the M4U HWs */
@@ -196,10 +216,32 @@
}
}
-static const struct iommu_gather_ops mtk_iommu_gather_ops = {
+static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie);
+ mtk_iommu_tlb_sync(cookie);
+}
+
+static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie);
+ mtk_iommu_tlb_sync(cookie);
+}
+
+static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+ mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
+}
+
+static const struct iommu_flush_ops mtk_iommu_flush_ops = {
.tlb_flush_all = mtk_iommu_tlb_flush_all,
- .tlb_add_flush = mtk_iommu_tlb_add_flush_nosync,
- .tlb_sync = mtk_iommu_tlb_sync,
+ .tlb_flush_walk = mtk_iommu_tlb_flush_walk,
+ .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
+ .tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
};
static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
@@ -212,13 +254,21 @@
/* Read error info from registers */
int_state = readl_relaxed(data->base + REG_MMU_FAULT_ST1);
- fault_iova = readl_relaxed(data->base + REG_MMU_FAULT_VA);
+ if (int_state & F_REG_MMU0_FAULT_MASK) {
+ regval = readl_relaxed(data->base + REG_MMU0_INT_ID);
+ fault_iova = readl_relaxed(data->base + REG_MMU0_FAULT_VA);
+ fault_pa = readl_relaxed(data->base + REG_MMU0_INVLD_PA);
+ } else {
+ regval = readl_relaxed(data->base + REG_MMU1_INT_ID);
+ fault_iova = readl_relaxed(data->base + REG_MMU1_FAULT_VA);
+ fault_pa = readl_relaxed(data->base + REG_MMU1_INVLD_PA);
+ }
layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
- fault_pa = readl_relaxed(data->base + REG_MMU_INVLD_PA);
- regval = readl_relaxed(data->base + REG_MMU_INT_ID);
- fault_larb = F_MMU0_INT_ID_LARB_ID(regval);
- fault_port = F_MMU0_INT_ID_PORT_ID(regval);
+ fault_larb = F_MMU_INT_ID_LARB_ID(regval);
+ fault_port = F_MMU_INT_ID_PORT_ID(regval);
+
+ fault_larb = data->plat_data->larbid_remap[fault_larb];
if (report_iommu_fault(&dom->domain, data->dev, fault_iova,
write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
@@ -244,13 +294,13 @@
{
struct mtk_smi_larb_iommu *larb_mmu;
unsigned int larbid, portid;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
int i;
for (i = 0; i < fwspec->num_ids; ++i) {
larbid = MTK_M4U_TO_LARB(fwspec->ids[i]);
portid = MTK_M4U_TO_PORT(fwspec->ids[i]);
- larb_mmu = &data->smi_imu.larb_imu[larbid];
+ larb_mmu = &data->larb_imu[larbid];
dev_dbg(dev, "%s iommu port: %d\n",
enable ? "enable" : "disable", portid);
@@ -271,17 +321,15 @@
dom->cfg = (struct io_pgtable_cfg) {
.quirks = IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_NO_PERMS |
- IO_PGTABLE_QUIRK_TLBI_ON_MAP,
+ IO_PGTABLE_QUIRK_TLBI_ON_MAP |
+ IO_PGTABLE_QUIRK_ARM_MTK_EXT,
.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
.ias = 32,
- .oas = 32,
- .tlb = &mtk_iommu_gather_ops,
+ .oas = 34,
+ .tlb = &mtk_iommu_flush_ops,
.iommu_dev = data->dev,
};
- if (data->enable_4GB)
- dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_4GB;
-
dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
if (!dom->iop) {
dev_err(data->dev, "Failed to alloc io pgtable\n");
@@ -336,7 +384,7 @@
struct device *dev)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
- struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
+ struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
if (!data)
return -ENODEV;
@@ -344,7 +392,7 @@
/* Update the pgtable base address register of the M4U HW */
if (!data->m4u_dom) {
data->m4u_dom = dom;
- writel(dom->cfg.arm_v7s_cfg.ttbr[0],
+ writel(dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK,
data->base + REG_MMU_PT_BASE_ADDR);
}
@@ -355,7 +403,7 @@
static void mtk_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
+ struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
if (!data)
return;
@@ -367,32 +415,43 @@
phys_addr_t paddr, size_t size, int prot)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+ struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
unsigned long flags;
int ret;
+ /* The "4GB mode" M4U physically can not use the lower remap of Dram. */
+ if (data->enable_4GB)
+ paddr |= BIT_ULL(32);
+
spin_lock_irqsave(&dom->pgtlock, flags);
- ret = dom->iop->map(dom->iop, iova, paddr & DMA_BIT_MASK(32),
- size, prot);
+ ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
spin_unlock_irqrestore(&dom->pgtlock, flags);
return ret;
}
static size_t mtk_iommu_unmap(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+ unsigned long iova, size_t size,
+ struct iommu_iotlb_gather *gather)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
unsigned long flags;
size_t unmapsz;
spin_lock_irqsave(&dom->pgtlock, flags);
- unmapsz = dom->iop->unmap(dom->iop, iova, size);
+ unmapsz = dom->iop->unmap(dom->iop, iova, size, gather);
spin_unlock_irqrestore(&dom->pgtlock, flags);
return unmapsz;
}
-static void mtk_iommu_iotlb_sync(struct iommu_domain *domain)
+static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+ mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
+}
+
+static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
{
mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
}
@@ -409,21 +468,22 @@
pa = dom->iop->iova_to_phys(dom->iop, iova);
spin_unlock_irqrestore(&dom->pgtlock, flags);
- if (data->enable_4GB)
- pa |= BIT_ULL(32);
+ if (data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE)
+ pa &= ~BIT_ULL(32);
return pa;
}
static int mtk_iommu_add_device(struct device *dev)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_data *data;
struct iommu_group *group;
- if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
+ if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return -ENODEV; /* Not a iommu client device */
- data = dev->iommu_fwspec->iommu_priv;
+ data = fwspec->iommu_priv;
iommu_device_link(&data->iommu, dev);
group = iommu_group_get_for_dev(dev);
@@ -436,12 +496,13 @@
static void mtk_iommu_remove_device(struct device *dev)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_data *data;
- if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
+ if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return;
- data = dev->iommu_fwspec->iommu_priv;
+ data = fwspec->iommu_priv;
iommu_device_unlink(&data->iommu, dev);
iommu_group_remove_device(dev);
@@ -468,6 +529,7 @@
static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct platform_device *m4updev;
if (args->args_count != 1) {
@@ -476,26 +538,26 @@
return -EINVAL;
}
- if (!dev->iommu_fwspec->iommu_priv) {
+ if (!fwspec->iommu_priv) {
/* Get the m4u device */
m4updev = of_find_device_by_node(args->np);
if (WARN_ON(!m4updev))
return -EINVAL;
- dev->iommu_fwspec->iommu_priv = platform_get_drvdata(m4updev);
+ fwspec->iommu_priv = platform_get_drvdata(m4updev);
}
return iommu_fwspec_add_ids(dev, args->args, 1);
}
-static struct iommu_ops mtk_iommu_ops = {
+static const struct iommu_ops mtk_iommu_ops = {
.domain_alloc = mtk_iommu_domain_alloc,
.domain_free = mtk_iommu_domain_free,
.attach_dev = mtk_iommu_attach_device,
.detach_dev = mtk_iommu_detach_device,
.map = mtk_iommu_map,
.unmap = mtk_iommu_unmap,
- .flush_iotlb_all = mtk_iommu_iotlb_sync,
+ .flush_iotlb_all = mtk_iommu_flush_iotlb_all,
.iotlb_sync = mtk_iommu_iotlb_sync,
.iova_to_phys = mtk_iommu_iova_to_phys,
.add_device = mtk_iommu_add_device,
@@ -516,9 +578,11 @@
return ret;
}
- regval = F_MMU_TF_PROTECT_SEL(2, data);
- if (data->m4u_plat == M4U_MT8173)
- regval |= F_MMU_PREFETCH_RT_REPLACE_MOD;
+ if (data->plat_data->m4u_plat == M4U_MT8173)
+ regval = F_MMU_PREFETCH_RT_REPLACE_MOD |
+ F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173;
+ else
+ regval = F_MMU_TF_PROT_TO_PROGRAM_ADDR;
writel_relaxed(regval, data->base + REG_MMU_CTRL_REG);
regval = F_L2_MULIT_HIT_EN |
@@ -538,14 +602,14 @@
F_INT_PRETETCH_TRANSATION_FIFO_FAULT;
writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL);
- if (data->m4u_plat == M4U_MT8173)
+ if (data->plat_data->m4u_plat == M4U_MT8173)
regval = (data->protect_base >> 1) | (data->enable_4GB << 31);
else
regval = lower_32_bits(data->protect_base) |
upper_32_bits(data->protect_base);
writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR);
- if (data->enable_4GB && data->m4u_plat != M4U_MT8173) {
+ if (data->enable_4GB && data->plat_data->has_vld_pa_rng) {
/*
* If 4GB mode is enabled, the validate PA range is from
* 0x1_0000_0000 to 0x1_ffff_ffff. here record bit[32:30].
@@ -555,8 +619,7 @@
}
writel_relaxed(0, data->base + REG_MMU_DCM_DIS);
- /* It's MISC control register whose default value is ok except mt8173.*/
- if (data->m4u_plat == M4U_MT8173)
+ if (data->plat_data->reset_axi)
writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE);
if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0,
@@ -589,7 +652,7 @@
if (!data)
return -ENOMEM;
data->dev = dev;
- data->m4u_plat = (enum mtk_iommu_plat)of_device_get_match_data(dev);
+ data->plat_data = of_device_get_match_data(dev);
/* Protect memory. HW will access here while translation fault.*/
protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2, GFP_KERNEL);
@@ -599,6 +662,8 @@
/* Whether the current dram is over 4GB */
data->enable_4GB = !!(max_pfn > (BIT_ULL(32) >> PAGE_SHIFT));
+ if (!data->plat_data->has_4gb_mode)
+ data->enable_4GB = false;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
data->base = devm_ioremap_resource(dev, res);
@@ -610,15 +675,16 @@
if (data->irq < 0)
return data->irq;
- data->bclk = devm_clk_get(dev, "bclk");
- if (IS_ERR(data->bclk))
- return PTR_ERR(data->bclk);
+ if (data->plat_data->has_bclk) {
+ data->bclk = devm_clk_get(dev, "bclk");
+ if (IS_ERR(data->bclk))
+ return PTR_ERR(data->bclk);
+ }
larb_nr = of_count_phandle_with_args(dev->of_node,
"mediatek,larbs", NULL);
if (larb_nr < 0)
return larb_nr;
- data->smi_imu.larb_nr = larb_nr;
for (i = 0; i < larb_nr; i++) {
struct device_node *larbnode;
@@ -629,17 +695,21 @@
if (!larbnode)
return -EINVAL;
- if (!of_device_is_available(larbnode))
+ if (!of_device_is_available(larbnode)) {
+ of_node_put(larbnode);
continue;
+ }
ret = of_property_read_u32(larbnode, "mediatek,larb-id", &id);
if (ret)/* The id is consecutive if there is no this property */
id = i;
plarbdev = of_find_device_by_node(larbnode);
- if (!plarbdev)
+ if (!plarbdev) {
+ of_node_put(larbnode);
return -EPROBE_DEFER;
- data->smi_imu.larb_imu[id].dev = &plarbdev->dev;
+ }
+ data->larb_imu[id].dev = &plarbdev->dev;
component_match_add_release(dev, &match, release_of,
compare_of, larbnode);
@@ -700,6 +770,7 @@
reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0);
reg->int_main_control = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL);
reg->ivrp_paddr = readl_relaxed(base + REG_MMU_IVRP_PADDR);
+ reg->vld_pa_rng = readl_relaxed(base + REG_MMU_VLD_PA_RNG);
clk_disable_unprepare(data->bclk);
return 0;
}
@@ -708,6 +779,7 @@
{
struct mtk_iommu_data *data = dev_get_drvdata(dev);
struct mtk_iommu_suspend_reg *reg = &data->reg;
+ struct mtk_iommu_domain *m4u_dom = data->m4u_dom;
void __iomem *base = data->base;
int ret;
@@ -723,8 +795,9 @@
writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0);
writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL);
writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR);
- if (data->m4u_dom)
- writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
+ writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG);
+ if (m4u_dom)
+ writel(m4u_dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK,
base + REG_MMU_PT_BASE_ADDR);
return 0;
}
@@ -733,9 +806,32 @@
SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume)
};
+static const struct mtk_iommu_plat_data mt2712_data = {
+ .m4u_plat = M4U_MT2712,
+ .has_4gb_mode = true,
+ .has_bclk = true,
+ .has_vld_pa_rng = true,
+ .larbid_remap = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+};
+
+static const struct mtk_iommu_plat_data mt8173_data = {
+ .m4u_plat = M4U_MT8173,
+ .has_4gb_mode = true,
+ .has_bclk = true,
+ .reset_axi = true,
+ .larbid_remap = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */
+};
+
+static const struct mtk_iommu_plat_data mt8183_data = {
+ .m4u_plat = M4U_MT8183,
+ .reset_axi = true,
+ .larbid_remap = {0, 4, 5, 6, 7, 2, 3, 1},
+};
+
static const struct of_device_id mtk_iommu_of_ids[] = {
- { .compatible = "mediatek,mt2712-m4u", .data = (void *)M4U_MT2712},
- { .compatible = "mediatek,mt8173-m4u", .data = (void *)M4U_MT8173},
+ { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data},
+ { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data},
+ { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data},
{}
};
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index 778498b..fc0f16e 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -1,15 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2015-2016 MediaTek Inc.
* Author: Honghui Zhang <honghui.zhang@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#ifndef _MTK_IOMMU_H_
@@ -19,13 +11,12 @@
#include <linux/component.h>
#include <linux/device.h>
#include <linux/io.h>
+#include <linux/io-pgtable.h>
#include <linux/iommu.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <soc/mediatek/smi.h>
-#include "io-pgtable.h"
-
struct mtk_iommu_suspend_reg {
u32 standard_axi_mode;
u32 dcm_dis;
@@ -33,12 +24,25 @@
u32 int_control0;
u32 int_main_control;
u32 ivrp_paddr;
+ u32 vld_pa_rng;
};
enum mtk_iommu_plat {
M4U_MT2701,
M4U_MT2712,
M4U_MT8173,
+ M4U_MT8183,
+};
+
+struct mtk_iommu_plat_data {
+ enum mtk_iommu_plat m4u_plat;
+ bool has_4gb_mode;
+
+ /* HW will use the EMI clock if there isn't the "bclk". */
+ bool has_bclk;
+ bool has_vld_pa_rng;
+ bool reset_axi;
+ unsigned char larbid_remap[MTK_LARB_NR_MAX];
};
struct mtk_iommu_domain;
@@ -52,14 +56,14 @@
struct mtk_iommu_suspend_reg reg;
struct mtk_iommu_domain *m4u_dom;
struct iommu_group *m4u_group;
- struct mtk_smi_iommu smi_imu; /* SMI larb iommu info */
bool enable_4GB;
bool tlb_flush_active;
struct iommu_device iommu;
- enum mtk_iommu_plat m4u_plat;
+ const struct mtk_iommu_plat_data *plat_data;
struct list_head list;
+ struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX];
};
static inline int compare_of(struct device *dev, void *data)
@@ -76,14 +80,14 @@
{
struct mtk_iommu_data *data = dev_get_drvdata(dev);
- return component_bind_all(dev, &data->smi_imu);
+ return component_bind_all(dev, &data->larb_imu);
}
static inline void mtk_iommu_unbind(struct device *dev)
{
struct mtk_iommu_data *data = dev_get_drvdata(dev);
- component_unbind_all(dev, &data->smi_imu);
+ component_unbind_all(dev, &data->larb_imu);
}
#endif
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 676c029..b5efd6d 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -1,19 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
+ * IOMMU API for MTK architected m4u v1 implementations
+ *
* Copyright (c) 2015-2016 MediaTek Inc.
* Author: Honghui Zhang <honghui.zhang@mediatek.com>
*
* Based on driver/iommu/mtk_iommu.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/bug.h>
#include <linux/clk.h>
#include <linux/component.h>
@@ -35,7 +29,7 @@
#include <linux/spinlock.h>
#include <asm/barrier.h>
#include <asm/dma-iommu.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <dt-bindings/memory/mt2701-larb-port.h>
#include <soc/mediatek/smi.h>
#include "mtk_iommu.h"
@@ -206,13 +200,13 @@
{
struct mtk_smi_larb_iommu *larb_mmu;
unsigned int larbid, portid;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
int i;
for (i = 0; i < fwspec->num_ids; ++i) {
larbid = mt2701_m4u_to_larb(fwspec->ids[i]);
portid = mt2701_m4u_to_port(fwspec->ids[i]);
- larb_mmu = &data->smi_imu.larb_imu[larbid];
+ larb_mmu = &data->larb_imu[larbid];
dev_dbg(dev, "%s iommu port: %d\n",
enable ? "enable" : "disable", portid);
@@ -230,9 +224,8 @@
spin_lock_init(&dom->pgtlock);
- dom->pgt_va = dma_zalloc_coherent(data->dev,
- M2701_IOMMU_PGT_SIZE,
- &dom->pgt_pa, GFP_KERNEL);
+ dom->pgt_va = dma_alloc_coherent(data->dev, M2701_IOMMU_PGT_SIZE,
+ &dom->pgt_pa, GFP_KERNEL);
if (!dom->pgt_va)
return -ENOMEM;
@@ -271,7 +264,7 @@
struct device *dev)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
- struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
+ struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
int ret;
if (!data)
@@ -293,7 +286,7 @@
static void mtk_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
+ struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
if (!data)
return;
@@ -331,7 +324,8 @@
}
static size_t mtk_iommu_unmap(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+ unsigned long iova, size_t size,
+ struct iommu_iotlb_gather *gather)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
unsigned long flags;
@@ -362,7 +356,7 @@
return pa;
}
-static struct iommu_ops mtk_iommu_ops;
+static const struct iommu_ops mtk_iommu_ops;
/*
* MTK generation one iommu HW only support one iommu domain, and all the client
@@ -371,6 +365,7 @@
static int mtk_iommu_create_mapping(struct device *dev,
struct of_phandle_args *args)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_data *data;
struct platform_device *m4updev;
struct dma_iommu_mapping *mtk_mapping;
@@ -383,28 +378,29 @@
return -EINVAL;
}
- if (!dev->iommu_fwspec) {
+ if (!fwspec) {
ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_ops);
if (ret)
return ret;
- } else if (dev->iommu_fwspec->ops != &mtk_iommu_ops) {
+ fwspec = dev_iommu_fwspec_get(dev);
+ } else if (dev_iommu_fwspec_get(dev)->ops != &mtk_iommu_ops) {
return -EINVAL;
}
- if (!dev->iommu_fwspec->iommu_priv) {
+ if (!fwspec->iommu_priv) {
/* Get the m4u device */
m4updev = of_find_device_by_node(args->np);
if (WARN_ON(!m4updev))
return -EINVAL;
- dev->iommu_fwspec->iommu_priv = platform_get_drvdata(m4updev);
+ fwspec->iommu_priv = platform_get_drvdata(m4updev);
}
ret = iommu_fwspec_add_ids(dev, args->args, 1);
if (ret)
return ret;
- data = dev->iommu_fwspec->iommu_priv;
+ data = fwspec->iommu_priv;
m4udev = data->dev;
mtk_mapping = m4udev->archdata.iommu;
if (!mtk_mapping) {
@@ -422,6 +418,7 @@
static int mtk_iommu_add_device(struct device *dev)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct dma_iommu_mapping *mtk_mapping;
struct of_phandle_args iommu_spec;
struct of_phandle_iterator it;
@@ -430,17 +427,21 @@
int err;
of_for_each_phandle(&it, err, dev->of_node, "iommus",
- "#iommu-cells", 0) {
+ "#iommu-cells", -1) {
int count = of_phandle_iterator_args(&it, iommu_spec.args,
MAX_PHANDLE_ARGS);
iommu_spec.np = of_node_get(it.node);
iommu_spec.args_count = count;
mtk_iommu_create_mapping(dev, &iommu_spec);
+
+ /* dev->iommu_fwspec might have changed */
+ fwspec = dev_iommu_fwspec_get(dev);
+
of_node_put(iommu_spec.np);
}
- if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
+ if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return -ENODEV; /* Not a iommu client device */
/*
@@ -458,7 +459,7 @@
if (err)
return err;
- data = dev->iommu_fwspec->iommu_priv;
+ data = fwspec->iommu_priv;
mtk_mapping = data->dev->archdata.iommu;
err = arm_iommu_attach_device(dev, mtk_mapping);
if (err) {
@@ -466,17 +467,18 @@
return err;
}
- return iommu_device_link(&data->iommu, dev);;
+ return iommu_device_link(&data->iommu, dev);
}
static void mtk_iommu_remove_device(struct device *dev)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_data *data;
- if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
+ if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return;
- data = dev->iommu_fwspec->iommu_priv;
+ data = fwspec->iommu_priv;
iommu_device_unlink(&data->iommu, dev);
iommu_group_remove_device(dev);
@@ -524,7 +526,7 @@
return 0;
}
-static struct iommu_ops mtk_iommu_ops = {
+static const struct iommu_ops mtk_iommu_ops = {
.domain_alloc = mtk_iommu_domain_alloc,
.domain_free = mtk_iommu_domain_free,
.attach_dev = mtk_iommu_attach_device,
@@ -609,14 +611,12 @@
}
}
- data->smi_imu.larb_imu[larb_nr].dev = &plarbdev->dev;
+ data->larb_imu[larb_nr].dev = &plarbdev->dev;
component_match_add_release(dev, &match, release_of,
compare_of, larb_spec.np);
larb_nr++;
}
- data->smi_imu.larb_nr = larb_nr;
-
platform_set_drvdata(pdev, data);
ret = mtk_iommu_hw_init(data);
@@ -704,15 +704,4 @@
{
return platform_driver_register(&mtk_iommu_driver);
}
-
-static void __exit m4u_exit(void)
-{
- return platform_driver_unregister(&mtk_iommu_driver);
-}
-
subsys_initcall(m4u_init);
-module_exit(m4u_exit);
-
-MODULE_DESCRIPTION("IOMMU API for MTK architected m4u v1 implementations");
-MODULE_AUTHOR("Honghui Zhang <honghui.zhang@mediatek.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index f7787e7..614a93a 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* OF helpers for IOMMU
*
* Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <linux/export.h>
@@ -24,6 +12,7 @@
#include <linux/of_iommu.h>
#include <linux/of_pci.h>
#include <linux/slab.h>
+#include <linux/fsl/mc.h>
#define NO_IOMMU 1
@@ -132,9 +121,8 @@
struct of_phandle_args iommu_spec = { .args_count = 1 };
int err;
- err = of_pci_map_rid(info->np, alias, "iommu-map",
- "iommu-map-mask", &iommu_spec.np,
- iommu_spec.args);
+ err = of_map_rid(info->np, alias, "iommu-map", "iommu-map-mask",
+ &iommu_spec.np, iommu_spec.args);
if (err)
return err == -ENODEV ? NO_IOMMU : err;
@@ -143,11 +131,28 @@
return err;
}
+static int of_fsl_mc_iommu_init(struct fsl_mc_device *mc_dev,
+ struct device_node *master_np)
+{
+ struct of_phandle_args iommu_spec = { .args_count = 1 };
+ int err;
+
+ err = of_map_rid(master_np, mc_dev->icid, "iommu-map",
+ "iommu-map-mask", &iommu_spec.np,
+ iommu_spec.args);
+ if (err)
+ return err == -ENODEV ? NO_IOMMU : err;
+
+ err = of_iommu_xlate(&mc_dev->dev, &iommu_spec);
+ of_node_put(iommu_spec.np);
+ return err;
+}
+
const struct iommu_ops *of_iommu_configure(struct device *dev,
struct device_node *master_np)
{
const struct iommu_ops *ops = NULL;
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
int err = NO_IOMMU;
if (!master_np)
@@ -174,6 +179,8 @@
err = pci_for_each_dma_alias(to_pci_dev(dev),
of_pci_iommu_init, &info);
+ } else if (dev_is_fsl_mc(dev)) {
+ err = of_fsl_mc_iommu_init(to_fsl_mc_device(dev), master_np);
} else {
struct of_phandle_args iommu_spec;
int idx = 0;
@@ -189,20 +196,24 @@
}
}
+
/*
* Two success conditions can be represented by non-negative err here:
* >0 : there is no IOMMU, or one was unavailable for non-fatal reasons
* 0 : we found an IOMMU, and dev->fwspec is initialised appropriately
* <0 : any actual error
*/
- if (!err)
- ops = dev->iommu_fwspec->ops;
+ if (!err) {
+ /* The fwspec pointer changed, read it again */
+ fwspec = dev_iommu_fwspec_get(dev);
+ ops = fwspec->ops;
+ }
/*
* If we have reason to believe the IOMMU driver missed the initial
- * add_device callback for dev, replay it to get things in order.
+ * probe for dev, replay it to get things in order.
*/
- if (ops && ops->add_device && dev->bus && !dev->iommu_group)
- err = ops->add_device(dev);
+ if (!err && dev->bus && !device_iommu_mapped(dev))
+ err = iommu_probe_device(dev);
/* Ignore all other errors apart from EPROBE_DEFER */
if (err == -EPROBE_DEFER) {
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index 5021754..8e19bfa 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* omap iommu: debugfs interface
*
* Copyright (C) 2008-2009 Nokia Corporation
*
* Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/err.h>
@@ -159,7 +156,7 @@
return 0;
}
-static int debug_read_tlb(struct seq_file *s, void *data)
+static int tlb_show(struct seq_file *s, void *data)
{
struct omap_iommu *obj = s->private;
@@ -210,7 +207,7 @@
spin_unlock(&obj->page_table_lock);
}
-static int debug_read_pagetable(struct seq_file *s, void *data)
+static int pagetable_show(struct seq_file *s, void *data)
{
struct omap_iommu *obj = s->private;
@@ -228,40 +225,16 @@
return 0;
}
-#define DEBUG_SEQ_FOPS_RO(name) \
- static int debug_open_##name(struct inode *inode, struct file *file) \
- { \
- return single_open(file, debug_read_##name, inode->i_private); \
- } \
- \
- static const struct file_operations debug_##name##_fops = { \
- .open = debug_open_##name, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
- }
-
#define DEBUG_FOPS_RO(name) \
- static const struct file_operations debug_##name##_fops = { \
+ static const struct file_operations name##_fops = { \
.open = simple_open, \
.read = debug_read_##name, \
.llseek = generic_file_llseek, \
}
DEBUG_FOPS_RO(regs);
-DEBUG_SEQ_FOPS_RO(tlb);
-DEBUG_SEQ_FOPS_RO(pagetable);
-
-#define __DEBUG_ADD_FILE(attr, mode) \
- { \
- struct dentry *dent; \
- dent = debugfs_create_file(#attr, mode, obj->debug_dir, \
- obj, &debug_##attr##_fops); \
- if (!dent) \
- goto err; \
- }
-
-#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 0400)
+DEFINE_SHOW_ATTRIBUTE(tlb);
+DEFINE_SHOW_ATTRIBUTE(pagetable);
void omap_iommu_debugfs_add(struct omap_iommu *obj)
{
@@ -270,23 +243,13 @@
if (!iommu_debug_root)
return;
- obj->debug_dir = debugfs_create_dir(obj->name, iommu_debug_root);
- if (!obj->debug_dir)
- return;
+ d = debugfs_create_dir(obj->name, iommu_debug_root);
+ obj->debug_dir = d;
- d = debugfs_create_u32("nr_tlb_entries", 0400, obj->debug_dir,
- &obj->nr_tlb_entries);
- if (!d)
- return;
-
- DEBUG_ADD_FILE_RO(regs);
- DEBUG_ADD_FILE_RO(tlb);
- DEBUG_ADD_FILE_RO(pagetable);
-
- return;
-
-err:
- debugfs_remove_recursive(obj->debug_dir);
+ debugfs_create_u32("nr_tlb_entries", 0400, d, &obj->nr_tlb_entries);
+ debugfs_create_file("regs", 0400, d, obj, ®s_fops);
+ debugfs_create_file("tlb", 0400, d, obj, &tlb_fops);
+ debugfs_create_file("pagetable", 0400, d, obj, &pagetable_fops);
}
void omap_iommu_debugfs_remove(struct omap_iommu *obj)
@@ -300,8 +263,6 @@
void __init omap_iommu_debugfs_init(void)
{
iommu_debug_root = debugfs_create_dir("omap_iommu", NULL);
- if (!iommu_debug_root)
- pr_err("can't create debugfs dir\n");
}
void __exit omap_iommu_debugfs_exit(void)
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index d2fb347..09c6e1c 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* omap iommu: tlb and pagetable primitives
*
@@ -6,10 +7,6 @@
*
* Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
* Paul Mundt and Toshihiro Kobayashi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/dma-mapping.h>
@@ -38,8 +35,16 @@
static const struct iommu_ops omap_iommu_ops;
-#define to_iommu(dev) \
- ((struct omap_iommu *)platform_get_drvdata(to_platform_device(dev)))
+struct orphan_dev {
+ struct device *dev;
+ struct list_head node;
+};
+
+static LIST_HEAD(orphan_dev_list);
+
+static DEFINE_SPINLOCK(orphan_lock);
+
+#define to_iommu(dev) ((struct omap_iommu *)dev_get_drvdata(dev))
/* bitmap of the page sizes currently supported */
#define OMAP_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M)
@@ -57,6 +62,8 @@
static struct platform_driver omap_iommu_driver;
static struct kmem_cache *iopte_cachep;
+static int _omap_iommu_add_device(struct device *dev);
+
/**
* to_omap_domain - Get struct omap_iommu_domain from generic iommu_domain
* @dom: generic iommu domain handle
@@ -69,6 +76,9 @@
/**
* omap_iommu_save_ctx - Save registers for pm off-mode support
* @dev: client device
+ *
+ * This should be treated as an deprecated API. It is preserved only
+ * to maintain existing functionality for OMAP3 ISP driver.
**/
void omap_iommu_save_ctx(struct device *dev)
{
@@ -96,6 +106,9 @@
/**
* omap_iommu_restore_ctx - Restore registers for pm off-mode support
* @dev: client device
+ *
+ * This should be treated as an deprecated API. It is preserved only
+ * to maintain existing functionality for OMAP3 ISP driver.
**/
void omap_iommu_restore_ctx(struct device *dev)
{
@@ -190,36 +203,18 @@
static int iommu_enable(struct omap_iommu *obj)
{
- int err;
- struct platform_device *pdev = to_platform_device(obj->dev);
- struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
+ int ret;
- if (pdata && pdata->deassert_reset) {
- err = pdata->deassert_reset(pdev, pdata->reset_name);
- if (err) {
- dev_err(obj->dev, "deassert_reset failed: %d\n", err);
- return err;
- }
- }
+ ret = pm_runtime_get_sync(obj->dev);
+ if (ret < 0)
+ pm_runtime_put_noidle(obj->dev);
- pm_runtime_get_sync(obj->dev);
-
- err = omap2_iommu_enable(obj);
-
- return err;
+ return ret < 0 ? ret : 0;
}
static void iommu_disable(struct omap_iommu *obj)
{
- struct platform_device *pdev = to_platform_device(obj->dev);
- struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
-
- omap2_iommu_disable(obj);
-
pm_runtime_put_sync(obj->dev);
-
- if (pdata && pdata->assert_reset)
- pdata->assert_reset(pdev, pdata->reset_name);
}
/*
@@ -905,15 +900,219 @@
dma_unmap_single(obj->dev, obj->pd_dma, IOPGD_TABLE_SIZE,
DMA_TO_DEVICE);
- iommu_disable(obj);
obj->pd_dma = 0;
obj->iopgd = NULL;
+ iommu_disable(obj);
spin_unlock(&obj->iommu_lock);
dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
}
+static void omap_iommu_save_tlb_entries(struct omap_iommu *obj)
+{
+ struct iotlb_lock lock;
+ struct cr_regs cr;
+ struct cr_regs *tmp;
+ int i;
+
+ /* check if there are any locked tlbs to save */
+ iotlb_lock_get(obj, &lock);
+ obj->num_cr_ctx = lock.base;
+ if (!obj->num_cr_ctx)
+ return;
+
+ tmp = obj->cr_ctx;
+ for_each_iotlb_cr(obj, obj->num_cr_ctx, i, cr)
+ * tmp++ = cr;
+}
+
+static void omap_iommu_restore_tlb_entries(struct omap_iommu *obj)
+{
+ struct iotlb_lock l;
+ struct cr_regs *tmp;
+ int i;
+
+ /* no locked tlbs to restore */
+ if (!obj->num_cr_ctx)
+ return;
+
+ l.base = 0;
+ tmp = obj->cr_ctx;
+ for (i = 0; i < obj->num_cr_ctx; i++, tmp++) {
+ l.vict = i;
+ iotlb_lock_set(obj, &l);
+ iotlb_load_cr(obj, tmp);
+ }
+ l.base = obj->num_cr_ctx;
+ l.vict = i;
+ iotlb_lock_set(obj, &l);
+}
+
+/**
+ * omap_iommu_domain_deactivate - deactivate attached iommu devices
+ * @domain: iommu domain attached to the target iommu device
+ *
+ * This API allows the client devices of IOMMU devices to suspend
+ * the IOMMUs they control at runtime, after they are idled and
+ * suspended all activity. System Suspend will leverage the PM
+ * driver late callbacks.
+ **/
+int omap_iommu_domain_deactivate(struct iommu_domain *domain)
+{
+ struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
+ struct omap_iommu_device *iommu;
+ struct omap_iommu *oiommu;
+ int i;
+
+ if (!omap_domain->dev)
+ return 0;
+
+ iommu = omap_domain->iommus;
+ iommu += (omap_domain->num_iommus - 1);
+ for (i = 0; i < omap_domain->num_iommus; i++, iommu--) {
+ oiommu = iommu->iommu_dev;
+ pm_runtime_put_sync(oiommu->dev);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(omap_iommu_domain_deactivate);
+
+/**
+ * omap_iommu_domain_activate - activate attached iommu devices
+ * @domain: iommu domain attached to the target iommu device
+ *
+ * This API allows the client devices of IOMMU devices to resume the
+ * IOMMUs they control at runtime, before they can resume operations.
+ * System Resume will leverage the PM driver late callbacks.
+ **/
+int omap_iommu_domain_activate(struct iommu_domain *domain)
+{
+ struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
+ struct omap_iommu_device *iommu;
+ struct omap_iommu *oiommu;
+ int i;
+
+ if (!omap_domain->dev)
+ return 0;
+
+ iommu = omap_domain->iommus;
+ for (i = 0; i < omap_domain->num_iommus; i++, iommu++) {
+ oiommu = iommu->iommu_dev;
+ pm_runtime_get_sync(oiommu->dev);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(omap_iommu_domain_activate);
+
+/**
+ * omap_iommu_runtime_suspend - disable an iommu device
+ * @dev: iommu device
+ *
+ * This function performs all that is necessary to disable an
+ * IOMMU device, either during final detachment from a client
+ * device, or during system/runtime suspend of the device. This
+ * includes programming all the appropriate IOMMU registers, and
+ * managing the associated omap_hwmod's state and the device's
+ * reset line. This function also saves the context of any
+ * locked TLBs if suspending.
+ **/
+static __maybe_unused int omap_iommu_runtime_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct iommu_platform_data *pdata = dev_get_platdata(dev);
+ struct omap_iommu *obj = to_iommu(dev);
+ int ret;
+
+ /* save the TLBs only during suspend, and not for power down */
+ if (obj->domain && obj->iopgd)
+ omap_iommu_save_tlb_entries(obj);
+
+ omap2_iommu_disable(obj);
+
+ if (pdata && pdata->device_idle)
+ pdata->device_idle(pdev);
+
+ if (pdata && pdata->assert_reset)
+ pdata->assert_reset(pdev, pdata->reset_name);
+
+ if (pdata && pdata->set_pwrdm_constraint) {
+ ret = pdata->set_pwrdm_constraint(pdev, false, &obj->pwrst);
+ if (ret) {
+ dev_warn(obj->dev, "pwrdm_constraint failed to be reset, status = %d\n",
+ ret);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * omap_iommu_runtime_resume - enable an iommu device
+ * @dev: iommu device
+ *
+ * This function performs all that is necessary to enable an
+ * IOMMU device, either during initial attachment to a client
+ * device, or during system/runtime resume of the device. This
+ * includes programming all the appropriate IOMMU registers, and
+ * managing the associated omap_hwmod's state and the device's
+ * reset line. The function also restores any locked TLBs if
+ * resuming after a suspend.
+ **/
+static __maybe_unused int omap_iommu_runtime_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct iommu_platform_data *pdata = dev_get_platdata(dev);
+ struct omap_iommu *obj = to_iommu(dev);
+ int ret = 0;
+
+ if (pdata && pdata->set_pwrdm_constraint) {
+ ret = pdata->set_pwrdm_constraint(pdev, true, &obj->pwrst);
+ if (ret) {
+ dev_warn(obj->dev, "pwrdm_constraint failed to be set, status = %d\n",
+ ret);
+ }
+ }
+
+ if (pdata && pdata->deassert_reset) {
+ ret = pdata->deassert_reset(pdev, pdata->reset_name);
+ if (ret) {
+ dev_err(dev, "deassert_reset failed: %d\n", ret);
+ return ret;
+ }
+ }
+
+ if (pdata && pdata->device_enable)
+ pdata->device_enable(pdev);
+
+ /* restore the TLBs only during resume, and not for power up */
+ if (obj->domain)
+ omap_iommu_restore_tlb_entries(obj);
+
+ ret = omap2_iommu_enable(obj);
+
+ return ret;
+}
+
+/**
+ * omap_iommu_suspend_prepare - prepare() dev_pm_ops implementation
+ * @dev: iommu device
+ *
+ * This function performs the necessary checks to determine if the IOMMU
+ * device needs suspending or not. The function checks if the runtime_pm
+ * status of the device is suspended, and returns 1 in that case. This
+ * results in the PM core to skip invoking any of the Sleep PM callbacks
+ * (suspend, suspend_late, resume, resume_early etc).
+ */
+static int omap_iommu_prepare(struct device *dev)
+{
+ if (pm_runtime_status_suspended(dev))
+ return 1;
+ return 0;
+}
+
static bool omap_iommu_can_register(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
@@ -978,6 +1177,7 @@
struct omap_iommu *obj;
struct resource *res;
struct device_node *of = pdev->dev.of_node;
+ struct orphan_dev *orphan_dev, *tmp;
if (!of) {
pr_err("%s: only DT-based devices are supported\n", __func__);
@@ -988,6 +1188,15 @@
if (!obj)
return -ENOMEM;
+ /*
+ * self-manage the ordering dependencies between omap_device_enable/idle
+ * and omap_device_assert/deassert_hardreset API
+ */
+ if (pdev->dev.pm_domain) {
+ dev_dbg(&pdev->dev, "device pm_domain is being reset\n");
+ pdev->dev.pm_domain = NULL;
+ }
+
obj->name = dev_name(&pdev->dev);
obj->nr_tlb_entries = 32;
err = of_property_read_u32(of, "ti,#tlb-entries", &obj->nr_tlb_entries);
@@ -1000,6 +1209,11 @@
obj->dev = &pdev->dev;
obj->ctx = (void *)obj + sizeof(*obj);
+ obj->cr_ctx = devm_kzalloc(&pdev->dev,
+ sizeof(*obj->cr_ctx) * obj->nr_tlb_entries,
+ GFP_KERNEL);
+ if (!obj->cr_ctx)
+ return -ENOMEM;
spin_lock_init(&obj->iommu_lock);
spin_lock_init(&obj->page_table_lock);
@@ -1040,13 +1254,20 @@
goto out_sysfs;
}
- pm_runtime_irq_safe(obj->dev);
pm_runtime_enable(obj->dev);
omap_iommu_debugfs_add(obj);
dev_info(&pdev->dev, "%s registered\n", obj->name);
+ list_for_each_entry_safe(orphan_dev, tmp, &orphan_dev_list, node) {
+ err = _omap_iommu_add_device(orphan_dev->dev);
+ if (!err) {
+ list_del(&orphan_dev->node);
+ kfree(orphan_dev);
+ }
+ }
+
return 0;
out_sysfs:
@@ -1076,6 +1297,14 @@
return 0;
}
+static const struct dev_pm_ops omap_iommu_pm_ops = {
+ .prepare = omap_iommu_prepare,
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+ SET_RUNTIME_PM_OPS(omap_iommu_runtime_suspend,
+ omap_iommu_runtime_resume, NULL)
+};
+
static const struct of_device_id omap_iommu_of_match[] = {
{ .compatible = "ti,omap2-iommu" },
{ .compatible = "ti,omap4-iommu" },
@@ -1089,6 +1318,7 @@
.remove = omap_iommu_remove,
.driver = {
.name = "omap-iommu",
+ .pm = &omap_iommu_pm_ops,
.of_match_table = of_match_ptr(omap_iommu_of_match),
},
};
@@ -1153,7 +1383,7 @@
}
static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
- size_t size)
+ size_t size, struct iommu_iotlb_gather *gather)
{
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct device *dev = omap_domain->dev;
@@ -1427,7 +1657,7 @@
return ret;
}
-static int omap_iommu_add_device(struct device *dev)
+static int _omap_iommu_add_device(struct device *dev)
{
struct omap_iommu_arch_data *arch_data, *tmp;
struct omap_iommu *oiommu;
@@ -1436,6 +1666,8 @@
struct platform_device *pdev;
int num_iommus, i;
int ret;
+ struct orphan_dev *orphan_dev;
+ unsigned long flags;
/*
* Allocate the archdata iommu structure for DT-based devices.
@@ -1467,10 +1699,26 @@
}
pdev = of_find_device_by_node(np);
- if (WARN_ON(!pdev)) {
+ if (!pdev) {
of_node_put(np);
kfree(arch_data);
- return -EINVAL;
+ spin_lock_irqsave(&orphan_lock, flags);
+ list_for_each_entry(orphan_dev, &orphan_dev_list,
+ node) {
+ if (orphan_dev->dev == dev)
+ break;
+ }
+ spin_unlock_irqrestore(&orphan_lock, flags);
+
+ if (orphan_dev && orphan_dev->dev == dev)
+ return -EPROBE_DEFER;
+
+ orphan_dev = kzalloc(sizeof(*orphan_dev), GFP_KERNEL);
+ orphan_dev->dev = dev;
+ spin_lock_irqsave(&orphan_lock, flags);
+ list_add(&orphan_dev->node, &orphan_dev_list);
+ spin_unlock_irqrestore(&orphan_lock, flags);
+ return -EPROBE_DEFER;
}
oiommu = platform_get_drvdata(pdev);
@@ -1481,6 +1729,7 @@
}
tmp->iommu_dev = oiommu;
+ tmp->dev = &pdev->dev;
of_node_put(np);
}
@@ -1515,6 +1764,17 @@
return 0;
}
+static int omap_iommu_add_device(struct device *dev)
+{
+ int ret;
+
+ ret = _omap_iommu_add_device(dev);
+ if (ret == -EPROBE_DEFER)
+ return 0;
+
+ return ret;
+}
+
static void omap_iommu_remove_device(struct device *dev)
{
struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
@@ -1558,7 +1818,7 @@
static int __init omap_iommu_init(void)
{
struct kmem_cache *p;
- const unsigned long flags = SLAB_HWCACHE_ALIGN;
+ const slab_flags_t flags = SLAB_HWCACHE_ALIGN;
size_t align = 1 << 10; /* L2 pagetable alignement */
struct device_node *np;
int ret;
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 1703159..18ee713 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* omap iommu: main structures
*
* Copyright (C) 2008-2009 Nokia Corporation
*
* Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef _OMAP_IOMMU_H
@@ -76,16 +73,22 @@
void *ctx; /* iommu context: registres saved area */
+ struct cr_regs *cr_ctx;
+ u32 num_cr_ctx;
+
int has_bus_err_back;
u32 id;
struct iommu_device iommu;
struct iommu_group *group;
+
+ u8 pwrst;
};
/**
* struct omap_iommu_arch_data - omap iommu private data
- * @iommu_dev: handle of the iommu device
+ * @iommu_dev: handle of the OMAP iommu device
+ * @dev: handle of the iommu device
*
* This is an omap iommu private data object, which binds an iommu user
* to its iommu device. This object should be placed at the iommu user's
@@ -94,6 +97,7 @@
*/
struct omap_iommu_arch_data {
struct omap_iommu *iommu_dev;
+ struct device *dev;
};
struct cr_regs {
diff --git a/drivers/iommu/omap-iopgtable.h b/drivers/iommu/omap-iopgtable.h
index 01a3152..1a4adb5 100644
--- a/drivers/iommu/omap-iopgtable.h
+++ b/drivers/iommu/omap-iopgtable.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* omap iommu: pagetable definitions
*
* Copyright (C) 2008-2010 Nokia Corporation
*
* Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef _OMAP_IOPGTABLE_H
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index ee70e99..c31e7bc 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -1,23 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* IOMMU API for QCOM secure IOMMUs. Somewhat based on arm-smmu.c
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
* Copyright (C) 2013 ARM Limited
* Copyright (C) 2017 Red Hat
*/
#include <linux/atomic.h>
+#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/dma-iommu.h>
@@ -26,10 +16,11 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/io-pgtable.h>
#include <linux/iommu.h>
#include <linux/iopoll.h>
#include <linux/kconfig.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -42,8 +33,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include "io-pgtable.h"
-#include "arm-smmu-regs.h"
+#include "arm-smmu.h"
#define SMMU_INTR_SEL_NS 0x2000
@@ -166,7 +156,7 @@
struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]);
size_t s = size;
- iova &= ~12UL;
+ iova = (iova >> 12) << 12;
iova |= ctx->asid;
do {
iommu_writel(ctx, reg, iova);
@@ -175,10 +165,32 @@
}
}
-static const struct iommu_gather_ops qcom_gather_ops = {
+static void qcom_iommu_tlb_flush_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ qcom_iommu_tlb_inv_range_nosync(iova, size, granule, false, cookie);
+ qcom_iommu_tlb_sync(cookie);
+}
+
+static void qcom_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ qcom_iommu_tlb_inv_range_nosync(iova, size, granule, true, cookie);
+ qcom_iommu_tlb_sync(cookie);
+}
+
+static void qcom_iommu_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+ qcom_iommu_tlb_inv_range_nosync(iova, granule, granule, true, cookie);
+}
+
+static const struct iommu_flush_ops qcom_flush_ops = {
.tlb_flush_all = qcom_iommu_tlb_inv_context,
- .tlb_add_flush = qcom_iommu_tlb_inv_range_nosync,
- .tlb_sync = qcom_iommu_tlb_sync,
+ .tlb_flush_walk = qcom_iommu_tlb_flush_walk,
+ .tlb_flush_leaf = qcom_iommu_tlb_flush_leaf,
+ .tlb_add_page = qcom_iommu_tlb_add_page,
};
static irqreturn_t qcom_iommu_fault(int irq, void *dev)
@@ -226,7 +238,7 @@
.pgsize_bitmap = qcom_iommu_ops.pgsize_bitmap,
.ias = 32,
.oas = 40,
- .tlb = &qcom_gather_ops,
+ .tlb = &qcom_flush_ops,
.iommu_dev = qcom_iommu->dev,
};
@@ -258,16 +270,16 @@
/* TTBRs */
iommu_writeq(ctx, ARM_SMMU_CB_TTBR0,
pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0] |
- ((u64)ctx->asid << TTBRn_ASID_SHIFT));
+ FIELD_PREP(TTBRn_ASID, ctx->asid));
iommu_writeq(ctx, ARM_SMMU_CB_TTBR1,
pgtbl_cfg.arm_lpae_s1_cfg.ttbr[1] |
- ((u64)ctx->asid << TTBRn_ASID_SHIFT));
+ FIELD_PREP(TTBRn_ASID, ctx->asid));
- /* TTBCR */
- iommu_writel(ctx, ARM_SMMU_CB_TTBCR2,
+ /* TCR */
+ iommu_writel(ctx, ARM_SMMU_CB_TCR2,
(pgtbl_cfg.arm_lpae_s1_cfg.tcr >> 32) |
- TTBCR2_SEP_UPSTREAM);
- iommu_writel(ctx, ARM_SMMU_CB_TTBCR,
+ FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM));
+ iommu_writel(ctx, ARM_SMMU_CB_TCR,
pgtbl_cfg.arm_lpae_s1_cfg.tcr);
/* MAIRs (stage-1 only) */
@@ -354,7 +366,8 @@
static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
- struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec);
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec);
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
int ret;
@@ -365,7 +378,7 @@
/* Ensure that the domain is finalized */
pm_runtime_get_sync(qcom_iommu->dev);
- ret = qcom_iommu_init_domain(domain, qcom_iommu, dev->iommu_fwspec);
+ ret = qcom_iommu_init_domain(domain, qcom_iommu, fwspec);
pm_runtime_put_sync(qcom_iommu->dev);
if (ret < 0)
return ret;
@@ -387,7 +400,7 @@
static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec);
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
unsigned i;
@@ -427,7 +440,7 @@
}
static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t size)
+ size_t size, struct iommu_iotlb_gather *gather)
{
size_t ret;
unsigned long flags;
@@ -444,14 +457,14 @@
*/
pm_runtime_get_sync(qcom_domain->iommu->dev);
spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags);
- ret = ops->unmap(ops, iova, size);
+ ret = ops->unmap(ops, iova, size, gather);
spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags);
pm_runtime_put_sync(qcom_domain->iommu->dev);
return ret;
}
-static void qcom_iommu_iotlb_sync(struct iommu_domain *domain)
+static void qcom_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops,
@@ -464,6 +477,12 @@
pm_runtime_put_sync(qcom_domain->iommu->dev);
}
+static void qcom_iommu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
+{
+ qcom_iommu_flush_iotlb_all(domain);
+}
+
static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
@@ -500,7 +519,7 @@
static int qcom_iommu_add_device(struct device *dev)
{
- struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec);
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(dev_iommu_fwspec_get(dev));
struct iommu_group *group;
struct device_link *link;
@@ -531,7 +550,7 @@
static void qcom_iommu_remove_device(struct device *dev)
{
- struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec);
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(dev_iommu_fwspec_get(dev));
if (!qcom_iommu)
return;
@@ -543,6 +562,7 @@
static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct qcom_iommu_dev *qcom_iommu;
struct platform_device *iommu_pdev;
unsigned asid = args->args[0];
@@ -568,14 +588,14 @@
WARN_ON(asid > qcom_iommu->num_ctxs))
return -EINVAL;
- if (!dev->iommu_fwspec->iommu_priv) {
- dev->iommu_fwspec->iommu_priv = qcom_iommu;
+ if (!fwspec->iommu_priv) {
+ fwspec->iommu_priv = qcom_iommu;
} else {
/* make sure devices iommus dt node isn't referring to
* multiple different iommu devices. Multiple context
* banks are ok, but multiple devices are not:
*/
- if (WARN_ON(qcom_iommu != dev->iommu_fwspec->iommu_priv))
+ if (WARN_ON(qcom_iommu != fwspec->iommu_priv))
return -EINVAL;
}
@@ -590,7 +610,7 @@
.detach_dev = qcom_iommu_detach_dev,
.map = qcom_iommu_map,
.unmap = qcom_iommu_unmap,
- .flush_iotlb_all = qcom_iommu_iotlb_sync,
+ .flush_iotlb_all = qcom_iommu_flush_iotlb_all,
.iotlb_sync = qcom_iommu_iotlb_sync,
.iova_to_phys = qcom_iommu_iova_to_phys,
.add_device = qcom_iommu_add_device,
@@ -705,10 +725,8 @@
return PTR_ERR(ctx->base);
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(dev, "failed to get irq\n");
+ if (irq < 0)
return -ENODEV;
- }
/* clear IRQs before registering fault handler, just in case the
* boot-loader left us a surprise:
@@ -784,7 +802,7 @@
struct qcom_iommu_dev *qcom_iommu;
struct device *dev = &pdev->dev;
struct resource *res;
- int ret, sz, max_asid = 0;
+ int ret, max_asid = 0;
/* find the max asid (which is 1:1 to ctx bank idx), so we know how
* many child ctx devices we have:
@@ -792,9 +810,8 @@
for_each_child_of_node(dev->of_node, child)
max_asid = max(max_asid, get_asid(child));
- sz = sizeof(*qcom_iommu) + (max_asid * sizeof(qcom_iommu->ctxs[0]));
-
- qcom_iommu = devm_kzalloc(dev, sz, GFP_KERNEL);
+ qcom_iommu = devm_kzalloc(dev, struct_size(qcom_iommu, ctxs, max_asid),
+ GFP_KERNEL);
if (!qcom_iommu)
return -ENOMEM;
qcom_iommu->num_ctxs = max_asid;
@@ -908,7 +925,6 @@
{ .compatible = "qcom,msm-iommu-v1" },
{ /* sentinel */ }
};
-MODULE_DEVICE_TABLE(of, qcom_iommu_of_match);
static struct platform_driver qcom_iommu_driver = {
.driver = {
@@ -934,15 +950,4 @@
return ret;
}
-
-static void __exit qcom_iommu_exit(void)
-{
- platform_driver_unregister(&qcom_iommu_driver);
- platform_driver_unregister(&qcom_iommu_ctx_driver);
-}
-
-module_init(qcom_iommu_init);
-module_exit(qcom_iommu_exit);
-
-MODULE_DESCRIPTION("IOMMU API for QCOM IOMMU v1 implementations");
-MODULE_LICENSE("GPL v2");
+device_initcall(qcom_iommu_init);
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index ad3e2b9..4dcbf68 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1,7 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * IOMMU API for Rockchip
+ *
+ * Module Authors: Simon Xue <xxm@rock-chips.com>
+ * Daniel Kurtz <djkurtz@chromium.org>
*/
#include <linux/clk.h>
@@ -17,7 +19,7 @@
#include <linux/iopoll.h>
#include <linux/list.h>
#include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/of.h>
#include <linux/of_iommu.h>
#include <linux/of_platform.h>
@@ -98,6 +100,7 @@
struct device *dev;
void __iomem **bases;
int num_mmu;
+ int num_irq;
struct clk_bulk_data *clocks;
int num_clocks;
bool reset_disabled;
@@ -792,7 +795,7 @@
}
static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
- size_t size)
+ size_t size, struct iommu_iotlb_gather *gather)
{
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
@@ -1066,7 +1069,8 @@
iommu_group_put(group);
iommu_device_link(&iommu->iommu, dev);
- data->link = device_link_add(dev, iommu->dev, DL_FLAG_PM_RUNTIME);
+ data->link = device_link_add(dev, iommu->dev,
+ DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
return 0;
}
@@ -1133,7 +1137,7 @@
struct rk_iommu *iommu;
struct resource *res;
int num_res = pdev->num_resources;
- int err, i, irq;
+ int err, i;
iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
if (!iommu)
@@ -1160,6 +1164,10 @@
if (iommu->num_mmu == 0)
return PTR_ERR(iommu->bases[0]);
+ iommu->num_irq = platform_irq_count(pdev);
+ if (iommu->num_irq < 0)
+ return iommu->num_irq;
+
iommu->reset_disabled = device_property_read_bool(dev,
"rockchip,disable-mmu-reset");
@@ -1216,8 +1224,9 @@
pm_runtime_enable(dev);
- i = 0;
- while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
+ for (i = 0; i < iommu->num_irq; i++) {
+ int irq = platform_get_irq(pdev, i);
+
if (irq < 0)
return irq;
@@ -1242,10 +1251,13 @@
static void rk_iommu_shutdown(struct platform_device *pdev)
{
struct rk_iommu *iommu = platform_get_drvdata(pdev);
- int i = 0, irq;
+ int i;
- while ((irq = platform_get_irq(pdev, i++)) != -ENXIO)
+ for (i = 0; i < iommu->num_irq; i++) {
+ int irq = platform_get_irq(pdev, i);
+
devm_free_irq(iommu->dev, irq, iommu);
+ }
pm_runtime_force_suspend(&pdev->dev);
}
@@ -1281,7 +1293,6 @@
{ .compatible = "rockchip,iommu" },
{ /* sentinel */ }
};
-MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids);
static struct platform_driver rk_iommu_driver = {
.probe = rk_iommu_probe,
@@ -1299,8 +1310,3 @@
return platform_driver_register(&rk_iommu_driver);
}
subsys_initcall(rk_iommu_init);
-
-MODULE_DESCRIPTION("IOMMU API for Rockchip");
-MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>");
-MODULE_ALIAS("platform:rockchip-iommu");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 22d4db3..3b0b18e 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -314,7 +314,8 @@
}
static size_t s390_iommu_unmap(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+ unsigned long iova, size_t size,
+ struct iommu_iotlb_gather *gather)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
int flags = ZPCI_PTE_INVALID;
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 7b1361d..3924f7c 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -1,116 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * IOMMU API for GART in Tegra20
+ * IOMMU API for Graphics Address Relocation Table on Tegra20
*
* Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * Author: Hiroshi DOYU <hdoyu@nvidia.com>
*/
-#define pr_fmt(fmt) "%s(): " fmt, __func__
+#define dev_fmt(fmt) "gart: " fmt
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/list.h>
-#include <linux/device.h>
#include <linux/io.h>
#include <linux/iommu.h>
-#include <linux/of.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
-#include <asm/cacheflush.h>
-
-/* bitmap of the page sizes currently supported */
-#define GART_IOMMU_PGSIZES (SZ_4K)
+#include <soc/tegra/mc.h>
#define GART_REG_BASE 0x24
#define GART_CONFIG (0x24 - GART_REG_BASE)
#define GART_ENTRY_ADDR (0x28 - GART_REG_BASE)
#define GART_ENTRY_DATA (0x2c - GART_REG_BASE)
-#define GART_ENTRY_PHYS_ADDR_VALID (1 << 31)
+
+#define GART_ENTRY_PHYS_ADDR_VALID BIT(31)
#define GART_PAGE_SHIFT 12
#define GART_PAGE_SIZE (1 << GART_PAGE_SHIFT)
-#define GART_PAGE_MASK \
- (~(GART_PAGE_SIZE - 1) & ~GART_ENTRY_PHYS_ADDR_VALID)
+#define GART_PAGE_MASK GENMASK(30, GART_PAGE_SHIFT)
-struct gart_client {
- struct device *dev;
- struct list_head list;
-};
+/* bitmap of the page sizes currently supported */
+#define GART_IOMMU_PGSIZES (GART_PAGE_SIZE)
struct gart_device {
void __iomem *regs;
u32 *savedata;
- u32 page_count; /* total remappable size */
- dma_addr_t iovmm_base; /* offset to vmm_area */
+ unsigned long iovmm_base; /* offset to vmm_area start */
+ unsigned long iovmm_end; /* offset to vmm_area end */
spinlock_t pte_lock; /* for pagetable */
- struct list_head client;
- spinlock_t client_lock; /* for client list */
- struct device *dev;
-
+ spinlock_t dom_lock; /* for active domain */
+ unsigned int active_devices; /* number of active devices */
+ struct iommu_domain *active_domain; /* current active domain */
struct iommu_device iommu; /* IOMMU Core handle */
-};
-
-struct gart_domain {
- struct iommu_domain domain; /* generic domain handle */
- struct gart_device *gart; /* link to gart device */
+ struct device *dev;
};
static struct gart_device *gart_handle; /* unique for a system */
static bool gart_debug;
-#define GART_PTE(_pfn) \
- (GART_ENTRY_PHYS_ADDR_VALID | ((_pfn) << PAGE_SHIFT))
-
-static struct gart_domain *to_gart_domain(struct iommu_domain *dom)
-{
- return container_of(dom, struct gart_domain, domain);
-}
-
/*
* Any interaction between any block on PPSB and a block on APB or AHB
* must have these read-back to ensure the APB/AHB bus transaction is
* complete before initiating activity on the PPSB block.
*/
-#define FLUSH_GART_REGS(gart) ((void)readl((gart)->regs + GART_CONFIG))
+#define FLUSH_GART_REGS(gart) readl_relaxed((gart)->regs + GART_CONFIG)
#define for_each_gart_pte(gart, iova) \
for (iova = gart->iovmm_base; \
- iova < gart->iovmm_base + GART_PAGE_SIZE * gart->page_count; \
+ iova < gart->iovmm_end; \
iova += GART_PAGE_SIZE)
static inline void gart_set_pte(struct gart_device *gart,
- unsigned long offs, u32 pte)
+ unsigned long iova, unsigned long pte)
{
- writel(offs, gart->regs + GART_ENTRY_ADDR);
- writel(pte, gart->regs + GART_ENTRY_DATA);
-
- dev_dbg(gart->dev, "%s %08lx:%08x\n",
- pte ? "map" : "unmap", offs, pte & GART_PAGE_MASK);
+ writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR);
+ writel_relaxed(pte, gart->regs + GART_ENTRY_DATA);
}
static inline unsigned long gart_read_pte(struct gart_device *gart,
- unsigned long offs)
+ unsigned long iova)
{
unsigned long pte;
- writel(offs, gart->regs + GART_ENTRY_ADDR);
- pte = readl(gart->regs + GART_ENTRY_DATA);
+ writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR);
+ pte = readl_relaxed(gart->regs + GART_ENTRY_DATA);
return pte;
}
@@ -122,224 +87,155 @@
for_each_gart_pte(gart, iova)
gart_set_pte(gart, iova, data ? *(data++) : 0);
- writel(1, gart->regs + GART_CONFIG);
+ writel_relaxed(1, gart->regs + GART_CONFIG);
FLUSH_GART_REGS(gart);
}
-#ifdef DEBUG
-static void gart_dump_table(struct gart_device *gart)
+static inline bool gart_iova_range_invalid(struct gart_device *gart,
+ unsigned long iova, size_t bytes)
{
- unsigned long iova;
- unsigned long flags;
-
- spin_lock_irqsave(&gart->pte_lock, flags);
- for_each_gart_pte(gart, iova) {
- unsigned long pte;
-
- pte = gart_read_pte(gart, iova);
-
- dev_dbg(gart->dev, "%s %08lx:%08lx\n",
- (GART_ENTRY_PHYS_ADDR_VALID & pte) ? "v" : " ",
- iova, pte & GART_PAGE_MASK);
- }
- spin_unlock_irqrestore(&gart->pte_lock, flags);
+ return unlikely(iova < gart->iovmm_base || bytes != GART_PAGE_SIZE ||
+ iova + bytes > gart->iovmm_end);
}
-#else
-static inline void gart_dump_table(struct gart_device *gart)
+
+static inline bool gart_pte_valid(struct gart_device *gart, unsigned long iova)
{
-}
-#endif
-
-static inline bool gart_iova_range_valid(struct gart_device *gart,
- unsigned long iova, size_t bytes)
-{
- unsigned long iova_start, iova_end, gart_start, gart_end;
-
- iova_start = iova;
- iova_end = iova_start + bytes - 1;
- gart_start = gart->iovmm_base;
- gart_end = gart_start + gart->page_count * GART_PAGE_SIZE - 1;
-
- if (iova_start < gart_start)
- return false;
- if (iova_end > gart_end)
- return false;
- return true;
+ return !!(gart_read_pte(gart, iova) & GART_ENTRY_PHYS_ADDR_VALID);
}
static int gart_iommu_attach_dev(struct iommu_domain *domain,
struct device *dev)
{
- struct gart_domain *gart_domain = to_gart_domain(domain);
- struct gart_device *gart = gart_domain->gart;
- struct gart_client *client, *c;
- int err = 0;
+ struct gart_device *gart = gart_handle;
+ int ret = 0;
- client = devm_kzalloc(gart->dev, sizeof(*c), GFP_KERNEL);
- if (!client)
- return -ENOMEM;
- client->dev = dev;
+ spin_lock(&gart->dom_lock);
- spin_lock(&gart->client_lock);
- list_for_each_entry(c, &gart->client, list) {
- if (c->dev == dev) {
- dev_err(gart->dev,
- "%s is already attached\n", dev_name(dev));
- err = -EINVAL;
- goto fail;
- }
+ if (gart->active_domain && gart->active_domain != domain) {
+ ret = -EBUSY;
+ } else if (dev->archdata.iommu != domain) {
+ dev->archdata.iommu = domain;
+ gart->active_domain = domain;
+ gart->active_devices++;
}
- list_add(&client->list, &gart->client);
- spin_unlock(&gart->client_lock);
- dev_dbg(gart->dev, "Attached %s\n", dev_name(dev));
- return 0;
-fail:
- devm_kfree(gart->dev, client);
- spin_unlock(&gart->client_lock);
- return err;
+ spin_unlock(&gart->dom_lock);
+
+ return ret;
}
static void gart_iommu_detach_dev(struct iommu_domain *domain,
struct device *dev)
{
- struct gart_domain *gart_domain = to_gart_domain(domain);
- struct gart_device *gart = gart_domain->gart;
- struct gart_client *c;
+ struct gart_device *gart = gart_handle;
- spin_lock(&gart->client_lock);
+ spin_lock(&gart->dom_lock);
- list_for_each_entry(c, &gart->client, list) {
- if (c->dev == dev) {
- list_del(&c->list);
- devm_kfree(gart->dev, c);
- dev_dbg(gart->dev, "Detached %s\n", dev_name(dev));
- goto out;
- }
+ if (dev->archdata.iommu == domain) {
+ dev->archdata.iommu = NULL;
+
+ if (--gart->active_devices == 0)
+ gart->active_domain = NULL;
}
- dev_err(gart->dev, "Couldn't find\n");
-out:
- spin_unlock(&gart->client_lock);
+
+ spin_unlock(&gart->dom_lock);
}
static struct iommu_domain *gart_iommu_domain_alloc(unsigned type)
{
- struct gart_domain *gart_domain;
- struct gart_device *gart;
+ struct iommu_domain *domain;
if (type != IOMMU_DOMAIN_UNMANAGED)
return NULL;
- gart = gart_handle;
- if (!gart)
- return NULL;
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (domain) {
+ domain->geometry.aperture_start = gart_handle->iovmm_base;
+ domain->geometry.aperture_end = gart_handle->iovmm_end - 1;
+ domain->geometry.force_aperture = true;
+ }
- gart_domain = kzalloc(sizeof(*gart_domain), GFP_KERNEL);
- if (!gart_domain)
- return NULL;
-
- gart_domain->gart = gart;
- gart_domain->domain.geometry.aperture_start = gart->iovmm_base;
- gart_domain->domain.geometry.aperture_end = gart->iovmm_base +
- gart->page_count * GART_PAGE_SIZE - 1;
- gart_domain->domain.geometry.force_aperture = true;
-
- return &gart_domain->domain;
+ return domain;
}
static void gart_iommu_domain_free(struct iommu_domain *domain)
{
- struct gart_domain *gart_domain = to_gart_domain(domain);
- struct gart_device *gart = gart_domain->gart;
+ WARN_ON(gart_handle->active_domain == domain);
+ kfree(domain);
+}
- if (gart) {
- spin_lock(&gart->client_lock);
- if (!list_empty(&gart->client)) {
- struct gart_client *c;
-
- list_for_each_entry(c, &gart->client, list)
- gart_iommu_detach_dev(domain, c->dev);
- }
- spin_unlock(&gart->client_lock);
+static inline int __gart_iommu_map(struct gart_device *gart, unsigned long iova,
+ unsigned long pa)
+{
+ if (unlikely(gart_debug && gart_pte_valid(gart, iova))) {
+ dev_err(gart->dev, "Page entry is in-use\n");
+ return -EINVAL;
}
- kfree(gart_domain);
+ gart_set_pte(gart, iova, GART_ENTRY_PHYS_ADDR_VALID | pa);
+
+ return 0;
}
static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t pa, size_t bytes, int prot)
{
- struct gart_domain *gart_domain = to_gart_domain(domain);
- struct gart_device *gart = gart_domain->gart;
- unsigned long flags;
- unsigned long pfn;
- unsigned long pte;
+ struct gart_device *gart = gart_handle;
+ int ret;
- if (!gart_iova_range_valid(gart, iova, bytes))
+ if (gart_iova_range_invalid(gart, iova, bytes))
return -EINVAL;
- spin_lock_irqsave(&gart->pte_lock, flags);
- pfn = __phys_to_pfn(pa);
- if (!pfn_valid(pfn)) {
- dev_err(gart->dev, "Invalid page: %pa\n", &pa);
- spin_unlock_irqrestore(&gart->pte_lock, flags);
+ spin_lock(&gart->pte_lock);
+ ret = __gart_iommu_map(gart, iova, (unsigned long)pa);
+ spin_unlock(&gart->pte_lock);
+
+ return ret;
+}
+
+static inline int __gart_iommu_unmap(struct gart_device *gart,
+ unsigned long iova)
+{
+ if (unlikely(gart_debug && !gart_pte_valid(gart, iova))) {
+ dev_err(gart->dev, "Page entry is invalid\n");
return -EINVAL;
}
- if (gart_debug) {
- pte = gart_read_pte(gart, iova);
- if (pte & GART_ENTRY_PHYS_ADDR_VALID) {
- spin_unlock_irqrestore(&gart->pte_lock, flags);
- dev_err(gart->dev, "Page entry is in-use\n");
- return -EBUSY;
- }
- }
- gart_set_pte(gart, iova, GART_PTE(pfn));
- FLUSH_GART_REGS(gart);
- spin_unlock_irqrestore(&gart->pte_lock, flags);
+
+ gart_set_pte(gart, iova, 0);
+
return 0;
}
static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t bytes)
+ size_t bytes, struct iommu_iotlb_gather *gather)
{
- struct gart_domain *gart_domain = to_gart_domain(domain);
- struct gart_device *gart = gart_domain->gart;
- unsigned long flags;
+ struct gart_device *gart = gart_handle;
+ int err;
- if (!gart_iova_range_valid(gart, iova, bytes))
+ if (gart_iova_range_invalid(gart, iova, bytes))
return 0;
- spin_lock_irqsave(&gart->pte_lock, flags);
- gart_set_pte(gart, iova, 0);
- FLUSH_GART_REGS(gart);
- spin_unlock_irqrestore(&gart->pte_lock, flags);
- return bytes;
+ spin_lock(&gart->pte_lock);
+ err = __gart_iommu_unmap(gart, iova);
+ spin_unlock(&gart->pte_lock);
+
+ return err ? 0 : bytes;
}
static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
- struct gart_domain *gart_domain = to_gart_domain(domain);
- struct gart_device *gart = gart_domain->gart;
+ struct gart_device *gart = gart_handle;
unsigned long pte;
- phys_addr_t pa;
- unsigned long flags;
- if (!gart_iova_range_valid(gart, iova, 0))
+ if (gart_iova_range_invalid(gart, iova, GART_PAGE_SIZE))
return -EINVAL;
- spin_lock_irqsave(&gart->pte_lock, flags);
+ spin_lock(&gart->pte_lock);
pte = gart_read_pte(gart, iova);
- spin_unlock_irqrestore(&gart->pte_lock, flags);
+ spin_unlock(&gart->pte_lock);
- pa = (pte & GART_PAGE_MASK);
- if (!pfn_valid(__phys_to_pfn(pa))) {
- dev_err(gart->dev, "No entry for %08llx:%pa\n",
- (unsigned long long)iova, &pa);
- gart_dump_table(gart);
- return -EINVAL;
- }
- return pa;
+ return pte & GART_PAGE_MASK;
}
static bool gart_iommu_capable(enum iommu_cap cap)
@@ -349,8 +245,12 @@
static int gart_iommu_add_device(struct device *dev)
{
- struct iommu_group *group = iommu_group_get_for_dev(dev);
+ struct iommu_group *group;
+ if (!dev->iommu_fwspec)
+ return -ENODEV;
+
+ group = iommu_group_get_for_dev(dev);
if (IS_ERR(group))
return PTR_ERR(group);
@@ -367,6 +267,23 @@
iommu_device_unlink(&gart_handle->iommu, dev);
}
+static int gart_iommu_of_xlate(struct device *dev,
+ struct of_phandle_args *args)
+{
+ return 0;
+}
+
+static void gart_iommu_sync_map(struct iommu_domain *domain)
+{
+ FLUSH_GART_REGS(gart_handle);
+}
+
+static void gart_iommu_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
+{
+ gart_iommu_sync_map(domain);
+}
+
static const struct iommu_ops gart_iommu_ops = {
.capable = gart_iommu_capable,
.domain_alloc = gart_iommu_domain_alloc,
@@ -380,155 +297,96 @@
.unmap = gart_iommu_unmap,
.iova_to_phys = gart_iommu_iova_to_phys,
.pgsize_bitmap = GART_IOMMU_PGSIZES,
+ .of_xlate = gart_iommu_of_xlate,
+ .iotlb_sync_map = gart_iommu_sync_map,
+ .iotlb_sync = gart_iommu_sync,
};
-static int tegra_gart_suspend(struct device *dev)
+int tegra_gart_suspend(struct gart_device *gart)
{
- struct gart_device *gart = dev_get_drvdata(dev);
- unsigned long iova;
u32 *data = gart->savedata;
- unsigned long flags;
+ unsigned long iova;
- spin_lock_irqsave(&gart->pte_lock, flags);
+ /*
+ * All GART users shall be suspended at this point. Disable
+ * address translation to trap all GART accesses as invalid
+ * memory accesses.
+ */
+ writel_relaxed(0, gart->regs + GART_CONFIG);
+ FLUSH_GART_REGS(gart);
+
for_each_gart_pte(gart, iova)
*(data++) = gart_read_pte(gart, iova);
- spin_unlock_irqrestore(&gart->pte_lock, flags);
+
return 0;
}
-static int tegra_gart_resume(struct device *dev)
+int tegra_gart_resume(struct gart_device *gart)
{
- struct gart_device *gart = dev_get_drvdata(dev);
- unsigned long flags;
-
- spin_lock_irqsave(&gart->pte_lock, flags);
do_gart_setup(gart, gart->savedata);
- spin_unlock_irqrestore(&gart->pte_lock, flags);
+
return 0;
}
-static int tegra_gart_probe(struct platform_device *pdev)
+struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc)
{
struct gart_device *gart;
- struct resource *res, *res_remap;
- void __iomem *gart_regs;
- struct device *dev = &pdev->dev;
- int ret;
-
- if (gart_handle)
- return -EIO;
+ struct resource *res;
+ int err;
BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT);
/* the GART memory aperture is required */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- res_remap = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- if (!res || !res_remap) {
- dev_err(dev, "GART memory aperture expected\n");
- return -ENXIO;
+ res = platform_get_resource(to_platform_device(dev), IORESOURCE_MEM, 1);
+ if (!res) {
+ dev_err(dev, "Memory aperture resource unavailable\n");
+ return ERR_PTR(-ENXIO);
}
- gart = devm_kzalloc(dev, sizeof(*gart), GFP_KERNEL);
- if (!gart) {
- dev_err(dev, "failed to allocate gart_device\n");
- return -ENOMEM;
- }
-
- gart_regs = devm_ioremap(dev, res->start, resource_size(res));
- if (!gart_regs) {
- dev_err(dev, "failed to remap GART registers\n");
- return -ENXIO;
- }
-
- ret = iommu_device_sysfs_add(&gart->iommu, &pdev->dev, NULL,
- dev_name(&pdev->dev));
- if (ret) {
- dev_err(dev, "Failed to register IOMMU in sysfs\n");
- return ret;
- }
-
- iommu_device_set_ops(&gart->iommu, &gart_iommu_ops);
-
- ret = iommu_device_register(&gart->iommu);
- if (ret) {
- dev_err(dev, "Failed to register IOMMU\n");
- iommu_device_sysfs_remove(&gart->iommu);
- return ret;
- }
-
- gart->dev = &pdev->dev;
- spin_lock_init(&gart->pte_lock);
- spin_lock_init(&gart->client_lock);
- INIT_LIST_HEAD(&gart->client);
- gart->regs = gart_regs;
- gart->iovmm_base = (dma_addr_t)res_remap->start;
- gart->page_count = (resource_size(res_remap) >> GART_PAGE_SHIFT);
-
- gart->savedata = vmalloc(array_size(sizeof(u32), gart->page_count));
- if (!gart->savedata) {
- dev_err(dev, "failed to allocate context save area\n");
- return -ENOMEM;
- }
-
- platform_set_drvdata(pdev, gart);
- do_gart_setup(gart, NULL);
+ gart = kzalloc(sizeof(*gart), GFP_KERNEL);
+ if (!gart)
+ return ERR_PTR(-ENOMEM);
gart_handle = gart;
- return 0;
-}
+ gart->dev = dev;
+ gart->regs = mc->regs + GART_REG_BASE;
+ gart->iovmm_base = res->start;
+ gart->iovmm_end = res->end + 1;
+ spin_lock_init(&gart->pte_lock);
+ spin_lock_init(&gart->dom_lock);
-static int tegra_gart_remove(struct platform_device *pdev)
-{
- struct gart_device *gart = platform_get_drvdata(pdev);
+ do_gart_setup(gart, NULL);
+ err = iommu_device_sysfs_add(&gart->iommu, dev, NULL, "gart");
+ if (err)
+ goto free_gart;
+
+ iommu_device_set_ops(&gart->iommu, &gart_iommu_ops);
+ iommu_device_set_fwnode(&gart->iommu, dev->fwnode);
+
+ err = iommu_device_register(&gart->iommu);
+ if (err)
+ goto remove_sysfs;
+
+ gart->savedata = vmalloc(resource_size(res) / GART_PAGE_SIZE *
+ sizeof(u32));
+ if (!gart->savedata) {
+ err = -ENOMEM;
+ goto unregister_iommu;
+ }
+
+ return gart;
+
+unregister_iommu:
iommu_device_unregister(&gart->iommu);
+remove_sysfs:
iommu_device_sysfs_remove(&gart->iommu);
+free_gart:
+ kfree(gart);
- writel(0, gart->regs + GART_CONFIG);
- if (gart->savedata)
- vfree(gart->savedata);
- gart_handle = NULL;
- return 0;
+ return ERR_PTR(err);
}
-static const struct dev_pm_ops tegra_gart_pm_ops = {
- .suspend = tegra_gart_suspend,
- .resume = tegra_gart_resume,
-};
-
-static const struct of_device_id tegra_gart_of_match[] = {
- { .compatible = "nvidia,tegra20-gart", },
- { },
-};
-MODULE_DEVICE_TABLE(of, tegra_gart_of_match);
-
-static struct platform_driver tegra_gart_driver = {
- .probe = tegra_gart_probe,
- .remove = tegra_gart_remove,
- .driver = {
- .name = "tegra-gart",
- .pm = &tegra_gart_pm_ops,
- .of_match_table = tegra_gart_of_match,
- },
-};
-
-static int tegra_gart_init(void)
-{
- return platform_driver_register(&tegra_gart_driver);
-}
-
-static void __exit tegra_gart_exit(void)
-{
- platform_driver_unregister(&tegra_gart_driver);
-}
-
-subsys_initcall(tegra_gart_init);
-module_exit(tegra_gart_exit);
module_param(gart_debug, bool, 0644);
-
MODULE_PARM_DESC(gart_debug, "Enable GART debugging");
-MODULE_DESCRIPTION("IOMMU API for GART in Tegra20");
-MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>");
-MODULE_ALIAS("platform:tegra-gart");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 0d03341..7293fc3 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2011-2014 NVIDIA CORPORATION. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/bitops.h>
@@ -102,7 +99,6 @@
#define SMMU_TLB_FLUSH_VA_MATCH_ALL (0 << 0)
#define SMMU_TLB_FLUSH_VA_MATCH_SECTION (2 << 0)
#define SMMU_TLB_FLUSH_VA_MATCH_GROUP (3 << 0)
-#define SMMU_TLB_FLUSH_ASID(x) (((x) & 0x7f) << 24)
#define SMMU_TLB_FLUSH_VA_SECTION(addr) ((((addr) & 0xffc00000) >> 12) | \
SMMU_TLB_FLUSH_VA_MATCH_SECTION)
#define SMMU_TLB_FLUSH_VA_GROUP(addr) ((((addr) & 0xffffc000) >> 12) | \
@@ -146,8 +142,6 @@
#define SMMU_PDE_ATTR (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \
SMMU_PDE_NONSECURE)
-#define SMMU_PTE_ATTR (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \
- SMMU_PTE_NONSECURE)
static unsigned int iova_pd_index(unsigned long iova)
{
@@ -205,8 +199,12 @@
{
u32 value;
- value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) |
- SMMU_TLB_FLUSH_VA_MATCH_ALL;
+ if (smmu->soc->num_asids == 4)
+ value = (asid & 0x3) << 29;
+ else
+ value = (asid & 0x7f) << 24;
+
+ value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_MATCH_ALL;
smmu_writel(smmu, value, SMMU_TLB_FLUSH);
}
@@ -216,8 +214,12 @@
{
u32 value;
- value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) |
- SMMU_TLB_FLUSH_VA_SECTION(iova);
+ if (smmu->soc->num_asids == 4)
+ value = (asid & 0x3) << 29;
+ else
+ value = (asid & 0x7f) << 24;
+
+ value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_SECTION(iova);
smmu_writel(smmu, value, SMMU_TLB_FLUSH);
}
@@ -227,8 +229,12 @@
{
u32 value;
- value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) |
- SMMU_TLB_FLUSH_VA_GROUP(iova);
+ if (smmu->soc->num_asids == 4)
+ value = (asid & 0x3) << 29;
+ else
+ value = (asid & 0x7f) << 24;
+
+ value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_GROUP(iova);
smmu_writel(smmu, value, SMMU_TLB_FLUSH);
}
@@ -316,6 +322,9 @@
/* TODO: free page directory and page tables */
+ WARN_ON_ONCE(as->use_count);
+ kfree(as->count);
+ kfree(as->pts);
kfree(as);
}
@@ -645,6 +654,7 @@
{
struct tegra_smmu_as *as = to_smmu_as(domain);
dma_addr_t pte_dma;
+ u32 pte_attrs;
u32 *pte;
pte = as_get_pte(as, iova, &pte_dma);
@@ -655,14 +665,22 @@
if (*pte == 0)
tegra_smmu_pte_get_use(as, iova);
+ pte_attrs = SMMU_PTE_NONSECURE;
+
+ if (prot & IOMMU_READ)
+ pte_attrs |= SMMU_PTE_READABLE;
+
+ if (prot & IOMMU_WRITE)
+ pte_attrs |= SMMU_PTE_WRITABLE;
+
tegra_smmu_set_pte(as, iova, pte, pte_dma,
- __phys_to_pfn(paddr) | SMMU_PTE_ATTR);
+ __phys_to_pfn(paddr) | pte_attrs);
return 0;
}
static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t size)
+ size_t size, struct iommu_iotlb_gather *gather)
{
struct tegra_smmu_as *as = to_smmu_as(domain);
dma_addr_t pte_dma;
@@ -846,7 +864,7 @@
static struct iommu_group *tegra_smmu_device_group(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct tegra_smmu *smmu = dev->archdata.iommu;
struct iommu_group *group;
@@ -926,17 +944,7 @@
return 0;
}
-static int tegra_smmu_swgroups_open(struct inode *inode, struct file *file)
-{
- return single_open(file, tegra_smmu_swgroups_show, inode->i_private);
-}
-
-static const struct file_operations tegra_smmu_swgroups_fops = {
- .open = tegra_smmu_swgroups_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(tegra_smmu_swgroups);
static int tegra_smmu_clients_show(struct seq_file *s, void *data)
{
@@ -964,17 +972,7 @@
return 0;
}
-static int tegra_smmu_clients_open(struct inode *inode, struct file *file)
-{
- return single_open(file, tegra_smmu_clients_show, inode->i_private);
-}
-
-static const struct file_operations tegra_smmu_clients_fops = {
- .open = tegra_smmu_clients_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
{
@@ -1002,10 +1000,6 @@
u32 value;
int err;
- /* This can happen on Tegra20 which doesn't have an SMMU */
- if (!soc)
- return NULL;
-
smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
if (!smmu)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
new file mode 100644
index 0000000..3ea9d76
--- /dev/null
+++ b/drivers/iommu/virtio-iommu.c
@@ -0,0 +1,1177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Virtio driver for the paravirtualized IOMMU
+ *
+ * Copyright (C) 2019 Arm Limited
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/amba/bus.h>
+#include <linux/delay.h>
+#include <linux/dma-iommu.h>
+#include <linux/freezer.h>
+#include <linux/interval_tree.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/of_iommu.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ids.h>
+#include <linux/wait.h>
+
+#include <uapi/linux/virtio_iommu.h>
+
+#define MSI_IOVA_BASE 0x8000000
+#define MSI_IOVA_LENGTH 0x100000
+
+#define VIOMMU_REQUEST_VQ 0
+#define VIOMMU_EVENT_VQ 1
+#define VIOMMU_NR_VQS 2
+
+struct viommu_dev {
+ struct iommu_device iommu;
+ struct device *dev;
+ struct virtio_device *vdev;
+
+ struct ida domain_ids;
+
+ struct virtqueue *vqs[VIOMMU_NR_VQS];
+ spinlock_t request_lock;
+ struct list_head requests;
+ void *evts;
+
+ /* Device configuration */
+ struct iommu_domain_geometry geometry;
+ u64 pgsize_bitmap;
+ u32 first_domain;
+ u32 last_domain;
+ /* Supported MAP flags */
+ u32 map_flags;
+ u32 probe_size;
+};
+
+struct viommu_mapping {
+ phys_addr_t paddr;
+ struct interval_tree_node iova;
+ u32 flags;
+};
+
+struct viommu_domain {
+ struct iommu_domain domain;
+ struct viommu_dev *viommu;
+ struct mutex mutex; /* protects viommu pointer */
+ unsigned int id;
+ u32 map_flags;
+
+ spinlock_t mappings_lock;
+ struct rb_root_cached mappings;
+
+ unsigned long nr_endpoints;
+};
+
+struct viommu_endpoint {
+ struct device *dev;
+ struct viommu_dev *viommu;
+ struct viommu_domain *vdomain;
+ struct list_head resv_regions;
+};
+
+struct viommu_request {
+ struct list_head list;
+ void *writeback;
+ unsigned int write_offset;
+ unsigned int len;
+ char buf[];
+};
+
+#define VIOMMU_FAULT_RESV_MASK 0xffffff00
+
+struct viommu_event {
+ union {
+ u32 head;
+ struct virtio_iommu_fault fault;
+ };
+};
+
+#define to_viommu_domain(domain) \
+ container_of(domain, struct viommu_domain, domain)
+
+static int viommu_get_req_errno(void *buf, size_t len)
+{
+ struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail);
+
+ switch (tail->status) {
+ case VIRTIO_IOMMU_S_OK:
+ return 0;
+ case VIRTIO_IOMMU_S_UNSUPP:
+ return -ENOSYS;
+ case VIRTIO_IOMMU_S_INVAL:
+ return -EINVAL;
+ case VIRTIO_IOMMU_S_RANGE:
+ return -ERANGE;
+ case VIRTIO_IOMMU_S_NOENT:
+ return -ENOENT;
+ case VIRTIO_IOMMU_S_FAULT:
+ return -EFAULT;
+ case VIRTIO_IOMMU_S_NOMEM:
+ return -ENOMEM;
+ case VIRTIO_IOMMU_S_IOERR:
+ case VIRTIO_IOMMU_S_DEVERR:
+ default:
+ return -EIO;
+ }
+}
+
+static void viommu_set_req_status(void *buf, size_t len, int status)
+{
+ struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail);
+
+ tail->status = status;
+}
+
+static off_t viommu_get_write_desc_offset(struct viommu_dev *viommu,
+ struct virtio_iommu_req_head *req,
+ size_t len)
+{
+ size_t tail_size = sizeof(struct virtio_iommu_req_tail);
+
+ if (req->type == VIRTIO_IOMMU_T_PROBE)
+ return len - viommu->probe_size - tail_size;
+
+ return len - tail_size;
+}
+
+/*
+ * __viommu_sync_req - Complete all in-flight requests
+ *
+ * Wait for all added requests to complete. When this function returns, all
+ * requests that were in-flight at the time of the call have completed.
+ */
+static int __viommu_sync_req(struct viommu_dev *viommu)
+{
+ int ret = 0;
+ unsigned int len;
+ size_t write_len;
+ struct viommu_request *req;
+ struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ];
+
+ assert_spin_locked(&viommu->request_lock);
+
+ virtqueue_kick(vq);
+
+ while (!list_empty(&viommu->requests)) {
+ len = 0;
+ req = virtqueue_get_buf(vq, &len);
+ if (!req)
+ continue;
+
+ if (!len)
+ viommu_set_req_status(req->buf, req->len,
+ VIRTIO_IOMMU_S_IOERR);
+
+ write_len = req->len - req->write_offset;
+ if (req->writeback && len == write_len)
+ memcpy(req->writeback, req->buf + req->write_offset,
+ write_len);
+
+ list_del(&req->list);
+ kfree(req);
+ }
+
+ return ret;
+}
+
+static int viommu_sync_req(struct viommu_dev *viommu)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&viommu->request_lock, flags);
+ ret = __viommu_sync_req(viommu);
+ if (ret)
+ dev_dbg(viommu->dev, "could not sync requests (%d)\n", ret);
+ spin_unlock_irqrestore(&viommu->request_lock, flags);
+
+ return ret;
+}
+
+/*
+ * __viommu_add_request - Add one request to the queue
+ * @buf: pointer to the request buffer
+ * @len: length of the request buffer
+ * @writeback: copy data back to the buffer when the request completes.
+ *
+ * Add a request to the queue. Only synchronize the queue if it's already full.
+ * Otherwise don't kick the queue nor wait for requests to complete.
+ *
+ * When @writeback is true, data written by the device, including the request
+ * status, is copied into @buf after the request completes. This is unsafe if
+ * the caller allocates @buf on stack and drops the lock between add_req() and
+ * sync_req().
+ *
+ * Return 0 if the request was successfully added to the queue.
+ */
+static int __viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len,
+ bool writeback)
+{
+ int ret;
+ off_t write_offset;
+ struct viommu_request *req;
+ struct scatterlist top_sg, bottom_sg;
+ struct scatterlist *sg[2] = { &top_sg, &bottom_sg };
+ struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ];
+
+ assert_spin_locked(&viommu->request_lock);
+
+ write_offset = viommu_get_write_desc_offset(viommu, buf, len);
+ if (write_offset <= 0)
+ return -EINVAL;
+
+ req = kzalloc(sizeof(*req) + len, GFP_ATOMIC);
+ if (!req)
+ return -ENOMEM;
+
+ req->len = len;
+ if (writeback) {
+ req->writeback = buf + write_offset;
+ req->write_offset = write_offset;
+ }
+ memcpy(&req->buf, buf, write_offset);
+
+ sg_init_one(&top_sg, req->buf, write_offset);
+ sg_init_one(&bottom_sg, req->buf + write_offset, len - write_offset);
+
+ ret = virtqueue_add_sgs(vq, sg, 1, 1, req, GFP_ATOMIC);
+ if (ret == -ENOSPC) {
+ /* If the queue is full, sync and retry */
+ if (!__viommu_sync_req(viommu))
+ ret = virtqueue_add_sgs(vq, sg, 1, 1, req, GFP_ATOMIC);
+ }
+ if (ret)
+ goto err_free;
+
+ list_add_tail(&req->list, &viommu->requests);
+ return 0;
+
+err_free:
+ kfree(req);
+ return ret;
+}
+
+static int viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&viommu->request_lock, flags);
+ ret = __viommu_add_req(viommu, buf, len, false);
+ if (ret)
+ dev_dbg(viommu->dev, "could not add request: %d\n", ret);
+ spin_unlock_irqrestore(&viommu->request_lock, flags);
+
+ return ret;
+}
+
+/*
+ * Send a request and wait for it to complete. Return the request status (as an
+ * errno)
+ */
+static int viommu_send_req_sync(struct viommu_dev *viommu, void *buf,
+ size_t len)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&viommu->request_lock, flags);
+
+ ret = __viommu_add_req(viommu, buf, len, true);
+ if (ret) {
+ dev_dbg(viommu->dev, "could not add request (%d)\n", ret);
+ goto out_unlock;
+ }
+
+ ret = __viommu_sync_req(viommu);
+ if (ret) {
+ dev_dbg(viommu->dev, "could not sync requests (%d)\n", ret);
+ /* Fall-through (get the actual request status) */
+ }
+
+ ret = viommu_get_req_errno(buf, len);
+out_unlock:
+ spin_unlock_irqrestore(&viommu->request_lock, flags);
+ return ret;
+}
+
+/*
+ * viommu_add_mapping - add a mapping to the internal tree
+ *
+ * On success, return the new mapping. Otherwise return NULL.
+ */
+static int viommu_add_mapping(struct viommu_domain *vdomain, unsigned long iova,
+ phys_addr_t paddr, size_t size, u32 flags)
+{
+ unsigned long irqflags;
+ struct viommu_mapping *mapping;
+
+ mapping = kzalloc(sizeof(*mapping), GFP_ATOMIC);
+ if (!mapping)
+ return -ENOMEM;
+
+ mapping->paddr = paddr;
+ mapping->iova.start = iova;
+ mapping->iova.last = iova + size - 1;
+ mapping->flags = flags;
+
+ spin_lock_irqsave(&vdomain->mappings_lock, irqflags);
+ interval_tree_insert(&mapping->iova, &vdomain->mappings);
+ spin_unlock_irqrestore(&vdomain->mappings_lock, irqflags);
+
+ return 0;
+}
+
+/*
+ * viommu_del_mappings - remove mappings from the internal tree
+ *
+ * @vdomain: the domain
+ * @iova: start of the range
+ * @size: size of the range. A size of 0 corresponds to the entire address
+ * space.
+ *
+ * On success, returns the number of unmapped bytes (>= size)
+ */
+static size_t viommu_del_mappings(struct viommu_domain *vdomain,
+ unsigned long iova, size_t size)
+{
+ size_t unmapped = 0;
+ unsigned long flags;
+ unsigned long last = iova + size - 1;
+ struct viommu_mapping *mapping = NULL;
+ struct interval_tree_node *node, *next;
+
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ next = interval_tree_iter_first(&vdomain->mappings, iova, last);
+ while (next) {
+ node = next;
+ mapping = container_of(node, struct viommu_mapping, iova);
+ next = interval_tree_iter_next(node, iova, last);
+
+ /* Trying to split a mapping? */
+ if (mapping->iova.start < iova)
+ break;
+
+ /*
+ * Virtio-iommu doesn't allow UNMAP to split a mapping created
+ * with a single MAP request, so remove the full mapping.
+ */
+ unmapped += mapping->iova.last - mapping->iova.start + 1;
+
+ interval_tree_remove(node, &vdomain->mappings);
+ kfree(mapping);
+ }
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ return unmapped;
+}
+
+/*
+ * viommu_replay_mappings - re-send MAP requests
+ *
+ * When reattaching a domain that was previously detached from all endpoints,
+ * mappings were deleted from the device. Re-create the mappings available in
+ * the internal tree.
+ */
+static int viommu_replay_mappings(struct viommu_domain *vdomain)
+{
+ int ret = 0;
+ unsigned long flags;
+ struct viommu_mapping *mapping;
+ struct interval_tree_node *node;
+ struct virtio_iommu_req_map map;
+
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ node = interval_tree_iter_first(&vdomain->mappings, 0, -1UL);
+ while (node) {
+ mapping = container_of(node, struct viommu_mapping, iova);
+ map = (struct virtio_iommu_req_map) {
+ .head.type = VIRTIO_IOMMU_T_MAP,
+ .domain = cpu_to_le32(vdomain->id),
+ .virt_start = cpu_to_le64(mapping->iova.start),
+ .virt_end = cpu_to_le64(mapping->iova.last),
+ .phys_start = cpu_to_le64(mapping->paddr),
+ .flags = cpu_to_le32(mapping->flags),
+ };
+
+ ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
+ if (ret)
+ break;
+
+ node = interval_tree_iter_next(node, 0, -1UL);
+ }
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ return ret;
+}
+
+static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
+ struct virtio_iommu_probe_resv_mem *mem,
+ size_t len)
+{
+ size_t size;
+ u64 start64, end64;
+ phys_addr_t start, end;
+ struct iommu_resv_region *region = NULL;
+ unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+
+ start = start64 = le64_to_cpu(mem->start);
+ end = end64 = le64_to_cpu(mem->end);
+ size = end64 - start64 + 1;
+
+ /* Catch any overflow, including the unlikely end64 - start64 + 1 = 0 */
+ if (start != start64 || end != end64 || size < end64 - start64)
+ return -EOVERFLOW;
+
+ if (len < sizeof(*mem))
+ return -EINVAL;
+
+ switch (mem->subtype) {
+ default:
+ dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n",
+ mem->subtype);
+ /* Fall-through */
+ case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
+ region = iommu_alloc_resv_region(start, size, 0,
+ IOMMU_RESV_RESERVED);
+ break;
+ case VIRTIO_IOMMU_RESV_MEM_T_MSI:
+ region = iommu_alloc_resv_region(start, size, prot,
+ IOMMU_RESV_MSI);
+ break;
+ }
+ if (!region)
+ return -ENOMEM;
+
+ list_add(&vdev->resv_regions, ®ion->list);
+ return 0;
+}
+
+static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
+{
+ int ret;
+ u16 type, len;
+ size_t cur = 0;
+ size_t probe_len;
+ struct virtio_iommu_req_probe *probe;
+ struct virtio_iommu_probe_property *prop;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct viommu_endpoint *vdev = fwspec->iommu_priv;
+
+ if (!fwspec->num_ids)
+ return -EINVAL;
+
+ probe_len = sizeof(*probe) + viommu->probe_size +
+ sizeof(struct virtio_iommu_req_tail);
+ probe = kzalloc(probe_len, GFP_KERNEL);
+ if (!probe)
+ return -ENOMEM;
+
+ probe->head.type = VIRTIO_IOMMU_T_PROBE;
+ /*
+ * For now, assume that properties of an endpoint that outputs multiple
+ * IDs are consistent. Only probe the first one.
+ */
+ probe->endpoint = cpu_to_le32(fwspec->ids[0]);
+
+ ret = viommu_send_req_sync(viommu, probe, probe_len);
+ if (ret)
+ goto out_free;
+
+ prop = (void *)probe->properties;
+ type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
+
+ while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
+ cur < viommu->probe_size) {
+ len = le16_to_cpu(prop->length) + sizeof(*prop);
+
+ switch (type) {
+ case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
+ ret = viommu_add_resv_mem(vdev, (void *)prop, len);
+ break;
+ default:
+ dev_err(dev, "unknown viommu prop 0x%x\n", type);
+ }
+
+ if (ret)
+ dev_err(dev, "failed to parse viommu prop 0x%x\n", type);
+
+ cur += len;
+ if (cur >= viommu->probe_size)
+ break;
+
+ prop = (void *)probe->properties + cur;
+ type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
+ }
+
+out_free:
+ kfree(probe);
+ return ret;
+}
+
+static int viommu_fault_handler(struct viommu_dev *viommu,
+ struct virtio_iommu_fault *fault)
+{
+ char *reason_str;
+
+ u8 reason = fault->reason;
+ u32 flags = le32_to_cpu(fault->flags);
+ u32 endpoint = le32_to_cpu(fault->endpoint);
+ u64 address = le64_to_cpu(fault->address);
+
+ switch (reason) {
+ case VIRTIO_IOMMU_FAULT_R_DOMAIN:
+ reason_str = "domain";
+ break;
+ case VIRTIO_IOMMU_FAULT_R_MAPPING:
+ reason_str = "page";
+ break;
+ case VIRTIO_IOMMU_FAULT_R_UNKNOWN:
+ default:
+ reason_str = "unknown";
+ break;
+ }
+
+ /* TODO: find EP by ID and report_iommu_fault */
+ if (flags & VIRTIO_IOMMU_FAULT_F_ADDRESS)
+ dev_err_ratelimited(viommu->dev, "%s fault from EP %u at %#llx [%s%s%s]\n",
+ reason_str, endpoint, address,
+ flags & VIRTIO_IOMMU_FAULT_F_READ ? "R" : "",
+ flags & VIRTIO_IOMMU_FAULT_F_WRITE ? "W" : "",
+ flags & VIRTIO_IOMMU_FAULT_F_EXEC ? "X" : "");
+ else
+ dev_err_ratelimited(viommu->dev, "%s fault from EP %u\n",
+ reason_str, endpoint);
+ return 0;
+}
+
+static void viommu_event_handler(struct virtqueue *vq)
+{
+ int ret;
+ unsigned int len;
+ struct scatterlist sg[1];
+ struct viommu_event *evt;
+ struct viommu_dev *viommu = vq->vdev->priv;
+
+ while ((evt = virtqueue_get_buf(vq, &len)) != NULL) {
+ if (len > sizeof(*evt)) {
+ dev_err(viommu->dev,
+ "invalid event buffer (len %u != %zu)\n",
+ len, sizeof(*evt));
+ } else if (!(evt->head & VIOMMU_FAULT_RESV_MASK)) {
+ viommu_fault_handler(viommu, &evt->fault);
+ }
+
+ sg_init_one(sg, evt, sizeof(*evt));
+ ret = virtqueue_add_inbuf(vq, sg, 1, evt, GFP_ATOMIC);
+ if (ret)
+ dev_err(viommu->dev, "could not add event buffer\n");
+ }
+
+ virtqueue_kick(vq);
+}
+
+/* IOMMU API */
+
+static struct iommu_domain *viommu_domain_alloc(unsigned type)
+{
+ struct viommu_domain *vdomain;
+
+ if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
+ return NULL;
+
+ vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL);
+ if (!vdomain)
+ return NULL;
+
+ mutex_init(&vdomain->mutex);
+ spin_lock_init(&vdomain->mappings_lock);
+ vdomain->mappings = RB_ROOT_CACHED;
+
+ if (type == IOMMU_DOMAIN_DMA &&
+ iommu_get_dma_cookie(&vdomain->domain)) {
+ kfree(vdomain);
+ return NULL;
+ }
+
+ return &vdomain->domain;
+}
+
+static int viommu_domain_finalise(struct viommu_dev *viommu,
+ struct iommu_domain *domain)
+{
+ int ret;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ vdomain->viommu = viommu;
+ vdomain->map_flags = viommu->map_flags;
+
+ domain->pgsize_bitmap = viommu->pgsize_bitmap;
+ domain->geometry = viommu->geometry;
+
+ ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain,
+ viommu->last_domain, GFP_KERNEL);
+ if (ret >= 0)
+ vdomain->id = (unsigned int)ret;
+
+ return ret > 0 ? 0 : ret;
+}
+
+static void viommu_domain_free(struct iommu_domain *domain)
+{
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ iommu_put_dma_cookie(domain);
+
+ /* Free all remaining mappings (size 2^64) */
+ viommu_del_mappings(vdomain, 0, 0);
+
+ if (vdomain->viommu)
+ ida_free(&vdomain->viommu->domain_ids, vdomain->id);
+
+ kfree(vdomain);
+}
+
+static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
+{
+ int i;
+ int ret = 0;
+ struct virtio_iommu_req_attach req;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct viommu_endpoint *vdev = fwspec->iommu_priv;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ mutex_lock(&vdomain->mutex);
+ if (!vdomain->viommu) {
+ /*
+ * Properly initialize the domain now that we know which viommu
+ * owns it.
+ */
+ ret = viommu_domain_finalise(vdev->viommu, domain);
+ } else if (vdomain->viommu != vdev->viommu) {
+ dev_err(dev, "cannot attach to foreign vIOMMU\n");
+ ret = -EXDEV;
+ }
+ mutex_unlock(&vdomain->mutex);
+
+ if (ret)
+ return ret;
+
+ /*
+ * In the virtio-iommu device, when attaching the endpoint to a new
+ * domain, it is detached from the old one and, if as as a result the
+ * old domain isn't attached to any endpoint, all mappings are removed
+ * from the old domain and it is freed.
+ *
+ * In the driver the old domain still exists, and its mappings will be
+ * recreated if it gets reattached to an endpoint. Otherwise it will be
+ * freed explicitly.
+ *
+ * vdev->vdomain is protected by group->mutex
+ */
+ if (vdev->vdomain)
+ vdev->vdomain->nr_endpoints--;
+
+ req = (struct virtio_iommu_req_attach) {
+ .head.type = VIRTIO_IOMMU_T_ATTACH,
+ .domain = cpu_to_le32(vdomain->id),
+ };
+
+ for (i = 0; i < fwspec->num_ids; i++) {
+ req.endpoint = cpu_to_le32(fwspec->ids[i]);
+
+ ret = viommu_send_req_sync(vdomain->viommu, &req, sizeof(req));
+ if (ret)
+ return ret;
+ }
+
+ if (!vdomain->nr_endpoints) {
+ /*
+ * This endpoint is the first to be attached to the domain.
+ * Replay existing mappings (e.g. SW MSI).
+ */
+ ret = viommu_replay_mappings(vdomain);
+ if (ret)
+ return ret;
+ }
+
+ vdomain->nr_endpoints++;
+ vdev->vdomain = vdomain;
+
+ return 0;
+}
+
+static int viommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ int ret;
+ u32 flags;
+ struct virtio_iommu_req_map map;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ flags = (prot & IOMMU_READ ? VIRTIO_IOMMU_MAP_F_READ : 0) |
+ (prot & IOMMU_WRITE ? VIRTIO_IOMMU_MAP_F_WRITE : 0) |
+ (prot & IOMMU_MMIO ? VIRTIO_IOMMU_MAP_F_MMIO : 0);
+
+ if (flags & ~vdomain->map_flags)
+ return -EINVAL;
+
+ ret = viommu_add_mapping(vdomain, iova, paddr, size, flags);
+ if (ret)
+ return ret;
+
+ map = (struct virtio_iommu_req_map) {
+ .head.type = VIRTIO_IOMMU_T_MAP,
+ .domain = cpu_to_le32(vdomain->id),
+ .virt_start = cpu_to_le64(iova),
+ .phys_start = cpu_to_le64(paddr),
+ .virt_end = cpu_to_le64(iova + size - 1),
+ .flags = cpu_to_le32(flags),
+ };
+
+ if (!vdomain->nr_endpoints)
+ return 0;
+
+ ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
+ if (ret)
+ viommu_del_mappings(vdomain, iova, size);
+
+ return ret;
+}
+
+static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size, struct iommu_iotlb_gather *gather)
+{
+ int ret = 0;
+ size_t unmapped;
+ struct virtio_iommu_req_unmap unmap;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ unmapped = viommu_del_mappings(vdomain, iova, size);
+ if (unmapped < size)
+ return 0;
+
+ /* Device already removed all mappings after detach. */
+ if (!vdomain->nr_endpoints)
+ return unmapped;
+
+ unmap = (struct virtio_iommu_req_unmap) {
+ .head.type = VIRTIO_IOMMU_T_UNMAP,
+ .domain = cpu_to_le32(vdomain->id),
+ .virt_start = cpu_to_le64(iova),
+ .virt_end = cpu_to_le64(iova + unmapped - 1),
+ };
+
+ ret = viommu_add_req(vdomain->viommu, &unmap, sizeof(unmap));
+ return ret ? 0 : unmapped;
+}
+
+static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ u64 paddr = 0;
+ unsigned long flags;
+ struct viommu_mapping *mapping;
+ struct interval_tree_node *node;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ node = interval_tree_iter_first(&vdomain->mappings, iova, iova);
+ if (node) {
+ mapping = container_of(node, struct viommu_mapping, iova);
+ paddr = mapping->paddr + (iova - mapping->iova.start);
+ }
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ return paddr;
+}
+
+static void viommu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
+{
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ viommu_sync_req(vdomain->viommu);
+}
+
+static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
+{
+ struct iommu_resv_region *entry, *new_entry, *msi = NULL;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct viommu_endpoint *vdev = fwspec->iommu_priv;
+ int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+
+ list_for_each_entry(entry, &vdev->resv_regions, list) {
+ if (entry->type == IOMMU_RESV_MSI)
+ msi = entry;
+
+ new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL);
+ if (!new_entry)
+ return;
+ list_add_tail(&new_entry->list, head);
+ }
+
+ /*
+ * If the device didn't register any bypass MSI window, add a
+ * software-mapped region.
+ */
+ if (!msi) {
+ msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
+ prot, IOMMU_RESV_SW_MSI);
+ if (!msi)
+ return;
+
+ list_add_tail(&msi->list, head);
+ }
+
+ iommu_dma_get_resv_regions(dev, head);
+}
+
+static void viommu_put_resv_regions(struct device *dev, struct list_head *head)
+{
+ struct iommu_resv_region *entry, *next;
+
+ list_for_each_entry_safe(entry, next, head, list)
+ kfree(entry);
+}
+
+static struct iommu_ops viommu_ops;
+static struct virtio_driver virtio_iommu_drv;
+
+static int viommu_match_node(struct device *dev, const void *data)
+{
+ return dev->parent->fwnode == data;
+}
+
+static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode)
+{
+ struct device *dev = driver_find_device(&virtio_iommu_drv.driver, NULL,
+ fwnode, viommu_match_node);
+ put_device(dev);
+
+ return dev ? dev_to_virtio(dev)->priv : NULL;
+}
+
+static int viommu_add_device(struct device *dev)
+{
+ int ret;
+ struct iommu_group *group;
+ struct viommu_endpoint *vdev;
+ struct viommu_dev *viommu = NULL;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+ if (!fwspec || fwspec->ops != &viommu_ops)
+ return -ENODEV;
+
+ viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode);
+ if (!viommu)
+ return -ENODEV;
+
+ vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+ if (!vdev)
+ return -ENOMEM;
+
+ vdev->dev = dev;
+ vdev->viommu = viommu;
+ INIT_LIST_HEAD(&vdev->resv_regions);
+ fwspec->iommu_priv = vdev;
+
+ if (viommu->probe_size) {
+ /* Get additional information for this endpoint */
+ ret = viommu_probe_endpoint(viommu, dev);
+ if (ret)
+ goto err_free_dev;
+ }
+
+ ret = iommu_device_link(&viommu->iommu, dev);
+ if (ret)
+ goto err_free_dev;
+
+ /*
+ * Last step creates a default domain and attaches to it. Everything
+ * must be ready.
+ */
+ group = iommu_group_get_for_dev(dev);
+ if (IS_ERR(group)) {
+ ret = PTR_ERR(group);
+ goto err_unlink_dev;
+ }
+
+ iommu_group_put(group);
+
+ return PTR_ERR_OR_ZERO(group);
+
+err_unlink_dev:
+ iommu_device_unlink(&viommu->iommu, dev);
+err_free_dev:
+ viommu_put_resv_regions(dev, &vdev->resv_regions);
+ kfree(vdev);
+
+ return ret;
+}
+
+static void viommu_remove_device(struct device *dev)
+{
+ struct viommu_endpoint *vdev;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+ if (!fwspec || fwspec->ops != &viommu_ops)
+ return;
+
+ vdev = fwspec->iommu_priv;
+
+ iommu_group_remove_device(dev);
+ iommu_device_unlink(&vdev->viommu->iommu, dev);
+ viommu_put_resv_regions(dev, &vdev->resv_regions);
+ kfree(vdev);
+}
+
+static struct iommu_group *viommu_device_group(struct device *dev)
+{
+ if (dev_is_pci(dev))
+ return pci_device_group(dev);
+ else
+ return generic_device_group(dev);
+}
+
+static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+ return iommu_fwspec_add_ids(dev, args->args, 1);
+}
+
+static struct iommu_ops viommu_ops = {
+ .domain_alloc = viommu_domain_alloc,
+ .domain_free = viommu_domain_free,
+ .attach_dev = viommu_attach_dev,
+ .map = viommu_map,
+ .unmap = viommu_unmap,
+ .iova_to_phys = viommu_iova_to_phys,
+ .iotlb_sync = viommu_iotlb_sync,
+ .add_device = viommu_add_device,
+ .remove_device = viommu_remove_device,
+ .device_group = viommu_device_group,
+ .get_resv_regions = viommu_get_resv_regions,
+ .put_resv_regions = viommu_put_resv_regions,
+ .of_xlate = viommu_of_xlate,
+};
+
+static int viommu_init_vqs(struct viommu_dev *viommu)
+{
+ struct virtio_device *vdev = dev_to_virtio(viommu->dev);
+ const char *names[] = { "request", "event" };
+ vq_callback_t *callbacks[] = {
+ NULL, /* No async requests */
+ viommu_event_handler,
+ };
+
+ return virtio_find_vqs(vdev, VIOMMU_NR_VQS, viommu->vqs, callbacks,
+ names, NULL);
+}
+
+static int viommu_fill_evtq(struct viommu_dev *viommu)
+{
+ int i, ret;
+ struct scatterlist sg[1];
+ struct viommu_event *evts;
+ struct virtqueue *vq = viommu->vqs[VIOMMU_EVENT_VQ];
+ size_t nr_evts = vq->num_free;
+
+ viommu->evts = evts = devm_kmalloc_array(viommu->dev, nr_evts,
+ sizeof(*evts), GFP_KERNEL);
+ if (!evts)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_evts; i++) {
+ sg_init_one(sg, &evts[i], sizeof(*evts));
+ ret = virtqueue_add_inbuf(vq, sg, 1, &evts[i], GFP_KERNEL);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int viommu_probe(struct virtio_device *vdev)
+{
+ struct device *parent_dev = vdev->dev.parent;
+ struct viommu_dev *viommu = NULL;
+ struct device *dev = &vdev->dev;
+ u64 input_start = 0;
+ u64 input_end = -1UL;
+ int ret;
+
+ if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
+ !virtio_has_feature(vdev, VIRTIO_IOMMU_F_MAP_UNMAP))
+ return -ENODEV;
+
+ viommu = devm_kzalloc(dev, sizeof(*viommu), GFP_KERNEL);
+ if (!viommu)
+ return -ENOMEM;
+
+ spin_lock_init(&viommu->request_lock);
+ ida_init(&viommu->domain_ids);
+ viommu->dev = dev;
+ viommu->vdev = vdev;
+ INIT_LIST_HEAD(&viommu->requests);
+
+ ret = viommu_init_vqs(viommu);
+ if (ret)
+ return ret;
+
+ virtio_cread(vdev, struct virtio_iommu_config, page_size_mask,
+ &viommu->pgsize_bitmap);
+
+ if (!viommu->pgsize_bitmap) {
+ ret = -EINVAL;
+ goto err_free_vqs;
+ }
+
+ viommu->map_flags = VIRTIO_IOMMU_MAP_F_READ | VIRTIO_IOMMU_MAP_F_WRITE;
+ viommu->last_domain = ~0U;
+
+ /* Optional features */
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
+ struct virtio_iommu_config, input_range.start,
+ &input_start);
+
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
+ struct virtio_iommu_config, input_range.end,
+ &input_end);
+
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
+ struct virtio_iommu_config, domain_range.start,
+ &viommu->first_domain);
+
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
+ struct virtio_iommu_config, domain_range.end,
+ &viommu->last_domain);
+
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
+ struct virtio_iommu_config, probe_size,
+ &viommu->probe_size);
+
+ viommu->geometry = (struct iommu_domain_geometry) {
+ .aperture_start = input_start,
+ .aperture_end = input_end,
+ .force_aperture = true,
+ };
+
+ if (virtio_has_feature(vdev, VIRTIO_IOMMU_F_MMIO))
+ viommu->map_flags |= VIRTIO_IOMMU_MAP_F_MMIO;
+
+ viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap;
+
+ virtio_device_ready(vdev);
+
+ /* Populate the event queue with buffers */
+ ret = viommu_fill_evtq(viommu);
+ if (ret)
+ goto err_free_vqs;
+
+ ret = iommu_device_sysfs_add(&viommu->iommu, dev, NULL, "%s",
+ virtio_bus_name(vdev));
+ if (ret)
+ goto err_free_vqs;
+
+ iommu_device_set_ops(&viommu->iommu, &viommu_ops);
+ iommu_device_set_fwnode(&viommu->iommu, parent_dev->fwnode);
+
+ iommu_device_register(&viommu->iommu);
+
+#ifdef CONFIG_PCI
+ if (pci_bus_type.iommu_ops != &viommu_ops) {
+ pci_request_acs();
+ ret = bus_set_iommu(&pci_bus_type, &viommu_ops);
+ if (ret)
+ goto err_unregister;
+ }
+#endif
+#ifdef CONFIG_ARM_AMBA
+ if (amba_bustype.iommu_ops != &viommu_ops) {
+ ret = bus_set_iommu(&amba_bustype, &viommu_ops);
+ if (ret)
+ goto err_unregister;
+ }
+#endif
+ if (platform_bus_type.iommu_ops != &viommu_ops) {
+ ret = bus_set_iommu(&platform_bus_type, &viommu_ops);
+ if (ret)
+ goto err_unregister;
+ }
+
+ vdev->priv = viommu;
+
+ dev_info(dev, "input address: %u bits\n",
+ order_base_2(viommu->geometry.aperture_end));
+ dev_info(dev, "page mask: %#llx\n", viommu->pgsize_bitmap);
+
+ return 0;
+
+err_unregister:
+ iommu_device_sysfs_remove(&viommu->iommu);
+ iommu_device_unregister(&viommu->iommu);
+err_free_vqs:
+ vdev->config->del_vqs(vdev);
+
+ return ret;
+}
+
+static void viommu_remove(struct virtio_device *vdev)
+{
+ struct viommu_dev *viommu = vdev->priv;
+
+ iommu_device_sysfs_remove(&viommu->iommu);
+ iommu_device_unregister(&viommu->iommu);
+
+ /* Stop all virtqueues */
+ vdev->config->reset(vdev);
+ vdev->config->del_vqs(vdev);
+
+ dev_info(&vdev->dev, "device removed\n");
+}
+
+static void viommu_config_changed(struct virtio_device *vdev)
+{
+ dev_warn(&vdev->dev, "config changed\n");
+}
+
+static unsigned int features[] = {
+ VIRTIO_IOMMU_F_MAP_UNMAP,
+ VIRTIO_IOMMU_F_INPUT_RANGE,
+ VIRTIO_IOMMU_F_DOMAIN_RANGE,
+ VIRTIO_IOMMU_F_PROBE,
+ VIRTIO_IOMMU_F_MMIO,
+};
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct virtio_driver virtio_iommu_drv = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .probe = viommu_probe,
+ .remove = viommu_remove,
+ .config_changed = viommu_config_changed,
+};
+
+module_virtio_driver(virtio_iommu_drv);
+
+MODULE_DESCRIPTION("Virtio IOMMU driver");
+MODULE_AUTHOR("Jean-Philippe Brucker <jean-philippe.brucker@arm.com>");
+MODULE_LICENSE("GPL v2");