Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index fc0160e..04878ca 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -3,6 +3,10 @@
config IOMMU_IOVA
tristate
+# The IOASID library may also be used by non-IOMMU_API users
+config IOASID
+ tristate
+
# IOMMU_API always gets selected by whoever wants it.
config IOMMU_API
bool
@@ -11,7 +15,7 @@
bool "IOMMU Hardware Support"
depends on MMU
default y
- ---help---
+ help
Say Y here if you want to compile device drivers for IO Memory
Management Units into the kernel. These devices usually allow to
remap DMA requests and/or remap interrupts from other devices on the
@@ -78,7 +82,7 @@
config IOMMU_DEFAULT_PASSTHROUGH
bool "IOMMU passthrough by default"
depends on IOMMU_API
- help
+ help
Enable passthrough by default, removing the need to pass in
iommu.passthrough=on or iommu=pt through command line. If this
is enabled, you can still disable with iommu.passthrough=off
@@ -87,12 +91,13 @@
If unsure, say N here.
config OF_IOMMU
- def_bool y
- depends on OF && IOMMU_API
+ def_bool y
+ depends on OF && IOMMU_API
# IOMMU-agnostic DMA-mapping layer
config IOMMU_DMA
bool
+ select DMA_OPS
select IOMMU_API
select IOMMU_IOVA
select IRQ_MSI_IOMMU
@@ -124,129 +129,14 @@
If unsure, say N here.
-config IOMMU_PGTABLES_L2
- def_bool y
- depends on MSM_IOMMU && MMU && SMP && CPU_DCACHE_DISABLE=n
-
-# AMD IOMMU support
-config AMD_IOMMU
- bool "AMD IOMMU support"
- select SWIOTLB
- select PCI_MSI
- select PCI_ATS
- select PCI_PRI
- select PCI_PASID
- select IOMMU_API
- select IOMMU_IOVA
- depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
- ---help---
- With this option you can enable support for AMD IOMMU hardware in
- your system. An IOMMU is a hardware component which provides
- remapping of DMA memory accesses from devices. With an AMD IOMMU you
- can isolate the DMA memory of different devices and protect the
- system from misbehaving device drivers or hardware.
-
- You can find out if your system has an AMD IOMMU if you look into
- your BIOS for an option to enable it or if you have an IVRS ACPI
- table.
-
-config AMD_IOMMU_V2
- tristate "AMD IOMMU Version 2 driver"
- depends on AMD_IOMMU
- select MMU_NOTIFIER
- ---help---
- This option enables support for the AMD IOMMUv2 features of the IOMMU
- hardware. Select this option if you want to use devices that support
- the PCI PRI and PASID interface.
-
-config AMD_IOMMU_DEBUGFS
- bool "Enable AMD IOMMU internals in DebugFS"
- depends on AMD_IOMMU && IOMMU_DEBUGFS
- ---help---
- !!!WARNING!!! !!!WARNING!!! !!!WARNING!!! !!!WARNING!!!
-
- DO NOT ENABLE THIS OPTION UNLESS YOU REALLY, -REALLY- KNOW WHAT YOU ARE DOING!!!
- Exposes AMD IOMMU device internals in DebugFS.
-
- This option is -NOT- intended for production environments, and should
- not generally be enabled.
-
-# Intel IOMMU support
-config DMAR_TABLE
- bool
-
-config INTEL_IOMMU
- bool "Support for Intel IOMMU using DMA Remapping Devices"
- depends on PCI_MSI && ACPI && (X86 || IA64)
- select IOMMU_API
- select IOMMU_IOVA
- select NEED_DMA_MAP_STATE
- select DMAR_TABLE
- select SWIOTLB
- help
- DMA remapping (DMAR) devices support enables independent address
- translations for Direct Memory Access (DMA) from devices.
- These DMA remapping devices are reported via ACPI tables
- and include PCI device scope covered by these DMA
- remapping devices.
-
-config INTEL_IOMMU_DEBUGFS
- bool "Export Intel IOMMU internals in Debugfs"
- depends on INTEL_IOMMU && IOMMU_DEBUGFS
- help
- !!!WARNING!!!
-
- DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!!
-
- Expose Intel IOMMU internals in Debugfs.
-
- This option is -NOT- intended for production environments, and should
- only be enabled for debugging Intel IOMMU.
-
-config INTEL_IOMMU_SVM
- bool "Support for Shared Virtual Memory with Intel IOMMU"
- depends on INTEL_IOMMU && X86_64
- select PCI_PASID
- select MMU_NOTIFIER
- help
- Shared Virtual Memory (SVM) provides a facility for devices
- to access DMA resources through process address space by
- means of a Process Address Space ID (PASID).
-
-config INTEL_IOMMU_DEFAULT_ON
- def_bool y
- prompt "Enable Intel DMA Remapping Devices by default"
- depends on INTEL_IOMMU
- help
- Selecting this option will enable a DMAR device at boot time if
- one is found. If this option is not selected, DMAR support can
- be enabled by passing intel_iommu=on to the kernel.
-
-config INTEL_IOMMU_BROKEN_GFX_WA
- bool "Workaround broken graphics drivers (going away soon)"
- depends on INTEL_IOMMU && BROKEN && X86
- ---help---
- Current Graphics drivers tend to use physical address
- for DMA and avoid using DMA APIs. Setting this config
- option permits the IOMMU driver to set a unity map for
- all the OS-visible memory. Hence the driver can continue
- to use physical addresses for DMA, at least until this
- option is removed in the 2.6.32 kernel.
-
-config INTEL_IOMMU_FLOPPY_WA
- def_bool y
- depends on INTEL_IOMMU && X86
- ---help---
- Floppy disk drivers are known to bypass DMA API calls
- thereby failing to work when IOMMU is enabled. This
- workaround will setup a 1:1 mapping for the first
- 16MiB to make floppy (an ISA device) work.
+source "drivers/iommu/amd/Kconfig"
+source "drivers/iommu/intel/Kconfig"
config IRQ_REMAP
bool "Support for Interrupt Remapping"
depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI
select DMAR_TABLE
- ---help---
+ help
Supports Interrupt remapping for IO-APIC and MSI devices.
To use x2apic mode in the CPU's which support x2APIC enhancements or
to support platforms with CPU's having > 8 bit APIC ID, say Y.
@@ -254,17 +144,16 @@
# OMAP IOMMU support
config OMAP_IOMMU
bool "OMAP IOMMU Support"
- depends on ARM && MMU
depends on ARCH_OMAP2PLUS || COMPILE_TEST
select IOMMU_API
- ---help---
+ help
The OMAP3 media platform drivers depend on iommu support,
if you need them say Y here.
config OMAP_IOMMU_DEBUG
bool "Export OMAP IOMMU internals in DebugFS"
depends on OMAP_IOMMU && DEBUG_FS
- ---help---
+ help
Select this to see extensive information about
the internal state of OMAP IOMMU in debugfs.
@@ -272,7 +161,6 @@
config ROCKCHIP_IOMMU
bool "Rockchip IOMMU Support"
- depends on ARM || ARM64
depends on ARCH_ROCKCHIP || COMPILE_TEST
select IOMMU_API
select ARM_DMA_USE_IOMMU
@@ -283,6 +171,15 @@
Say Y here if you are using a Rockchip SoC that includes an IOMMU
device.
+config SUN50I_IOMMU
+ bool "Allwinner H6 IOMMU Support"
+ depends on HAS_DMA
+ depends on ARCH_SUNXI || COMPILE_TEST
+ select ARM_DMA_USE_IOMMU
+ select IOMMU_API
+ help
+ Support for the IOMMU introduced in the Allwinner H6 SoCs.
+
config TEGRA_IOMMU_GART
bool "Tegra GART IOMMU Support"
depends on ARCH_TEGRA_2x_SOC
@@ -306,7 +203,7 @@
config EXYNOS_IOMMU
bool "Exynos IOMMU Support"
- depends on ARCH_EXYNOS && MMU
+ depends on ARCH_EXYNOS || COMPILE_TEST
depends on !CPU_BIG_ENDIAN # revisit driver if we can enable big-endian ptes
select IOMMU_API
select ARM_DMA_USE_IOMMU
@@ -329,14 +226,13 @@
config IPMMU_VMSA
bool "Renesas VMSA-compatible IPMMU"
- depends on ARM || IOMMU_DMA
depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64)
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
select ARM_DMA_USE_IOMMU
help
Support for the Renesas VMSA-compatible IPMMU found in the R-Mobile
- APE6, R-Car Gen2, and R-Car Gen3 SoCs.
+ APE6, R-Car Gen{2,3} and RZ/G{1,2} SoCs.
If unsure, say N.
@@ -350,8 +246,8 @@
# ARM IOMMU support
config ARM_SMMU
- bool "ARM Ltd. System MMU (SMMU) Support"
- depends on (ARM64 || ARM) && MMU
+ tristate "ARM Ltd. System MMU (SMMU) Support"
+ depends on ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
select ARM_DMA_USE_IOMMU if ARM
@@ -362,6 +258,18 @@
Say Y here if your SoC includes an IOMMU device implementing
the ARM SMMU architecture.
+config ARM_SMMU_LEGACY_DT_BINDINGS
+ bool "Support the legacy \"mmu-masters\" devicetree bindings"
+ depends on ARM_SMMU=y && OF
+ help
+ Support for the badly designed and deprecated "mmu-masters"
+ devicetree bindings. This allows some DMA masters to attach
+ to the SMMU but does not provide any support via the DMA API.
+ If you're lucky, you might be able to get VFIO up and running.
+
+ If you say Y here then you'll make me very sad. Instead, say N
+ and move your firmware to the utopian future that was 2016.
+
config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT
bool "Default to disabling bypass on ARM SMMU v1 and v2"
depends on ARM_SMMU
@@ -388,7 +296,7 @@
config.
config ARM_SMMU_V3
- bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
+ tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
depends on ARM64
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
@@ -400,6 +308,16 @@
Say Y here if your system includes an IOMMU device implementing
the ARM SMMUv3 architecture.
+config ARM_SMMU_V3_SVA
+ bool "Shared Virtual Addressing support for the ARM SMMUv3"
+ depends on ARM_SMMU_V3
+ help
+ Support for sharing process address spaces with devices using the
+ SMMUv3.
+
+ Say Y here if your system supports SVA extensions such as PCIe PASID
+ and PRI.
+
config S390_IOMMU
def_bool y if S390 && PCI
depends on S390 && PCI
@@ -409,7 +327,7 @@
config S390_CCW_IOMMU
bool "S390 CCW IOMMU Support"
- depends on S390 && CCW
+ depends on S390 && CCW || COMPILE_TEST
select IOMMU_API
help
Enables bits of IOMMU API required by VFIO. The iommu_ops
@@ -417,7 +335,7 @@
config S390_AP_IOMMU
bool "S390 AP IOMMU Support"
- depends on S390 && ZCRYPT
+ depends on S390 && ZCRYPT || COMPILE_TEST
select IOMMU_API
help
Enables bits of IOMMU API required by VFIO. The iommu_ops
@@ -425,11 +343,9 @@
config MTK_IOMMU
bool "MTK IOMMU Support"
- depends on ARM || ARM64
depends on ARCH_MEDIATEK || COMPILE_TEST
select ARM_DMA_USE_IOMMU
select IOMMU_API
- select IOMMU_DMA
select IOMMU_IO_PGTABLE_ARMV7S
select MEMORY
select MTK_SMI
@@ -467,7 +383,7 @@
config HYPERV_IOMMU
bool "Hyper-V x2APIC IRQ Handling"
- depends on HYPERV
+ depends on HYPERV && X86
select IOMMU_API
default HYPERV
help
@@ -475,8 +391,8 @@
guests to run with x2APIC mode enabled.
config VIRTIO_IOMMU
- bool "Virtio IOMMU driver"
- depends on VIRTIO=y
+ tristate "Virtio IOMMU driver"
+ depends on VIRTIO
depends on ARM64
select IOMMU_API
select INTERVAL_TREE
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 4f405f9..11f1771 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
+obj-y += amd/ intel/ arm/
obj-$(CONFIG_IOMMU_API) += iommu.o
obj-$(CONFIG_IOMMU_API) += iommu-traces.o
obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
@@ -7,31 +8,22 @@
obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
+obj-$(CONFIG_IOASID) += ioasid.o
obj-$(CONFIG_IOMMU_IOVA) += iova.o
obj-$(CONFIG_OF_IOMMU) += of_iommu.o
obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
-obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o amd_iommu_quirks.o
-obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
-obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
-obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o
-obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
-obj-$(CONFIG_DMAR_TABLE) += dmar.o
-obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
-obj-$(CONFIG_INTEL_IOMMU) += intel-trace.o
-obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o
-obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
-obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
+obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
obj-$(CONFIG_MTK_IOMMU) += mtk_iommu.o
obj-$(CONFIG_MTK_IOMMU_V1) += mtk_iommu_v1.o
obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
+obj-$(CONFIG_SUN50I_IOMMU) += sun50i-iommu.o
obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
-obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
new file mode 100644
index 0000000..626b97d
--- /dev/null
+++ b/drivers/iommu/amd/Kconfig
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# AMD IOMMU support
+config AMD_IOMMU
+ bool "AMD IOMMU support"
+ select SWIOTLB
+ select PCI_MSI
+ select PCI_ATS
+ select PCI_PRI
+ select PCI_PASID
+ select IOMMU_API
+ select IOMMU_IOVA
+ select IOMMU_DMA
+ depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
+ help
+ With this option you can enable support for AMD IOMMU hardware in
+ your system. An IOMMU is a hardware component which provides
+ remapping of DMA memory accesses from devices. With an AMD IOMMU you
+ can isolate the DMA memory of different devices and protect the
+ system from misbehaving device drivers or hardware.
+
+ You can find out if your system has an AMD IOMMU if you look into
+ your BIOS for an option to enable it or if you have an IVRS ACPI
+ table.
+
+config AMD_IOMMU_V2
+ tristate "AMD IOMMU Version 2 driver"
+ depends on AMD_IOMMU
+ select MMU_NOTIFIER
+ help
+ This option enables support for the AMD IOMMUv2 features of the IOMMU
+ hardware. Select this option if you want to use devices that support
+ the PCI PRI and PASID interface.
+
+config AMD_IOMMU_DEBUGFS
+ bool "Enable AMD IOMMU internals in DebugFS"
+ depends on AMD_IOMMU && IOMMU_DEBUGFS
+ help
+ !!!WARNING!!! !!!WARNING!!! !!!WARNING!!! !!!WARNING!!!
+
+ DO NOT ENABLE THIS OPTION UNLESS YOU REALLY, -REALLY- KNOW WHAT YOU ARE DOING!!!
+ Exposes AMD IOMMU device internals in DebugFS.
+
+ This option is -NOT- intended for production environments, and should
+ not generally be enabled.
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
new file mode 100644
index 0000000..dc5a2fa
--- /dev/null
+++ b/drivers/iommu/amd/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o
+obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
+obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd/amd_iommu.h
similarity index 65%
rename from drivers/iommu/amd_iommu_proto.h
rename to drivers/iommu/amd/amd_iommu.h
index 92c2ba6..0c40d22 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -4,8 +4,10 @@
* Author: Joerg Roedel <jroedel@suse.de>
*/
-#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
-#define _ASM_X86_AMD_IOMMU_PROTO_H
+#ifndef AMD_IOMMU_H
+#define AMD_IOMMU_H
+
+#include <linux/iommu.h>
#include "amd_iommu_types.h"
@@ -15,6 +17,7 @@
extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
extern void amd_iommu_apply_erratum_63(u16 devid);
+extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
extern int amd_iommu_init_devices(void);
extern void amd_iommu_uninit_devices(void);
@@ -39,16 +42,25 @@
struct iommu_domain;
extern bool amd_iommu_v2_supported(void);
+extern struct amd_iommu *get_amd_iommu(unsigned int idx);
+extern u8 amd_iommu_pc_get_max_banks(unsigned int idx);
+extern bool amd_iommu_pc_supported(void);
+extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
+extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+ u8 fxn, u64 *value);
+extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+ u8 fxn, u64 *value);
+
extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
-extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
u64 address);
-extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
-extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
+extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
unsigned long cr3);
-extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
+extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
#ifdef CONFIG_IRQ_REMAP
@@ -64,7 +76,7 @@
#define PPR_INVALID 0x1
#define PPR_FAILURE 0xf
-extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+extern int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
int status, int tag);
static inline bool is_rd890_iommu(struct pci_dev *pdev)
@@ -73,12 +85,9 @@
(pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
}
-static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
+static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask)
{
- if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
- return false;
-
- return !!(iommu->features & f);
+ return !!(iommu->features & mask);
}
static inline u64 iommu_virt_to_phys(void *vaddr)
@@ -92,5 +101,15 @@
}
extern bool translation_pre_enabled(struct amd_iommu *iommu);
-extern struct iommu_dev_data *get_dev_data(struct device *dev);
-#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
+extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+ struct device *dev);
+extern int __init add_special_device(u8 type, u8 id, u16 *devid,
+ bool cmd_line);
+
+#ifdef CONFIG_DMI
+void amd_iommu_apply_ivrs_quirks(void);
+#else
+static inline void amd_iommu_apply_ivrs_quirks(void) { }
+#endif
+
+#endif
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
similarity index 97%
rename from drivers/iommu/amd_iommu_types.h
rename to drivers/iommu/amd/amd_iommu_types.h
index 76e9d3e..690c597 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -93,6 +93,7 @@
#define FEATURE_PC (1ULL<<9)
#define FEATURE_GAM_VAPIC (1ULL<<21)
#define FEATURE_EPHSUP (1ULL<<50)
+#define FEATURE_SNP (1ULL<<63)
#define FEATURE_PASID_SHIFT 32
#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
@@ -108,6 +109,7 @@
#define PASID_MASK 0x0000ffff
/* MMIO status bits */
+#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK (1 << 0)
#define MMIO_STATUS_EVT_INT_MASK (1 << 1)
#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
#define MMIO_STATUS_PPR_INT_MASK (1 << 6)
@@ -128,6 +130,8 @@
#define EVENT_TYPE_IOTLB_INV_TO 0x7
#define EVENT_TYPE_INV_DEV_REQ 0x8
#define EVENT_TYPE_INV_PPR_REQ 0x9
+#define EVENT_TYPE_RMP_FAULT 0xd
+#define EVENT_TYPE_RMP_HW_ERR 0xe
#define EVENT_DEVID_MASK 0xffff
#define EVENT_DEVID_SHIFT 0
#define EVENT_DOMID_MASK_LO 0xffff
@@ -147,8 +151,8 @@
#define CONTROL_COHERENT_EN 0x0aULL
#define CONTROL_ISOC_EN 0x0bULL
#define CONTROL_CMDBUF_EN 0x0cULL
-#define CONTROL_PPFLOG_EN 0x0dULL
-#define CONTROL_PPFINT_EN 0x0eULL
+#define CONTROL_PPRLOG_EN 0x0dULL
+#define CONTROL_PPRINT_EN 0x0eULL
#define CONTROL_PPR_EN 0x0fULL
#define CONTROL_GT_EN 0x10ULL
#define CONTROL_GA_EN 0x11ULL
@@ -376,6 +380,10 @@
#define IOMMU_CAP_NPCACHE 26
#define IOMMU_CAP_EFR 27
+/* IOMMU IVINFO */
+#define IOMMU_IVINFO_OFFSET 36
+#define IOMMU_IVINFO_EFRSUP BIT(0)
+
/* IOMMU Feature Reporting Field (for IVHD type 10h */
#define IOMMU_FEAT_GASUP_SHIFT 6
@@ -395,10 +403,10 @@
#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */
extern bool amd_iommu_dump;
-#define DUMP_printk(format, arg...) \
- do { \
- if (amd_iommu_dump) \
- printk(KERN_INFO "AMD-Vi: " format, ## arg); \
+#define DUMP_printk(format, arg...) \
+ do { \
+ if (amd_iommu_dump) \
+ pr_info("AMD-Vi: " format, ## arg); \
} while(0);
/* global flag if IOMMUs cache non-present entries */
@@ -467,15 +475,12 @@
* independent of their use.
*/
struct protection_domain {
- struct list_head list; /* for list of all protection domains */
struct list_head dev_list; /* List of all devices in this domain */
struct iommu_domain domain; /* generic domain handle used by
iommu core code */
spinlock_t lock; /* mostly used to lock the page table*/
- struct mutex api_lock; /* protect page tables in the iommu-api path */
u16 id; /* the domain id written to the device table */
- int mode; /* paging mode (0-6 levels) */
- u64 *pt_root; /* page table root pointer */
+ atomic64_t pt_root; /* pgtable root and pgtable mode */
int glx; /* Number of levels for GCR3 table */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
@@ -483,6 +488,12 @@
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
};
+/* For decocded pt_root */
+struct domain_pgtable {
+ int mode;
+ u64 *root;
+};
+
/*
* Structure where we save information about one hardware AMD IOMMU in the
* system.
@@ -596,7 +607,8 @@
#endif
u32 flags;
- volatile u64 __aligned(8) cmd_sem;
+ volatile u64 *cmd_sem;
+ u64 cmd_sem_val;
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
/* DebugFS Info */
@@ -646,7 +658,6 @@
struct pci_dev *pdev;
u16 devid; /* PCI Device ID */
bool iommu_v2; /* Device can make use of IOMMUv2 */
- bool passthrough; /* Device is identity mapped */
struct {
bool enabled;
int qdep;
diff --git a/drivers/iommu/amd_iommu_debugfs.c b/drivers/iommu/amd/debugfs.c
similarity index 89%
rename from drivers/iommu/amd_iommu_debugfs.c
rename to drivers/iommu/amd/debugfs.c
index c6a5c73..545372f 100644
--- a/drivers/iommu/amd_iommu_debugfs.c
+++ b/drivers/iommu/amd/debugfs.c
@@ -8,10 +8,9 @@
*/
#include <linux/debugfs.h>
-#include <linux/iommu.h>
#include <linux/pci.h>
-#include "amd_iommu_proto.h"
-#include "amd_iommu_types.h"
+
+#include "amd_iommu.h"
static struct dentry *amd_iommu_debugfs;
static DEFINE_MUTEX(amd_iommu_debugfs_lock);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd/init.c
similarity index 93%
rename from drivers/iommu/amd_iommu_init.c
rename to drivers/iommu/amd/init.c
index 692401e..6eaefc9 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd/init.c
@@ -18,9 +18,9 @@
#include <linux/msi.h>
#include <linux/amd-iommu.h>
#include <linux/export.h>
-#include <linux/iommu.h>
#include <linux/kmemleak.h>
#include <linux/mem_encrypt.h>
+#include <linux/iopoll.h>
#include <asm/pci-direct.h>
#include <asm/iommu.h>
#include <asm/apic.h>
@@ -30,12 +30,12 @@
#include <asm/iommu_table.h>
#include <asm/io_apic.h>
#include <asm/irq_remapping.h>
+#include <asm/set_memory.h>
#include <linux/crash_dump.h>
+
#include "amd_iommu.h"
-#include "amd_iommu_proto.h"
-#include "amd_iommu_types.h"
-#include "irq_remapping.h"
+#include "../irq_remapping.h"
/*
* definitions for the ACPI scanning code
@@ -71,6 +71,8 @@
#define IVHD_FLAG_ISOC_EN_MASK 0x08
#define IVMD_FLAG_EXCL_RANGE 0x08
+#define IVMD_FLAG_IW 0x04
+#define IVMD_FLAG_IR 0x02
#define IVMD_FLAG_UNITY_MAP 0x01
#define ACPI_DEVFLAG_INITPASS 0x01
@@ -256,6 +258,8 @@
static bool amd_iommu_pre_enabled = true;
+static u32 amd_iommu_ivinfo __initdata;
+
bool translation_pre_enabled(struct amd_iommu *iommu)
{
return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
@@ -295,6 +299,34 @@
return amd_iommus_present;
}
+#ifdef CONFIG_IRQ_REMAP
+static bool check_feature_on_all_iommus(u64 mask)
+{
+ bool ret = false;
+ struct amd_iommu *iommu;
+
+ for_each_iommu(iommu) {
+ ret = iommu_feature(iommu, mask);
+ if (!ret)
+ return false;
+ }
+
+ return true;
+}
+#endif
+
+/*
+ * For IVHD type 0x11/0x40, EFR is also available via IVHD.
+ * Default to IVHD EFR since it is available sooner
+ * (i.e. before PCI init).
+ */
+static void __init early_iommu_features_init(struct amd_iommu *iommu,
+ struct ivhd_header *h)
+{
+ if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP)
+ iommu->features = h->efr_reg;
+}
+
/* Access to l1 and l2 indexed register spaces */
static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
@@ -359,6 +391,29 @@
&entry, sizeof(entry));
}
+static void iommu_set_cwwb_range(struct amd_iommu *iommu)
+{
+ u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
+ u64 entry = start & PM_ADDR_MASK;
+
+ if (!iommu_feature(iommu, FEATURE_SNP))
+ return;
+
+ /* Note:
+ * Re-purpose Exclusion base/limit registers for Completion wait
+ * write-back base/limit.
+ */
+ memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
+ &entry, sizeof(entry));
+
+ /* Note:
+ * Default to 4 Kbytes, which can be specified by setting base
+ * address equal to the limit address.
+ */
+ memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
+ &entry, sizeof(entry));
+}
+
/* Programs the physical address of the device table into the IOMMU hardware */
static void iommu_set_device_table(struct amd_iommu *iommu)
{
@@ -440,7 +495,7 @@
return NULL;
}
- return (u8 __iomem *)ioremap_nocache(address, end);
+ return (u8 __iomem *)ioremap(address, end);
}
static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
@@ -602,6 +657,16 @@
}
/*
+ * This function restarts event logging in case the IOMMU experienced
+ * an event log buffer overflow.
+ */
+void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
+{
+ iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
+ iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
+}
+
+/*
* This function resets the command buffer if the IOMMU stopped fetching
* commands from it.
*/
@@ -649,11 +714,27 @@
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
}
+static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
+ gfp_t gfp, size_t size)
+{
+ int order = get_order(size);
+ void *buf = (void *)__get_free_pages(gfp, order);
+
+ if (buf &&
+ iommu_feature(iommu, FEATURE_SNP) &&
+ set_memory_4k((unsigned long)buf, (1 << order))) {
+ free_pages((unsigned long)buf, order);
+ buf = NULL;
+ }
+
+ return buf;
+}
+
/* allocates the memory where the IOMMU will log its events to */
static int __init alloc_event_buffer(struct amd_iommu *iommu)
{
- iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(EVT_BUFFER_SIZE));
+ iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
+ EVT_BUFFER_SIZE);
return iommu->evt_buf ? 0 : -ENOMEM;
}
@@ -692,8 +773,8 @@
/* allocates the memory where the IOMMU will log its events to */
static int __init alloc_ppr_log(struct amd_iommu *iommu)
{
- iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(PPR_LOG_SIZE));
+ iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
+ PPR_LOG_SIZE);
return iommu->ppr_log ? 0 : -ENOMEM;
}
@@ -714,27 +795,20 @@
writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
- iommu_feature_enable(iommu, CONTROL_PPFLOG_EN);
+ iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
iommu_feature_enable(iommu, CONTROL_PPR_EN);
}
static void __init free_ppr_log(struct amd_iommu *iommu)
{
- if (iommu->ppr_log == NULL)
- return;
-
free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
}
static void free_ga_log(struct amd_iommu *iommu)
{
#ifdef CONFIG_IRQ_REMAP
- if (iommu->ga_log)
- free_pages((unsigned long)iommu->ga_log,
- get_order(GA_LOG_SIZE));
- if (iommu->ga_log_tail)
- free_pages((unsigned long)iommu->ga_log_tail,
- get_order(8));
+ free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE));
+ free_pages((unsigned long)iommu->ga_log_tail, get_order(8));
#endif
}
@@ -742,16 +816,27 @@
{
#ifdef CONFIG_IRQ_REMAP
u32 status, i;
+ u64 entry;
if (!iommu->ga_log)
return -EINVAL;
- status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
-
/* Check if already running */
- if (status & (MMIO_STATUS_GALOG_RUN_MASK))
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ if (WARN_ON(status & (MMIO_STATUS_GALOG_RUN_MASK)))
return 0;
+ entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
+ memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
+ &entry, sizeof(entry));
+ entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
+ (BIT_ULL(52)-1)) & ~7ULL;
+ memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
+ &entry, sizeof(entry));
+ writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
+ writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
+
+
iommu_feature_enable(iommu, CONTROL_GAINT_EN);
iommu_feature_enable(iommu, CONTROL_GALOG_EN);
@@ -759,19 +844,18 @@
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
if (status & (MMIO_STATUS_GALOG_RUN_MASK))
break;
+ udelay(10);
}
- if (i >= LOOP_TIMEOUT)
+ if (WARN_ON(i >= LOOP_TIMEOUT))
return -EINVAL;
#endif /* CONFIG_IRQ_REMAP */
return 0;
}
-#ifdef CONFIG_IRQ_REMAP
static int iommu_init_ga_log(struct amd_iommu *iommu)
{
- u64 entry;
-
+#ifdef CONFIG_IRQ_REMAP
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
return 0;
@@ -785,39 +869,26 @@
if (!iommu->ga_log_tail)
goto err_out;
- entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
- memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
- &entry, sizeof(entry));
- entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
- (BIT_ULL(52)-1)) & ~7ULL;
- memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
- &entry, sizeof(entry));
- writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
- writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
-
return 0;
err_out:
free_ga_log(iommu);
return -EINVAL;
+#else
+ return 0;
+#endif /* CONFIG_IRQ_REMAP */
}
-#endif /* CONFIG_IRQ_REMAP */
-static int iommu_init_ga(struct amd_iommu *iommu)
+static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
{
- int ret = 0;
+ iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1);
-#ifdef CONFIG_IRQ_REMAP
- /* Note: We have already checked GASup from IVRS table.
- * Now, we need to make sure that GAMSup is set.
- */
- if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
- !iommu_feature(iommu, FEATURE_GAM_VAPIC))
- amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
+ return iommu->cmd_sem ? 0 : -ENOMEM;
+}
- ret = iommu_init_ga_log(iommu);
-#endif /* CONFIG_IRQ_REMAP */
-
- return ret;
+static void __init free_cwwb_sem(struct amd_iommu *iommu)
+{
+ if (iommu->cmd_sem)
+ free_page((unsigned long)iommu->cmd_sem);
}
static void iommu_enable_xt(struct amd_iommu *iommu)
@@ -1111,29 +1182,6 @@
}
/*
- * Reads the device exclusion range from ACPI and initializes the IOMMU with
- * it
- */
-static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
-{
- struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
-
- if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
- return;
-
- if (iommu) {
- /*
- * We only can configure exclusion ranges per IOMMU, not
- * per device. But we can enable the exclusion range per
- * device. This is done here
- */
- set_dev_entry_bit(devid, DEV_ENTRY_EX);
- iommu->exclusion_start = m->range_start;
- iommu->exclusion_length = m->range_length;
- }
-}
-
-/*
* Takes a pointer to an AMD IOMMU entry in the ACPI table and
* initializes the hardware and our data structures with it.
*/
@@ -1406,6 +1454,7 @@
static void __init free_iommu_one(struct amd_iommu *iommu)
{
+ free_cwwb_sem(iommu);
free_command_buffer(iommu);
free_event_buffer(iommu);
free_ppr_log(iommu);
@@ -1492,6 +1541,7 @@
int ret;
raw_spin_lock_init(&iommu->lock);
+ iommu->cmd_sem_val = 0;
/* Add IOMMU to internal data structures */
list_add_tail(&iommu->list, &amd_iommu_list);
@@ -1559,6 +1609,9 @@
if ((h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) &&
(h->efr_reg & BIT(IOMMU_EFR_MSICAPMMIOSUP_SHIFT)))
amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
+
+ early_iommu_features_init(iommu, h);
+
break;
default:
return -EINVAL;
@@ -1569,6 +1622,9 @@
if (!iommu->mmio_base)
return -ENOMEM;
+ if (alloc_cwwb_sem(iommu))
+ return -ENOMEM;
+
if (alloc_command_buffer(iommu))
return -ENOMEM;
@@ -1606,7 +1662,7 @@
/**
* get_highest_supported_ivhd_type - Look up the appropriate IVHD type
- * @ivrs Pointer to the IVRS header
+ * @ivrs: Pointer to the IVRS header
*
* This function search through all IVDB of the maximum supported IVHD
*/
@@ -1725,10 +1781,38 @@
NULL,
};
+/*
+ * Note: IVHD 0x11 and 0x40 also contains exact copy
+ * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
+ * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
+ */
+static void __init late_iommu_features_init(struct amd_iommu *iommu)
+{
+ u64 features;
+
+ if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
+ return;
+
+ /* read extended feature bits */
+ features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
+
+ if (!iommu->features) {
+ iommu->features = features;
+ return;
+ }
+
+ /*
+ * Sanity check and warn if EFR values from
+ * IVHD and MMIO conflict.
+ */
+ if (features != iommu->features)
+ pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx).\n",
+ features, iommu->features);
+}
+
static int __init iommu_init_pci(struct amd_iommu *iommu)
{
int cap_ptr = iommu->cap_ptr;
- u32 range, misc, low, high;
int ret;
iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid),
@@ -1741,19 +1825,11 @@
pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
&iommu->cap);
- pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
- &range);
- pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
- &misc);
if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
amd_iommu_iotlb_sup = false;
- /* read extended feature bits */
- low = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
- high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
-
- iommu->features = ((u64)high << 32) | low;
+ late_iommu_features_init(iommu);
if (iommu_feature(iommu, FEATURE_GT)) {
int glxval;
@@ -1786,7 +1862,7 @@
if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
return -ENOMEM;
- ret = iommu_init_ga(iommu);
+ ret = iommu_init_ga_log(iommu);
if (ret)
return ret;
@@ -1849,8 +1925,8 @@
pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr);
if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
- pci_info(pdev, "Extended features (%#llx):\n",
- iommu->features);
+ pr_info("Extended features (%#llx):", iommu->features);
+
for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
if (iommu_feature(iommu, (1ULL << i)))
pr_cont(" %s", feat_str[i]);
@@ -1880,6 +1956,9 @@
ret = iommu_init_pci(iommu);
if (ret)
break;
+
+ /* Need to setup range after PCI init */
+ iommu_set_cwwb_range(iommu);
}
/*
@@ -1943,7 +2022,7 @@
#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32)
#define XT_INT_DEST_HI(x) ((((x) >> 24) & 0xFFULL) << 56)
-/**
+/*
* Setup the IntCapXT registers with interrupt routing information
* based on the PCI MSI capability block registers, accessed via
* MMIO MSI address low/hi and MSI data registers.
@@ -2045,7 +2124,7 @@
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
if (iommu->ppr_log != NULL)
- iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
+ iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
iommu_ga_log_enable(iommu);
@@ -2070,30 +2149,6 @@
}
}
-/* called when we find an exclusion range definition in ACPI */
-static int __init init_exclusion_range(struct ivmd_header *m)
-{
- int i;
-
- switch (m->type) {
- case ACPI_IVMD_TYPE:
- set_device_exclusion_range(m->devid, m);
- break;
- case ACPI_IVMD_TYPE_ALL:
- for (i = 0; i <= amd_iommu_last_bdf; ++i)
- set_device_exclusion_range(i, m);
- break;
- case ACPI_IVMD_TYPE_RANGE:
- for (i = m->devid; i <= m->aux; ++i)
- set_device_exclusion_range(i, m);
- break;
- default:
- break;
- }
-
- return 0;
-}
-
/* called for unity map ACPI definition */
static int __init init_unity_map_range(struct ivmd_header *m)
{
@@ -2104,9 +2159,6 @@
if (e == NULL)
return -ENOMEM;
- if (m->flags & IVMD_FLAG_EXCL_RANGE)
- init_exclusion_range(m);
-
switch (m->type) {
default:
kfree(e);
@@ -2130,6 +2182,16 @@
e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
e->prot = m->flags >> 1;
+ /*
+ * Treat per-device exclusion ranges as r/w unity-mapped regions
+ * since some buggy BIOSes might lead to the overwritten exclusion
+ * range (exclusion_start and exclusion_length members). This
+ * happens when there are multiple exclusion ranges (IVMD entries)
+ * defined in ACPI table.
+ */
+ if (m->flags & IVMD_FLAG_EXCL_RANGE)
+ e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
+
DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
" range_start: %016llx range_end: %016llx flags: %x\n", s,
PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
@@ -2272,7 +2334,7 @@
switch (amd_iommu_guest_ir) {
case AMD_IOMMU_GUEST_IR_VAPIC:
iommu_feature_enable(iommu, CONTROL_GAM_EN);
- /* Fall through */
+ fallthrough;
case AMD_IOMMU_GUEST_IR_LEGACY_GA:
iommu_feature_enable(iommu, CONTROL_GA_EN);
iommu->irte_ops = &irte_128_ops;
@@ -2345,6 +2407,14 @@
}
#ifdef CONFIG_IRQ_REMAP
+ /*
+ * Note: We have already checked GASup from IVRS table.
+ * Now, we need to make sure that GAMSup is set.
+ */
+ if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
+ !check_feature_on_all_iommus(FEATURE_GAM_VAPIC))
+ amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
+
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
#endif
@@ -2497,6 +2567,11 @@
free_unity_maps();
}
+static void __init ivinfo_init(void *ivrs)
+{
+ amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
+}
+
/*
* This is the hardware init function for AMD IOMMU in the system.
* This function is called either from amd_iommu_init or from the interrupt
@@ -2551,6 +2626,8 @@
if (ret)
goto out;
+ ivinfo_init(ivrs_base);
+
amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd/iommu.c
similarity index 77%
rename from drivers/iommu/amd_iommu.c
rename to drivers/iommu/amd/iommu.c
index c392930..200cf5d 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -18,17 +18,16 @@
#include <linux/slab.h>
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/dma-direct.h>
+#include <linux/dma-iommu.h>
#include <linux/iommu-helper.h>
-#include <linux/iommu.h>
#include <linux/delay.h>
#include <linux/amd-iommu.h>
#include <linux/notifier.h>
#include <linux/export.h>
#include <linux/irq.h>
#include <linux/msi.h>
-#include <linux/dma-contiguous.h>
#include <linux/irqdomain.h>
#include <linux/percpu.h>
#include <linux/iova.h>
@@ -42,9 +41,8 @@
#include <asm/gart.h>
#include <asm/dma.h>
-#include "amd_iommu_proto.h"
-#include "amd_iommu_types.h"
-#include "irq_remapping.h"
+#include "amd_iommu.h"
+#include "../irq_remapping.h"
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
@@ -70,6 +68,8 @@
*/
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
+#define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL
+
static DEFINE_SPINLOCK(pd_bitmap_lock);
/* List of all available dev_data structures */
@@ -88,8 +88,6 @@
static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
int amd_iommu_max_glx_val = -1;
-static const struct dma_map_ops amd_iommu_dma_ops;
-
/*
* general struct to manage commands send to an IOMMU
*/
@@ -100,23 +98,9 @@
struct kmem_cache *amd_iommu_irq_cache;
static void update_domain(struct protection_domain *domain);
-static int protection_domain_init(struct protection_domain *domain);
static void detach_device(struct device *dev);
-static void iova_domain_flush_tlb(struct iova_domain *iovad);
-
-/*
- * Data container for a dma_ops specific protection domain
- */
-struct dma_ops_domain {
- /* generic protection domain information */
- struct protection_domain domain;
-
- /* IOVA RB-Tree */
- struct iova_domain iovad;
-};
-
-static struct iova_domain reserved_iova_ranges;
-static struct lock_class_key reserved_rbtree_key;
+static void update_and_flush_device_table(struct protection_domain *domain,
+ struct domain_pgtable *pgtable);
/****************************************************************************
*
@@ -124,30 +108,6 @@
*
****************************************************************************/
-static inline int match_hid_uid(struct device *dev,
- struct acpihid_map_entry *entry)
-{
- struct acpi_device *adev = ACPI_COMPANION(dev);
- const char *hid, *uid;
-
- if (!adev)
- return -ENODEV;
-
- hid = acpi_device_hid(adev);
- uid = acpi_device_uid(adev);
-
- if (!hid || !(*hid))
- return -ENODEV;
-
- if (!uid || !(*uid))
- return strcmp(hid, entry->hid);
-
- if (!(*entry->uid))
- return strcmp(hid, entry->hid);
-
- return (strcmp(hid, entry->hid) || strcmp(uid, entry->uid));
-}
-
static inline u16 get_pci_device_id(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -158,10 +118,15 @@
static inline int get_acpihid_device_id(struct device *dev,
struct acpihid_map_entry **entry)
{
+ struct acpi_device *adev = ACPI_COMPANION(dev);
struct acpihid_map_entry *p;
+ if (!adev)
+ return -ENODEV;
+
list_for_each_entry(p, &acpihid_map, list) {
- if (!match_hid_uid(dev, p)) {
+ if (acpi_dev_hid_uid_match(adev, p->hid,
+ p->uid[0] ? p->uid : NULL)) {
if (entry)
*entry = p;
return p->devid;
@@ -187,10 +152,35 @@
return container_of(dom, struct protection_domain, domain);
}
-static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain)
+static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
+ struct domain_pgtable *pgtable)
{
- BUG_ON(domain->flags != PD_DMA_OPS_MASK);
- return container_of(domain, struct dma_ops_domain, domain);
+ u64 pt_root = atomic64_read(&domain->pt_root);
+
+ pgtable->root = (u64 *)(pt_root & PAGE_MASK);
+ pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
+}
+
+static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
+{
+ atomic64_set(&domain->pt_root, root);
+}
+
+static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
+{
+ amd_iommu_domain_set_pt_root(domain, 0);
+}
+
+static void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+ u64 *root, int mode)
+{
+ u64 pt_root;
+
+ /* lowest 3 bits encode pgtable mode */
+ pt_root = mode & 7;
+ pt_root |= (u64)root;
+
+ amd_iommu_domain_set_pt_root(domain, pt_root);
}
static struct iommu_dev_data *alloc_dev_data(u16 devid)
@@ -299,12 +289,6 @@
return dev_data;
}
-struct iommu_dev_data *get_dev_data(struct device *dev)
-{
- return dev->archdata.iommu;
-}
-EXPORT_SYMBOL(get_dev_data);
-
/*
* Find or create an IOMMU group for a acpihid device.
*/
@@ -333,16 +317,15 @@
static bool pci_iommuv2_capable(struct pci_dev *pdev)
{
static const int caps[] = {
- PCI_EXT_CAP_ID_ATS,
PCI_EXT_CAP_ID_PRI,
PCI_EXT_CAP_ID_PASID,
};
int i, pos;
- if (pci_ats_disabled())
+ if (!pci_ats_supported(pdev))
return false;
- for (i = 0; i < 3; ++i) {
+ for (i = 0; i < 2; ++i) {
pos = pci_find_ext_capability(pdev, caps[i]);
if (pos == 0)
return false;
@@ -355,7 +338,7 @@
{
struct iommu_dev_data *dev_data;
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
return dev_data->errata & (1 << erratum) ? true : false;
}
@@ -368,7 +351,7 @@
{
int devid;
- if (!dev || !dev->dma_mask)
+ if (!dev)
return false;
devid = get_device_id(dev);
@@ -385,32 +368,18 @@
return true;
}
-static void init_iommu_group(struct device *dev)
-{
- struct iommu_group *group;
-
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return;
-
- iommu_group_put(group);
-}
-
static int iommu_init_device(struct device *dev)
{
struct iommu_dev_data *dev_data;
- struct amd_iommu *iommu;
int devid;
- if (dev->archdata.iommu)
+ if (dev_iommu_priv_get(dev))
return 0;
devid = get_device_id(dev);
if (devid < 0)
return devid;
- iommu = amd_iommu_rlookup_table[devid];
-
dev_data = find_dev_data(devid);
if (!dev_data)
return -ENOMEM;
@@ -431,9 +400,7 @@
dev_data->iommu_v2 = iommu->is_iommu_v2;
}
- dev->archdata.iommu = dev_data;
-
- iommu_device_link(&iommu->iommu, dev);
+ dev_iommu_priv_set(dev, dev_data);
return 0;
}
@@ -452,31 +419,18 @@
setup_aliases(dev);
}
-static void iommu_uninit_device(struct device *dev)
+static void amd_iommu_uninit_device(struct device *dev)
{
struct iommu_dev_data *dev_data;
- struct amd_iommu *iommu;
- int devid;
- devid = get_device_id(dev);
- if (devid < 0)
- return;
-
- iommu = amd_iommu_rlookup_table[devid];
-
- dev_data = search_dev_data(devid);
+ dev_data = dev_iommu_priv_get(dev);
if (!dev_data)
return;
if (dev_data->domain)
detach_device(dev);
- iommu_device_unlink(&iommu->iommu, dev);
-
- iommu_group_remove_device(dev);
-
- /* Remove dma-ops */
- dev->dma_ops = NULL;
+ dev_iommu_priv_set(dev, NULL);
/*
* We keep dev_data around for unplugged devices and reuse it when the
@@ -531,6 +485,67 @@
pr_err("CMD[%d]: %08x\n", i, cmd->data[i]);
}
+static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
+{
+ struct iommu_dev_data *dev_data = NULL;
+ int devid, vmg_tag, flags;
+ struct pci_dev *pdev;
+ u64 spa;
+
+ devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
+ vmg_tag = (event[1]) & 0xFFFF;
+ flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
+ spa = ((u64)event[3] << 32) | (event[2] & 0xFFFFFFF8);
+
+ pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
+ devid & 0xff);
+ if (pdev)
+ dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (dev_data && __ratelimit(&dev_data->rs)) {
+ pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
+ vmg_tag, spa, flags);
+ } else {
+ pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
+ PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ vmg_tag, spa, flags);
+ }
+
+ if (pdev)
+ pci_dev_put(pdev);
+}
+
+static void amd_iommu_report_rmp_fault(volatile u32 *event)
+{
+ struct iommu_dev_data *dev_data = NULL;
+ int devid, flags_rmp, vmg_tag, flags;
+ struct pci_dev *pdev;
+ u64 gpa;
+
+ devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
+ flags_rmp = (event[0] >> EVENT_FLAGS_SHIFT) & 0xFF;
+ vmg_tag = (event[1]) & 0xFFFF;
+ flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
+ gpa = ((u64)event[3] << 32) | event[2];
+
+ pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
+ devid & 0xff);
+ if (pdev)
+ dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (dev_data && __ratelimit(&dev_data->rs)) {
+ pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
+ vmg_tag, gpa, flags_rmp, flags);
+ } else {
+ pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
+ PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ vmg_tag, gpa, flags_rmp, flags);
+ }
+
+ if (pdev)
+ pci_dev_put(pdev);
+}
+
static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
u64 address, int flags)
{
@@ -540,7 +555,7 @@
pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
devid & 0xff);
if (pdev)
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
if (dev_data && __ratelimit(&dev_data->rs)) {
pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
@@ -558,10 +573,11 @@
static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
{
struct device *dev = iommu->iommu.dev;
- int type, devid, pasid, flags, tag;
+ int type, devid, flags, tag;
volatile u32 *event = __evt;
int count = 0;
u64 address;
+ u32 pasid;
retry:
type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
@@ -622,9 +638,14 @@
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
+ case EVENT_TYPE_RMP_FAULT:
+ amd_iommu_report_rmp_fault(event);
+ break;
+ case EVENT_TYPE_RMP_HW_ERR:
+ amd_iommu_report_rmp_hw_error(event);
+ break;
case EVENT_TYPE_INV_PPR_REQ:
- pasid = ((event[0] >> 16) & 0xFFFF)
- | ((event[1] << 6) & 0xF0000);
+ pasid = PPR_PASID(*((u64 *)__evt));
tag = event[1] & 0x03FF;
dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
@@ -775,10 +796,25 @@
}
}
}
-#endif /* CONFIG_IRQ_REMAP */
+
+static void
+amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu)
+{
+ if (!irq_remapping_enabled || !dev_is_pci(dev) ||
+ pci_dev_has_special_msi_domain(to_pci_dev(dev)))
+ return;
+
+ dev_set_msi_domain(dev, iommu->msi_domain);
+}
+
+#else /* CONFIG_IRQ_REMAP */
+static inline void
+amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { }
+#endif /* !CONFIG_IRQ_REMAP */
#define AMD_IOMMU_INT_MASK \
- (MMIO_STATUS_EVT_INT_MASK | \
+ (MMIO_STATUS_EVT_OVERFLOW_INT_MASK | \
+ MMIO_STATUS_EVT_INT_MASK | \
MMIO_STATUS_PPR_INT_MASK | \
MMIO_STATUS_GALOG_INT_MASK)
@@ -788,7 +824,7 @@
u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
while (status & AMD_IOMMU_INT_MASK) {
- /* Enable EVT and PPR and GA interrupts again */
+ /* Enable interrupt sources again */
writel(AMD_IOMMU_INT_MASK,
iommu->mmio_base + MMIO_STATUS_OFFSET);
@@ -809,6 +845,11 @@
}
#endif
+ if (status & MMIO_STATUS_EVT_OVERFLOW_INT_MASK) {
+ pr_info_ratelimited("IOMMU event log overflow\n");
+ amd_iommu_restart_event_logging(iommu);
+ }
+
/*
* Hardware bug: ERBT1312
* When re-enabling interrupt (by writing 1
@@ -838,11 +879,11 @@
*
****************************************************************************/
-static int wait_on_sem(volatile u64 *sem)
+static int wait_on_sem(struct amd_iommu *iommu, u64 data)
{
int i = 0;
- while (*sem == 0 && i < LOOP_TIMEOUT) {
+ while (*iommu->cmd_sem != data && i < LOOP_TIMEOUT) {
udelay(1);
i += 1;
}
@@ -859,29 +900,30 @@
struct iommu_cmd *cmd)
{
u8 *target;
-
- target = iommu->cmd_buf + iommu->cmd_buf_tail;
-
- iommu->cmd_buf_tail += sizeof(*cmd);
- iommu->cmd_buf_tail %= CMD_BUFFER_SIZE;
+ u32 tail;
/* Copy command to buffer */
+ tail = iommu->cmd_buf_tail;
+ target = iommu->cmd_buf + tail;
memcpy(target, cmd, sizeof(*cmd));
+ tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
+ iommu->cmd_buf_tail = tail;
+
/* Tell the IOMMU about it */
- writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+ writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
}
-static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
+static void build_completion_wait(struct iommu_cmd *cmd,
+ struct amd_iommu *iommu,
+ u64 data)
{
- u64 paddr = iommu_virt_to_phys((void *)address);
-
- WARN_ON(address & 0x7ULL);
+ u64 paddr = iommu_virt_to_phys((void *)iommu->cmd_sem);
memset(cmd, 0, sizeof(*cmd));
cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
cmd->data[1] = upper_32_bits(paddr);
- cmd->data[2] = 1;
+ cmd->data[2] = data;
CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
}
@@ -954,7 +996,7 @@
cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
}
-static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
+static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, u32 pasid,
u64 address, bool size)
{
memset(cmd, 0, sizeof(*cmd));
@@ -972,7 +1014,7 @@
CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
}
-static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
+static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, u32 pasid,
int qdep, u64 address, bool size)
{
memset(cmd, 0, sizeof(*cmd));
@@ -992,7 +1034,7 @@
CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
}
-static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
+static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid,
int status, int tag, bool gn)
{
memset(cmd, 0, sizeof(*cmd));
@@ -1090,22 +1132,21 @@
struct iommu_cmd cmd;
unsigned long flags;
int ret;
+ u64 data;
if (!iommu->need_sync)
return 0;
-
- build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
-
raw_spin_lock_irqsave(&iommu->lock, flags);
- iommu->cmd_sem = 0;
+ data = ++iommu->cmd_sem_val;
+ build_completion_wait(&cmd, iommu, data);
ret = __iommu_queue_command_sync(iommu, &cmd, false);
if (ret)
goto out_unlock;
- ret = wait_on_sem(&iommu->cmd_sem);
+ ret = wait_on_sem(iommu, data);
out_unlock:
raw_spin_unlock_irqrestore(&iommu->lock, flags);
@@ -1300,12 +1341,6 @@
__domain_flush_pages(domain, address, size, 0);
}
-/* Flush the whole IO/TLB for a given protection domain */
-static void domain_flush_tlb(struct protection_domain *domain)
-{
- __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
-}
-
/* Flush the whole IO/TLB for a given protection domain - including PDE */
static void domain_flush_tlb_pde(struct protection_domain *domain)
{
@@ -1443,15 +1478,19 @@
return freelist;
}
-static void free_pagetable(struct protection_domain *domain)
+static void free_pagetable(struct domain_pgtable *pgtable)
{
- unsigned long root = (unsigned long)domain->pt_root;
struct page *freelist = NULL;
+ unsigned long root;
- BUG_ON(domain->mode < PAGE_MODE_NONE ||
- domain->mode > PAGE_MODE_6_LEVEL);
+ if (pgtable->mode == PAGE_MODE_NONE)
+ return;
- freelist = free_sub_pt(root, domain->mode, freelist);
+ BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
+ pgtable->mode > PAGE_MODE_6_LEVEL);
+
+ root = (unsigned long)pgtable->root;
+ freelist = free_sub_pt(root, pgtable->mode, freelist);
free_page_list(freelist);
}
@@ -1465,8 +1504,9 @@
unsigned long address,
gfp_t gfp)
{
+ struct domain_pgtable pgtable;
unsigned long flags;
- bool ret = false;
+ bool ret = true;
u64 *pte;
pte = (void *)get_zeroed_page(gfp);
@@ -1475,14 +1515,27 @@
spin_lock_irqsave(&domain->lock, flags);
- if (address <= PM_LEVEL_SIZE(domain->mode) ||
- WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ if (address <= PM_LEVEL_SIZE(pgtable.mode))
goto out;
- *pte = PM_LEVEL_PDE(domain->mode,
- iommu_virt_to_phys(domain->pt_root));
- domain->pt_root = pte;
- domain->mode += 1;
+ ret = false;
+ if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
+ goto out;
+
+ *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
+
+ pgtable.root = pte;
+ pgtable.mode += 1;
+ update_and_flush_device_table(domain, &pgtable);
+ domain_flush_complete(domain);
+
+ /*
+ * Device Table needs to be updated and flushed before the new root can
+ * be published.
+ */
+ amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode);
pte = NULL;
ret = true;
@@ -1501,16 +1554,29 @@
gfp_t gfp,
bool *updated)
{
+ struct domain_pgtable pgtable;
int level, end_lvl;
u64 *pte, *page;
BUG_ON(!is_power_of_2(page_size));
- while (address > PM_LEVEL_SIZE(domain->mode))
- *updated = increase_address_space(domain, address, gfp) || *updated;
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
- level = domain->mode - 1;
- pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+ while (address > PM_LEVEL_SIZE(pgtable.mode)) {
+ /*
+ * Return an error if there is no memory to update the
+ * page-table.
+ */
+ if (!increase_address_space(domain, address, gfp))
+ return NULL;
+
+ /* Read new values to check if update was successful */
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ }
+
+
+ level = pgtable.mode - 1;
+ pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
address = PAGE_SIZE_ALIGN(address, page_size);
end_lvl = PAGE_SIZE_LEVEL(page_size);
@@ -1586,16 +1652,19 @@
unsigned long address,
unsigned long *page_size)
{
+ struct domain_pgtable pgtable;
int level;
u64 *pte;
*page_size = 0;
- if (address > PM_LEVEL_SIZE(domain->mode))
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ if (address > PM_LEVEL_SIZE(pgtable.mode))
return NULL;
- level = domain->mode - 1;
- pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+ level = pgtable.mode - 1;
+ pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
*page_size = PTE_LEVEL_PAGE_SIZE(level);
while (level > 0) {
@@ -1710,7 +1779,13 @@
unsigned long flags;
spin_lock_irqsave(&dom->lock, flags);
- update_domain(dom);
+ /*
+ * Flush domain TLB(s) and wait for completion. Any Device-Table
+ * Updates and flushing already happened in
+ * increase_address_space().
+ */
+ domain_flush_tlb_pde(dom);
+ domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
@@ -1755,43 +1830,6 @@
/****************************************************************************
*
- * The next functions belong to the address allocator for the dma_ops
- * interface functions.
- *
- ****************************************************************************/
-
-
-static unsigned long dma_ops_alloc_iova(struct device *dev,
- struct dma_ops_domain *dma_dom,
- unsigned int pages, u64 dma_mask)
-{
- unsigned long pfn = 0;
-
- pages = __roundup_pow_of_two(pages);
-
- if (dma_mask > DMA_BIT_MASK(32))
- pfn = alloc_iova_fast(&dma_dom->iovad, pages,
- IOVA_PFN(DMA_BIT_MASK(32)), false);
-
- if (!pfn)
- pfn = alloc_iova_fast(&dma_dom->iovad, pages,
- IOVA_PFN(dma_mask), true);
-
- return (pfn << PAGE_SHIFT);
-}
-
-static void dma_ops_free_iova(struct dma_ops_domain *dma_dom,
- unsigned long address,
- unsigned int pages)
-{
- pages = __roundup_pow_of_two(pages);
- address >>= PAGE_SHIFT;
-
- free_iova_fast(&dma_dom->iovad, address, pages);
-}
-
-/****************************************************************************
- *
* The next functions belong to the domain allocation. A domain is
* allocated for every IOMMU as the default domain. If device isolation
* is enabled, every device get its own domain. The most important thing
@@ -1866,102 +1904,18 @@
free_page((unsigned long)domain->gcr3_tbl);
}
-static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&dom->domain.lock, flags);
- domain_flush_tlb(&dom->domain);
- domain_flush_complete(&dom->domain);
- spin_unlock_irqrestore(&dom->domain.lock, flags);
-}
-
-static void iova_domain_flush_tlb(struct iova_domain *iovad)
-{
- struct dma_ops_domain *dom;
-
- dom = container_of(iovad, struct dma_ops_domain, iovad);
-
- dma_ops_domain_flush_tlb(dom);
-}
-
-/*
- * Free a domain, only used if something went wrong in the
- * allocation path and we need to free an already allocated page table
- */
-static void dma_ops_domain_free(struct dma_ops_domain *dom)
-{
- if (!dom)
- return;
-
- put_iova_domain(&dom->iovad);
-
- free_pagetable(&dom->domain);
-
- if (dom->domain.id)
- domain_id_free(dom->domain.id);
-
- kfree(dom);
-}
-
-/*
- * Allocates a new protection domain usable for the dma_ops functions.
- * It also initializes the page table and the address allocator data
- * structures required for the dma_ops interface
- */
-static struct dma_ops_domain *dma_ops_domain_alloc(void)
-{
- struct dma_ops_domain *dma_dom;
-
- dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
- if (!dma_dom)
- return NULL;
-
- if (protection_domain_init(&dma_dom->domain))
- goto free_dma_dom;
-
- dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
- dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- dma_dom->domain.flags = PD_DMA_OPS_MASK;
- if (!dma_dom->domain.pt_root)
- goto free_dma_dom;
-
- init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN);
-
- if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
- goto free_dma_dom;
-
- /* Initialize reserved ranges */
- copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
-
- return dma_dom;
-
-free_dma_dom:
- dma_ops_domain_free(dma_dom);
-
- return NULL;
-}
-
-/*
- * little helper function to check whether a given protection domain is a
- * dma_ops domain
- */
-static bool dma_ops_domain(struct protection_domain *domain)
-{
- return domain->flags & PD_DMA_OPS_MASK;
-}
-
static void set_dte_entry(u16 devid, struct protection_domain *domain,
+ struct domain_pgtable *pgtable,
bool ats, bool ppr)
{
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
- if (domain->mode != PAGE_MODE_NONE)
- pte_root = iommu_virt_to_phys(domain->pt_root);
+ if (pgtable->mode != PAGE_MODE_NONE)
+ pte_root = iommu_virt_to_phys(pgtable->root);
- pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
+ pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
@@ -2034,6 +1988,7 @@
static void do_attach(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
+ struct domain_pgtable pgtable;
struct amd_iommu *iommu;
bool ats;
@@ -2049,7 +2004,9 @@
domain->dev_cnt += 1;
/* Update device table */
- set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2);
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ set_dte_entry(dev_data->devid, domain, &pgtable,
+ ats, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
device_flush_dte(dev_data);
@@ -2158,14 +2115,14 @@
static int attach_device(struct device *dev,
struct protection_domain *domain)
{
- struct pci_dev *pdev;
struct iommu_dev_data *dev_data;
+ struct pci_dev *pdev;
unsigned long flags;
int ret;
spin_lock_irqsave(&domain->lock, flags);
- dev_data = get_dev_data(dev);
+ dev_data = dev_iommu_priv_get(dev);
spin_lock(&dev_data->lock);
@@ -2178,8 +2135,10 @@
pdev = to_pci_dev(dev);
if (domain->flags & PD_IOMMUV2_MASK) {
+ struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
+
ret = -EINVAL;
- if (!dev_data->passthrough)
+ if (def_domain->type != IOMMU_DOMAIN_IDENTITY)
goto out;
if (dev_data->iommu_v2) {
@@ -2227,7 +2186,7 @@
struct iommu_dev_data *dev_data;
unsigned long flags;
- dev_data = get_dev_data(dev);
+ dev_data = dev_iommu_priv_get(dev);
domain = dev_data->domain;
spin_lock_irqsave(&domain->lock, flags);
@@ -2261,68 +2220,61 @@
spin_unlock_irqrestore(&domain->lock, flags);
}
-static int amd_iommu_add_device(struct device *dev)
+static struct iommu_device *amd_iommu_probe_device(struct device *dev)
{
- struct iommu_dev_data *dev_data;
- struct iommu_domain *domain;
+ struct iommu_device *iommu_dev;
struct amd_iommu *iommu;
int ret, devid;
- if (!check_device(dev) || get_dev_data(dev))
- return 0;
+ if (!check_device(dev))
+ return ERR_PTR(-ENODEV);
devid = get_device_id(dev);
if (devid < 0)
- return devid;
+ return ERR_PTR(devid);
iommu = amd_iommu_rlookup_table[devid];
+ if (dev_iommu_priv_get(dev))
+ return &iommu->iommu;
+
ret = iommu_init_device(dev);
if (ret) {
if (ret != -ENOTSUPP)
dev_err(dev, "Failed to initialize - trying to proceed anyway\n");
-
+ iommu_dev = ERR_PTR(ret);
iommu_ignore_device(dev);
- dev->dma_ops = NULL;
- goto out;
+ } else {
+ amd_iommu_set_pci_msi_domain(dev, iommu);
+ iommu_dev = &iommu->iommu;
}
- init_iommu_group(dev);
- dev_data = get_dev_data(dev);
+ iommu_completion_wait(iommu);
- BUG_ON(!dev_data);
+ return iommu_dev;
+}
- if (dev_data->iommu_v2)
- iommu_request_dm_for_dev(dev);
+static void amd_iommu_probe_finalize(struct device *dev)
+{
+ struct iommu_domain *domain;
/* Domains are initialized for this device - have a look what we ended up with */
domain = iommu_get_domain_for_dev(dev);
- if (domain->type == IOMMU_DOMAIN_IDENTITY)
- dev_data->passthrough = true;
- else
- dev->dma_ops = &amd_iommu_dma_ops;
-
-out:
- iommu_completion_wait(iommu);
-
- return 0;
+ if (domain->type == IOMMU_DOMAIN_DMA)
+ iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0);
}
-static void amd_iommu_remove_device(struct device *dev)
+static void amd_iommu_release_device(struct device *dev)
{
+ int devid = get_device_id(dev);
struct amd_iommu *iommu;
- int devid;
if (!check_device(dev))
return;
- devid = get_device_id(dev);
- if (devid < 0)
- return;
-
iommu = amd_iommu_rlookup_table[devid];
- iommu_uninit_device(dev);
+ amd_iommu_uninit_device(dev);
iommu_completion_wait(iommu);
}
@@ -2334,515 +2286,64 @@
return acpihid_device_group(dev);
}
+static int amd_iommu_domain_get_attr(struct iommu_domain *domain,
+ enum iommu_attr attr, void *data)
+{
+ switch (domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ return -ENODEV;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ *(int *)data = !amd_iommu_unmap_flush;
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+}
+
/*****************************************************************************
*
* The next functions belong to the dma_ops mapping/unmapping code.
*
*****************************************************************************/
-/*
- * In the dma_ops path we only have the struct device. This function
- * finds the corresponding IOMMU, the protection domain and the
- * requestor id for a given device.
- * If the device is not yet associated with a domain this is also done
- * in this function.
- */
-static struct protection_domain *get_domain(struct device *dev)
-{
- struct protection_domain *domain;
- struct iommu_domain *io_domain;
-
- if (!check_device(dev))
- return ERR_PTR(-EINVAL);
-
- domain = get_dev_data(dev)->domain;
- if (domain == NULL && get_dev_data(dev)->defer_attach) {
- get_dev_data(dev)->defer_attach = false;
- io_domain = iommu_get_domain_for_dev(dev);
- domain = to_pdomain(io_domain);
- attach_device(dev, domain);
- }
- if (domain == NULL)
- return ERR_PTR(-EBUSY);
-
- if (!dma_ops_domain(domain))
- return ERR_PTR(-EBUSY);
-
- return domain;
-}
-
-static void update_device_table(struct protection_domain *domain)
+static void update_device_table(struct protection_domain *domain,
+ struct domain_pgtable *pgtable)
{
struct iommu_dev_data *dev_data;
list_for_each_entry(dev_data, &domain->dev_list, list) {
- set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled,
- dev_data->iommu_v2);
+ set_dte_entry(dev_data->devid, domain, pgtable,
+ dev_data->ats.enabled, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
}
}
+static void update_and_flush_device_table(struct protection_domain *domain,
+ struct domain_pgtable *pgtable)
+{
+ update_device_table(domain, pgtable);
+ domain_flush_devices(domain);
+}
+
static void update_domain(struct protection_domain *domain)
{
- update_device_table(domain);
+ struct domain_pgtable pgtable;
- domain_flush_devices(domain);
+ /* Update device table */
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ update_and_flush_device_table(domain, &pgtable);
+
+ /* Flush domain TLB(s) and wait for completion */
domain_flush_tlb_pde(domain);
domain_flush_complete(domain);
}
-static int dir2prot(enum dma_data_direction direction)
-{
- if (direction == DMA_TO_DEVICE)
- return IOMMU_PROT_IR;
- else if (direction == DMA_FROM_DEVICE)
- return IOMMU_PROT_IW;
- else if (direction == DMA_BIDIRECTIONAL)
- return IOMMU_PROT_IW | IOMMU_PROT_IR;
- else
- return 0;
-}
-
-/*
- * This function contains common code for mapping of a physically
- * contiguous memory region into DMA address space. It is used by all
- * mapping functions provided with this IOMMU driver.
- * Must be called with the domain lock held.
- */
-static dma_addr_t __map_single(struct device *dev,
- struct dma_ops_domain *dma_dom,
- phys_addr_t paddr,
- size_t size,
- enum dma_data_direction direction,
- u64 dma_mask)
-{
- dma_addr_t offset = paddr & ~PAGE_MASK;
- dma_addr_t address, start, ret;
- unsigned long flags;
- unsigned int pages;
- int prot = 0;
- int i;
-
- pages = iommu_num_pages(paddr, size, PAGE_SIZE);
- paddr &= PAGE_MASK;
-
- address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask);
- if (!address)
- goto out;
-
- prot = dir2prot(direction);
-
- start = address;
- for (i = 0; i < pages; ++i) {
- ret = iommu_map_page(&dma_dom->domain, start, paddr,
- PAGE_SIZE, prot, GFP_ATOMIC);
- if (ret)
- goto out_unmap;
-
- paddr += PAGE_SIZE;
- start += PAGE_SIZE;
- }
- address += offset;
-
- domain_flush_np_cache(&dma_dom->domain, address, size);
-
-out:
- return address;
-
-out_unmap:
-
- for (--i; i >= 0; --i) {
- start -= PAGE_SIZE;
- iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
- }
-
- spin_lock_irqsave(&dma_dom->domain.lock, flags);
- domain_flush_tlb(&dma_dom->domain);
- domain_flush_complete(&dma_dom->domain);
- spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
-
- dma_ops_free_iova(dma_dom, address, pages);
-
- return DMA_MAPPING_ERROR;
-}
-
-/*
- * Does the reverse of the __map_single function. Must be called with
- * the domain lock held too
- */
-static void __unmap_single(struct dma_ops_domain *dma_dom,
- dma_addr_t dma_addr,
- size_t size,
- int dir)
-{
- dma_addr_t i, start;
- unsigned int pages;
-
- pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
- dma_addr &= PAGE_MASK;
- start = dma_addr;
-
- for (i = 0; i < pages; ++i) {
- iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
- start += PAGE_SIZE;
- }
-
- if (amd_iommu_unmap_flush) {
- unsigned long flags;
-
- spin_lock_irqsave(&dma_dom->domain.lock, flags);
- domain_flush_tlb(&dma_dom->domain);
- domain_flush_complete(&dma_dom->domain);
- spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
- dma_ops_free_iova(dma_dom, dma_addr, pages);
- } else {
- pages = __roundup_pow_of_two(pages);
- queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0);
- }
-}
-
-/*
- * The exported map_single function for dma_ops.
- */
-static dma_addr_t map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- phys_addr_t paddr = page_to_phys(page) + offset;
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- u64 dma_mask;
-
- domain = get_domain(dev);
- if (PTR_ERR(domain) == -EINVAL)
- return (dma_addr_t)paddr;
- else if (IS_ERR(domain))
- return DMA_MAPPING_ERROR;
-
- dma_mask = *dev->dma_mask;
- dma_dom = to_dma_ops_domain(domain);
-
- return __map_single(dev, dma_dom, paddr, size, dir, dma_mask);
-}
-
-/*
- * The exported unmap_single function for dma_ops.
- */
-static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
- enum dma_data_direction dir, unsigned long attrs)
-{
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
-
- domain = get_domain(dev);
- if (IS_ERR(domain))
- return;
-
- dma_dom = to_dma_ops_domain(domain);
-
- __unmap_single(dma_dom, dma_addr, size, dir);
-}
-
-static int sg_num_pages(struct device *dev,
- struct scatterlist *sglist,
- int nelems)
-{
- unsigned long mask, boundary_size;
- struct scatterlist *s;
- int i, npages = 0;
-
- mask = dma_get_seg_boundary(dev);
- boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
- 1UL << (BITS_PER_LONG - PAGE_SHIFT);
-
- for_each_sg(sglist, s, nelems, i) {
- int p, n;
-
- s->dma_address = npages << PAGE_SHIFT;
- p = npages % boundary_size;
- n = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
- if (p + n > boundary_size)
- npages += boundary_size - p;
- npages += n;
- }
-
- return npages;
-}
-
-/*
- * The exported map_sg function for dma_ops (handles scatter-gather
- * lists).
- */
-static int map_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction,
- unsigned long attrs)
-{
- int mapped_pages = 0, npages = 0, prot = 0, i;
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- struct scatterlist *s;
- unsigned long address;
- u64 dma_mask;
- int ret;
-
- domain = get_domain(dev);
- if (IS_ERR(domain))
- return 0;
-
- dma_dom = to_dma_ops_domain(domain);
- dma_mask = *dev->dma_mask;
-
- npages = sg_num_pages(dev, sglist, nelems);
-
- address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
- if (!address)
- goto out_err;
-
- prot = dir2prot(direction);
-
- /* Map all sg entries */
- for_each_sg(sglist, s, nelems, i) {
- int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
-
- for (j = 0; j < pages; ++j) {
- unsigned long bus_addr, phys_addr;
-
- bus_addr = address + s->dma_address + (j << PAGE_SHIFT);
- phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT);
- ret = iommu_map_page(domain, bus_addr, phys_addr,
- PAGE_SIZE, prot,
- GFP_ATOMIC | __GFP_NOWARN);
- if (ret)
- goto out_unmap;
-
- mapped_pages += 1;
- }
- }
-
- /* Everything is mapped - write the right values into s->dma_address */
- for_each_sg(sglist, s, nelems, i) {
- /*
- * Add in the remaining piece of the scatter-gather offset that
- * was masked out when we were determining the physical address
- * via (sg_phys(s) & PAGE_MASK) earlier.
- */
- s->dma_address += address + (s->offset & ~PAGE_MASK);
- s->dma_length = s->length;
- }
-
- if (s)
- domain_flush_np_cache(domain, s->dma_address, s->dma_length);
-
- return nelems;
-
-out_unmap:
- dev_err(dev, "IOMMU mapping error in map_sg (io-pages: %d reason: %d)\n",
- npages, ret);
-
- for_each_sg(sglist, s, nelems, i) {
- int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
-
- for (j = 0; j < pages; ++j) {
- unsigned long bus_addr;
-
- bus_addr = address + s->dma_address + (j << PAGE_SHIFT);
- iommu_unmap_page(domain, bus_addr, PAGE_SIZE);
-
- if (--mapped_pages == 0)
- goto out_free_iova;
- }
- }
-
-out_free_iova:
- free_iova_fast(&dma_dom->iovad, address >> PAGE_SHIFT, npages);
-
-out_err:
- return 0;
-}
-
-/*
- * The exported map_sg function for dma_ops (handles scatter-gather
- * lists).
- */
-static void unmap_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- unsigned long startaddr;
- int npages;
-
- domain = get_domain(dev);
- if (IS_ERR(domain))
- return;
-
- startaddr = sg_dma_address(sglist) & PAGE_MASK;
- dma_dom = to_dma_ops_domain(domain);
- npages = sg_num_pages(dev, sglist, nelems);
-
- __unmap_single(dma_dom, startaddr, npages << PAGE_SHIFT, dir);
-}
-
-/*
- * The exported alloc_coherent function for dma_ops.
- */
-static void *alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag,
- unsigned long attrs)
-{
- u64 dma_mask = dev->coherent_dma_mask;
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- struct page *page;
-
- domain = get_domain(dev);
- if (PTR_ERR(domain) == -EINVAL) {
- page = alloc_pages(flag, get_order(size));
- *dma_addr = page_to_phys(page);
- return page_address(page);
- } else if (IS_ERR(domain))
- return NULL;
-
- dma_dom = to_dma_ops_domain(domain);
- size = PAGE_ALIGN(size);
- dma_mask = dev->coherent_dma_mask;
- flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
- flag |= __GFP_ZERO;
-
- page = alloc_pages(flag | __GFP_NOWARN, get_order(size));
- if (!page) {
- if (!gfpflags_allow_blocking(flag))
- return NULL;
-
- page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
- get_order(size), flag & __GFP_NOWARN);
- if (!page)
- return NULL;
- }
-
- if (!dma_mask)
- dma_mask = *dev->dma_mask;
-
- *dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
- size, DMA_BIDIRECTIONAL, dma_mask);
-
- if (*dma_addr == DMA_MAPPING_ERROR)
- goto out_free;
-
- return page_address(page);
-
-out_free:
-
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, get_order(size));
-
- return NULL;
-}
-
-/*
- * The exported free_coherent function for dma_ops.
- */
-static void free_coherent(struct device *dev, size_t size,
- void *virt_addr, dma_addr_t dma_addr,
- unsigned long attrs)
-{
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- struct page *page;
-
- page = virt_to_page(virt_addr);
- size = PAGE_ALIGN(size);
-
- domain = get_domain(dev);
- if (IS_ERR(domain))
- goto free_mem;
-
- dma_dom = to_dma_ops_domain(domain);
-
- __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
-
-free_mem:
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, get_order(size));
-}
-
-/*
- * This function is called by the DMA layer to find out if we can handle a
- * particular device. It is part of the dma_ops.
- */
-static int amd_iommu_dma_supported(struct device *dev, u64 mask)
-{
- if (!dma_direct_supported(dev, mask))
- return 0;
- return check_device(dev);
-}
-
-static const struct dma_map_ops amd_iommu_dma_ops = {
- .alloc = alloc_coherent,
- .free = free_coherent,
- .map_page = map_page,
- .unmap_page = unmap_page,
- .map_sg = map_sg,
- .unmap_sg = unmap_sg,
- .dma_supported = amd_iommu_dma_supported,
- .mmap = dma_common_mmap,
- .get_sgtable = dma_common_get_sgtable,
-};
-
-static int init_reserved_iova_ranges(void)
-{
- struct pci_dev *pdev = NULL;
- struct iova *val;
-
- init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN);
-
- lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock,
- &reserved_rbtree_key);
-
- /* MSI memory range */
- val = reserve_iova(&reserved_iova_ranges,
- IOVA_PFN(MSI_RANGE_START), IOVA_PFN(MSI_RANGE_END));
- if (!val) {
- pr_err("Reserving MSI range failed\n");
- return -ENOMEM;
- }
-
- /* HT memory range */
- val = reserve_iova(&reserved_iova_ranges,
- IOVA_PFN(HT_RANGE_START), IOVA_PFN(HT_RANGE_END));
- if (!val) {
- pr_err("Reserving HT range failed\n");
- return -ENOMEM;
- }
-
- /*
- * Memory used for PCI resources
- * FIXME: Check whether we can reserve the PCI-hole completly
- */
- for_each_pci_dev(pdev) {
- int i;
-
- for (i = 0; i < PCI_NUM_RESOURCES; ++i) {
- struct resource *r = &pdev->resource[i];
-
- if (!(r->flags & IORESOURCE_MEM))
- continue;
-
- val = reserve_iova(&reserved_iova_ranges,
- IOVA_PFN(r->start),
- IOVA_PFN(r->end));
- if (!val) {
- pci_err(pdev, "Reserve pci-resource range %pR failed\n", r);
- return -ENOMEM;
- }
- }
- }
-
- return 0;
-}
-
int __init amd_iommu_init_api(void)
{
int ret, err = 0;
@@ -2851,10 +2352,6 @@
if (ret)
return ret;
- ret = init_reserved_iova_ranges();
- if (ret)
- return ret;
-
err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
if (err)
return err;
@@ -2873,7 +2370,6 @@
int __init amd_iommu_init_dma_ops(void)
{
swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
- iommu_detected = 1;
if (amd_iommu_unmap_flush)
pr_info("IO/TLB flush on unmap enabled\n");
@@ -2913,28 +2409,45 @@
static void protection_domain_free(struct protection_domain *domain)
{
+ struct domain_pgtable pgtable;
+
if (!domain)
return;
if (domain->id)
domain_id_free(domain->id);
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ amd_iommu_domain_clr_pt_root(domain);
+ free_pagetable(&pgtable);
+
kfree(domain);
}
-static int protection_domain_init(struct protection_domain *domain)
+static int protection_domain_init(struct protection_domain *domain, int mode)
{
+ u64 *pt_root = NULL;
+
+ BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL);
+
spin_lock_init(&domain->lock);
- mutex_init(&domain->api_lock);
domain->id = domain_id_alloc();
if (!domain->id)
return -ENOMEM;
INIT_LIST_HEAD(&domain->dev_list);
+ if (mode != PAGE_MODE_NONE) {
+ pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!pt_root)
+ return -ENOMEM;
+ }
+
+ amd_iommu_domain_set_pgtable(domain, pt_root, mode);
+
return 0;
}
-static struct protection_domain *protection_domain_alloc(void)
+static struct protection_domain *protection_domain_alloc(int mode)
{
struct protection_domain *domain;
@@ -2942,7 +2455,7 @@
if (!domain)
return NULL;
- if (protection_domain_init(domain))
+ if (protection_domain_init(domain, mode))
goto out_err;
return domain;
@@ -2955,53 +2468,35 @@
static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
{
- struct protection_domain *pdomain;
- struct dma_ops_domain *dma_domain;
+ struct protection_domain *domain;
+ int mode = DEFAULT_PGTABLE_LEVEL;
- switch (type) {
- case IOMMU_DOMAIN_UNMANAGED:
- pdomain = protection_domain_alloc();
- if (!pdomain)
- return NULL;
+ if (type == IOMMU_DOMAIN_IDENTITY)
+ mode = PAGE_MODE_NONE;
- pdomain->mode = PAGE_MODE_3_LEVEL;
- pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- if (!pdomain->pt_root) {
- protection_domain_free(pdomain);
- return NULL;
- }
-
- pdomain->domain.geometry.aperture_start = 0;
- pdomain->domain.geometry.aperture_end = ~0ULL;
- pdomain->domain.geometry.force_aperture = true;
-
- break;
- case IOMMU_DOMAIN_DMA:
- dma_domain = dma_ops_domain_alloc();
- if (!dma_domain) {
- pr_err("Failed to allocate\n");
- return NULL;
- }
- pdomain = &dma_domain->domain;
- break;
- case IOMMU_DOMAIN_IDENTITY:
- pdomain = protection_domain_alloc();
- if (!pdomain)
- return NULL;
-
- pdomain->mode = PAGE_MODE_NONE;
- break;
- default:
+ domain = protection_domain_alloc(mode);
+ if (!domain)
return NULL;
- }
- return &pdomain->domain;
+ domain->domain.geometry.aperture_start = 0;
+ domain->domain.geometry.aperture_end = ~0ULL;
+ domain->domain.geometry.force_aperture = true;
+
+ if (type == IOMMU_DOMAIN_DMA &&
+ iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
+ goto free_domain;
+
+ return &domain->domain;
+
+free_domain:
+ protection_domain_free(domain);
+
+ return NULL;
}
static void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
domain = to_pdomain(dom);
@@ -3013,28 +2508,19 @@
if (!dom)
return;
- switch (dom->type) {
- case IOMMU_DOMAIN_DMA:
- /* Now release the domain */
- dma_dom = to_dma_ops_domain(domain);
- dma_ops_domain_free(dma_dom);
- break;
- default:
- if (domain->mode != PAGE_MODE_NONE)
- free_pagetable(domain);
+ if (dom->type == IOMMU_DOMAIN_DMA)
+ iommu_put_dma_cookie(&domain->domain);
- if (domain->flags & PD_IOMMUV2_MASK)
- free_gcr3_table(domain);
+ if (domain->flags & PD_IOMMUV2_MASK)
+ free_gcr3_table(domain);
- protection_domain_free(domain);
- break;
- }
+ protection_domain_free(domain);
}
static void amd_iommu_detach_device(struct iommu_domain *dom,
struct device *dev)
{
- struct iommu_dev_data *dev_data = dev->archdata.iommu;
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
struct amd_iommu *iommu;
int devid;
@@ -3072,7 +2558,8 @@
if (!check_device(dev))
return -EINVAL;
- dev_data = dev->archdata.iommu;
+ dev_data = dev_iommu_priv_get(dev);
+ dev_data->defer_attach = false;
iommu = amd_iommu_rlookup_table[dev_data->devid];
if (!iommu)
@@ -3098,13 +2585,16 @@
}
static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
- phys_addr_t paddr, size_t page_size, int iommu_prot)
+ phys_addr_t paddr, size_t page_size, int iommu_prot,
+ gfp_t gfp)
{
struct protection_domain *domain = to_pdomain(dom);
+ struct domain_pgtable pgtable;
int prot = 0;
int ret;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return -EINVAL;
if (iommu_prot & IOMMU_READ)
@@ -3112,9 +2602,7 @@
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
- mutex_lock(&domain->api_lock);
- ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL);
- mutex_unlock(&domain->api_lock);
+ ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp);
domain_flush_np_cache(domain, iova, page_size);
@@ -3126,16 +2614,13 @@
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
- size_t unmap_size;
+ struct domain_pgtable pgtable;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return 0;
- mutex_lock(&domain->api_lock);
- unmap_size = iommu_unmap_page(domain, iova, page_size);
- mutex_unlock(&domain->api_lock);
-
- return unmap_size;
+ return iommu_unmap_page(domain, iova, page_size);
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -3143,9 +2628,11 @@
{
struct protection_domain *domain = to_pdomain(dom);
unsigned long offset_mask, pte_pgsize;
+ struct domain_pgtable pgtable;
u64 *pte, __pte;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return iova;
pte = fetch_pte(domain, iova, &pte_pgsize);
@@ -3227,34 +2714,14 @@
list_add_tail(®ion->list, head);
}
-static void amd_iommu_put_resv_regions(struct device *dev,
- struct list_head *head)
+bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+ struct device *dev)
{
- struct iommu_resv_region *entry, *next;
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
- list_for_each_entry_safe(entry, next, head, list)
- kfree(entry);
-}
-
-static void amd_iommu_apply_resv_region(struct device *dev,
- struct iommu_domain *domain,
- struct iommu_resv_region *region)
-{
- struct dma_ops_domain *dma_dom = to_dma_ops_domain(to_pdomain(domain));
- unsigned long start, end;
-
- start = IOVA_PFN(region->start);
- end = IOVA_PFN(region->start + region->length - 1);
-
- WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
-}
-
-static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
- struct device *dev)
-{
- struct iommu_dev_data *dev_data = dev->archdata.iommu;
return dev_data->defer_attach;
}
+EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred);
static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
@@ -3273,6 +2740,25 @@
amd_iommu_flush_iotlb_all(domain);
}
+static int amd_iommu_def_domain_type(struct device *dev)
+{
+ struct iommu_dev_data *dev_data;
+
+ dev_data = dev_iommu_priv_get(dev);
+ if (!dev_data)
+ return 0;
+
+ /*
+ * Do not identity map IOMMUv2 capable devices when memory encryption is
+ * active, because some of those devices (AMD GPUs) don't have the
+ * encryption bit in their DMA-mask and require remapping.
+ */
+ if (!mem_encrypt_active() && dev_data->iommu_v2)
+ return IOMMU_DOMAIN_IDENTITY;
+
+ return 0;
+}
+
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
@@ -3282,16 +2768,18 @@
.map = amd_iommu_map,
.unmap = amd_iommu_unmap,
.iova_to_phys = amd_iommu_iova_to_phys,
- .add_device = amd_iommu_add_device,
- .remove_device = amd_iommu_remove_device,
+ .probe_device = amd_iommu_probe_device,
+ .release_device = amd_iommu_release_device,
+ .probe_finalize = amd_iommu_probe_finalize,
.device_group = amd_iommu_device_group,
+ .domain_get_attr = amd_iommu_domain_get_attr,
.get_resv_regions = amd_iommu_get_resv_regions,
- .put_resv_regions = amd_iommu_put_resv_regions,
- .apply_resv_region = amd_iommu_apply_resv_region,
+ .put_resv_regions = generic_iommu_put_resv_regions,
.is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
.iotlb_sync = amd_iommu_iotlb_sync,
+ .def_domain_type = amd_iommu_def_domain_type,
};
/*****************************************************************************
@@ -3320,18 +2808,22 @@
void amd_iommu_domain_direct_map(struct iommu_domain *dom)
{
struct protection_domain *domain = to_pdomain(dom);
+ struct domain_pgtable pgtable;
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
- /* Update data structure */
- domain->mode = PAGE_MODE_NONE;
+ /* First save pgtable configuration*/
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ /* Remove page-table from domain */
+ amd_iommu_domain_clr_pt_root(domain);
/* Make changes visible to IOMMUs */
update_domain(domain);
/* Page-table is not visible to IOMMU anymore, so free it */
- free_pagetable(domain);
+ free_pagetable(&pgtable);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -3383,7 +2875,7 @@
}
EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
-static int __flush_pasid(struct protection_domain *domain, int pasid,
+static int __flush_pasid(struct protection_domain *domain, u32 pasid,
u64 address, bool size)
{
struct iommu_dev_data *dev_data;
@@ -3444,13 +2936,13 @@
return ret;
}
-static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
+static int __amd_iommu_flush_page(struct protection_domain *domain, u32 pasid,
u64 address)
{
return __flush_pasid(domain, pasid, address, false);
}
-int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
u64 address)
{
struct protection_domain *domain = to_pdomain(dom);
@@ -3465,13 +2957,13 @@
}
EXPORT_SYMBOL(amd_iommu_flush_page);
-static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
+static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid)
{
return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
true);
}
-int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
+int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid)
{
struct protection_domain *domain = to_pdomain(dom);
unsigned long flags;
@@ -3485,7 +2977,7 @@
}
EXPORT_SYMBOL(amd_iommu_flush_tlb);
-static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
+static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc)
{
int index;
u64 *pte;
@@ -3517,12 +3009,14 @@
return pte;
}
-static int __set_gcr3(struct protection_domain *domain, int pasid,
+static int __set_gcr3(struct protection_domain *domain, u32 pasid,
unsigned long cr3)
{
+ struct domain_pgtable pgtable;
u64 *pte;
- if (domain->mode != PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
@@ -3534,11 +3028,13 @@
return __amd_iommu_flush_tlb(domain, pasid);
}
-static int __clear_gcr3(struct protection_domain *domain, int pasid)
+static int __clear_gcr3(struct protection_domain *domain, u32 pasid)
{
+ struct domain_pgtable pgtable;
u64 *pte;
- if (domain->mode != PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
@@ -3550,7 +3046,7 @@
return __amd_iommu_flush_tlb(domain, pasid);
}
-int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
unsigned long cr3)
{
struct protection_domain *domain = to_pdomain(dom);
@@ -3565,7 +3061,7 @@
}
EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
-int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
+int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
{
struct protection_domain *domain = to_pdomain(dom);
unsigned long flags;
@@ -3579,14 +3075,14 @@
}
EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
-int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
int status, int tag)
{
struct iommu_dev_data *dev_data;
struct amd_iommu *iommu;
struct iommu_cmd cmd;
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
iommu = amd_iommu_rlookup_table[dev_data->devid];
build_complete_ppr(&cmd, dev_data->devid, pasid, status,
@@ -3599,9 +3095,27 @@
struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
{
struct protection_domain *pdomain;
+ struct iommu_dev_data *dev_data;
+ struct device *dev = &pdev->dev;
+ struct iommu_domain *io_domain;
- pdomain = get_domain(&pdev->dev);
- if (IS_ERR(pdomain))
+ if (!check_device(dev))
+ return NULL;
+
+ dev_data = dev_iommu_priv_get(&pdev->dev);
+ pdomain = dev_data->domain;
+ io_domain = iommu_get_domain_for_dev(dev);
+
+ if (pdomain == NULL && dev_data->defer_attach) {
+ dev_data->defer_attach = false;
+ pdomain = to_pdomain(io_domain);
+ attach_device(dev, pdomain);
+ }
+
+ if (pdomain == NULL)
+ return NULL;
+
+ if (io_domain->type != IOMMU_DOMAIN_DMA)
return NULL;
/* Only return IOMMUv2 domains */
@@ -3619,7 +3133,7 @@
if (!amd_iommu_v2_supported())
return;
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
dev_data->errata |= (1 << erratum);
}
EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
@@ -3638,11 +3152,8 @@
memset(info, 0, sizeof(*info));
- if (!pci_ats_disabled()) {
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS);
- if (pos)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
- }
+ if (pci_ats_supported(pdev))
+ info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (pos)
@@ -4097,69 +3608,51 @@
static int get_devid(struct irq_alloc_info *info)
{
- int devid = -1;
-
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
- devid = get_ioapic_devid(info->ioapic_id);
- break;
+ case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT:
+ return get_ioapic_devid(info->devid);
case X86_IRQ_ALLOC_TYPE_HPET:
- devid = get_hpet_devid(info->hpet_id);
- break;
- case X86_IRQ_ALLOC_TYPE_MSI:
- case X86_IRQ_ALLOC_TYPE_MSIX:
- devid = get_device_id(&info->msi_dev->dev);
- break;
+ case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT:
+ return get_hpet_devid(info->devid);
+ case X86_IRQ_ALLOC_TYPE_PCI_MSI:
+ case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
+ return get_device_id(msi_desc_to_dev(info->desc));
default:
- BUG_ON(1);
- break;
+ WARN_ON_ONCE(1);
+ return -1;
}
-
- return devid;
}
-static struct irq_domain *get_ir_irq_domain(struct irq_alloc_info *info)
+static struct irq_domain *get_irq_domain_for_devid(struct irq_alloc_info *info,
+ int devid)
{
- struct amd_iommu *iommu;
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+ if (!iommu)
+ return NULL;
+
+ switch (info->type) {
+ case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT:
+ case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT:
+ return iommu->ir_domain;
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+}
+
+static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
+{
int devid;
if (!info)
return NULL;
devid = get_devid(info);
- if (devid >= 0) {
- iommu = amd_iommu_rlookup_table[devid];
- if (iommu)
- return iommu->ir_domain;
- }
-
- return NULL;
-}
-
-static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
-{
- struct amd_iommu *iommu;
- int devid;
-
- if (!info)
+ if (devid < 0)
return NULL;
-
- switch (info->type) {
- case X86_IRQ_ALLOC_TYPE_MSI:
- case X86_IRQ_ALLOC_TYPE_MSIX:
- devid = get_device_id(&info->msi_dev->dev);
- if (devid < 0)
- return NULL;
-
- iommu = amd_iommu_rlookup_table[devid];
- if (iommu)
- return iommu->msi_domain;
- break;
- default:
- break;
- }
-
- return NULL;
+ return get_irq_domain_for_devid(info, devid);
}
struct irq_remap_ops amd_iommu_irq_ops = {
@@ -4168,7 +3661,6 @@
.disable = amd_iommu_disable,
.reenable = amd_iommu_reenable,
.enable_faulting = amd_iommu_enable_faulting,
- .get_ir_irq_domain = get_ir_irq_domain,
.get_irq_domain = get_irq_domain,
};
@@ -4194,21 +3686,21 @@
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
/* Setup IOAPIC entry */
- entry = info->ioapic_entry;
- info->ioapic_entry = NULL;
+ entry = info->ioapic.entry;
+ info->ioapic.entry = NULL;
memset(entry, 0, sizeof(*entry));
entry->vector = index;
entry->mask = 0;
- entry->trigger = info->ioapic_trigger;
- entry->polarity = info->ioapic_polarity;
+ entry->trigger = info->ioapic.trigger;
+ entry->polarity = info->ioapic.polarity;
/* Mask level triggered irqs. */
- if (info->ioapic_trigger)
+ if (info->ioapic.trigger)
entry->mask = 1;
break;
case X86_IRQ_ALLOC_TYPE_HPET:
- case X86_IRQ_ALLOC_TYPE_MSI:
- case X86_IRQ_ALLOC_TYPE_MSIX:
+ case X86_IRQ_ALLOC_TYPE_PCI_MSI:
+ case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo = MSI_ADDR_BASE_LO;
msg->data = irte_info->index;
@@ -4252,15 +3744,15 @@
if (!info)
return -EINVAL;
- if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI &&
- info->type != X86_IRQ_ALLOC_TYPE_MSIX)
+ if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI &&
+ info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX)
return -EINVAL;
/*
* With IRQ remapping enabled, don't need contiguous CPU vectors
* to support multiple MSI interrupts.
*/
- if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+ if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI)
info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
devid = get_devid(info);
@@ -4288,15 +3780,16 @@
iommu->irte_ops->set_allocated(table, i);
}
WARN_ON(table->min_index != 32);
- index = info->ioapic_pin;
+ index = info->ioapic.pin;
} else {
index = -ENOMEM;
}
- } else if (info->type == X86_IRQ_ALLOC_TYPE_MSI ||
- info->type == X86_IRQ_ALLOC_TYPE_MSIX) {
- bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);
+ } else if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI ||
+ info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX) {
+ bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI);
- index = alloc_irq_index(devid, nr_irqs, align, info->msi_dev);
+ index = alloc_irq_index(devid, nr_irqs, align,
+ msi_desc_to_pci_dev(info->desc));
} else {
index = alloc_irq_index(devid, nr_irqs, false, NULL);
}
@@ -4309,8 +3802,8 @@
for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(domain, virq + i);
- cfg = irqd_cfg(irq_data);
- if (!irq_data || !cfg) {
+ cfg = irq_data ? irqd_cfg(irq_data) : NULL;
+ if (!cfg) {
ret = -EINVAL;
goto out_free_data;
}
@@ -4418,14 +3911,18 @@
{
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+ u64 valid;
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
!entry || entry->lo.fields_vapic.guest_mode)
return 0;
+ valid = entry->lo.fields_vapic.valid;
+
entry->lo.val = 0;
entry->hi.val = 0;
+ entry->lo.fields_vapic.valid = valid;
entry->lo.fields_vapic.guest_mode = 1;
entry->lo.fields_vapic.ga_log_intr = 1;
entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr;
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
similarity index 95%
rename from drivers/iommu/amd_iommu_v2.c
rename to drivers/iommu/amd/iommu_v2.c
index 05f3d93..fb61bdc 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -13,13 +13,11 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
-#include <linux/iommu.h>
#include <linux/wait.h>
#include <linux/pci.h>
#include <linux/gfp.h>
-#include "amd_iommu_types.h"
-#include "amd_iommu_proto.h"
+#include "amd_iommu.h"
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>");
@@ -42,7 +40,7 @@
struct mmu_notifier mn; /* mmu_notifier handle */
struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
struct device_state *device_state; /* Link to our device_state */
- int pasid; /* PASID index */
+ u32 pasid; /* PASID index */
bool invalid; /* Used during setup and
teardown of the pasid */
spinlock_t lock; /* Protect pri_queues and
@@ -72,7 +70,7 @@
struct mm_struct *mm;
u64 address;
u16 devid;
- u16 pasid;
+ u32 pasid;
u16 tag;
u16 finish;
u16 flags;
@@ -152,7 +150,7 @@
/* Must be called under dev_state->lock */
static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
- int pasid, bool alloc)
+ u32 pasid, bool alloc)
{
struct pasid_state **root, **ptr;
int level, index;
@@ -186,7 +184,7 @@
static int set_pasid_state(struct device_state *dev_state,
struct pasid_state *pasid_state,
- int pasid)
+ u32 pasid)
{
struct pasid_state **ptr;
unsigned long flags;
@@ -213,7 +211,7 @@
return ret;
}
-static void clear_pasid_state(struct device_state *dev_state, int pasid)
+static void clear_pasid_state(struct device_state *dev_state, u32 pasid)
{
struct pasid_state **ptr;
unsigned long flags;
@@ -231,7 +229,7 @@
}
static struct pasid_state *get_pasid_state(struct device_state *dev_state,
- int pasid)
+ u32 pasid)
{
struct pasid_state **ptr, *ret = NULL;
unsigned long flags;
@@ -487,7 +485,7 @@
flags |= FAULT_FLAG_WRITE;
flags |= FAULT_FLAG_REMOTE;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_extend_vma(mm, address);
if (!vma || address < vma->vm_start)
/* failed to get a vma in the right range */
@@ -497,9 +495,9 @@
if (access_error(vma, fault))
goto out;
- ret = handle_mm_fault(vma, address, flags);
+ ret = handle_mm_fault(vma, address, flags, NULL);
out:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
if (ret & VM_FAULT_ERROR)
/* failed to service fault */
@@ -517,13 +515,12 @@
struct amd_iommu_fault *iommu_fault;
struct pasid_state *pasid_state;
struct device_state *dev_state;
+ struct pci_dev *pdev = NULL;
unsigned long flags;
struct fault *fault;
bool finish;
u16 tag, devid;
int ret;
- struct iommu_dev_data *dev_data;
- struct pci_dev *pdev = NULL;
iommu_fault = data;
tag = iommu_fault->tag & 0x1ff;
@@ -534,12 +531,11 @@
devid & 0xff);
if (!pdev)
return -ENODEV;
- dev_data = get_dev_data(&pdev->dev);
+
+ ret = NOTIFY_DONE;
/* In kdump kernel pci dev is not initialized yet -> send INVALID */
- ret = NOTIFY_DONE;
- if (translation_pre_enabled(amd_iommu_rlookup_table[devid])
- && dev_data->defer_attach) {
+ if (amd_iommu_is_attach_deferred(NULL, &pdev->dev)) {
amd_iommu_complete_ppr(pdev, iommu_fault->pasid,
PPR_INVALID, tag);
goto out;
@@ -598,7 +594,7 @@
.notifier_call = ppr_notifier,
};
-int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
struct task_struct *task)
{
struct pasid_state *pasid_state;
@@ -619,7 +615,7 @@
return -EINVAL;
ret = -EINVAL;
- if (pasid < 0 || pasid >= dev_state->max_pasids)
+ if (pasid >= dev_state->max_pasids)
goto out;
ret = -ENOMEM;
@@ -683,7 +679,7 @@
}
EXPORT_SYMBOL(amd_iommu_bind_pasid);
-void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
+void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid)
{
struct pasid_state *pasid_state;
struct device_state *dev_state;
@@ -699,7 +695,7 @@
if (dev_state == NULL)
return;
- if (pasid < 0 || pasid >= dev_state->max_pasids)
+ if (pasid >= dev_state->max_pasids)
goto out;
pasid_state = get_pasid_state(dev_state, pasid);
@@ -931,10 +927,8 @@
{
int ret;
- pr_info("AMD IOMMUv2 driver by Joerg Roedel <jroedel@suse.de>\n");
-
if (!amd_iommu_v2_supported()) {
- pr_info("AMD IOMMUv2 functionality not available on this system\n");
+ pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n");
/*
* Load anyway to provide the symbols to other modules
* which may use AMD IOMMUv2 optionally.
@@ -951,6 +945,8 @@
amd_iommu_register_ppr_notifier(&ppr_nb);
+ pr_info("AMD IOMMUv2 loaded and initialized\n");
+
return 0;
out:
diff --git a/drivers/iommu/amd_iommu_quirks.c b/drivers/iommu/amd/quirks.c
similarity index 100%
rename from drivers/iommu/amd_iommu_quirks.c
rename to drivers/iommu/amd/quirks.c
diff --git a/drivers/iommu/amd_iommu.h b/drivers/iommu/amd_iommu.h
deleted file mode 100644
index 12d540d..0000000
--- a/drivers/iommu/amd_iommu.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-
-#ifndef AMD_IOMMU_H
-#define AMD_IOMMU_H
-
-int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line);
-
-#ifdef CONFIG_DMI
-void amd_iommu_apply_ivrs_quirks(void);
-#else
-static void amd_iommu_apply_ivrs_quirks(void) { }
-#endif
-
-#endif
diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
deleted file mode 100644
index b19b6ca..0000000
--- a/drivers/iommu/arm-smmu.h
+++ /dev/null
@@ -1,402 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * IOMMU API for ARM architected SMMU implementations.
- *
- * Copyright (C) 2013 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#ifndef _ARM_SMMU_H
-#define _ARM_SMMU_H
-
-#include <linux/atomic.h>
-#include <linux/bits.h>
-#include <linux/clk.h>
-#include <linux/device.h>
-#include <linux/io-64-nonatomic-hi-lo.h>
-#include <linux/io-pgtable.h>
-#include <linux/iommu.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-
-/* Configuration registers */
-#define ARM_SMMU_GR0_sCR0 0x0
-#define sCR0_VMID16EN BIT(31)
-#define sCR0_BSU GENMASK(15, 14)
-#define sCR0_FB BIT(13)
-#define sCR0_PTM BIT(12)
-#define sCR0_VMIDPNE BIT(11)
-#define sCR0_USFCFG BIT(10)
-#define sCR0_GCFGFIE BIT(5)
-#define sCR0_GCFGFRE BIT(4)
-#define sCR0_EXIDENABLE BIT(3)
-#define sCR0_GFIE BIT(2)
-#define sCR0_GFRE BIT(1)
-#define sCR0_CLIENTPD BIT(0)
-
-/* Auxiliary Configuration register */
-#define ARM_SMMU_GR0_sACR 0x10
-
-/* Identification registers */
-#define ARM_SMMU_GR0_ID0 0x20
-#define ID0_S1TS BIT(30)
-#define ID0_S2TS BIT(29)
-#define ID0_NTS BIT(28)
-#define ID0_SMS BIT(27)
-#define ID0_ATOSNS BIT(26)
-#define ID0_PTFS_NO_AARCH32 BIT(25)
-#define ID0_PTFS_NO_AARCH32S BIT(24)
-#define ID0_NUMIRPT GENMASK(23, 16)
-#define ID0_CTTW BIT(14)
-#define ID0_NUMSIDB GENMASK(12, 9)
-#define ID0_EXIDS BIT(8)
-#define ID0_NUMSMRG GENMASK(7, 0)
-
-#define ARM_SMMU_GR0_ID1 0x24
-#define ID1_PAGESIZE BIT(31)
-#define ID1_NUMPAGENDXB GENMASK(30, 28)
-#define ID1_NUMS2CB GENMASK(23, 16)
-#define ID1_NUMCB GENMASK(7, 0)
-
-#define ARM_SMMU_GR0_ID2 0x28
-#define ID2_VMID16 BIT(15)
-#define ID2_PTFS_64K BIT(14)
-#define ID2_PTFS_16K BIT(13)
-#define ID2_PTFS_4K BIT(12)
-#define ID2_UBS GENMASK(11, 8)
-#define ID2_OAS GENMASK(7, 4)
-#define ID2_IAS GENMASK(3, 0)
-
-#define ARM_SMMU_GR0_ID3 0x2c
-#define ARM_SMMU_GR0_ID4 0x30
-#define ARM_SMMU_GR0_ID5 0x34
-#define ARM_SMMU_GR0_ID6 0x38
-
-#define ARM_SMMU_GR0_ID7 0x3c
-#define ID7_MAJOR GENMASK(7, 4)
-#define ID7_MINOR GENMASK(3, 0)
-
-#define ARM_SMMU_GR0_sGFSR 0x48
-#define ARM_SMMU_GR0_sGFSYNR0 0x50
-#define ARM_SMMU_GR0_sGFSYNR1 0x54
-#define ARM_SMMU_GR0_sGFSYNR2 0x58
-
-/* Global TLB invalidation */
-#define ARM_SMMU_GR0_TLBIVMID 0x64
-#define ARM_SMMU_GR0_TLBIALLNSNH 0x68
-#define ARM_SMMU_GR0_TLBIALLH 0x6c
-#define ARM_SMMU_GR0_sTLBGSYNC 0x70
-
-#define ARM_SMMU_GR0_sTLBGSTATUS 0x74
-#define sTLBGSTATUS_GSACTIVE BIT(0)
-
-/* Stream mapping registers */
-#define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2))
-#define SMR_VALID BIT(31)
-#define SMR_MASK GENMASK(31, 16)
-#define SMR_ID GENMASK(15, 0)
-
-#define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2))
-#define S2CR_PRIVCFG GENMASK(25, 24)
-enum arm_smmu_s2cr_privcfg {
- S2CR_PRIVCFG_DEFAULT,
- S2CR_PRIVCFG_DIPAN,
- S2CR_PRIVCFG_UNPRIV,
- S2CR_PRIVCFG_PRIV,
-};
-#define S2CR_TYPE GENMASK(17, 16)
-enum arm_smmu_s2cr_type {
- S2CR_TYPE_TRANS,
- S2CR_TYPE_BYPASS,
- S2CR_TYPE_FAULT,
-};
-#define S2CR_EXIDVALID BIT(10)
-#define S2CR_CBNDX GENMASK(7, 0)
-
-/* Context bank attribute registers */
-#define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2))
-#define CBAR_IRPTNDX GENMASK(31, 24)
-#define CBAR_TYPE GENMASK(17, 16)
-enum arm_smmu_cbar_type {
- CBAR_TYPE_S2_TRANS,
- CBAR_TYPE_S1_TRANS_S2_BYPASS,
- CBAR_TYPE_S1_TRANS_S2_FAULT,
- CBAR_TYPE_S1_TRANS_S2_TRANS,
-};
-#define CBAR_S1_MEMATTR GENMASK(15, 12)
-#define CBAR_S1_MEMATTR_WB 0xf
-#define CBAR_S1_BPSHCFG GENMASK(9, 8)
-#define CBAR_S1_BPSHCFG_NSH 3
-#define CBAR_VMID GENMASK(7, 0)
-
-#define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2))
-
-#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
-#define CBA2R_VMID16 GENMASK(31, 16)
-#define CBA2R_VA64 BIT(0)
-
-#define ARM_SMMU_CB_SCTLR 0x0
-#define SCTLR_S1_ASIDPNE BIT(12)
-#define SCTLR_CFCFG BIT(7)
-#define SCTLR_CFIE BIT(6)
-#define SCTLR_CFRE BIT(5)
-#define SCTLR_E BIT(4)
-#define SCTLR_AFE BIT(2)
-#define SCTLR_TRE BIT(1)
-#define SCTLR_M BIT(0)
-
-#define ARM_SMMU_CB_ACTLR 0x4
-
-#define ARM_SMMU_CB_RESUME 0x8
-#define RESUME_TERMINATE BIT(0)
-
-#define ARM_SMMU_CB_TCR2 0x10
-#define TCR2_SEP GENMASK(17, 15)
-#define TCR2_SEP_UPSTREAM 0x7
-#define TCR2_AS BIT(4)
-
-#define ARM_SMMU_CB_TTBR0 0x20
-#define ARM_SMMU_CB_TTBR1 0x28
-#define TTBRn_ASID GENMASK_ULL(63, 48)
-
-#define ARM_SMMU_CB_TCR 0x30
-#define ARM_SMMU_CB_CONTEXTIDR 0x34
-#define ARM_SMMU_CB_S1_MAIR0 0x38
-#define ARM_SMMU_CB_S1_MAIR1 0x3c
-
-#define ARM_SMMU_CB_PAR 0x50
-#define CB_PAR_F BIT(0)
-
-#define ARM_SMMU_CB_FSR 0x58
-#define FSR_MULTI BIT(31)
-#define FSR_SS BIT(30)
-#define FSR_UUT BIT(8)
-#define FSR_ASF BIT(7)
-#define FSR_TLBLKF BIT(6)
-#define FSR_TLBMCF BIT(5)
-#define FSR_EF BIT(4)
-#define FSR_PF BIT(3)
-#define FSR_AFF BIT(2)
-#define FSR_TF BIT(1)
-
-#define FSR_IGN (FSR_AFF | FSR_ASF | \
- FSR_TLBMCF | FSR_TLBLKF)
-#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \
- FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
-
-#define ARM_SMMU_CB_FAR 0x60
-
-#define ARM_SMMU_CB_FSYNR0 0x68
-#define FSYNR0_WNR BIT(4)
-
-#define ARM_SMMU_CB_S1_TLBIVA 0x600
-#define ARM_SMMU_CB_S1_TLBIASID 0x610
-#define ARM_SMMU_CB_S1_TLBIVAL 0x620
-#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
-#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
-#define ARM_SMMU_CB_TLBSYNC 0x7f0
-#define ARM_SMMU_CB_TLBSTATUS 0x7f4
-#define ARM_SMMU_CB_ATS1PR 0x800
-
-#define ARM_SMMU_CB_ATSR 0x8f0
-#define ATSR_ACTIVE BIT(0)
-
-
-/* Maximum number of context banks per SMMU */
-#define ARM_SMMU_MAX_CBS 128
-
-
-/* Shared driver definitions */
-enum arm_smmu_arch_version {
- ARM_SMMU_V1,
- ARM_SMMU_V1_64K,
- ARM_SMMU_V2,
-};
-
-enum arm_smmu_implementation {
- GENERIC_SMMU,
- ARM_MMU500,
- CAVIUM_SMMUV2,
- QCOM_SMMUV2,
-};
-
-struct arm_smmu_device {
- struct device *dev;
-
- void __iomem *base;
- unsigned int numpage;
- unsigned int pgshift;
-
-#define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
-#define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
-#define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
-#define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
-#define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
-#define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
-#define ARM_SMMU_FEAT_VMID16 (1 << 6)
-#define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
-#define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
-#define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
-#define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
-#define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
-#define ARM_SMMU_FEAT_EXIDS (1 << 12)
- u32 features;
-
- enum arm_smmu_arch_version version;
- enum arm_smmu_implementation model;
- const struct arm_smmu_impl *impl;
-
- u32 num_context_banks;
- u32 num_s2_context_banks;
- DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
- struct arm_smmu_cb *cbs;
- atomic_t irptndx;
-
- u32 num_mapping_groups;
- u16 streamid_mask;
- u16 smr_mask_mask;
- struct arm_smmu_smr *smrs;
- struct arm_smmu_s2cr *s2crs;
- struct mutex stream_map_mutex;
-
- unsigned long va_size;
- unsigned long ipa_size;
- unsigned long pa_size;
- unsigned long pgsize_bitmap;
-
- u32 num_global_irqs;
- u32 num_context_irqs;
- unsigned int *irqs;
- struct clk_bulk_data *clks;
- int num_clks;
-
- spinlock_t global_sync_lock;
-
- /* IOMMU core code handle */
- struct iommu_device iommu;
-};
-
-enum arm_smmu_context_fmt {
- ARM_SMMU_CTX_FMT_NONE,
- ARM_SMMU_CTX_FMT_AARCH64,
- ARM_SMMU_CTX_FMT_AARCH32_L,
- ARM_SMMU_CTX_FMT_AARCH32_S,
-};
-
-struct arm_smmu_cfg {
- u8 cbndx;
- u8 irptndx;
- union {
- u16 asid;
- u16 vmid;
- };
- enum arm_smmu_cbar_type cbar;
- enum arm_smmu_context_fmt fmt;
-};
-#define INVALID_IRPTNDX 0xff
-
-enum arm_smmu_domain_stage {
- ARM_SMMU_DOMAIN_S1 = 0,
- ARM_SMMU_DOMAIN_S2,
- ARM_SMMU_DOMAIN_NESTED,
- ARM_SMMU_DOMAIN_BYPASS,
-};
-
-struct arm_smmu_flush_ops {
- struct iommu_flush_ops tlb;
- void (*tlb_inv_range)(unsigned long iova, size_t size, size_t granule,
- bool leaf, void *cookie);
- void (*tlb_sync)(void *cookie);
-};
-
-struct arm_smmu_domain {
- struct arm_smmu_device *smmu;
- struct io_pgtable_ops *pgtbl_ops;
- const struct arm_smmu_flush_ops *flush_ops;
- struct arm_smmu_cfg cfg;
- enum arm_smmu_domain_stage stage;
- bool non_strict;
- struct mutex init_mutex; /* Protects smmu pointer */
- spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
- struct iommu_domain domain;
-};
-
-
-/* Implementation details, yay! */
-struct arm_smmu_impl {
- u32 (*read_reg)(struct arm_smmu_device *smmu, int page, int offset);
- void (*write_reg)(struct arm_smmu_device *smmu, int page, int offset,
- u32 val);
- u64 (*read_reg64)(struct arm_smmu_device *smmu, int page, int offset);
- void (*write_reg64)(struct arm_smmu_device *smmu, int page, int offset,
- u64 val);
- int (*cfg_probe)(struct arm_smmu_device *smmu);
- int (*reset)(struct arm_smmu_device *smmu);
- int (*init_context)(struct arm_smmu_domain *smmu_domain);
-};
-
-static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
-{
- return smmu->base + (n << smmu->pgshift);
-}
-
-static inline u32 arm_smmu_readl(struct arm_smmu_device *smmu, int page, int offset)
-{
- if (smmu->impl && unlikely(smmu->impl->read_reg))
- return smmu->impl->read_reg(smmu, page, offset);
- return readl_relaxed(arm_smmu_page(smmu, page) + offset);
-}
-
-static inline void arm_smmu_writel(struct arm_smmu_device *smmu, int page,
- int offset, u32 val)
-{
- if (smmu->impl && unlikely(smmu->impl->write_reg))
- smmu->impl->write_reg(smmu, page, offset, val);
- else
- writel_relaxed(val, arm_smmu_page(smmu, page) + offset);
-}
-
-static inline u64 arm_smmu_readq(struct arm_smmu_device *smmu, int page, int offset)
-{
- if (smmu->impl && unlikely(smmu->impl->read_reg64))
- return smmu->impl->read_reg64(smmu, page, offset);
- return readq_relaxed(arm_smmu_page(smmu, page) + offset);
-}
-
-static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page,
- int offset, u64 val)
-{
- if (smmu->impl && unlikely(smmu->impl->write_reg64))
- smmu->impl->write_reg64(smmu, page, offset, val);
- else
- writeq_relaxed(val, arm_smmu_page(smmu, page) + offset);
-}
-
-#define ARM_SMMU_GR0 0
-#define ARM_SMMU_GR1 1
-#define ARM_SMMU_CB(s, n) ((s)->numpage + (n))
-
-#define arm_smmu_gr0_read(s, o) \
- arm_smmu_readl((s), ARM_SMMU_GR0, (o))
-#define arm_smmu_gr0_write(s, o, v) \
- arm_smmu_writel((s), ARM_SMMU_GR0, (o), (v))
-
-#define arm_smmu_gr1_read(s, o) \
- arm_smmu_readl((s), ARM_SMMU_GR1, (o))
-#define arm_smmu_gr1_write(s, o, v) \
- arm_smmu_writel((s), ARM_SMMU_GR1, (o), (v))
-
-#define arm_smmu_cb_read(s, n, o) \
- arm_smmu_readl((s), ARM_SMMU_CB((s), (n)), (o))
-#define arm_smmu_cb_write(s, n, o, v) \
- arm_smmu_writel((s), ARM_SMMU_CB((s), (n)), (o), (v))
-#define arm_smmu_cb_readq(s, n, o) \
- arm_smmu_readq((s), ARM_SMMU_CB((s), (n)), (o))
-#define arm_smmu_cb_writeq(s, n, o, v) \
- arm_smmu_writeq((s), ARM_SMMU_CB((s), (n)), (o), (v))
-
-struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu);
-
-#endif /* _ARM_SMMU_H */
diff --git a/drivers/iommu/arm/Makefile b/drivers/iommu/arm/Makefile
new file mode 100644
index 0000000..0f9efea
--- /dev/null
+++ b/drivers/iommu/arm/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += arm-smmu/ arm-smmu-v3/
diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile
new file mode 100644
index 0000000..54feb1e
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
+arm_smmu_v3-objs-y += arm-smmu-v3.o
+arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
+arm_smmu_v3-objs := $(arm_smmu_v3-objs-y)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
new file mode 100644
index 0000000..9255c96
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implementation of the IOMMU SVA API for the ARM SMMUv3
+ */
+
+#include <linux/mm.h>
+#include <linux/mmu_context.h>
+#include <linux/slab.h>
+
+#include "arm-smmu-v3.h"
+#include "../../io-pgtable-arm.h"
+
+static DEFINE_MUTEX(sva_lock);
+
+/*
+ * Check if the CPU ASID is available on the SMMU side. If a private context
+ * descriptor is using it, try to replace it.
+ */
+static struct arm_smmu_ctx_desc *
+arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
+{
+ int ret;
+ u32 new_asid;
+ struct arm_smmu_ctx_desc *cd;
+ struct arm_smmu_device *smmu;
+ struct arm_smmu_domain *smmu_domain;
+
+ cd = xa_load(&arm_smmu_asid_xa, asid);
+ if (!cd)
+ return NULL;
+
+ if (cd->mm) {
+ if (WARN_ON(cd->mm != mm))
+ return ERR_PTR(-EINVAL);
+ /* All devices bound to this mm use the same cd struct. */
+ refcount_inc(&cd->refs);
+ return cd;
+ }
+
+ smmu_domain = container_of(cd, struct arm_smmu_domain, s1_cfg.cd);
+ smmu = smmu_domain->smmu;
+
+ ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd,
+ XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
+ if (ret)
+ return ERR_PTR(-ENOSPC);
+ /*
+ * Race with unmap: TLB invalidations will start targeting the new ASID,
+ * which isn't assigned yet. We'll do an invalidate-all on the old ASID
+ * later, so it doesn't matter.
+ */
+ cd->asid = new_asid;
+ /*
+ * Update ASID and invalidate CD in all associated masters. There will
+ * be some overlap between use of both ASIDs, until we invalidate the
+ * TLB.
+ */
+ arm_smmu_write_ctx_desc(smmu_domain, 0, cd);
+
+ /* Invalidate TLB entries previously associated with that context */
+ arm_smmu_tlb_inv_asid(smmu, asid);
+
+ xa_erase(&arm_smmu_asid_xa, asid);
+ return NULL;
+}
+
+__maybe_unused
+static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm)
+{
+ u16 asid;
+ int err = 0;
+ u64 tcr, par, reg;
+ struct arm_smmu_ctx_desc *cd;
+ struct arm_smmu_ctx_desc *ret = NULL;
+
+ asid = arm64_mm_context_get(mm);
+ if (!asid)
+ return ERR_PTR(-ESRCH);
+
+ cd = kzalloc(sizeof(*cd), GFP_KERNEL);
+ if (!cd) {
+ err = -ENOMEM;
+ goto out_put_context;
+ }
+
+ refcount_set(&cd->refs, 1);
+
+ mutex_lock(&arm_smmu_asid_lock);
+ ret = arm_smmu_share_asid(mm, asid);
+ if (ret) {
+ mutex_unlock(&arm_smmu_asid_lock);
+ goto out_free_cd;
+ }
+
+ err = xa_insert(&arm_smmu_asid_xa, asid, cd, GFP_KERNEL);
+ mutex_unlock(&arm_smmu_asid_lock);
+
+ if (err)
+ goto out_free_asid;
+
+ tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, 64ULL - vabits_actual) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, ARM_LPAE_TCR_RGN_WBWA) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, ARM_LPAE_TCR_RGN_WBWA) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS) |
+ CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
+
+ switch (PAGE_SIZE) {
+ case SZ_4K:
+ tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_4K);
+ break;
+ case SZ_16K:
+ tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_16K);
+ break;
+ case SZ_64K:
+ tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_64K);
+ break;
+ default:
+ WARN_ON(1);
+ err = -EINVAL;
+ goto out_free_asid;
+ }
+
+ reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ par = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_PARANGE_SHIFT);
+ tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_IPS, par);
+
+ cd->ttbr = virt_to_phys(mm->pgd);
+ cd->tcr = tcr;
+ /*
+ * MAIR value is pretty much constant and global, so we can just get it
+ * from the current CPU register
+ */
+ cd->mair = read_sysreg(mair_el1);
+ cd->asid = asid;
+ cd->mm = mm;
+
+ return cd;
+
+out_free_asid:
+ arm_smmu_free_asid(cd);
+out_free_cd:
+ kfree(cd);
+out_put_context:
+ arm64_mm_context_put(mm);
+ return err < 0 ? ERR_PTR(err) : ret;
+}
+
+__maybe_unused
+static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd)
+{
+ if (arm_smmu_free_asid(cd)) {
+ /* Unpin ASID */
+ arm64_mm_context_put(cd->mm);
+ kfree(cd);
+ }
+}
+
+bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
+{
+ unsigned long reg, fld;
+ unsigned long oas;
+ unsigned long asid_bits;
+ u32 feat_mask = ARM_SMMU_FEAT_BTM | ARM_SMMU_FEAT_COHERENCY;
+
+ if (vabits_actual == 52)
+ feat_mask |= ARM_SMMU_FEAT_VAX;
+
+ if ((smmu->features & feat_mask) != feat_mask)
+ return false;
+
+ if (!(smmu->pgsize_bitmap & PAGE_SIZE))
+ return false;
+
+ /*
+ * Get the smallest PA size of all CPUs (sanitized by cpufeature). We're
+ * not even pretending to support AArch32 here. Abort if the MMU outputs
+ * addresses larger than what we support.
+ */
+ reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_PARANGE_SHIFT);
+ oas = id_aa64mmfr0_parange_to_phys_shift(fld);
+ if (smmu->oas < oas)
+ return false;
+
+ /* We can support bigger ASIDs than the CPU, but not smaller */
+ fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_ASID_SHIFT);
+ asid_bits = fld ? 16 : 8;
+ if (smmu->asid_bits < asid_bits)
+ return false;
+
+ /*
+ * See max_pinned_asids in arch/arm64/mm/context.c. The following is
+ * generally the maximum number of bindable processes.
+ */
+ if (arm64_kernel_unmapped_at_el0())
+ asid_bits--;
+ dev_dbg(smmu->dev, "%d shared contexts\n", (1 << asid_bits) -
+ num_possible_cpus() - 2);
+
+ return true;
+}
+
+static bool arm_smmu_iopf_supported(struct arm_smmu_master *master)
+{
+ return false;
+}
+
+bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
+{
+ if (!(master->smmu->features & ARM_SMMU_FEAT_SVA))
+ return false;
+
+ /* SSID and IOPF support are mandatory for the moment */
+ return master->ssid_bits && arm_smmu_iopf_supported(master);
+}
+
+bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
+{
+ bool enabled;
+
+ mutex_lock(&sva_lock);
+ enabled = master->sva_enabled;
+ mutex_unlock(&sva_lock);
+ return enabled;
+}
+
+int arm_smmu_master_enable_sva(struct arm_smmu_master *master)
+{
+ mutex_lock(&sva_lock);
+ master->sva_enabled = true;
+ mutex_unlock(&sva_lock);
+
+ return 0;
+}
+
+int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
+{
+ mutex_lock(&sva_lock);
+ if (!list_empty(&master->bonds)) {
+ dev_err(master->dev, "cannot disable SVA, device is bound\n");
+ mutex_unlock(&sva_lock);
+ return -EBUSY;
+ }
+ master->sva_enabled = false;
+ mutex_unlock(&sva_lock);
+
+ return 0;
+}
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
similarity index 77%
rename from drivers/iommu/arm-smmu-v3.c
rename to drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index ef6af71..7067b7c 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -11,7 +11,6 @@
#include <linux/acpi.h>
#include <linux/acpi_iort.h>
-#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/crash_dump.h>
#include <linux/delay.h>
@@ -19,10 +18,8 @@
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/io-pgtable.h>
-#include <linux/iommu.h>
#include <linux/iopoll.h>
-#include <linux/init.h>
-#include <linux/moduleparam.h>
+#include <linux/module.h>
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -34,370 +31,17 @@
#include <linux/amba/bus.h>
-/* MMIO registers */
-#define ARM_SMMU_IDR0 0x0
-#define IDR0_ST_LVL GENMASK(28, 27)
-#define IDR0_ST_LVL_2LVL 1
-#define IDR0_STALL_MODEL GENMASK(25, 24)
-#define IDR0_STALL_MODEL_STALL 0
-#define IDR0_STALL_MODEL_FORCE 2
-#define IDR0_TTENDIAN GENMASK(22, 21)
-#define IDR0_TTENDIAN_MIXED 0
-#define IDR0_TTENDIAN_LE 2
-#define IDR0_TTENDIAN_BE 3
-#define IDR0_CD2L (1 << 19)
-#define IDR0_VMID16 (1 << 18)
-#define IDR0_PRI (1 << 16)
-#define IDR0_SEV (1 << 14)
-#define IDR0_MSI (1 << 13)
-#define IDR0_ASID16 (1 << 12)
-#define IDR0_ATS (1 << 10)
-#define IDR0_HYP (1 << 9)
-#define IDR0_COHACC (1 << 4)
-#define IDR0_TTF GENMASK(3, 2)
-#define IDR0_TTF_AARCH64 2
-#define IDR0_TTF_AARCH32_64 3
-#define IDR0_S1P (1 << 1)
-#define IDR0_S2P (1 << 0)
+#include "arm-smmu-v3.h"
-#define ARM_SMMU_IDR1 0x4
-#define IDR1_TABLES_PRESET (1 << 30)
-#define IDR1_QUEUES_PRESET (1 << 29)
-#define IDR1_REL (1 << 28)
-#define IDR1_CMDQS GENMASK(25, 21)
-#define IDR1_EVTQS GENMASK(20, 16)
-#define IDR1_PRIQS GENMASK(15, 11)
-#define IDR1_SSIDSIZE GENMASK(10, 6)
-#define IDR1_SIDSIZE GENMASK(5, 0)
-
-#define ARM_SMMU_IDR5 0x14
-#define IDR5_STALL_MAX GENMASK(31, 16)
-#define IDR5_GRAN64K (1 << 6)
-#define IDR5_GRAN16K (1 << 5)
-#define IDR5_GRAN4K (1 << 4)
-#define IDR5_OAS GENMASK(2, 0)
-#define IDR5_OAS_32_BIT 0
-#define IDR5_OAS_36_BIT 1
-#define IDR5_OAS_40_BIT 2
-#define IDR5_OAS_42_BIT 3
-#define IDR5_OAS_44_BIT 4
-#define IDR5_OAS_48_BIT 5
-#define IDR5_OAS_52_BIT 6
-#define IDR5_VAX GENMASK(11, 10)
-#define IDR5_VAX_52_BIT 1
-
-#define ARM_SMMU_CR0 0x20
-#define CR0_ATSCHK (1 << 4)
-#define CR0_CMDQEN (1 << 3)
-#define CR0_EVTQEN (1 << 2)
-#define CR0_PRIQEN (1 << 1)
-#define CR0_SMMUEN (1 << 0)
-
-#define ARM_SMMU_CR0ACK 0x24
-
-#define ARM_SMMU_CR1 0x28
-#define CR1_TABLE_SH GENMASK(11, 10)
-#define CR1_TABLE_OC GENMASK(9, 8)
-#define CR1_TABLE_IC GENMASK(7, 6)
-#define CR1_QUEUE_SH GENMASK(5, 4)
-#define CR1_QUEUE_OC GENMASK(3, 2)
-#define CR1_QUEUE_IC GENMASK(1, 0)
-/* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
-#define CR1_CACHE_NC 0
-#define CR1_CACHE_WB 1
-#define CR1_CACHE_WT 2
-
-#define ARM_SMMU_CR2 0x2c
-#define CR2_PTM (1 << 2)
-#define CR2_RECINVSID (1 << 1)
-#define CR2_E2H (1 << 0)
-
-#define ARM_SMMU_GBPA 0x44
-#define GBPA_UPDATE (1 << 31)
-#define GBPA_ABORT (1 << 20)
-
-#define ARM_SMMU_IRQ_CTRL 0x50
-#define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
-#define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
-#define IRQ_CTRL_GERROR_IRQEN (1 << 0)
-
-#define ARM_SMMU_IRQ_CTRLACK 0x54
-
-#define ARM_SMMU_GERROR 0x60
-#define GERROR_SFM_ERR (1 << 8)
-#define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
-#define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
-#define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
-#define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
-#define GERROR_PRIQ_ABT_ERR (1 << 3)
-#define GERROR_EVTQ_ABT_ERR (1 << 2)
-#define GERROR_CMDQ_ERR (1 << 0)
-#define GERROR_ERR_MASK 0xfd
-
-#define ARM_SMMU_GERRORN 0x64
-
-#define ARM_SMMU_GERROR_IRQ_CFG0 0x68
-#define ARM_SMMU_GERROR_IRQ_CFG1 0x70
-#define ARM_SMMU_GERROR_IRQ_CFG2 0x74
-
-#define ARM_SMMU_STRTAB_BASE 0x80
-#define STRTAB_BASE_RA (1UL << 62)
-#define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
-
-#define ARM_SMMU_STRTAB_BASE_CFG 0x88
-#define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
-#define STRTAB_BASE_CFG_FMT_LINEAR 0
-#define STRTAB_BASE_CFG_FMT_2LVL 1
-#define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
-#define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
-
-#define ARM_SMMU_CMDQ_BASE 0x90
-#define ARM_SMMU_CMDQ_PROD 0x98
-#define ARM_SMMU_CMDQ_CONS 0x9c
-
-#define ARM_SMMU_EVTQ_BASE 0xa0
-#define ARM_SMMU_EVTQ_PROD 0x100a8
-#define ARM_SMMU_EVTQ_CONS 0x100ac
-#define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
-#define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
-#define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
-
-#define ARM_SMMU_PRIQ_BASE 0xc0
-#define ARM_SMMU_PRIQ_PROD 0x100c8
-#define ARM_SMMU_PRIQ_CONS 0x100cc
-#define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
-#define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
-#define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
-
-/* Common MSI config fields */
-#define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
-#define MSI_CFG2_SH GENMASK(5, 4)
-#define MSI_CFG2_MEMATTR GENMASK(3, 0)
-
-/* Common memory attribute values */
-#define ARM_SMMU_SH_NSH 0
-#define ARM_SMMU_SH_OSH 2
-#define ARM_SMMU_SH_ISH 3
-#define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
-#define ARM_SMMU_MEMATTR_OIWB 0xf
-
-#define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
-#define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
-#define Q_OVERFLOW_FLAG (1U << 31)
-#define Q_OVF(p) ((p) & Q_OVERFLOW_FLAG)
-#define Q_ENT(q, p) ((q)->base + \
- Q_IDX(&((q)->llq), p) * \
- (q)->ent_dwords)
-
-#define Q_BASE_RWA (1UL << 62)
-#define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
-#define Q_BASE_LOG2SIZE GENMASK(4, 0)
-
-/* Ensure DMA allocations are naturally aligned */
-#ifdef CONFIG_CMA_ALIGNMENT
-#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
-#else
-#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1)
-#endif
-
-/*
- * Stream table.
- *
- * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
- * 2lvl: 128k L1 entries,
- * 256 lazy entries per table (each table covers a PCI bus)
- */
-#define STRTAB_L1_SZ_SHIFT 20
-#define STRTAB_SPLIT 8
-
-#define STRTAB_L1_DESC_DWORDS 1
-#define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
-#define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
-
-#define STRTAB_STE_DWORDS 8
-#define STRTAB_STE_0_V (1UL << 0)
-#define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
-#define STRTAB_STE_0_CFG_ABORT 0
-#define STRTAB_STE_0_CFG_BYPASS 4
-#define STRTAB_STE_0_CFG_S1_TRANS 5
-#define STRTAB_STE_0_CFG_S2_TRANS 6
-
-#define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
-#define STRTAB_STE_0_S1FMT_LINEAR 0
-#define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
-#define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
-
-#define STRTAB_STE_1_S1C_CACHE_NC 0UL
-#define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
-#define STRTAB_STE_1_S1C_CACHE_WT 2UL
-#define STRTAB_STE_1_S1C_CACHE_WB 3UL
-#define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
-#define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
-#define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
-
-#define STRTAB_STE_1_S1STALLD (1UL << 27)
-
-#define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
-#define STRTAB_STE_1_EATS_ABT 0UL
-#define STRTAB_STE_1_EATS_TRANS 1UL
-#define STRTAB_STE_1_EATS_S1CHK 2UL
-
-#define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
-#define STRTAB_STE_1_STRW_NSEL1 0UL
-#define STRTAB_STE_1_STRW_EL2 2UL
-
-#define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
-#define STRTAB_STE_1_SHCFG_INCOMING 1UL
-
-#define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
-#define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
-#define STRTAB_STE_2_S2AA64 (1UL << 51)
-#define STRTAB_STE_2_S2ENDI (1UL << 52)
-#define STRTAB_STE_2_S2PTW (1UL << 54)
-#define STRTAB_STE_2_S2R (1UL << 58)
-
-#define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
-
-/* Context descriptor (stage-1 only) */
-#define CTXDESC_CD_DWORDS 8
-#define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
-#define ARM64_TCR_T0SZ GENMASK_ULL(5, 0)
-#define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
-#define ARM64_TCR_TG0 GENMASK_ULL(15, 14)
-#define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
-#define ARM64_TCR_IRGN0 GENMASK_ULL(9, 8)
-#define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
-#define ARM64_TCR_ORGN0 GENMASK_ULL(11, 10)
-#define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
-#define ARM64_TCR_SH0 GENMASK_ULL(13, 12)
-#define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
-#define ARM64_TCR_EPD0 (1ULL << 7)
-#define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
-#define ARM64_TCR_EPD1 (1ULL << 23)
-
-#define CTXDESC_CD_0_ENDI (1UL << 15)
-#define CTXDESC_CD_0_V (1UL << 31)
-
-#define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
-#define ARM64_TCR_IPS GENMASK_ULL(34, 32)
-#define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
-#define ARM64_TCR_TBI0 (1ULL << 37)
-
-#define CTXDESC_CD_0_AA64 (1UL << 41)
-#define CTXDESC_CD_0_S (1UL << 44)
-#define CTXDESC_CD_0_R (1UL << 45)
-#define CTXDESC_CD_0_A (1UL << 46)
-#define CTXDESC_CD_0_ASET (1UL << 47)
-#define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
-
-#define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
-
-/* Convert between AArch64 (CPU) TCR format and SMMU CD format */
-#define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
- FIELD_GET(ARM64_TCR_##fld, tcr))
-
-/* Command queue */
-#define CMDQ_ENT_SZ_SHIFT 4
-#define CMDQ_ENT_DWORDS ((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
-#define CMDQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
-
-#define CMDQ_CONS_ERR GENMASK(30, 24)
-#define CMDQ_ERR_CERROR_NONE_IDX 0
-#define CMDQ_ERR_CERROR_ILL_IDX 1
-#define CMDQ_ERR_CERROR_ABT_IDX 2
-#define CMDQ_ERR_CERROR_ATC_INV_IDX 3
-
-#define CMDQ_PROD_OWNED_FLAG Q_OVERFLOW_FLAG
-
-/*
- * This is used to size the command queue and therefore must be at least
- * BITS_PER_LONG so that the valid_map works correctly (it relies on the
- * total number of queue entries being a multiple of BITS_PER_LONG).
- */
-#define CMDQ_BATCH_ENTRIES BITS_PER_LONG
-
-#define CMDQ_0_OP GENMASK_ULL(7, 0)
-#define CMDQ_0_SSV (1UL << 11)
-
-#define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
-#define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
-#define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
-
-#define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
-#define CMDQ_CFGI_1_LEAF (1UL << 0)
-#define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
-
-#define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
-#define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
-#define CMDQ_TLBI_1_LEAF (1UL << 0)
-#define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
-#define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
-
-#define CMDQ_ATC_0_SSID GENMASK_ULL(31, 12)
-#define CMDQ_ATC_0_SID GENMASK_ULL(63, 32)
-#define CMDQ_ATC_0_GLOBAL (1UL << 9)
-#define CMDQ_ATC_1_SIZE GENMASK_ULL(5, 0)
-#define CMDQ_ATC_1_ADDR_MASK GENMASK_ULL(63, 12)
-
-#define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
-#define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
-#define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
-#define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
-
-#define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
-#define CMDQ_SYNC_0_CS_NONE 0
-#define CMDQ_SYNC_0_CS_IRQ 1
-#define CMDQ_SYNC_0_CS_SEV 2
-#define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
-#define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
-#define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
-#define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
-
-/* Event queue */
-#define EVTQ_ENT_SZ_SHIFT 5
-#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
-#define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
-
-#define EVTQ_0_ID GENMASK_ULL(7, 0)
-
-/* PRI queue */
-#define PRIQ_ENT_SZ_SHIFT 4
-#define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
-#define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
-
-#define PRIQ_0_SID GENMASK_ULL(31, 0)
-#define PRIQ_0_SSID GENMASK_ULL(51, 32)
-#define PRIQ_0_PERM_PRIV (1UL << 58)
-#define PRIQ_0_PERM_EXEC (1UL << 59)
-#define PRIQ_0_PERM_READ (1UL << 60)
-#define PRIQ_0_PERM_WRITE (1UL << 61)
-#define PRIQ_0_PRG_LAST (1UL << 62)
-#define PRIQ_0_SSID_V (1UL << 63)
-
-#define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
-#define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
-
-/* High-level queue structures */
-#define ARM_SMMU_POLL_TIMEOUT_US 1000000 /* 1s! */
-#define ARM_SMMU_POLL_SPIN_COUNT 10
-
-#define MSI_IOVA_BASE 0x8000000
-#define MSI_IOVA_LENGTH 0x100000
-
-/*
- * not really modular, but the easiest way to keep compat with existing
- * bootargs behaviour is to continue using module_param_named here.
- */
static bool disable_bypass = 1;
-module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
+module_param(disable_bypass, bool, 0444);
MODULE_PARM_DESC(disable_bypass,
"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
-enum pri_resp {
- PRI_RESP_DENY = 0,
- PRI_RESP_FAIL = 1,
- PRI_RESP_SUCC = 2,
-};
+static bool disable_msipolling;
+module_param(disable_msipolling, bool, 0444);
+MODULE_PARM_DESC(disable_msipolling,
+ "Disable MSI-based polling for CMD_SYNC completion.");
enum arm_smmu_msi_index {
EVTQ_MSI_INDEX,
@@ -424,255 +68,14 @@
},
};
-struct arm_smmu_cmdq_ent {
- /* Common fields */
- u8 opcode;
- bool substream_valid;
-
- /* Command-specific fields */
- union {
- #define CMDQ_OP_PREFETCH_CFG 0x1
- struct {
- u32 sid;
- u8 size;
- u64 addr;
- } prefetch;
-
- #define CMDQ_OP_CFGI_STE 0x3
- #define CMDQ_OP_CFGI_ALL 0x4
- struct {
- u32 sid;
- union {
- bool leaf;
- u8 span;
- };
- } cfgi;
-
- #define CMDQ_OP_TLBI_NH_ASID 0x11
- #define CMDQ_OP_TLBI_NH_VA 0x12
- #define CMDQ_OP_TLBI_EL2_ALL 0x20
- #define CMDQ_OP_TLBI_S12_VMALL 0x28
- #define CMDQ_OP_TLBI_S2_IPA 0x2a
- #define CMDQ_OP_TLBI_NSNH_ALL 0x30
- struct {
- u16 asid;
- u16 vmid;
- bool leaf;
- u64 addr;
- } tlbi;
-
- #define CMDQ_OP_ATC_INV 0x40
- #define ATC_INV_SIZE_ALL 52
- struct {
- u32 sid;
- u32 ssid;
- u64 addr;
- u8 size;
- bool global;
- } atc;
-
- #define CMDQ_OP_PRI_RESP 0x41
- struct {
- u32 sid;
- u32 ssid;
- u16 grpid;
- enum pri_resp resp;
- } pri;
-
- #define CMDQ_OP_CMD_SYNC 0x46
- struct {
- u64 msiaddr;
- } sync;
- };
-};
-
-struct arm_smmu_ll_queue {
- union {
- u64 val;
- struct {
- u32 prod;
- u32 cons;
- };
- struct {
- atomic_t prod;
- atomic_t cons;
- } atomic;
- u8 __pad[SMP_CACHE_BYTES];
- } ____cacheline_aligned_in_smp;
- u32 max_n_shift;
-};
-
-struct arm_smmu_queue {
- struct arm_smmu_ll_queue llq;
- int irq; /* Wired interrupt */
-
- __le64 *base;
- dma_addr_t base_dma;
- u64 q_base;
-
- size_t ent_dwords;
-
- u32 __iomem *prod_reg;
- u32 __iomem *cons_reg;
-};
-
-struct arm_smmu_queue_poll {
- ktime_t timeout;
- unsigned int delay;
- unsigned int spin_cnt;
- bool wfe;
-};
-
-struct arm_smmu_cmdq {
- struct arm_smmu_queue q;
- atomic_long_t *valid_map;
- atomic_t owner_prod;
- atomic_t lock;
-};
-
-struct arm_smmu_evtq {
- struct arm_smmu_queue q;
- u32 max_stalls;
-};
-
-struct arm_smmu_priq {
- struct arm_smmu_queue q;
-};
-
-/* High-level stream table and context descriptor structures */
-struct arm_smmu_strtab_l1_desc {
- u8 span;
-
- __le64 *l2ptr;
- dma_addr_t l2ptr_dma;
-};
-
-struct arm_smmu_s1_cfg {
- __le64 *cdptr;
- dma_addr_t cdptr_dma;
-
- struct arm_smmu_ctx_desc {
- u16 asid;
- u64 ttbr;
- u64 tcr;
- u64 mair;
- } cd;
-};
-
-struct arm_smmu_s2_cfg {
- u16 vmid;
- u64 vttbr;
- u64 vtcr;
-};
-
-struct arm_smmu_strtab_cfg {
- __le64 *strtab;
- dma_addr_t strtab_dma;
- struct arm_smmu_strtab_l1_desc *l1_desc;
- unsigned int num_l1_ents;
-
- u64 strtab_base;
- u32 strtab_base_cfg;
-};
-
-/* An SMMUv3 instance */
-struct arm_smmu_device {
- struct device *dev;
- void __iomem *base;
-
-#define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
-#define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
-#define ARM_SMMU_FEAT_TT_LE (1 << 2)
-#define ARM_SMMU_FEAT_TT_BE (1 << 3)
-#define ARM_SMMU_FEAT_PRI (1 << 4)
-#define ARM_SMMU_FEAT_ATS (1 << 5)
-#define ARM_SMMU_FEAT_SEV (1 << 6)
-#define ARM_SMMU_FEAT_MSI (1 << 7)
-#define ARM_SMMU_FEAT_COHERENCY (1 << 8)
-#define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
-#define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
-#define ARM_SMMU_FEAT_STALLS (1 << 11)
-#define ARM_SMMU_FEAT_HYP (1 << 12)
-#define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
-#define ARM_SMMU_FEAT_VAX (1 << 14)
- u32 features;
-
-#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
-#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
- u32 options;
-
- struct arm_smmu_cmdq cmdq;
- struct arm_smmu_evtq evtq;
- struct arm_smmu_priq priq;
-
- int gerr_irq;
- int combined_irq;
-
- unsigned long ias; /* IPA */
- unsigned long oas; /* PA */
- unsigned long pgsize_bitmap;
-
-#define ARM_SMMU_MAX_ASIDS (1 << 16)
- unsigned int asid_bits;
- DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
-
-#define ARM_SMMU_MAX_VMIDS (1 << 16)
- unsigned int vmid_bits;
- DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
-
- unsigned int ssid_bits;
- unsigned int sid_bits;
-
- struct arm_smmu_strtab_cfg strtab_cfg;
-
- /* IOMMU core code handle */
- struct iommu_device iommu;
-};
-
-/* SMMU private data for each master */
-struct arm_smmu_master {
- struct arm_smmu_device *smmu;
- struct device *dev;
- struct arm_smmu_domain *domain;
- struct list_head domain_head;
- u32 *sids;
- unsigned int num_sids;
- bool ats_enabled;
-};
-
-/* SMMU private data for an IOMMU domain */
-enum arm_smmu_domain_stage {
- ARM_SMMU_DOMAIN_S1 = 0,
- ARM_SMMU_DOMAIN_S2,
- ARM_SMMU_DOMAIN_NESTED,
- ARM_SMMU_DOMAIN_BYPASS,
-};
-
-struct arm_smmu_domain {
- struct arm_smmu_device *smmu;
- struct mutex init_mutex; /* Protects smmu pointer */
-
- struct io_pgtable_ops *pgtbl_ops;
- bool non_strict;
- atomic_t nr_ats_masters;
-
- enum arm_smmu_domain_stage stage;
- union {
- struct arm_smmu_s1_cfg s1_cfg;
- struct arm_smmu_s2_cfg s2_cfg;
- };
-
- struct iommu_domain domain;
-
- struct list_head devices;
- spinlock_t devices_lock;
-};
-
struct arm_smmu_option_prop {
u32 opt;
const char *prop;
};
+DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
+DEFINE_MUTEX(arm_smmu_asid_lock);
+
static struct arm_smmu_option_prop arm_smmu_options[] = {
{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
@@ -682,9 +85,8 @@
static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
struct arm_smmu_device *smmu)
{
- if ((offset > SZ_64K) &&
- (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
- offset -= SZ_64K;
+ if (offset > SZ_64K)
+ return smmu->page1 + offset - SZ_64K;
return smmu->base + offset;
}
@@ -750,7 +152,7 @@
* Ensure that all CPU accesses (reads and writes) to the queue
* are complete before we update the cons pointer.
*/
- mb();
+ __iomb();
writel_relaxed(q->llq.cons, q->cons_reg);
}
@@ -762,8 +164,15 @@
static int queue_sync_prod_in(struct arm_smmu_queue *q)
{
+ u32 prod;
int ret = 0;
- u32 prod = readl_relaxed(q->prod_reg);
+
+ /*
+ * We can't use the _relaxed() variant here, as we must prevent
+ * speculative reads of the queue before we have determined that
+ * prod has indeed moved.
+ */
+ prod = readl(q->prod_reg);
if (Q_OVF(prod) != Q_OVF(q->llq.prod))
ret = -EOVERFLOW;
@@ -813,7 +222,7 @@
*dst++ = cpu_to_le64(*src++);
}
-static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
+static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
{
int i;
@@ -847,28 +256,42 @@
cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
break;
+ case CMDQ_OP_CFGI_CD:
+ cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
+ fallthrough;
case CMDQ_OP_CFGI_STE:
cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
break;
+ case CMDQ_OP_CFGI_CD_ALL:
+ cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
+ break;
case CMDQ_OP_CFGI_ALL:
/* Cover the entire SID range */
cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
break;
case CMDQ_OP_TLBI_NH_VA:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
+ cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
+ cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
break;
case CMDQ_OP_TLBI_S2_IPA:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
+ cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
+ cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
break;
case CMDQ_OP_TLBI_NH_ASID:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
- /* Fallthrough */
+ fallthrough;
case CMDQ_OP_TLBI_S12_VMALL:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
break;
@@ -924,8 +347,7 @@
* Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
* payload, so the write will zero the entire command on that platform.
*/
- if (smmu->features & ARM_SMMU_FEAT_MSI &&
- smmu->features & ARM_SMMU_FEAT_COHERENCY) {
+ if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
q->ent_dwords * 8;
}
@@ -968,7 +390,6 @@
*/
return;
case CMDQ_ERR_CERROR_ILL_IDX:
- /* Fallthrough */
default:
break;
}
@@ -1264,8 +685,7 @@
static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
struct arm_smmu_ll_queue *llq)
{
- if (smmu->features & ARM_SMMU_FEAT_MSI &&
- smmu->features & ARM_SMMU_FEAT_COHERENCY)
+ if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
@@ -1411,7 +831,7 @@
}
/*
- * Try to unlock the cmq lock. This will fail if we're the last
+ * Try to unlock the cmdq lock. This will fail if we're the last
* reader, in which case we can safely update cmdq->q.llq.cons
*/
if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
@@ -1443,51 +863,287 @@
return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
}
-/* Context descriptor manipulation functions */
-static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
+static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmdq_ent *cmd)
{
- u64 val = 0;
-
- /* Repack the TCR. Just care about TTBR0 for now */
- val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
- val |= ARM_SMMU_TCR2CD(tcr, TG0);
- val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
- val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
- val |= ARM_SMMU_TCR2CD(tcr, SH0);
- val |= ARM_SMMU_TCR2CD(tcr, EPD0);
- val |= ARM_SMMU_TCR2CD(tcr, EPD1);
- val |= ARM_SMMU_TCR2CD(tcr, IPS);
-
- return val;
+ if (cmds->num == CMDQ_BATCH_ENTRIES) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
+ cmds->num = 0;
+ }
+ arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
+ cmds->num++;
}
-static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
- struct arm_smmu_s1_cfg *cfg)
+static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds)
{
+ return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
+}
+
+/* Context descriptor manipulation functions */
+void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
+{
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = CMDQ_OP_TLBI_NH_ASID,
+ .tlbi.asid = asid,
+ };
+
+ arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+ arm_smmu_cmdq_issue_sync(smmu);
+}
+
+static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
+ int ssid, bool leaf)
+{
+ size_t i;
+ unsigned long flags;
+ struct arm_smmu_master *master;
+ struct arm_smmu_cmdq_batch cmds = {};
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = CMDQ_OP_CFGI_CD,
+ .cfgi = {
+ .ssid = ssid,
+ .leaf = leaf,
+ },
+ };
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+ for (i = 0; i < master->num_sids; i++) {
+ cmd.cfgi.sid = master->sids[i];
+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ }
+ }
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+ arm_smmu_cmdq_batch_submit(smmu, &cmds);
+}
+
+static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
+ struct arm_smmu_l1_ctx_desc *l1_desc)
+{
+ size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
+
+ l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
+ &l1_desc->l2ptr_dma, GFP_KERNEL);
+ if (!l1_desc->l2ptr) {
+ dev_warn(smmu->dev,
+ "failed to allocate context descriptor table\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void arm_smmu_write_cd_l1_desc(__le64 *dst,
+ struct arm_smmu_l1_ctx_desc *l1_desc)
+{
+ u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
+ CTXDESC_L1_DESC_V;
+
+ /* See comment in arm_smmu_write_ctx_desc() */
+ WRITE_ONCE(*dst, cpu_to_le64(val));
+}
+
+static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
+ u32 ssid)
+{
+ __le64 *l1ptr;
+ unsigned int idx;
+ struct arm_smmu_l1_ctx_desc *l1_desc;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+
+ if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+ return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
+
+ idx = ssid >> CTXDESC_SPLIT;
+ l1_desc = &cdcfg->l1_desc[idx];
+ if (!l1_desc->l2ptr) {
+ if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
+ return NULL;
+
+ l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
+ arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
+ /* An invalid L1CD can be cached */
+ arm_smmu_sync_cd(smmu_domain, ssid, false);
+ }
+ idx = ssid & (CTXDESC_L2_ENTRIES - 1);
+ return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
+}
+
+int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+ struct arm_smmu_ctx_desc *cd)
+{
+ /*
+ * This function handles the following cases:
+ *
+ * (1) Install primary CD, for normal DMA traffic (SSID = 0).
+ * (2) Install a secondary CD, for SID+SSID traffic.
+ * (3) Update ASID of a CD. Atomically write the first 64 bits of the
+ * CD, then invalidate the old entry and mappings.
+ * (4) Remove a secondary CD.
+ */
u64 val;
+ bool cd_live;
+ __le64 *cdptr;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+ if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+ return -E2BIG;
+
+ cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+ if (!cdptr)
+ return -ENOMEM;
+
+ val = le64_to_cpu(cdptr[0]);
+ cd_live = !!(val & CTXDESC_CD_0_V);
+
+ if (!cd) { /* (4) */
+ val = 0;
+ } else if (cd_live) { /* (3) */
+ val &= ~CTXDESC_CD_0_ASID;
+ val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
+ /*
+ * Until CD+TLB invalidation, both ASIDs may be used for tagging
+ * this substream's traffic
+ */
+ } else { /* (1) and (2) */
+ cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
+ cdptr[2] = 0;
+ cdptr[3] = cpu_to_le64(cd->mair);
+
+ /*
+ * STE is live, and the SMMU might read dwords of this CD in any
+ * order. Ensure that it observes valid values before reading
+ * V=1.
+ */
+ arm_smmu_sync_cd(smmu_domain, ssid, true);
+
+ val = cd->tcr |
+#ifdef __BIG_ENDIAN
+ CTXDESC_CD_0_ENDI |
+#endif
+ CTXDESC_CD_0_R | CTXDESC_CD_0_A |
+ (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
+ CTXDESC_CD_0_AA64 |
+ FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
+ CTXDESC_CD_0_V;
+
+ /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
+ if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+ val |= CTXDESC_CD_0_S;
+ }
/*
- * We don't need to issue any invalidation here, as we'll invalidate
- * the STE when installing the new entry anyway.
+ * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
+ * "Configuration structures and configuration invalidation completion"
+ *
+ * The size of single-copy atomic reads made by the SMMU is
+ * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
+ * field within an aligned 64-bit span of a structure can be altered
+ * without first making the structure invalid.
*/
- val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
-#ifdef __BIG_ENDIAN
- CTXDESC_CD_0_ENDI |
-#endif
- CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
- CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
- CTXDESC_CD_0_V;
+ WRITE_ONCE(cdptr[0], cpu_to_le64(val));
+ arm_smmu_sync_cd(smmu_domain, ssid, true);
+ return 0;
+}
- /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
- if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
- val |= CTXDESC_CD_0_S;
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
+{
+ int ret;
+ size_t l1size;
+ size_t max_contexts;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+ struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
- cfg->cdptr[0] = cpu_to_le64(val);
+ max_contexts = 1 << cfg->s1cdmax;
- val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
- cfg->cdptr[1] = cpu_to_le64(val);
+ if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
+ max_contexts <= CTXDESC_L2_ENTRIES) {
+ cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
+ cdcfg->num_l1_ents = max_contexts;
- cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
+ l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
+ } else {
+ cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
+ cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
+ CTXDESC_L2_ENTRIES);
+
+ cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
+ sizeof(*cdcfg->l1_desc),
+ GFP_KERNEL);
+ if (!cdcfg->l1_desc)
+ return -ENOMEM;
+
+ l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+ }
+
+ cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
+ GFP_KERNEL);
+ if (!cdcfg->cdtab) {
+ dev_warn(smmu->dev, "failed to allocate context descriptor\n");
+ ret = -ENOMEM;
+ goto err_free_l1;
+ }
+
+ return 0;
+
+err_free_l1:
+ if (cdcfg->l1_desc) {
+ devm_kfree(smmu->dev, cdcfg->l1_desc);
+ cdcfg->l1_desc = NULL;
+ }
+ return ret;
+}
+
+static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
+{
+ int i;
+ size_t size, l1size;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+
+ if (cdcfg->l1_desc) {
+ size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
+
+ for (i = 0; i < cdcfg->num_l1_ents; i++) {
+ if (!cdcfg->l1_desc[i].l2ptr)
+ continue;
+
+ dmam_free_coherent(smmu->dev, size,
+ cdcfg->l1_desc[i].l2ptr,
+ cdcfg->l1_desc[i].l2ptr_dma);
+ }
+ devm_kfree(smmu->dev, cdcfg->l1_desc);
+ cdcfg->l1_desc = NULL;
+
+ l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+ } else {
+ l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
+ }
+
+ dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
+ cdcfg->cdtab_dma = 0;
+ cdcfg->cdtab = NULL;
+}
+
+bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
+{
+ bool free;
+ struct arm_smmu_ctx_desc *old_cd;
+
+ if (!cd->asid)
+ return false;
+
+ free = refcount_dec_and_test(&cd->refs);
+ if (free) {
+ old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
+ WARN_ON(old_cd != cd);
+ }
+ return free;
}
/* Stream table manipulation functions */
@@ -1499,7 +1155,8 @@
val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
- *dst = cpu_to_le64(val);
+ /* See comment in arm_smmu_write_ctx_desc() */
+ WRITE_ONCE(*dst, cpu_to_le64(val));
}
static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
@@ -1609,6 +1266,7 @@
if (s1_cfg) {
BUG_ON(ste_live);
dst[1] = cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
@@ -1618,8 +1276,10 @@
!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
- val |= (s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
- FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
+ val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+ FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
+ FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
+ FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
}
if (s2_cfg) {
@@ -1652,7 +1312,7 @@
arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
}
-static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
+static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
{
unsigned int i;
@@ -1677,7 +1337,7 @@
desc->span = STRTAB_SPLIT + 1;
desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
- GFP_KERNEL | __GFP_ZERO);
+ GFP_KERNEL);
if (!desc->l2ptr) {
dev_err(smmu->dev,
"failed to allocate l2 stream table for SID %u\n",
@@ -1902,17 +1562,16 @@
cmd->atc.size = log2_span;
}
-static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
- struct arm_smmu_cmdq_ent *cmd)
+static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
{
int i;
+ struct arm_smmu_cmdq_ent cmd;
- if (!master->ats_enabled)
- return 0;
+ arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
for (i = 0; i < master->num_sids; i++) {
- cmd->atc.sid = master->sids[i];
- arm_smmu_cmdq_issue_cmd(master->smmu, cmd);
+ cmd.atc.sid = master->sids[i];
+ arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
}
return arm_smmu_cmdq_issue_sync(master->smmu);
@@ -1921,10 +1580,11 @@
static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
int ssid, unsigned long iova, size_t size)
{
- int ret = 0;
+ int i;
unsigned long flags;
struct arm_smmu_cmdq_ent cmd;
struct arm_smmu_master *master;
+ struct arm_smmu_cmdq_batch cmds = {};
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
return 0;
@@ -1949,11 +1609,18 @@
arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- list_for_each_entry(master, &smmu_domain->devices, domain_head)
- ret |= arm_smmu_atc_inv_master(master, &cmd);
+ list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+ if (!master->ats_enabled)
+ continue;
+
+ for (i = 0; i < master->num_sids; i++) {
+ cmd.atc.sid = master->sids[i];
+ arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
+ }
+ }
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
- return ret ? -ETIMEDOUT : 0;
+ return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
}
/* IO_PGTABLE API */
@@ -1963,15 +1630,6 @@
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cmdq_ent cmd;
- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
- cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
- cmd.tlbi.vmid = 0;
- } else {
- cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
- cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
- }
-
/*
* NOTE: when io-pgtable is in non-strict mode, we may get here with
* PTEs previously cleared by unmaps on the current CPU not yet visible
@@ -1979,8 +1637,14 @@
* insertion to guarantee those are observed before the TLBI. Do be
* careful, 007.
*/
- arm_smmu_cmdq_issue_cmd(smmu, &cmd);
- arm_smmu_cmdq_issue_sync(smmu);
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
+ } else {
+ cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
+ cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
+ arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+ arm_smmu_cmdq_issue_sync(smmu);
+ }
arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
}
@@ -1988,10 +1652,10 @@
size_t granule, bool leaf,
struct arm_smmu_domain *smmu_domain)
{
- u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
struct arm_smmu_device *smmu = smmu_domain->smmu;
- unsigned long start = iova, end = iova + size;
- int i = 0;
+ unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
+ size_t inv_range = granule;
+ struct arm_smmu_cmdq_batch cmds = {};
struct arm_smmu_cmdq_ent cmd = {
.tlbi = {
.leaf = leaf,
@@ -2009,19 +1673,50 @@
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
}
+ if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
+ /* Get the leaf page size */
+ tg = __ffs(smmu_domain->domain.pgsize_bitmap);
+
+ /* Convert page size of 12,14,16 (log2) to 1,2,3 */
+ cmd.tlbi.tg = (tg - 10) / 2;
+
+ /* Determine what level the granule is at */
+ cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
+
+ num_pages = size >> tg;
+ }
+
while (iova < end) {
- if (i == CMDQ_BATCH_ENTRIES) {
- arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, false);
- i = 0;
+ if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
+ /*
+ * On each iteration of the loop, the range is 5 bits
+ * worth of the aligned size remaining.
+ * The range in pages is:
+ *
+ * range = (num_pages & (0x1f << __ffs(num_pages)))
+ */
+ unsigned long scale, num;
+
+ /* Determine the power of 2 multiple number of pages */
+ scale = __ffs(num_pages);
+ cmd.tlbi.scale = scale;
+
+ /* Determine how many chunks of 2^scale size we have */
+ num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
+ cmd.tlbi.num = num - 1;
+
+ /* range is num * 2^scale * pgsize */
+ inv_range = num << (scale + tg);
+
+ /* Clear out the lower order bits for the next iteration */
+ num_pages -= num << scale;
}
cmd.tlbi.addr = iova;
- arm_smmu_cmdq_build_cmd(&cmds[i * CMDQ_ENT_DWORDS], &cmd);
- iova += granule;
- i++;
+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ iova += inv_range;
}
-
- arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, true);
+ arm_smmu_cmdq_batch_submit(smmu, &cmds);
/*
* Unfortunately, this can't be leaf-only since we may have
@@ -2133,14 +1828,12 @@
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
- if (cfg->cdptr) {
- dmam_free_coherent(smmu_domain->smmu->dev,
- CTXDESC_CD_DWORDS << 3,
- cfg->cdptr,
- cfg->cdptr_dma);
-
- arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
- }
+ /* Prevent SVA from touching the CD while we're freeing it */
+ mutex_lock(&arm_smmu_asid_lock);
+ if (cfg->cdcfg.cdtab)
+ arm_smmu_free_cd_tables(smmu_domain);
+ arm_smmu_free_asid(&cfg->cd);
+ mutex_unlock(&arm_smmu_asid_lock);
} else {
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
if (cfg->vmid)
@@ -2151,55 +1844,90 @@
}
static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_master *master,
struct io_pgtable_cfg *pgtbl_cfg)
{
int ret;
- int asid;
+ u32 asid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+ typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
- asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
- if (asid < 0)
- return asid;
+ refcount_set(&cfg->cd.refs, 1);
- cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
- &cfg->cdptr_dma,
- GFP_KERNEL | __GFP_ZERO);
- if (!cfg->cdptr) {
- dev_warn(smmu->dev, "failed to allocate context descriptor\n");
- ret = -ENOMEM;
+ /* Prevent SVA from modifying the ASID until it is written to the CD */
+ mutex_lock(&arm_smmu_asid_lock);
+ ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
+ XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
+ if (ret)
+ goto out_unlock;
+
+ cfg->s1cdmax = master->ssid_bits;
+
+ ret = arm_smmu_alloc_cd_tables(smmu_domain);
+ if (ret)
goto out_free_asid;
- }
cfg->cd.asid = (u16)asid;
- cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
- cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
- cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
+ cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
+ cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
+ CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
+ cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
+
+ /*
+ * Note that this will end up calling arm_smmu_sync_cd() before
+ * the master has been added to the devices list for this domain.
+ * This isn't an issue because the STE hasn't been installed yet.
+ */
+ ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
+ if (ret)
+ goto out_free_cd_tables;
+
+ mutex_unlock(&arm_smmu_asid_lock);
return 0;
+out_free_cd_tables:
+ arm_smmu_free_cd_tables(smmu_domain);
out_free_asid:
- arm_smmu_bitmap_free(smmu->asid_map, asid);
+ arm_smmu_free_asid(&cfg->cd);
+out_unlock:
+ mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_master *master,
struct io_pgtable_cfg *pgtbl_cfg)
{
int vmid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
+ typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
if (vmid < 0)
return vmid;
+ vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
cfg->vmid = (u16)vmid;
cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
- cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
+ cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
+ FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
return 0;
}
-static int arm_smmu_domain_finalise(struct iommu_domain *domain)
+static int arm_smmu_domain_finalise(struct iommu_domain *domain,
+ struct arm_smmu_master *master)
{
int ret;
unsigned long ias, oas;
@@ -2207,6 +1935,7 @@
struct io_pgtable_cfg pgtbl_cfg;
struct io_pgtable_ops *pgtbl_ops;
int (*finalise_stage_fn)(struct arm_smmu_domain *,
+ struct arm_smmu_master *,
struct io_pgtable_cfg *);
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -2261,7 +1990,7 @@
domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
domain->geometry.force_aperture = true;
- ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
+ ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
if (ret < 0) {
free_io_pgtable_ops(pgtbl_ops);
return ret;
@@ -2313,26 +2042,20 @@
}
}
-#ifdef CONFIG_PCI_ATS
static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
{
- struct pci_dev *pdev;
+ struct device *dev = master->dev;
struct arm_smmu_device *smmu = master->smmu;
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) ||
- !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled())
+ if (!(smmu->features & ARM_SMMU_FEAT_ATS))
return false;
- pdev = to_pci_dev(master->dev);
- return !pdev->untrusted && pdev->ats_cap;
+ if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
+ return false;
+
+ return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
}
-#else
-static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
-{
- return false;
-}
-#endif
static void arm_smmu_enable_ats(struct arm_smmu_master *master)
{
@@ -2357,7 +2080,6 @@
static void arm_smmu_disable_ats(struct arm_smmu_master *master)
{
- struct arm_smmu_cmdq_ent cmd;
struct arm_smmu_domain *smmu_domain = master->domain;
if (!master->ats_enabled)
@@ -2369,11 +2091,57 @@
* ATC invalidation via the SMMU.
*/
wmb();
- arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
- arm_smmu_atc_inv_master(master, &cmd);
+ arm_smmu_atc_inv_master(master);
atomic_dec(&smmu_domain->nr_ats_masters);
}
+static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
+{
+ int ret;
+ int features;
+ int num_pasids;
+ struct pci_dev *pdev;
+
+ if (!dev_is_pci(master->dev))
+ return -ENODEV;
+
+ pdev = to_pci_dev(master->dev);
+
+ features = pci_pasid_features(pdev);
+ if (features < 0)
+ return features;
+
+ num_pasids = pci_max_pasids(pdev);
+ if (num_pasids <= 0)
+ return num_pasids;
+
+ ret = pci_enable_pasid(pdev, features);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to enable PASID\n");
+ return ret;
+ }
+
+ master->ssid_bits = min_t(u8, ilog2(num_pasids),
+ master->smmu->ssid_bits);
+ return 0;
+}
+
+static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
+{
+ struct pci_dev *pdev;
+
+ if (!dev_is_pci(master->dev))
+ return;
+
+ pdev = to_pci_dev(master->dev);
+
+ if (!pdev->pasid_enabled)
+ return;
+
+ master->ssid_bits = 0;
+ pci_disable_pasid(pdev);
+}
+
static void arm_smmu_detach_dev(struct arm_smmu_master *master)
{
unsigned long flags;
@@ -2405,16 +2173,26 @@
if (!fwspec)
return -ENOENT;
- master = fwspec->iommu_priv;
+ master = dev_iommu_priv_get(dev);
smmu = master->smmu;
+ /*
+ * Checking that SVA is disabled ensures that this device isn't bound to
+ * any mm, and can be safely detached from its old domain. Bonds cannot
+ * be removed concurrently since we're holding the group mutex.
+ */
+ if (arm_smmu_master_sva_enabled(master)) {
+ dev_err(dev, "cannot attach - SVA enabled\n");
+ return -EBUSY;
+ }
+
arm_smmu_detach_dev(master);
mutex_lock(&smmu_domain->init_mutex);
if (!smmu_domain->smmu) {
smmu_domain->smmu = smmu;
- ret = arm_smmu_domain_finalise(domain);
+ ret = arm_smmu_domain_finalise(domain, master);
if (ret) {
smmu_domain->smmu = NULL;
goto out_unlock;
@@ -2426,6 +2204,13 @@
dev_name(smmu->dev));
ret = -ENXIO;
goto out_unlock;
+ } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
+ master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
+ dev_err(dev,
+ "cannot attach to incompatible domain (%u SSID bits != %u)\n",
+ smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
+ ret = -EINVAL;
+ goto out_unlock;
}
master->domain = smmu_domain;
@@ -2433,9 +2218,6 @@
if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
master->ats_enabled = arm_smmu_ats_supported(master);
- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
- arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg);
-
arm_smmu_install_ste_for_dev(master);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
@@ -2450,14 +2232,14 @@
}
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
if (!ops)
return -ENODEV;
- return ops->map(ops, iova, paddr, size, prot);
+ return ops->map(ops, iova, paddr, size, prot, gfp);
}
static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
@@ -2485,7 +2267,7 @@
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
+ arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start + 1,
gather->pgsize, true, smmu_domain);
}
@@ -2526,77 +2308,87 @@
static struct iommu_ops arm_smmu_ops;
-static int arm_smmu_add_device(struct device *dev)
+static struct iommu_device *arm_smmu_probe_device(struct device *dev)
{
int i, ret;
struct arm_smmu_device *smmu;
struct arm_smmu_master *master;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct iommu_group *group;
if (!fwspec || fwspec->ops != &arm_smmu_ops)
- return -ENODEV;
- /*
- * We _can_ actually withstand dodgy bus code re-calling add_device()
- * without an intervening remove_device()/of_xlate() sequence, but
- * we're not going to do so quietly...
- */
- if (WARN_ON_ONCE(fwspec->iommu_priv)) {
- master = fwspec->iommu_priv;
- smmu = master->smmu;
- } else {
- smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
- if (!smmu)
- return -ENODEV;
- master = kzalloc(sizeof(*master), GFP_KERNEL);
- if (!master)
- return -ENOMEM;
+ return ERR_PTR(-ENODEV);
- master->dev = dev;
- master->smmu = smmu;
- master->sids = fwspec->ids;
- master->num_sids = fwspec->num_ids;
- fwspec->iommu_priv = master;
- }
+ if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
+ return ERR_PTR(-EBUSY);
+
+ smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
+ if (!smmu)
+ return ERR_PTR(-ENODEV);
+
+ master = kzalloc(sizeof(*master), GFP_KERNEL);
+ if (!master)
+ return ERR_PTR(-ENOMEM);
+
+ master->dev = dev;
+ master->smmu = smmu;
+ master->sids = fwspec->ids;
+ master->num_sids = fwspec->num_ids;
+ INIT_LIST_HEAD(&master->bonds);
+ dev_iommu_priv_set(dev, master);
/* Check the SIDs are in range of the SMMU and our stream table */
for (i = 0; i < master->num_sids; i++) {
u32 sid = master->sids[i];
- if (!arm_smmu_sid_in_range(smmu, sid))
- return -ERANGE;
+ if (!arm_smmu_sid_in_range(smmu, sid)) {
+ ret = -ERANGE;
+ goto err_free_master;
+ }
/* Ensure l2 strtab is initialised */
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
ret = arm_smmu_init_l2_strtab(smmu, sid);
if (ret)
- return ret;
+ goto err_free_master;
}
}
- group = iommu_group_get_for_dev(dev);
- if (!IS_ERR(group)) {
- iommu_group_put(group);
- iommu_device_link(&smmu->iommu, dev);
- }
+ master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
- return PTR_ERR_OR_ZERO(group);
+ /*
+ * Note that PASID must be enabled before, and disabled after ATS:
+ * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
+ *
+ * Behavior is undefined if this bit is Set and the value of the PASID
+ * Enable, Execute Requested Enable, or Privileged Mode Requested bits
+ * are changed.
+ */
+ arm_smmu_enable_pasid(master);
+
+ if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
+ master->ssid_bits = min_t(u8, master->ssid_bits,
+ CTXDESC_LINEAR_CDMAX);
+
+ return &smmu->iommu;
+
+err_free_master:
+ kfree(master);
+ dev_iommu_priv_set(dev, NULL);
+ return ERR_PTR(ret);
}
-static void arm_smmu_remove_device(struct device *dev)
+static void arm_smmu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_master *master;
- struct arm_smmu_device *smmu;
if (!fwspec || fwspec->ops != &arm_smmu_ops)
return;
- master = fwspec->iommu_priv;
- smmu = master->smmu;
+ master = dev_iommu_priv_get(dev);
+ WARN_ON(arm_smmu_master_sva_enabled(master));
arm_smmu_detach_dev(master);
- iommu_group_remove_device(dev);
- iommu_device_unlink(&smmu->iommu, dev);
+ arm_smmu_disable_pasid(master);
kfree(master);
iommu_fwspec_free(dev);
}
@@ -2712,13 +2504,67 @@
iommu_dma_get_resv_regions(dev, head);
}
-static void arm_smmu_put_resv_regions(struct device *dev,
- struct list_head *head)
+static bool arm_smmu_dev_has_feature(struct device *dev,
+ enum iommu_dev_features feat)
{
- struct iommu_resv_region *entry, *next;
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- list_for_each_entry_safe(entry, next, head, list)
- kfree(entry);
+ if (!master)
+ return false;
+
+ switch (feat) {
+ case IOMMU_DEV_FEAT_SVA:
+ return arm_smmu_master_sva_supported(master);
+ default:
+ return false;
+ }
+}
+
+static bool arm_smmu_dev_feature_enabled(struct device *dev,
+ enum iommu_dev_features feat)
+{
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
+ if (!master)
+ return false;
+
+ switch (feat) {
+ case IOMMU_DEV_FEAT_SVA:
+ return arm_smmu_master_sva_enabled(master);
+ default:
+ return false;
+ }
+}
+
+static int arm_smmu_dev_enable_feature(struct device *dev,
+ enum iommu_dev_features feat)
+{
+ if (!arm_smmu_dev_has_feature(dev, feat))
+ return -ENODEV;
+
+ if (arm_smmu_dev_feature_enabled(dev, feat))
+ return -EBUSY;
+
+ switch (feat) {
+ case IOMMU_DEV_FEAT_SVA:
+ return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
+ default:
+ return -EINVAL;
+ }
+}
+
+static int arm_smmu_dev_disable_feature(struct device *dev,
+ enum iommu_dev_features feat)
+{
+ if (!arm_smmu_dev_feature_enabled(dev, feat))
+ return -EINVAL;
+
+ switch (feat) {
+ case IOMMU_DEV_FEAT_SVA:
+ return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
+ default:
+ return -EINVAL;
+ }
}
static struct iommu_ops arm_smmu_ops = {
@@ -2731,14 +2577,18 @@
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
- .add_device = arm_smmu_add_device,
- .remove_device = arm_smmu_remove_device,
+ .probe_device = arm_smmu_probe_device,
+ .release_device = arm_smmu_release_device,
.device_group = arm_smmu_device_group,
.domain_get_attr = arm_smmu_domain_get_attr,
.domain_set_attr = arm_smmu_domain_set_attr,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
- .put_resv_regions = arm_smmu_put_resv_regions,
+ .put_resv_regions = generic_iommu_put_resv_regions,
+ .dev_has_feat = arm_smmu_dev_has_feature,
+ .dev_feat_enabled = arm_smmu_dev_feature_enabled,
+ .dev_enable_feat = arm_smmu_dev_enable_feature,
+ .dev_disable_feat = arm_smmu_dev_disable_feature,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
};
@@ -2885,11 +2735,11 @@
l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
- GFP_KERNEL | __GFP_ZERO);
+ GFP_KERNEL);
if (!strtab) {
dev_err(smmu->dev,
"failed to allocate l1 stream table (%u bytes)\n",
- size);
+ l1size);
return -ENOMEM;
}
cfg->strtab = strtab;
@@ -2912,7 +2762,7 @@
size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
- GFP_KERNEL | __GFP_ZERO);
+ GFP_KERNEL);
if (!strtab) {
dev_err(smmu->dev,
"failed to allocate linear stream table (%u bytes)\n",
@@ -3349,8 +3199,11 @@
if (reg & IDR0_SEV)
smmu->features |= ARM_SMMU_FEAT_SEV;
- if (reg & IDR0_MSI)
+ if (reg & IDR0_MSI) {
smmu->features |= ARM_SMMU_FEAT_MSI;
+ if (coherent && !disable_msipolling)
+ smmu->options |= ARM_SMMU_OPT_MSIPOLL;
+ }
if (reg & IDR0_HYP)
smmu->features |= ARM_SMMU_FEAT_HYP;
@@ -3366,7 +3219,7 @@
switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
case IDR0_STALL_MODEL_FORCE:
smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
- /* Fallthrough */
+ fallthrough;
case IDR0_STALL_MODEL_STALL:
smmu->features |= ARM_SMMU_FEAT_STALLS;
}
@@ -3386,7 +3239,7 @@
switch (FIELD_GET(IDR0_TTF, reg)) {
case IDR0_TTF_AARCH32_64:
smmu->ias = 40;
- /* Fallthrough */
+ fallthrough;
case IDR0_TTF_AARCH64:
break;
default:
@@ -3436,6 +3289,11 @@
if (smmu->sid_bits <= STRTAB_SPLIT)
smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
+ /* IDR3 */
+ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
+ if (FIELD_GET(IDR3_RIL, reg))
+ smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
+
/* IDR5 */
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
@@ -3478,7 +3336,7 @@
default:
dev_info(smmu->dev,
"unknown output address size. Truncating to 48-bit\n");
- /* Fallthrough */
+ fallthrough;
case IDR5_OAS_48_BIT:
smmu->oas = 48;
}
@@ -3495,6 +3353,9 @@
smmu->ias = max(smmu->ias, smmu->oas);
+ if (arm_smmu_sva_supported(smmu))
+ smmu->features |= ARM_SMMU_FEAT_SVA;
+
dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
smmu->ias, smmu->oas, smmu->features);
return 0;
@@ -3572,6 +3433,55 @@
return SZ_128K;
}
+static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
+{
+ int err;
+
+#ifdef CONFIG_PCI
+ if (pci_bus_type.iommu_ops != ops) {
+ err = bus_set_iommu(&pci_bus_type, ops);
+ if (err)
+ return err;
+ }
+#endif
+#ifdef CONFIG_ARM_AMBA
+ if (amba_bustype.iommu_ops != ops) {
+ err = bus_set_iommu(&amba_bustype, ops);
+ if (err)
+ goto err_reset_pci_ops;
+ }
+#endif
+ if (platform_bus_type.iommu_ops != ops) {
+ err = bus_set_iommu(&platform_bus_type, ops);
+ if (err)
+ goto err_reset_amba_ops;
+ }
+
+ return 0;
+
+err_reset_amba_ops:
+#ifdef CONFIG_ARM_AMBA
+ bus_set_iommu(&amba_bustype, NULL);
+#endif
+err_reset_pci_ops: __maybe_unused;
+#ifdef CONFIG_PCI
+ bus_set_iommu(&pci_bus_type, NULL);
+#endif
+ return err;
+}
+
+static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
+ resource_size_t size)
+{
+ struct resource res = {
+ .flags = IORESOURCE_MEM,
+ .start = start,
+ .end = start + size - 1,
+ };
+
+ return devm_ioremap_resource(dev, &res);
+}
+
static int arm_smmu_device_probe(struct platform_device *pdev)
{
int irq, ret;
@@ -3601,16 +3511,29 @@
/* Base address */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
+ if (resource_size(res) < arm_smmu_resource_size(smmu)) {
dev_err(dev, "MMIO region too small (%pr)\n", res);
return -EINVAL;
}
ioaddr = res->start;
- smmu->base = devm_ioremap_resource(dev, res);
+ /*
+ * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
+ * the PMCG registers which are reserved by the PMU driver.
+ */
+ smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
if (IS_ERR(smmu->base))
return PTR_ERR(smmu->base);
+ if (arm_smmu_resource_size(smmu) > SZ_64K) {
+ smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
+ ARM_SMMU_REG_SZ);
+ if (IS_ERR(smmu->page1))
+ return PTR_ERR(smmu->page1);
+ } else {
+ smmu->page1 = smmu->base;
+ }
+
/* Interrupt lines */
irq = platform_get_irq_byname_optional(pdev, "combined");
@@ -3662,48 +3585,45 @@
return ret;
}
-#ifdef CONFIG_PCI
- if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
- pci_request_acs();
- ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
- if (ret)
- return ret;
- }
-#endif
-#ifdef CONFIG_ARM_AMBA
- if (amba_bustype.iommu_ops != &arm_smmu_ops) {
- ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
- if (ret)
- return ret;
- }
-#endif
- if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
- ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
- if (ret)
- return ret;
- }
+ return arm_smmu_set_bus_ops(&arm_smmu_ops);
+}
+
+static int arm_smmu_device_remove(struct platform_device *pdev)
+{
+ struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
+
+ arm_smmu_set_bus_ops(NULL);
+ iommu_device_unregister(&smmu->iommu);
+ iommu_device_sysfs_remove(&smmu->iommu);
+ arm_smmu_device_disable(smmu);
+
return 0;
}
static void arm_smmu_device_shutdown(struct platform_device *pdev)
{
- struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
-
- arm_smmu_device_disable(smmu);
+ arm_smmu_device_remove(pdev);
}
static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v3", },
{ },
};
+MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
static struct platform_driver arm_smmu_driver = {
.driver = {
- .name = "arm-smmu-v3",
- .of_match_table = of_match_ptr(arm_smmu_of_match),
- .suppress_bind_attrs = true,
+ .name = "arm-smmu-v3",
+ .of_match_table = arm_smmu_of_match,
+ .suppress_bind_attrs = true,
},
.probe = arm_smmu_device_probe,
+ .remove = arm_smmu_device_remove,
.shutdown = arm_smmu_device_shutdown,
};
-builtin_platform_driver(arm_smmu_driver);
+module_platform_driver(arm_smmu_driver);
+
+MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
+MODULE_AUTHOR("Will Deacon <will@kernel.org>");
+MODULE_ALIAS("platform:arm-smmu-v3");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
new file mode 100644
index 0000000..57e5d22
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -0,0 +1,723 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * IOMMU API for ARM architected SMMUv3 implementations.
+ *
+ * Copyright (C) 2015 ARM Limited
+ */
+
+#ifndef _ARM_SMMU_V3_H
+#define _ARM_SMMU_V3_H
+
+#include <linux/bitfield.h>
+#include <linux/iommu.h>
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/sizes.h>
+
+/* MMIO registers */
+#define ARM_SMMU_IDR0 0x0
+#define IDR0_ST_LVL GENMASK(28, 27)
+#define IDR0_ST_LVL_2LVL 1
+#define IDR0_STALL_MODEL GENMASK(25, 24)
+#define IDR0_STALL_MODEL_STALL 0
+#define IDR0_STALL_MODEL_FORCE 2
+#define IDR0_TTENDIAN GENMASK(22, 21)
+#define IDR0_TTENDIAN_MIXED 0
+#define IDR0_TTENDIAN_LE 2
+#define IDR0_TTENDIAN_BE 3
+#define IDR0_CD2L (1 << 19)
+#define IDR0_VMID16 (1 << 18)
+#define IDR0_PRI (1 << 16)
+#define IDR0_SEV (1 << 14)
+#define IDR0_MSI (1 << 13)
+#define IDR0_ASID16 (1 << 12)
+#define IDR0_ATS (1 << 10)
+#define IDR0_HYP (1 << 9)
+#define IDR0_COHACC (1 << 4)
+#define IDR0_TTF GENMASK(3, 2)
+#define IDR0_TTF_AARCH64 2
+#define IDR0_TTF_AARCH32_64 3
+#define IDR0_S1P (1 << 1)
+#define IDR0_S2P (1 << 0)
+
+#define ARM_SMMU_IDR1 0x4
+#define IDR1_TABLES_PRESET (1 << 30)
+#define IDR1_QUEUES_PRESET (1 << 29)
+#define IDR1_REL (1 << 28)
+#define IDR1_CMDQS GENMASK(25, 21)
+#define IDR1_EVTQS GENMASK(20, 16)
+#define IDR1_PRIQS GENMASK(15, 11)
+#define IDR1_SSIDSIZE GENMASK(10, 6)
+#define IDR1_SIDSIZE GENMASK(5, 0)
+
+#define ARM_SMMU_IDR3 0xc
+#define IDR3_RIL (1 << 10)
+
+#define ARM_SMMU_IDR5 0x14
+#define IDR5_STALL_MAX GENMASK(31, 16)
+#define IDR5_GRAN64K (1 << 6)
+#define IDR5_GRAN16K (1 << 5)
+#define IDR5_GRAN4K (1 << 4)
+#define IDR5_OAS GENMASK(2, 0)
+#define IDR5_OAS_32_BIT 0
+#define IDR5_OAS_36_BIT 1
+#define IDR5_OAS_40_BIT 2
+#define IDR5_OAS_42_BIT 3
+#define IDR5_OAS_44_BIT 4
+#define IDR5_OAS_48_BIT 5
+#define IDR5_OAS_52_BIT 6
+#define IDR5_VAX GENMASK(11, 10)
+#define IDR5_VAX_52_BIT 1
+
+#define ARM_SMMU_CR0 0x20
+#define CR0_ATSCHK (1 << 4)
+#define CR0_CMDQEN (1 << 3)
+#define CR0_EVTQEN (1 << 2)
+#define CR0_PRIQEN (1 << 1)
+#define CR0_SMMUEN (1 << 0)
+
+#define ARM_SMMU_CR0ACK 0x24
+
+#define ARM_SMMU_CR1 0x28
+#define CR1_TABLE_SH GENMASK(11, 10)
+#define CR1_TABLE_OC GENMASK(9, 8)
+#define CR1_TABLE_IC GENMASK(7, 6)
+#define CR1_QUEUE_SH GENMASK(5, 4)
+#define CR1_QUEUE_OC GENMASK(3, 2)
+#define CR1_QUEUE_IC GENMASK(1, 0)
+/* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
+#define CR1_CACHE_NC 0
+#define CR1_CACHE_WB 1
+#define CR1_CACHE_WT 2
+
+#define ARM_SMMU_CR2 0x2c
+#define CR2_PTM (1 << 2)
+#define CR2_RECINVSID (1 << 1)
+#define CR2_E2H (1 << 0)
+
+#define ARM_SMMU_GBPA 0x44
+#define GBPA_UPDATE (1 << 31)
+#define GBPA_ABORT (1 << 20)
+
+#define ARM_SMMU_IRQ_CTRL 0x50
+#define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
+#define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
+#define IRQ_CTRL_GERROR_IRQEN (1 << 0)
+
+#define ARM_SMMU_IRQ_CTRLACK 0x54
+
+#define ARM_SMMU_GERROR 0x60
+#define GERROR_SFM_ERR (1 << 8)
+#define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
+#define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
+#define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
+#define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
+#define GERROR_PRIQ_ABT_ERR (1 << 3)
+#define GERROR_EVTQ_ABT_ERR (1 << 2)
+#define GERROR_CMDQ_ERR (1 << 0)
+#define GERROR_ERR_MASK 0x1fd
+
+#define ARM_SMMU_GERRORN 0x64
+
+#define ARM_SMMU_GERROR_IRQ_CFG0 0x68
+#define ARM_SMMU_GERROR_IRQ_CFG1 0x70
+#define ARM_SMMU_GERROR_IRQ_CFG2 0x74
+
+#define ARM_SMMU_STRTAB_BASE 0x80
+#define STRTAB_BASE_RA (1UL << 62)
+#define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
+
+#define ARM_SMMU_STRTAB_BASE_CFG 0x88
+#define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
+#define STRTAB_BASE_CFG_FMT_LINEAR 0
+#define STRTAB_BASE_CFG_FMT_2LVL 1
+#define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
+#define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
+
+#define ARM_SMMU_CMDQ_BASE 0x90
+#define ARM_SMMU_CMDQ_PROD 0x98
+#define ARM_SMMU_CMDQ_CONS 0x9c
+
+#define ARM_SMMU_EVTQ_BASE 0xa0
+#define ARM_SMMU_EVTQ_PROD 0x100a8
+#define ARM_SMMU_EVTQ_CONS 0x100ac
+#define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
+#define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
+#define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
+
+#define ARM_SMMU_PRIQ_BASE 0xc0
+#define ARM_SMMU_PRIQ_PROD 0x100c8
+#define ARM_SMMU_PRIQ_CONS 0x100cc
+#define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
+#define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
+#define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
+
+#define ARM_SMMU_REG_SZ 0xe00
+
+/* Common MSI config fields */
+#define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
+#define MSI_CFG2_SH GENMASK(5, 4)
+#define MSI_CFG2_MEMATTR GENMASK(3, 0)
+
+/* Common memory attribute values */
+#define ARM_SMMU_SH_NSH 0
+#define ARM_SMMU_SH_OSH 2
+#define ARM_SMMU_SH_ISH 3
+#define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
+#define ARM_SMMU_MEMATTR_OIWB 0xf
+
+#define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
+#define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
+#define Q_OVERFLOW_FLAG (1U << 31)
+#define Q_OVF(p) ((p) & Q_OVERFLOW_FLAG)
+#define Q_ENT(q, p) ((q)->base + \
+ Q_IDX(&((q)->llq), p) * \
+ (q)->ent_dwords)
+
+#define Q_BASE_RWA (1UL << 62)
+#define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
+#define Q_BASE_LOG2SIZE GENMASK(4, 0)
+
+/* Ensure DMA allocations are naturally aligned */
+#ifdef CONFIG_CMA_ALIGNMENT
+#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
+#else
+#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1)
+#endif
+
+/*
+ * Stream table.
+ *
+ * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
+ * 2lvl: 128k L1 entries,
+ * 256 lazy entries per table (each table covers a PCI bus)
+ */
+#define STRTAB_L1_SZ_SHIFT 20
+#define STRTAB_SPLIT 8
+
+#define STRTAB_L1_DESC_DWORDS 1
+#define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
+#define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
+
+#define STRTAB_STE_DWORDS 8
+#define STRTAB_STE_0_V (1UL << 0)
+#define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
+#define STRTAB_STE_0_CFG_ABORT 0
+#define STRTAB_STE_0_CFG_BYPASS 4
+#define STRTAB_STE_0_CFG_S1_TRANS 5
+#define STRTAB_STE_0_CFG_S2_TRANS 6
+
+#define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
+#define STRTAB_STE_0_S1FMT_LINEAR 0
+#define STRTAB_STE_0_S1FMT_64K_L2 2
+#define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
+#define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
+
+#define STRTAB_STE_1_S1DSS GENMASK_ULL(1, 0)
+#define STRTAB_STE_1_S1DSS_TERMINATE 0x0
+#define STRTAB_STE_1_S1DSS_BYPASS 0x1
+#define STRTAB_STE_1_S1DSS_SSID0 0x2
+
+#define STRTAB_STE_1_S1C_CACHE_NC 0UL
+#define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
+#define STRTAB_STE_1_S1C_CACHE_WT 2UL
+#define STRTAB_STE_1_S1C_CACHE_WB 3UL
+#define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
+#define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
+#define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
+
+#define STRTAB_STE_1_S1STALLD (1UL << 27)
+
+#define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
+#define STRTAB_STE_1_EATS_ABT 0UL
+#define STRTAB_STE_1_EATS_TRANS 1UL
+#define STRTAB_STE_1_EATS_S1CHK 2UL
+
+#define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
+#define STRTAB_STE_1_STRW_NSEL1 0UL
+#define STRTAB_STE_1_STRW_EL2 2UL
+
+#define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
+#define STRTAB_STE_1_SHCFG_INCOMING 1UL
+
+#define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
+#define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
+#define STRTAB_STE_2_VTCR_S2T0SZ GENMASK_ULL(5, 0)
+#define STRTAB_STE_2_VTCR_S2SL0 GENMASK_ULL(7, 6)
+#define STRTAB_STE_2_VTCR_S2IR0 GENMASK_ULL(9, 8)
+#define STRTAB_STE_2_VTCR_S2OR0 GENMASK_ULL(11, 10)
+#define STRTAB_STE_2_VTCR_S2SH0 GENMASK_ULL(13, 12)
+#define STRTAB_STE_2_VTCR_S2TG GENMASK_ULL(15, 14)
+#define STRTAB_STE_2_VTCR_S2PS GENMASK_ULL(18, 16)
+#define STRTAB_STE_2_S2AA64 (1UL << 51)
+#define STRTAB_STE_2_S2ENDI (1UL << 52)
+#define STRTAB_STE_2_S2PTW (1UL << 54)
+#define STRTAB_STE_2_S2R (1UL << 58)
+
+#define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
+
+/*
+ * Context descriptors.
+ *
+ * Linear: when less than 1024 SSIDs are supported
+ * 2lvl: at most 1024 L1 entries,
+ * 1024 lazy entries per table.
+ */
+#define CTXDESC_SPLIT 10
+#define CTXDESC_L2_ENTRIES (1 << CTXDESC_SPLIT)
+
+#define CTXDESC_L1_DESC_DWORDS 1
+#define CTXDESC_L1_DESC_V (1UL << 0)
+#define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12)
+
+#define CTXDESC_CD_DWORDS 8
+#define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
+#define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
+#define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
+#define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
+#define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
+#define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
+#define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
+
+#define CTXDESC_CD_0_ENDI (1UL << 15)
+#define CTXDESC_CD_0_V (1UL << 31)
+
+#define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
+#define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
+
+#define CTXDESC_CD_0_AA64 (1UL << 41)
+#define CTXDESC_CD_0_S (1UL << 44)
+#define CTXDESC_CD_0_R (1UL << 45)
+#define CTXDESC_CD_0_A (1UL << 46)
+#define CTXDESC_CD_0_ASET (1UL << 47)
+#define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
+
+#define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
+
+/*
+ * When the SMMU only supports linear context descriptor tables, pick a
+ * reasonable size limit (64kB).
+ */
+#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
+
+/* Command queue */
+#define CMDQ_ENT_SZ_SHIFT 4
+#define CMDQ_ENT_DWORDS ((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
+#define CMDQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
+
+#define CMDQ_CONS_ERR GENMASK(30, 24)
+#define CMDQ_ERR_CERROR_NONE_IDX 0
+#define CMDQ_ERR_CERROR_ILL_IDX 1
+#define CMDQ_ERR_CERROR_ABT_IDX 2
+#define CMDQ_ERR_CERROR_ATC_INV_IDX 3
+
+#define CMDQ_PROD_OWNED_FLAG Q_OVERFLOW_FLAG
+
+/*
+ * This is used to size the command queue and therefore must be at least
+ * BITS_PER_LONG so that the valid_map works correctly (it relies on the
+ * total number of queue entries being a multiple of BITS_PER_LONG).
+ */
+#define CMDQ_BATCH_ENTRIES BITS_PER_LONG
+
+#define CMDQ_0_OP GENMASK_ULL(7, 0)
+#define CMDQ_0_SSV (1UL << 11)
+
+#define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
+#define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
+#define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
+
+#define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12)
+#define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
+#define CMDQ_CFGI_1_LEAF (1UL << 0)
+#define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
+
+#define CMDQ_TLBI_0_NUM GENMASK_ULL(16, 12)
+#define CMDQ_TLBI_RANGE_NUM_MAX 31
+#define CMDQ_TLBI_0_SCALE GENMASK_ULL(24, 20)
+#define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
+#define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
+#define CMDQ_TLBI_1_LEAF (1UL << 0)
+#define CMDQ_TLBI_1_TTL GENMASK_ULL(9, 8)
+#define CMDQ_TLBI_1_TG GENMASK_ULL(11, 10)
+#define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
+#define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
+
+#define CMDQ_ATC_0_SSID GENMASK_ULL(31, 12)
+#define CMDQ_ATC_0_SID GENMASK_ULL(63, 32)
+#define CMDQ_ATC_0_GLOBAL (1UL << 9)
+#define CMDQ_ATC_1_SIZE GENMASK_ULL(5, 0)
+#define CMDQ_ATC_1_ADDR_MASK GENMASK_ULL(63, 12)
+
+#define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
+#define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
+#define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
+#define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
+
+#define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
+#define CMDQ_SYNC_0_CS_NONE 0
+#define CMDQ_SYNC_0_CS_IRQ 1
+#define CMDQ_SYNC_0_CS_SEV 2
+#define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
+#define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
+#define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
+#define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
+
+/* Event queue */
+#define EVTQ_ENT_SZ_SHIFT 5
+#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
+#define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
+
+#define EVTQ_0_ID GENMASK_ULL(7, 0)
+
+/* PRI queue */
+#define PRIQ_ENT_SZ_SHIFT 4
+#define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
+#define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
+
+#define PRIQ_0_SID GENMASK_ULL(31, 0)
+#define PRIQ_0_SSID GENMASK_ULL(51, 32)
+#define PRIQ_0_PERM_PRIV (1UL << 58)
+#define PRIQ_0_PERM_EXEC (1UL << 59)
+#define PRIQ_0_PERM_READ (1UL << 60)
+#define PRIQ_0_PERM_WRITE (1UL << 61)
+#define PRIQ_0_PRG_LAST (1UL << 62)
+#define PRIQ_0_SSID_V (1UL << 63)
+
+#define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
+#define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
+
+/* High-level queue structures */
+#define ARM_SMMU_POLL_TIMEOUT_US 1000000 /* 1s! */
+#define ARM_SMMU_POLL_SPIN_COUNT 10
+
+#define MSI_IOVA_BASE 0x8000000
+#define MSI_IOVA_LENGTH 0x100000
+
+enum pri_resp {
+ PRI_RESP_DENY = 0,
+ PRI_RESP_FAIL = 1,
+ PRI_RESP_SUCC = 2,
+};
+
+struct arm_smmu_cmdq_ent {
+ /* Common fields */
+ u8 opcode;
+ bool substream_valid;
+
+ /* Command-specific fields */
+ union {
+ #define CMDQ_OP_PREFETCH_CFG 0x1
+ struct {
+ u32 sid;
+ u8 size;
+ u64 addr;
+ } prefetch;
+
+ #define CMDQ_OP_CFGI_STE 0x3
+ #define CMDQ_OP_CFGI_ALL 0x4
+ #define CMDQ_OP_CFGI_CD 0x5
+ #define CMDQ_OP_CFGI_CD_ALL 0x6
+ struct {
+ u32 sid;
+ u32 ssid;
+ union {
+ bool leaf;
+ u8 span;
+ };
+ } cfgi;
+
+ #define CMDQ_OP_TLBI_NH_ASID 0x11
+ #define CMDQ_OP_TLBI_NH_VA 0x12
+ #define CMDQ_OP_TLBI_EL2_ALL 0x20
+ #define CMDQ_OP_TLBI_S12_VMALL 0x28
+ #define CMDQ_OP_TLBI_S2_IPA 0x2a
+ #define CMDQ_OP_TLBI_NSNH_ALL 0x30
+ struct {
+ u8 num;
+ u8 scale;
+ u16 asid;
+ u16 vmid;
+ bool leaf;
+ u8 ttl;
+ u8 tg;
+ u64 addr;
+ } tlbi;
+
+ #define CMDQ_OP_ATC_INV 0x40
+ #define ATC_INV_SIZE_ALL 52
+ struct {
+ u32 sid;
+ u32 ssid;
+ u64 addr;
+ u8 size;
+ bool global;
+ } atc;
+
+ #define CMDQ_OP_PRI_RESP 0x41
+ struct {
+ u32 sid;
+ u32 ssid;
+ u16 grpid;
+ enum pri_resp resp;
+ } pri;
+
+ #define CMDQ_OP_CMD_SYNC 0x46
+ struct {
+ u64 msiaddr;
+ } sync;
+ };
+};
+
+struct arm_smmu_ll_queue {
+ union {
+ u64 val;
+ struct {
+ u32 prod;
+ u32 cons;
+ };
+ struct {
+ atomic_t prod;
+ atomic_t cons;
+ } atomic;
+ u8 __pad[SMP_CACHE_BYTES];
+ } ____cacheline_aligned_in_smp;
+ u32 max_n_shift;
+};
+
+struct arm_smmu_queue {
+ struct arm_smmu_ll_queue llq;
+ int irq; /* Wired interrupt */
+
+ __le64 *base;
+ dma_addr_t base_dma;
+ u64 q_base;
+
+ size_t ent_dwords;
+
+ u32 __iomem *prod_reg;
+ u32 __iomem *cons_reg;
+};
+
+struct arm_smmu_queue_poll {
+ ktime_t timeout;
+ unsigned int delay;
+ unsigned int spin_cnt;
+ bool wfe;
+};
+
+struct arm_smmu_cmdq {
+ struct arm_smmu_queue q;
+ atomic_long_t *valid_map;
+ atomic_t owner_prod;
+ atomic_t lock;
+};
+
+struct arm_smmu_cmdq_batch {
+ u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
+ int num;
+};
+
+struct arm_smmu_evtq {
+ struct arm_smmu_queue q;
+ u32 max_stalls;
+};
+
+struct arm_smmu_priq {
+ struct arm_smmu_queue q;
+};
+
+/* High-level stream table and context descriptor structures */
+struct arm_smmu_strtab_l1_desc {
+ u8 span;
+
+ __le64 *l2ptr;
+ dma_addr_t l2ptr_dma;
+};
+
+struct arm_smmu_ctx_desc {
+ u16 asid;
+ u64 ttbr;
+ u64 tcr;
+ u64 mair;
+
+ refcount_t refs;
+ struct mm_struct *mm;
+};
+
+struct arm_smmu_l1_ctx_desc {
+ __le64 *l2ptr;
+ dma_addr_t l2ptr_dma;
+};
+
+struct arm_smmu_ctx_desc_cfg {
+ __le64 *cdtab;
+ dma_addr_t cdtab_dma;
+ struct arm_smmu_l1_ctx_desc *l1_desc;
+ unsigned int num_l1_ents;
+};
+
+struct arm_smmu_s1_cfg {
+ struct arm_smmu_ctx_desc_cfg cdcfg;
+ struct arm_smmu_ctx_desc cd;
+ u8 s1fmt;
+ u8 s1cdmax;
+};
+
+struct arm_smmu_s2_cfg {
+ u16 vmid;
+ u64 vttbr;
+ u64 vtcr;
+};
+
+struct arm_smmu_strtab_cfg {
+ __le64 *strtab;
+ dma_addr_t strtab_dma;
+ struct arm_smmu_strtab_l1_desc *l1_desc;
+ unsigned int num_l1_ents;
+
+ u64 strtab_base;
+ u32 strtab_base_cfg;
+};
+
+/* An SMMUv3 instance */
+struct arm_smmu_device {
+ struct device *dev;
+ void __iomem *base;
+ void __iomem *page1;
+
+#define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
+#define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
+#define ARM_SMMU_FEAT_TT_LE (1 << 2)
+#define ARM_SMMU_FEAT_TT_BE (1 << 3)
+#define ARM_SMMU_FEAT_PRI (1 << 4)
+#define ARM_SMMU_FEAT_ATS (1 << 5)
+#define ARM_SMMU_FEAT_SEV (1 << 6)
+#define ARM_SMMU_FEAT_MSI (1 << 7)
+#define ARM_SMMU_FEAT_COHERENCY (1 << 8)
+#define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
+#define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
+#define ARM_SMMU_FEAT_STALLS (1 << 11)
+#define ARM_SMMU_FEAT_HYP (1 << 12)
+#define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
+#define ARM_SMMU_FEAT_VAX (1 << 14)
+#define ARM_SMMU_FEAT_RANGE_INV (1 << 15)
+#define ARM_SMMU_FEAT_BTM (1 << 16)
+#define ARM_SMMU_FEAT_SVA (1 << 17)
+ u32 features;
+
+#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
+#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
+#define ARM_SMMU_OPT_MSIPOLL (1 << 2)
+ u32 options;
+
+ struct arm_smmu_cmdq cmdq;
+ struct arm_smmu_evtq evtq;
+ struct arm_smmu_priq priq;
+
+ int gerr_irq;
+ int combined_irq;
+
+ unsigned long ias; /* IPA */
+ unsigned long oas; /* PA */
+ unsigned long pgsize_bitmap;
+
+#define ARM_SMMU_MAX_ASIDS (1 << 16)
+ unsigned int asid_bits;
+
+#define ARM_SMMU_MAX_VMIDS (1 << 16)
+ unsigned int vmid_bits;
+ DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
+
+ unsigned int ssid_bits;
+ unsigned int sid_bits;
+
+ struct arm_smmu_strtab_cfg strtab_cfg;
+
+ /* IOMMU core code handle */
+ struct iommu_device iommu;
+};
+
+/* SMMU private data for each master */
+struct arm_smmu_master {
+ struct arm_smmu_device *smmu;
+ struct device *dev;
+ struct arm_smmu_domain *domain;
+ struct list_head domain_head;
+ u32 *sids;
+ unsigned int num_sids;
+ bool ats_enabled;
+ bool sva_enabled;
+ struct list_head bonds;
+ unsigned int ssid_bits;
+};
+
+/* SMMU private data for an IOMMU domain */
+enum arm_smmu_domain_stage {
+ ARM_SMMU_DOMAIN_S1 = 0,
+ ARM_SMMU_DOMAIN_S2,
+ ARM_SMMU_DOMAIN_NESTED,
+ ARM_SMMU_DOMAIN_BYPASS,
+};
+
+struct arm_smmu_domain {
+ struct arm_smmu_device *smmu;
+ struct mutex init_mutex; /* Protects smmu pointer */
+
+ struct io_pgtable_ops *pgtbl_ops;
+ bool non_strict;
+ atomic_t nr_ats_masters;
+
+ enum arm_smmu_domain_stage stage;
+ union {
+ struct arm_smmu_s1_cfg s1_cfg;
+ struct arm_smmu_s2_cfg s2_cfg;
+ };
+
+ struct iommu_domain domain;
+
+ struct list_head devices;
+ spinlock_t devices_lock;
+};
+
+extern struct xarray arm_smmu_asid_xa;
+extern struct mutex arm_smmu_asid_lock;
+
+int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+ struct arm_smmu_ctx_desc *cd);
+void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
+bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd);
+
+#ifdef CONFIG_ARM_SMMU_V3_SVA
+bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
+bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
+bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
+int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
+int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
+#else /* CONFIG_ARM_SMMU_V3_SVA */
+static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
+{
+ return false;
+}
+
+static inline bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
+{
+ return false;
+}
+
+static inline bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
+{
+ return false;
+}
+
+static inline int arm_smmu_master_enable_sva(struct arm_smmu_master *master)
+{
+ return -ENODEV;
+}
+
+static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_ARM_SMMU_V3_SVA */
+#endif /* _ARM_SMMU_V3_H */
diff --git a/drivers/iommu/arm/arm-smmu/Makefile b/drivers/iommu/arm/arm-smmu/Makefile
new file mode 100644
index 0000000..e240a7b
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
+obj-$(CONFIG_ARM_SMMU) += arm_smmu.o
+arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o arm-smmu-qcom.o
diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
similarity index 66%
rename from drivers/iommu/arm-smmu-impl.c
rename to drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
index 5c87a38..88f17cc 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
@@ -68,7 +68,8 @@
return 0;
}
-static int cavium_init_context(struct arm_smmu_domain *smmu_domain)
+static int cavium_init_context(struct arm_smmu_domain *smmu_domain,
+ struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
{
struct cavium_smmu *cs = container_of(smmu_domain->smmu,
struct cavium_smmu, smmu);
@@ -109,7 +110,7 @@
#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
-static int arm_mmu500_reset(struct arm_smmu_device *smmu)
+int arm_mmu500_reset(struct arm_smmu_device *smmu)
{
u32 reg, major;
int i;
@@ -119,7 +120,7 @@
* Secure has also cleared SACR.CACHE_LOCK for this to take effect...
*/
reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID7);
- major = FIELD_GET(ID7_MAJOR, reg);
+ major = FIELD_GET(ARM_SMMU_ID7_MAJOR, reg);
reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sACR);
if (major >= 2)
reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
@@ -147,14 +148,57 @@
.reset = arm_mmu500_reset,
};
+static u64 mrvl_mmu500_readq(struct arm_smmu_device *smmu, int page, int off)
+{
+ /*
+ * Marvell Armada-AP806 erratum #582743.
+ * Split all the readq to double readl
+ */
+ return hi_lo_readq_relaxed(arm_smmu_page(smmu, page) + off);
+}
+
+static void mrvl_mmu500_writeq(struct arm_smmu_device *smmu, int page, int off,
+ u64 val)
+{
+ /*
+ * Marvell Armada-AP806 erratum #582743.
+ * Split all the writeq to double writel
+ */
+ hi_lo_writeq_relaxed(val, arm_smmu_page(smmu, page) + off);
+}
+
+static int mrvl_mmu500_cfg_probe(struct arm_smmu_device *smmu)
+{
+
+ /*
+ * Armada-AP806 erratum #582743.
+ * Hide the SMMU_IDR2.PTFSv8 fields to sidestep the AArch64
+ * formats altogether and allow using 32 bits access on the
+ * interconnect.
+ */
+ smmu->features &= ~(ARM_SMMU_FEAT_FMT_AARCH64_4K |
+ ARM_SMMU_FEAT_FMT_AARCH64_16K |
+ ARM_SMMU_FEAT_FMT_AARCH64_64K);
+
+ return 0;
+}
+
+static const struct arm_smmu_impl mrvl_mmu500_impl = {
+ .read_reg64 = mrvl_mmu500_readq,
+ .write_reg64 = mrvl_mmu500_writeq,
+ .cfg_probe = mrvl_mmu500_cfg_probe,
+ .reset = arm_mmu500_reset,
+};
+
struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
{
+ const struct device_node *np = smmu->dev->of_node;
+
/*
- * We will inevitably have to combine model-specific implementation
- * quirks with platform-specific integration quirks, but everything
- * we currently support happens to work out as straightforward
- * mutually-exclusive assignments.
+ * Set the impl for model-specific implementation quirks first,
+ * such that platform integration quirks can pick it up and
+ * inherit from it if necessary.
*/
switch (smmu->model) {
case ARM_MMU500:
@@ -166,9 +210,21 @@
break;
}
- if (of_property_read_bool(smmu->dev->of_node,
- "calxeda,smmu-secure-config-access"))
+ /* This is implicitly MMU-400 */
+ if (of_property_read_bool(np, "calxeda,smmu-secure-config-access"))
smmu->impl = &calxeda_impl;
+ if (of_device_is_compatible(np, "nvidia,tegra194-smmu"))
+ return nvidia_smmu_impl_init(smmu);
+
+ if (of_device_is_compatible(np, "qcom,sdm845-smmu-500") ||
+ of_device_is_compatible(np, "qcom,sc7180-smmu-500") ||
+ of_device_is_compatible(np, "qcom,sm8150-smmu-500") ||
+ of_device_is_compatible(np, "qcom,sm8250-smmu-500"))
+ return qcom_smmu_impl_init(smmu);
+
+ if (of_device_is_compatible(np, "marvell,ap806-smmu-500"))
+ smmu->impl = &mrvl_mmu500_impl;
+
return smmu;
}
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
new file mode 100644
index 0000000..3136805
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2019-2020 NVIDIA CORPORATION. All rights reserved.
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "arm-smmu.h"
+
+/*
+ * Tegra194 has three ARM MMU-500 Instances.
+ * Two of them are used together and must be programmed identically for
+ * interleaved IOVA accesses across them and translates accesses from
+ * non-isochronous HW devices.
+ * Third one is used for translating accesses from isochronous HW devices.
+ * This implementation supports programming of the two instances that must
+ * be programmed identically.
+ * The third instance usage is through standard arm-smmu driver itself and
+ * is out of scope of this implementation.
+ */
+#define NUM_SMMU_INSTANCES 2
+
+struct nvidia_smmu {
+ struct arm_smmu_device smmu;
+ void __iomem *bases[NUM_SMMU_INSTANCES];
+};
+
+static inline void __iomem *nvidia_smmu_page(struct arm_smmu_device *smmu,
+ unsigned int inst, int page)
+{
+ struct nvidia_smmu *nvidia_smmu;
+
+ nvidia_smmu = container_of(smmu, struct nvidia_smmu, smmu);
+ return nvidia_smmu->bases[inst] + (page << smmu->pgshift);
+}
+
+static u32 nvidia_smmu_read_reg(struct arm_smmu_device *smmu,
+ int page, int offset)
+{
+ void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset;
+
+ return readl_relaxed(reg);
+}
+
+static void nvidia_smmu_write_reg(struct arm_smmu_device *smmu,
+ int page, int offset, u32 val)
+{
+ unsigned int i;
+
+ for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
+ void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset;
+
+ writel_relaxed(val, reg);
+ }
+}
+
+static u64 nvidia_smmu_read_reg64(struct arm_smmu_device *smmu,
+ int page, int offset)
+{
+ void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset;
+
+ return readq_relaxed(reg);
+}
+
+static void nvidia_smmu_write_reg64(struct arm_smmu_device *smmu,
+ int page, int offset, u64 val)
+{
+ unsigned int i;
+
+ for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
+ void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset;
+
+ writeq_relaxed(val, reg);
+ }
+}
+
+static void nvidia_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
+ int sync, int status)
+{
+ unsigned int delay;
+
+ arm_smmu_writel(smmu, page, sync, 0);
+
+ for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
+ unsigned int spin_cnt;
+
+ for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
+ u32 val = 0;
+ unsigned int i;
+
+ for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
+ void __iomem *reg;
+
+ reg = nvidia_smmu_page(smmu, i, page) + status;
+ val |= readl_relaxed(reg);
+ }
+
+ if (!(val & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
+ return;
+
+ cpu_relax();
+ }
+
+ udelay(delay);
+ }
+
+ dev_err_ratelimited(smmu->dev,
+ "TLB sync timed out -- SMMU may be deadlocked\n");
+}
+
+static int nvidia_smmu_reset(struct arm_smmu_device *smmu)
+{
+ unsigned int i;
+
+ for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
+ u32 val;
+ void __iomem *reg = nvidia_smmu_page(smmu, i, ARM_SMMU_GR0) +
+ ARM_SMMU_GR0_sGFSR;
+
+ /* clear global FSR */
+ val = readl_relaxed(reg);
+ writel_relaxed(val, reg);
+ }
+
+ return 0;
+}
+
+static irqreturn_t nvidia_smmu_global_fault_inst(int irq,
+ struct arm_smmu_device *smmu,
+ int inst)
+{
+ u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
+ void __iomem *gr0_base = nvidia_smmu_page(smmu, inst, 0);
+
+ gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
+ if (!gfsr)
+ return IRQ_NONE;
+
+ gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
+ gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
+ gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
+
+ dev_err_ratelimited(smmu->dev,
+ "Unexpected global fault, this could be serious\n");
+ dev_err_ratelimited(smmu->dev,
+ "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
+ gfsr, gfsynr0, gfsynr1, gfsynr2);
+
+ writel_relaxed(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t nvidia_smmu_global_fault(int irq, void *dev)
+{
+ unsigned int inst;
+ irqreturn_t ret = IRQ_NONE;
+ struct arm_smmu_device *smmu = dev;
+
+ for (inst = 0; inst < NUM_SMMU_INSTANCES; inst++) {
+ irqreturn_t irq_ret;
+
+ irq_ret = nvidia_smmu_global_fault_inst(irq, smmu, inst);
+ if (irq_ret == IRQ_HANDLED)
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+static irqreturn_t nvidia_smmu_context_fault_bank(int irq,
+ struct arm_smmu_device *smmu,
+ int idx, int inst)
+{
+ u32 fsr, fsynr, cbfrsynra;
+ unsigned long iova;
+ void __iomem *gr1_base = nvidia_smmu_page(smmu, inst, 1);
+ void __iomem *cb_base = nvidia_smmu_page(smmu, inst, smmu->numpage + idx);
+
+ fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
+ if (!(fsr & ARM_SMMU_FSR_FAULT))
+ return IRQ_NONE;
+
+ fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
+ iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
+ cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(idx));
+
+ dev_err_ratelimited(smmu->dev,
+ "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
+ fsr, iova, fsynr, cbfrsynra, idx);
+
+ writel_relaxed(fsr, cb_base + ARM_SMMU_CB_FSR);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t nvidia_smmu_context_fault(int irq, void *dev)
+{
+ int idx;
+ unsigned int inst;
+ irqreturn_t ret = IRQ_NONE;
+ struct arm_smmu_device *smmu;
+ struct iommu_domain *domain = dev;
+ struct arm_smmu_domain *smmu_domain;
+
+ smmu_domain = container_of(domain, struct arm_smmu_domain, domain);
+ smmu = smmu_domain->smmu;
+
+ for (inst = 0; inst < NUM_SMMU_INSTANCES; inst++) {
+ irqreturn_t irq_ret;
+
+ /*
+ * Interrupt line is shared between all contexts.
+ * Check for faults across all contexts.
+ */
+ for (idx = 0; idx < smmu->num_context_banks; idx++) {
+ irq_ret = nvidia_smmu_context_fault_bank(irq, smmu,
+ idx, inst);
+ if (irq_ret == IRQ_HANDLED)
+ ret = IRQ_HANDLED;
+ }
+ }
+
+ return ret;
+}
+
+static const struct arm_smmu_impl nvidia_smmu_impl = {
+ .read_reg = nvidia_smmu_read_reg,
+ .write_reg = nvidia_smmu_write_reg,
+ .read_reg64 = nvidia_smmu_read_reg64,
+ .write_reg64 = nvidia_smmu_write_reg64,
+ .reset = nvidia_smmu_reset,
+ .tlb_sync = nvidia_smmu_tlb_sync,
+ .global_fault = nvidia_smmu_global_fault,
+ .context_fault = nvidia_smmu_context_fault,
+};
+
+struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
+{
+ struct resource *res;
+ struct device *dev = smmu->dev;
+ struct nvidia_smmu *nvidia_smmu;
+ struct platform_device *pdev = to_platform_device(dev);
+
+ nvidia_smmu = devm_kzalloc(dev, sizeof(*nvidia_smmu), GFP_KERNEL);
+ if (!nvidia_smmu)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Copy the data from struct arm_smmu_device *smmu allocated in
+ * arm-smmu.c. The smmu from struct nvidia_smmu replaces the smmu
+ * pointer used in arm-smmu.c once this function returns.
+ * This is necessary to derive nvidia_smmu from smmu pointer passed
+ * through arm_smmu_impl function calls subsequently.
+ */
+ nvidia_smmu->smmu = *smmu;
+ /* Instance 0 is ioremapped by arm-smmu.c. */
+ nvidia_smmu->bases[0] = smmu->base;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res)
+ return ERR_PTR(-ENODEV);
+
+ nvidia_smmu->bases[1] = devm_ioremap_resource(dev, res);
+ if (IS_ERR(nvidia_smmu->bases[1]))
+ return ERR_CAST(nvidia_smmu->bases[1]);
+
+ nvidia_smmu->smmu.impl = &nvidia_smmu_impl;
+
+ /*
+ * Free the struct arm_smmu_device *smmu allocated in arm-smmu.c.
+ * Once this function returns, arm-smmu.c would use arm_smmu_device
+ * allocated as part of struct nvidia_smmu.
+ */
+ devm_kfree(dev, smmu);
+
+ return &nvidia_smmu->smmu;
+}
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
new file mode 100644
index 0000000..63f7173
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/of_device.h>
+#include <linux/qcom_scm.h>
+
+#include "arm-smmu.h"
+
+struct qcom_smmu {
+ struct arm_smmu_device smmu;
+ bool bypass_quirk;
+ u8 bypass_cbndx;
+};
+
+static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
+{
+ return container_of(smmu, struct qcom_smmu, smmu);
+}
+
+static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
+ { .compatible = "qcom,adreno" },
+ { .compatible = "qcom,mdp4" },
+ { .compatible = "qcom,mdss" },
+ { .compatible = "qcom,sc7180-mdss" },
+ { .compatible = "qcom,sc7180-mss-pil" },
+ { .compatible = "qcom,sdm845-mdss" },
+ { .compatible = "qcom,sdm845-mss-pil" },
+ { }
+};
+
+static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
+{
+ unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+ u32 reg;
+ u32 smr;
+ int i;
+
+ /*
+ * With some firmware versions writes to S2CR of type FAULT are
+ * ignored, and writing BYPASS will end up written as FAULT in the
+ * register. Perform a write to S2CR to detect if this is the case and
+ * if so reserve a context bank to emulate bypass streams.
+ */
+ reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, S2CR_TYPE_BYPASS) |
+ FIELD_PREP(ARM_SMMU_S2CR_CBNDX, 0xff) |
+ FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, S2CR_PRIVCFG_DEFAULT);
+ arm_smmu_gr0_write(smmu, last_s2cr, reg);
+ reg = arm_smmu_gr0_read(smmu, last_s2cr);
+ if (FIELD_GET(ARM_SMMU_S2CR_TYPE, reg) != S2CR_TYPE_BYPASS) {
+ qsmmu->bypass_quirk = true;
+ qsmmu->bypass_cbndx = smmu->num_context_banks - 1;
+
+ set_bit(qsmmu->bypass_cbndx, smmu->context_map);
+
+ arm_smmu_cb_write(smmu, qsmmu->bypass_cbndx, ARM_SMMU_CB_SCTLR, 0);
+
+ reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, CBAR_TYPE_S1_TRANS_S2_BYPASS);
+ arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(qsmmu->bypass_cbndx), reg);
+ }
+
+ for (i = 0; i < smmu->num_mapping_groups; i++) {
+ smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
+
+ if (FIELD_GET(ARM_SMMU_SMR_VALID, smr)) {
+ /* Ignore valid bit for SMR mask extraction. */
+ smr &= ~ARM_SMMU_SMR_VALID;
+ smmu->smrs[i].id = FIELD_GET(ARM_SMMU_SMR_ID, smr);
+ smmu->smrs[i].mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
+ smmu->smrs[i].valid = true;
+
+ smmu->s2crs[i].type = S2CR_TYPE_BYPASS;
+ smmu->s2crs[i].privcfg = S2CR_PRIVCFG_DEFAULT;
+ smmu->s2crs[i].cbndx = 0xff;
+ }
+ }
+
+ return 0;
+}
+
+static void qcom_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
+{
+ struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+ u32 cbndx = s2cr->cbndx;
+ u32 type = s2cr->type;
+ u32 reg;
+
+ if (qsmmu->bypass_quirk) {
+ if (type == S2CR_TYPE_BYPASS) {
+ /*
+ * Firmware with quirky S2CR handling will substitute
+ * BYPASS writes with FAULT, so point the stream to the
+ * reserved context bank and ask for translation on the
+ * stream
+ */
+ type = S2CR_TYPE_TRANS;
+ cbndx = qsmmu->bypass_cbndx;
+ } else if (type == S2CR_TYPE_FAULT) {
+ /*
+ * Firmware with quirky S2CR handling will ignore FAULT
+ * writes, so trick it to write FAULT by asking for a
+ * BYPASS.
+ */
+ type = S2CR_TYPE_BYPASS;
+ cbndx = 0xff;
+ }
+ }
+
+ reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, type) |
+ FIELD_PREP(ARM_SMMU_S2CR_CBNDX, cbndx) |
+ FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
+}
+
+static int qcom_smmu_def_domain_type(struct device *dev)
+{
+ const struct of_device_id *match =
+ of_match_device(qcom_smmu_client_of_match, dev);
+
+ return match ? IOMMU_DOMAIN_IDENTITY : 0;
+}
+
+static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu)
+{
+ int ret;
+
+ /*
+ * To address performance degradation in non-real time clients,
+ * such as USB and UFS, turn off wait-for-safe on sdm845 based boards,
+ * such as MTP and db845, whose firmwares implement secure monitor
+ * call handlers to turn on/off the wait-for-safe logic.
+ */
+ ret = qcom_scm_qsmmu500_wait_safe_toggle(0);
+ if (ret)
+ dev_warn(smmu->dev, "Failed to turn off SAFE logic\n");
+
+ return ret;
+}
+
+static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
+{
+ const struct device_node *np = smmu->dev->of_node;
+
+ arm_mmu500_reset(smmu);
+
+ if (of_device_is_compatible(np, "qcom,sdm845-smmu-500"))
+ return qcom_sdm845_smmu500_reset(smmu);
+
+ return 0;
+}
+
+static const struct arm_smmu_impl qcom_smmu_impl = {
+ .cfg_probe = qcom_smmu_cfg_probe,
+ .def_domain_type = qcom_smmu_def_domain_type,
+ .reset = qcom_smmu500_reset,
+ .write_s2cr = qcom_smmu_write_s2cr,
+};
+
+struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
+{
+ struct qcom_smmu *qsmmu;
+
+ /* Check to make sure qcom_scm has finished probing */
+ if (!qcom_scm_is_available())
+ return ERR_PTR(-EPROBE_DEFER);
+
+ qsmmu = devm_kzalloc(smmu->dev, sizeof(*qsmmu), GFP_KERNEL);
+ if (!qsmmu)
+ return ERR_PTR(-ENOMEM);
+
+ qsmmu->smmu = *smmu;
+
+ qsmmu->smmu.impl = &qcom_smmu_impl;
+ devm_kfree(smmu->dev, smmu);
+
+ return &qsmmu->smmu;
+}
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
similarity index 76%
rename from drivers/iommu/arm-smmu.c
rename to drivers/iommu/arm/arm-smmu/arm-smmu.c
index 2185ea5..df24bbe 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -27,8 +27,7 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/iopoll.h>
-#include <linux/init.h>
-#include <linux/moduleparam.h>
+#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
@@ -36,6 +35,7 @@
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
+#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/amba/bus.h>
@@ -52,17 +52,10 @@
*/
#define QCOM_DUMMY_VAL -1
-#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
-#define TLB_SPIN_COUNT 10
-
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
static int force_stage;
-/*
- * not really modular, but the easiest way to keep compat with existing
- * bootargs behaviour is to continue using module_param() here.
- */
module_param(force_stage, int, S_IRUGO);
MODULE_PARM_DESC(force_stage,
"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
@@ -72,43 +65,10 @@
MODULE_PARM_DESC(disable_bypass,
"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
-struct arm_smmu_s2cr {
- struct iommu_group *group;
- int count;
- enum arm_smmu_s2cr_type type;
- enum arm_smmu_s2cr_privcfg privcfg;
- u8 cbndx;
-};
-
#define s2cr_init_val (struct arm_smmu_s2cr){ \
.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
}
-struct arm_smmu_smr {
- u16 mask;
- u16 id;
- bool valid;
-};
-
-struct arm_smmu_cb {
- u64 ttbr[2];
- u32 tcr[2];
- u32 mair[2];
- struct arm_smmu_cfg *cfg;
-};
-
-struct arm_smmu_master_cfg {
- struct arm_smmu_device *smmu;
- s16 smendx[];
-};
-#define INVALID_SMENDX -1
-#define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
-#define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
-#define fwspec_smendx(fw, i) \
- (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
-#define for_each_cfg_sme(fw, i, idx) \
- for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
-
static bool using_legacy_binding, using_generic_binding;
static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
@@ -122,7 +82,7 @@
static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
{
if (pm_runtime_enabled(smmu->dev))
- pm_runtime_put(smmu->dev);
+ pm_runtime_put_autosuspend(smmu->dev);
}
static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
@@ -130,6 +90,12 @@
return container_of(dom, struct arm_smmu_domain, domain);
}
+static struct platform_driver arm_smmu_driver;
+static struct iommu_ops arm_smmu_ops;
+
+#ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
+static int arm_smmu_bus_init(struct iommu_ops *ops);
+
static struct device_node *dev_get_dev_node(struct device *dev)
{
if (dev_is_pci(dev)) {
@@ -165,9 +131,6 @@
return err == -ENOENT ? 0 : err;
}
-static struct platform_driver arm_smmu_driver;
-static struct iommu_ops arm_smmu_ops;
-
static int arm_smmu_register_legacy_master(struct device *dev,
struct arm_smmu_device **smmu)
{
@@ -219,18 +182,26 @@
return err;
}
-static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
+/*
+ * With the legacy DT binding in play, we have no guarantees about
+ * probe order, but then we're also not doing default domains, so we can
+ * delay setting bus ops until we're sure every possible SMMU is ready,
+ * and that way ensure that no probe_device() calls get missed.
+ */
+static int arm_smmu_legacy_bus_init(void)
{
- int idx;
-
- do {
- idx = find_next_zero_bit(map, end, start);
- if (idx == end)
- return -ENOSPC;
- } while (test_and_set_bit(idx, map));
-
- return idx;
+ if (using_legacy_binding)
+ return arm_smmu_bus_init(&arm_smmu_ops);
+ return 0;
}
+device_initcall_sync(arm_smmu_legacy_bus_init);
+#else
+static int arm_smmu_register_legacy_master(struct device *dev,
+ struct arm_smmu_device **smmu)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
{
@@ -244,11 +215,14 @@
unsigned int spin_cnt, delay;
u32 reg;
+ if (smmu->impl && unlikely(smmu->impl->tlb_sync))
+ return smmu->impl->tlb_sync(smmu, page, sync, status);
+
arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
reg = arm_smmu_readl(smmu, page, status);
- if (!(reg & sTLBGSTATUS_GSACTIVE))
+ if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
return;
cpu_relax();
}
@@ -268,9 +242,8 @@
spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
}
-static void arm_smmu_tlb_sync_context(void *cookie)
+static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
{
- struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
unsigned long flags;
@@ -280,13 +253,6 @@
spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
}
-static void arm_smmu_tlb_sync_vmid(void *cookie)
-{
- struct arm_smmu_domain *smmu_domain = cookie;
-
- arm_smmu_tlb_sync_global(smmu_domain->smmu);
-}
-
static void arm_smmu_tlb_inv_context_s1(void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
@@ -297,7 +263,7 @@
wmb();
arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
- arm_smmu_tlb_sync_context(cookie);
+ arm_smmu_tlb_sync_context(smmu_domain);
}
static void arm_smmu_tlb_inv_context_s2(void *cookie)
@@ -312,18 +278,16 @@
}
static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+ size_t granule, void *cookie, int reg)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- int reg, idx = cfg->cbndx;
+ int idx = cfg->cbndx;
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
wmb();
- reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
-
if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
iova = (iova >> 12) << 12;
iova |= cfg->asid;
@@ -342,16 +306,15 @@
}
static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+ size_t granule, void *cookie, int reg)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- int reg, idx = smmu_domain->cfg.cbndx;
+ int idx = smmu_domain->cfg.cbndx;
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
wmb();
- reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
iova >>= 12;
do {
if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
@@ -362,14 +325,69 @@
} while (size -= granule);
}
+static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
+ ARM_SMMU_CB_S1_TLBIVA);
+ arm_smmu_tlb_sync_context(cookie);
+}
+
+static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
+ ARM_SMMU_CB_S1_TLBIVAL);
+ arm_smmu_tlb_sync_context(cookie);
+}
+
+static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+ arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
+ ARM_SMMU_CB_S1_TLBIVAL);
+}
+
+static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
+ ARM_SMMU_CB_S2_TLBIIPAS2);
+ arm_smmu_tlb_sync_context(cookie);
+}
+
+static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
+ ARM_SMMU_CB_S2_TLBIIPAS2L);
+ arm_smmu_tlb_sync_context(cookie);
+}
+
+static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+ arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
+ ARM_SMMU_CB_S2_TLBIIPAS2L);
+}
+
+static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_context_s2(cookie);
+}
/*
* On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
* almost negligible, but the benefit of getting the first one in as far ahead
* of the sync as possible is significant, hence we don't just make this a
- * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think.
+ * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
+ * think.
*/
-static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -380,67 +398,25 @@
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
}
-static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
- size_t granule, void *cookie)
-{
- struct arm_smmu_domain *smmu_domain = cookie;
- const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
-
- ops->tlb_inv_range(iova, size, granule, false, cookie);
- ops->tlb_sync(cookie);
-}
-
-static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
- size_t granule, void *cookie)
-{
- struct arm_smmu_domain *smmu_domain = cookie;
- const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
-
- ops->tlb_inv_range(iova, size, granule, true, cookie);
- ops->tlb_sync(cookie);
-}
-
-static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule,
- void *cookie)
-{
- struct arm_smmu_domain *smmu_domain = cookie;
- const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
-
- ops->tlb_inv_range(iova, granule, granule, true, cookie);
-}
-
-static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = {
- .tlb = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
- .tlb_flush_walk = arm_smmu_tlb_inv_walk,
- .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
- .tlb_add_page = arm_smmu_tlb_add_page,
- },
- .tlb_inv_range = arm_smmu_tlb_inv_range_s1,
- .tlb_sync = arm_smmu_tlb_sync_context,
+static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
+ .tlb_flush_walk = arm_smmu_tlb_inv_walk_s1,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s1,
+ .tlb_add_page = arm_smmu_tlb_add_page_s1,
};
-static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
- .tlb = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
- .tlb_flush_walk = arm_smmu_tlb_inv_walk,
- .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
- .tlb_add_page = arm_smmu_tlb_add_page,
- },
- .tlb_inv_range = arm_smmu_tlb_inv_range_s2,
- .tlb_sync = arm_smmu_tlb_sync_context,
+static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
+ .tlb_flush_walk = arm_smmu_tlb_inv_walk_s2,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s2,
+ .tlb_add_page = arm_smmu_tlb_add_page_s2,
};
-static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
- .tlb = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
- .tlb_flush_walk = arm_smmu_tlb_inv_walk,
- .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
- .tlb_add_page = arm_smmu_tlb_add_page,
- },
- .tlb_inv_range = arm_smmu_tlb_inv_vmid_nosync,
- .tlb_sync = arm_smmu_tlb_sync_vmid,
+static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
+ .tlb_flush_walk = arm_smmu_tlb_inv_any_s2_v1,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_any_s2_v1,
+ .tlb_add_page = arm_smmu_tlb_add_page_s2_v1,
};
static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
@@ -453,7 +429,7 @@
int idx = smmu_domain->cfg.cbndx;
fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
- if (!(fsr & FSR_FAULT))
+ if (!(fsr & ARM_SMMU_FSR_FAULT))
return IRQ_NONE;
fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
@@ -472,6 +448,8 @@
{
u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
struct arm_smmu_device *smmu = dev;
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
@@ -481,11 +459,19 @@
if (!gfsr)
return IRQ_NONE;
- dev_err_ratelimited(smmu->dev,
- "Unexpected global fault, this could be serious\n");
- dev_err_ratelimited(smmu->dev,
- "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
- gfsr, gfsynr0, gfsynr1, gfsynr2);
+ if (__ratelimit(&rs)) {
+ if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
+ (gfsr & ARM_SMMU_sGFSR_USF))
+ dev_err(smmu->dev,
+ "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
+ (u16)gfsynr1);
+ else
+ dev_err(smmu->dev,
+ "Unexpected global fault, this could be serious\n");
+ dev_err(smmu->dev,
+ "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
+ gfsr, gfsynr0, gfsynr1, gfsynr2);
+ }
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
return IRQ_HANDLED;
@@ -505,26 +491,32 @@
if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
} else {
- cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
- cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
- cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
+ cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
+ cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
- cb->tcr[1] |= TCR2_AS;
+ cb->tcr[1] |= ARM_SMMU_TCR2_AS;
+ else
+ cb->tcr[0] |= ARM_SMMU_TCR_EAE;
}
} else {
- cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
+ cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
}
/* TTBRs */
if (stage1) {
if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
- cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
- cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
+ cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
+ cb->ttbr[1] = 0;
} else {
- cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
- cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
- cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
- cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
+ cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
+ cfg->asid);
+ cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
+ cfg->asid);
+
+ if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
+ cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
+ else
+ cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
}
} else {
cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
@@ -536,13 +528,13 @@
cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
} else {
- cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
- cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
+ cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
+ cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
}
}
}
-static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
+void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
{
u32 reg;
bool stage1;
@@ -560,31 +552,33 @@
/* CBA2R */
if (smmu->version > ARM_SMMU_V1) {
if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
- reg = CBA2R_VA64;
+ reg = ARM_SMMU_CBA2R_VA64;
else
reg = 0;
/* 16-bit VMIDs live in CBA2R */
if (smmu->features & ARM_SMMU_FEAT_VMID16)
- reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
+ reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
}
/* CBAR */
- reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
+ reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
if (smmu->version < ARM_SMMU_V2)
- reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
+ reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
/*
* Use the weakest shareability/memory types, so they are
* overridden by the ttbcr/pte.
*/
if (stage1) {
- reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
- FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
+ reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
+ ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
+ FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
+ ARM_SMMU_CBAR_S1_MEMATTR_WB);
} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
/* 8-bit VMIDs live in CBAR */
- reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
+ reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
}
arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
@@ -616,17 +610,29 @@
}
/* SCTLR */
- reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
+ reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
+ ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
if (stage1)
- reg |= SCTLR_S1_ASIDPNE;
+ reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
- reg |= SCTLR_E;
+ reg |= ARM_SMMU_SCTLR_E;
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
}
+static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_device *smmu,
+ struct device *dev, unsigned int start)
+{
+ if (smmu->impl && smmu->impl->alloc_context_bank)
+ return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
+
+ return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
+}
+
static int arm_smmu_init_domain_context(struct iommu_domain *domain,
- struct arm_smmu_device *smmu)
+ struct arm_smmu_device *smmu,
+ struct device *dev)
{
int irq, start, ret = 0;
unsigned long ias, oas;
@@ -635,6 +641,7 @@
enum io_pgtable_fmt fmt;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ irqreturn_t (*context_fault)(int irq, void *dev);
mutex_lock(&smmu_domain->init_mutex);
if (smmu_domain->smmu)
@@ -740,10 +747,13 @@
ret = -EINVAL;
goto out_unlock;
}
- ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
- smmu->num_context_banks);
- if (ret < 0)
+
+ ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
+ if (ret < 0) {
goto out_unlock;
+ }
+
+ smmu_domain->smmu = smmu;
cfg->cbndx = ret;
if (smmu->version < ARM_SMMU_V2) {
@@ -758,22 +768,21 @@
else
cfg->asid = cfg->cbndx;
- smmu_domain->smmu = smmu;
- if (smmu->impl && smmu->impl->init_context) {
- ret = smmu->impl->init_context(smmu_domain);
- if (ret)
- goto out_unlock;
- }
-
pgtbl_cfg = (struct io_pgtable_cfg) {
.pgsize_bitmap = smmu->pgsize_bitmap,
.ias = ias,
.oas = oas,
.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
- .tlb = &smmu_domain->flush_ops->tlb,
+ .tlb = smmu_domain->flush_ops,
.iommu_dev = smmu->dev,
};
+ if (smmu->impl && smmu->impl->init_context) {
+ ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
+ if (ret)
+ goto out_clear_smmu;
+ }
+
if (smmu_domain->non_strict)
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
@@ -785,7 +794,14 @@
/* Update the domain's page sizes to reflect the page table format */
domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
- domain->geometry.aperture_end = (1UL << ias) - 1;
+
+ if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
+ domain->geometry.aperture_start = ~0UL << ias;
+ domain->geometry.aperture_end = ~0UL;
+ } else {
+ domain->geometry.aperture_end = (1UL << ias) - 1;
+ }
+
domain->geometry.force_aperture = true;
/* Initialise the context bank with our page table cfg */
@@ -797,12 +813,18 @@
* handler seeing a half-initialised domain state.
*/
irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
- ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
+
+ if (smmu->impl && smmu->impl->context_fault)
+ context_fault = smmu->impl->context_fault;
+ else
+ context_fault = arm_smmu_context_fault;
+
+ ret = devm_request_irq(smmu->dev, irq, context_fault,
IRQF_SHARED, "arm-smmu-context-fault", domain);
if (ret < 0) {
dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
cfg->irptndx, irq);
- cfg->irptndx = INVALID_IRPTNDX;
+ cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
}
mutex_unlock(&smmu_domain->init_mutex);
@@ -840,7 +862,7 @@
smmu->cbs[cfg->cbndx].cfg = NULL;
arm_smmu_write_context_bank(smmu, cfg->cbndx);
- if (cfg->irptndx != INVALID_IRPTNDX) {
+ if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
devm_free_irq(smmu->dev, irq, domain);
}
@@ -896,23 +918,31 @@
static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
{
struct arm_smmu_smr *smr = smmu->smrs + idx;
- u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
+ u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
+ FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
- reg |= SMR_VALID;
+ reg |= ARM_SMMU_SMR_VALID;
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
}
static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
{
struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
- u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
- FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
- FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
+ u32 reg;
+
+ if (smmu->impl && smmu->impl->write_s2cr) {
+ smmu->impl->write_s2cr(smmu, idx);
+ return;
+ }
+
+ reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
+ FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
+ FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
smmu->smrs[idx].valid)
- reg |= S2CR_EXIDVALID;
+ reg |= ARM_SMMU_S2CR_EXIDVALID;
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
}
@@ -930,24 +960,37 @@
static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
{
u32 smr;
+ int i;
if (!smmu->smrs)
return;
-
+ /*
+ * If we've had to accommodate firmware memory regions, we may
+ * have live SMRs by now; tread carefully...
+ *
+ * Somewhat perversely, not having a free SMR for this test implies we
+ * can get away without it anyway, as we'll only be able to 'allocate'
+ * these SMRs for the ID/mask values we're already trusting to be OK.
+ */
+ for (i = 0; i < smmu->num_mapping_groups; i++)
+ if (!smmu->smrs[i].valid)
+ goto smr_ok;
+ return;
+smr_ok:
/*
* SMR.ID bits may not be preserved if the corresponding MASK
* bits are set, so check each one separately. We can reject
* masters later if they try to claim IDs outside these masks.
*/
- smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
- arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
- smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
- smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
+ smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
+ smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
+ smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
- smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
- arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
- smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
- smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
+ smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
+ smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
+ smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
}
static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
@@ -1007,17 +1050,16 @@
static int arm_smmu_master_alloc_smes(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
+ struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
struct arm_smmu_device *smmu = cfg->smmu;
struct arm_smmu_smr *smrs = smmu->smrs;
- struct iommu_group *group;
int i, idx, ret;
mutex_lock(&smmu->stream_map_mutex);
/* Figure out a viable stream map entry allocation */
- for_each_cfg_sme(fwspec, i, idx) {
- u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
- u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
+ for_each_cfg_sme(cfg, fwspec, i, idx) {
+ u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
+ u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
if (idx != INVALID_SMENDX) {
ret = -EEXIST;
@@ -1038,20 +1080,9 @@
cfg->smendx[i] = (s16)idx;
}
- group = iommu_group_get_for_dev(dev);
- if (!group)
- group = ERR_PTR(-ENOMEM);
- if (IS_ERR(group)) {
- ret = PTR_ERR(group);
- goto out_err;
- }
- iommu_group_put(group);
-
/* It worked! Now, poke the actual hardware */
- for_each_cfg_sme(fwspec, i, idx) {
+ for_each_cfg_sme(cfg, fwspec, i, idx)
arm_smmu_write_sme(smmu, idx);
- smmu->s2crs[idx].group = group;
- }
mutex_unlock(&smmu->stream_map_mutex);
return 0;
@@ -1065,14 +1096,14 @@
return ret;
}
-static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
+static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
+ struct iommu_fwspec *fwspec)
{
- struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
- struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
+ struct arm_smmu_device *smmu = cfg->smmu;
int i, idx;
mutex_lock(&smmu->stream_map_mutex);
- for_each_cfg_sme(fwspec, i, idx) {
+ for_each_cfg_sme(cfg, fwspec, i, idx) {
if (arm_smmu_free_sme(smmu, idx))
arm_smmu_write_sme(smmu, idx);
cfg->smendx[i] = INVALID_SMENDX;
@@ -1081,6 +1112,7 @@
}
static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_master_cfg *cfg,
struct iommu_fwspec *fwspec)
{
struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -1094,7 +1126,7 @@
else
type = S2CR_TYPE_TRANS;
- for_each_cfg_sme(fwspec, i, idx) {
+ for_each_cfg_sme(cfg, fwspec, i, idx) {
if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
continue;
@@ -1108,10 +1140,11 @@
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
- int ret;
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct arm_smmu_device *smmu;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct arm_smmu_master_cfg *cfg;
+ struct arm_smmu_device *smmu;
+ int ret;
if (!fwspec || fwspec->ops != &arm_smmu_ops) {
dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
@@ -1120,22 +1153,23 @@
/*
* FIXME: The arch/arm DMA API code tries to attach devices to its own
- * domains between of_xlate() and add_device() - we have no way to cope
+ * domains between of_xlate() and probe_device() - we have no way to cope
* with that, so until ARM gets converted to rely on groups and default
* domains, just say no (but more politely than by dereferencing NULL).
* This should be at least a WARN_ON once that's sorted.
*/
- if (!fwspec->iommu_priv)
+ cfg = dev_iommu_priv_get(dev);
+ if (!cfg)
return -ENODEV;
- smmu = fwspec_smmu(fwspec);
+ smmu = cfg->smmu;
ret = arm_smmu_rpm_get(smmu);
if (ret < 0)
return ret;
/* Ensure that the domain is finalised */
- ret = arm_smmu_init_domain_context(domain, smmu);
+ ret = arm_smmu_init_domain_context(domain, smmu, dev);
if (ret < 0)
goto rpm_put;
@@ -1152,7 +1186,21 @@
}
/* Looks ok, so add the device to the domain */
- ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
+ ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
+
+ /*
+ * Setup an autosuspend delay to avoid bouncing runpm state.
+ * Otherwise, if a driver for a suspended consumer device
+ * unmaps buffers, it will runpm resume/suspend for each one.
+ *
+ * For example, when used by a GPU device, when an application
+ * or game exits, it can trigger unmapping 100s or 1000s of
+ * buffers. With a runpm cycle for each buffer, that adds up
+ * to 5-10sec worth of reprogramming the context bank, while
+ * the system appears to be locked up to the user.
+ */
+ pm_runtime_set_autosuspend_delay(smmu->dev, 20);
+ pm_runtime_use_autosuspend(smmu->dev);
rpm_put:
arm_smmu_rpm_put(smmu);
@@ -1160,7 +1208,7 @@
}
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1170,7 +1218,7 @@
return -ENODEV;
arm_smmu_rpm_get(smmu);
- ret = ops->map(ops, iova, paddr, size, prot);
+ ret = ops->map(ops, iova, paddr, size, prot, gfp);
arm_smmu_rpm_put(smmu);
return ret;
@@ -1200,7 +1248,7 @@
if (smmu_domain->flush_ops) {
arm_smmu_rpm_get(smmu);
- smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain);
+ smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
arm_smmu_rpm_put(smmu);
}
}
@@ -1211,11 +1259,16 @@
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (smmu_domain->flush_ops) {
- arm_smmu_rpm_get(smmu);
- smmu_domain->flush_ops->tlb_sync(smmu_domain);
- arm_smmu_rpm_put(smmu);
- }
+ if (!smmu)
+ return;
+
+ arm_smmu_rpm_get(smmu);
+ if (smmu->version == ARM_SMMU_V2 ||
+ smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+ arm_smmu_tlb_sync_context(smmu_domain);
+ else
+ arm_smmu_tlb_sync_global(smmu);
+ arm_smmu_rpm_put(smmu);
}
static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
@@ -1245,7 +1298,8 @@
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
- if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) {
+ if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
+ 5, 50)) {
spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
dev_err(dev,
"iova to phys timed out on %pad. Falling back to software table walk.\n",
@@ -1256,7 +1310,7 @@
phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
- if (phys & CB_PAR_F) {
+ if (phys & ARM_SMMU_CB_PAR_F) {
dev_err(dev, "translation fault!\n");
dev_err(dev, "PAR = 0x%llx\n", phys);
goto out;
@@ -1313,9 +1367,9 @@
return dev ? dev_get_drvdata(dev) : NULL;
}
-static int arm_smmu_add_device(struct device *dev)
+static struct iommu_device *arm_smmu_probe_device(struct device *dev)
{
- struct arm_smmu_device *smmu;
+ struct arm_smmu_device *smmu = NULL;
struct arm_smmu_master_cfg *cfg;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
int i, ret;
@@ -1334,13 +1388,13 @@
} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
} else {
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
}
ret = -EINVAL;
for (i = 0; i < fwspec->num_ids; i++) {
- u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
- u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
+ u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
+ u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
if (sid & ~smmu->streamid_mask) {
dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
@@ -1361,7 +1415,7 @@
goto out_free;
cfg->smmu = smmu;
- fwspec->iommu_priv = cfg;
+ dev_iommu_priv_set(dev, cfg);
while (i--)
cfg->smendx[i] = INVALID_SMENDX;
@@ -1375,21 +1429,19 @@
if (ret)
goto out_cfg_free;
- iommu_device_link(&smmu->iommu, dev);
-
device_link_add(dev, smmu->dev,
DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
- return 0;
+ return &smmu->iommu;
out_cfg_free:
kfree(cfg);
out_free:
iommu_fwspec_free(dev);
- return ret;
+ return ERR_PTR(ret);
}
-static void arm_smmu_remove_device(struct device *dev)
+static void arm_smmu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_master_cfg *cfg;
@@ -1399,31 +1451,31 @@
if (!fwspec || fwspec->ops != &arm_smmu_ops)
return;
- cfg = fwspec->iommu_priv;
+ cfg = dev_iommu_priv_get(dev);
smmu = cfg->smmu;
ret = arm_smmu_rpm_get(smmu);
if (ret < 0)
return;
- iommu_device_unlink(&smmu->iommu, dev);
- arm_smmu_master_free_smes(fwspec);
+ arm_smmu_master_free_smes(cfg, fwspec);
arm_smmu_rpm_put(smmu);
- iommu_group_remove_device(dev);
- kfree(fwspec->iommu_priv);
+ dev_iommu_priv_set(dev, NULL);
+ kfree(cfg);
iommu_fwspec_free(dev);
}
static struct iommu_group *arm_smmu_device_group(struct device *dev)
{
+ struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+ struct arm_smmu_device *smmu = cfg->smmu;
struct iommu_group *group = NULL;
int i, idx;
- for_each_cfg_sme(fwspec, i, idx) {
+ for_each_cfg_sme(cfg, fwspec, i, idx) {
if (group && smmu->s2crs[idx].group &&
group != smmu->s2crs[idx].group)
return ERR_PTR(-EINVAL);
@@ -1441,6 +1493,11 @@
else
group = generic_device_group(dev);
+ /* Remember group for faster lookups */
+ if (!IS_ERR(group))
+ for_each_cfg_sme(cfg, fwspec, i, idx)
+ smmu->s2crs[idx].group = group;
+
return group;
}
@@ -1521,12 +1578,12 @@
u32 mask, fwid = 0;
if (args->args_count > 0)
- fwid |= FIELD_PREP(SMR_ID, args->args[0]);
+ fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
if (args->args_count > 1)
- fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
+ fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
- fwid |= FIELD_PREP(SMR_MASK, mask);
+ fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
return iommu_fwspec_add_ids(dev, &fwid, 1);
}
@@ -1547,13 +1604,15 @@
iommu_dma_get_resv_regions(dev, head);
}
-static void arm_smmu_put_resv_regions(struct device *dev,
- struct list_head *head)
+static int arm_smmu_def_domain_type(struct device *dev)
{
- struct iommu_resv_region *entry, *next;
+ struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
+ const struct arm_smmu_impl *impl = cfg->smmu->impl;
- list_for_each_entry_safe(entry, next, head, list)
- kfree(entry);
+ if (impl && impl->def_domain_type)
+ return impl->def_domain_type(dev);
+
+ return 0;
}
static struct iommu_ops arm_smmu_ops = {
@@ -1566,14 +1625,15 @@
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
- .add_device = arm_smmu_add_device,
- .remove_device = arm_smmu_remove_device,
+ .probe_device = arm_smmu_probe_device,
+ .release_device = arm_smmu_release_device,
.device_group = arm_smmu_device_group,
.domain_get_attr = arm_smmu_domain_get_attr,
.domain_set_attr = arm_smmu_domain_set_attr,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
- .put_resv_regions = arm_smmu_put_resv_regions,
+ .put_resv_regions = generic_iommu_put_resv_regions,
+ .def_domain_type = arm_smmu_def_domain_type,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
};
@@ -1596,7 +1656,7 @@
/* Make sure all context banks are disabled and clear CB_FSR */
for (i = 0; i < smmu->num_context_banks; ++i) {
arm_smmu_write_context_bank(smmu, i);
- arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT);
+ arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
}
/* Invalidate the TLB, just in case */
@@ -1606,29 +1666,30 @@
reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
/* Enable fault reporting */
- reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
+ reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
+ ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
/* Disable TLB broadcasting. */
- reg |= (sCR0_VMIDPNE | sCR0_PTM);
+ reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
/* Enable client access, handling unmatched streams as appropriate */
- reg &= ~sCR0_CLIENTPD;
+ reg &= ~ARM_SMMU_sCR0_CLIENTPD;
if (disable_bypass)
- reg |= sCR0_USFCFG;
+ reg |= ARM_SMMU_sCR0_USFCFG;
else
- reg &= ~sCR0_USFCFG;
+ reg &= ~ARM_SMMU_sCR0_USFCFG;
/* Disable forced broadcasting */
- reg &= ~sCR0_FB;
+ reg &= ~ARM_SMMU_sCR0_FB;
/* Don't upgrade barriers */
- reg &= ~(sCR0_BSU);
+ reg &= ~(ARM_SMMU_sCR0_BSU);
if (smmu->features & ARM_SMMU_FEAT_VMID16)
- reg |= sCR0_VMID16EN;
+ reg |= ARM_SMMU_sCR0_VMID16EN;
if (smmu->features & ARM_SMMU_FEAT_EXIDS)
- reg |= sCR0_EXIDENABLE;
+ reg |= ARM_SMMU_sCR0_EXIDENABLE;
if (smmu->impl && smmu->impl->reset)
smmu->impl->reset(smmu);
@@ -1662,7 +1723,7 @@
unsigned int size;
u32 id;
bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
- int i;
+ int i, ret;
dev_notice(smmu->dev, "probing hardware configuration...\n");
dev_notice(smmu->dev, "SMMUv%d with:\n",
@@ -1673,21 +1734,21 @@
/* Restrict available stages based on module parameter */
if (force_stage == 1)
- id &= ~(ID0_S2TS | ID0_NTS);
+ id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
else if (force_stage == 2)
- id &= ~(ID0_S1TS | ID0_NTS);
+ id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
- if (id & ID0_S1TS) {
+ if (id & ARM_SMMU_ID0_S1TS) {
smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
dev_notice(smmu->dev, "\tstage 1 translation\n");
}
- if (id & ID0_S2TS) {
+ if (id & ARM_SMMU_ID0_S2TS) {
smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
dev_notice(smmu->dev, "\tstage 2 translation\n");
}
- if (id & ID0_NTS) {
+ if (id & ARM_SMMU_ID0_NTS) {
smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
dev_notice(smmu->dev, "\tnested translation\n");
}
@@ -1698,8 +1759,8 @@
return -ENODEV;
}
- if ((id & ID0_S1TS) &&
- ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
+ if ((id & ARM_SMMU_ID0_S1TS) &&
+ ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
dev_notice(smmu->dev, "\taddress translation ops\n");
}
@@ -1710,7 +1771,7 @@
* Fortunately, this also opens up a workaround for systems where the
* ID register value has ended up configured incorrectly.
*/
- cttw_reg = !!(id & ID0_CTTW);
+ cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
if (cttw_fw || cttw_reg)
dev_notice(smmu->dev, "\t%scoherent table walk\n",
cttw_fw ? "" : "non-");
@@ -1719,16 +1780,16 @@
"\t(IDR0.CTTW overridden by FW configuration)\n");
/* Max. number of entries we have for stream matching/indexing */
- if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
+ if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
smmu->features |= ARM_SMMU_FEAT_EXIDS;
size = 1 << 16;
} else {
- size = 1 << FIELD_GET(ID0_NUMSIDB, id);
+ size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
}
smmu->streamid_mask = size - 1;
- if (id & ID0_SMS) {
+ if (id & ARM_SMMU_ID0_SMS) {
smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
- size = FIELD_GET(ID0_NUMSMRG, id);
+ size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
if (size == 0) {
dev_err(smmu->dev,
"stream-matching supported, but no SMRs present!\n");
@@ -1756,18 +1817,19 @@
mutex_init(&smmu->stream_map_mutex);
spin_lock_init(&smmu->global_sync_lock);
- if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
+ if (smmu->version < ARM_SMMU_V2 ||
+ !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
- if (!(id & ID0_PTFS_NO_AARCH32S))
+ if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
}
/* ID1 */
id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
- smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
+ smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
/* Check for size mismatch of SMMU address space from mapped region */
- size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
+ size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
if (smmu->numpage != 2 * size << smmu->pgshift)
dev_warn(smmu->dev,
"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
@@ -1775,8 +1837,8 @@
/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
smmu->numpage = size;
- smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
- smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
+ smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
+ smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
if (smmu->num_s2_context_banks > smmu->num_context_banks) {
dev_err(smmu->dev, "impossible number of S2 context banks!\n");
return -ENODEV;
@@ -1790,14 +1852,14 @@
/* ID2 */
id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
- size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
+ size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
smmu->ipa_size = size;
/* The output mask is also applied for bypass */
- size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
+ size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
smmu->pa_size = size;
- if (id & ID2_VMID16)
+ if (id & ARM_SMMU_ID2_VMID16)
smmu->features |= ARM_SMMU_FEAT_VMID16;
/*
@@ -1814,16 +1876,22 @@
if (smmu->version == ARM_SMMU_V1_64K)
smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
} else {
- size = FIELD_GET(ID2_UBS, id);
+ size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
smmu->va_size = arm_smmu_id_size_to_bits(size);
- if (id & ID2_PTFS_4K)
+ if (id & ARM_SMMU_ID2_PTFS_4K)
smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
- if (id & ID2_PTFS_16K)
+ if (id & ARM_SMMU_ID2_PTFS_16K)
smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
- if (id & ID2_PTFS_64K)
+ if (id & ARM_SMMU_ID2_PTFS_64K)
smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
}
+ if (smmu->impl && smmu->impl->cfg_probe) {
+ ret = smmu->impl->cfg_probe(smmu);
+ if (ret)
+ return ret;
+ }
+
/* Now we've corralled the various formats, what'll it do? */
if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
@@ -1851,9 +1919,6 @@
dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
smmu->ipa_size, smmu->pa_size);
- if (smmu->impl && smmu->impl->cfg_probe)
- return smmu->impl->cfg_probe(smmu);
-
return 0;
}
@@ -1879,9 +1944,11 @@
{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
+ { .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
{ },
};
+MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
#ifdef CONFIG_ACPI
static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
@@ -1968,8 +2035,10 @@
legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
if (legacy_binding && !using_generic_binding) {
- if (!using_legacy_binding)
- pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
+ if (!using_legacy_binding) {
+ pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
+ IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
+ }
using_legacy_binding = true;
} else if (!legacy_binding && !using_legacy_binding) {
using_generic_binding = true;
@@ -1984,25 +2053,50 @@
return 0;
}
-static void arm_smmu_bus_init(void)
+static int arm_smmu_bus_init(struct iommu_ops *ops)
{
+ int err;
+
/* Oh, for a proper bus abstraction */
- if (!iommu_present(&platform_bus_type))
- bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
+ if (!iommu_present(&platform_bus_type)) {
+ err = bus_set_iommu(&platform_bus_type, ops);
+ if (err)
+ return err;
+ }
#ifdef CONFIG_ARM_AMBA
- if (!iommu_present(&amba_bustype))
- bus_set_iommu(&amba_bustype, &arm_smmu_ops);
+ if (!iommu_present(&amba_bustype)) {
+ err = bus_set_iommu(&amba_bustype, ops);
+ if (err)
+ goto err_reset_platform_ops;
+ }
#endif
#ifdef CONFIG_PCI
if (!iommu_present(&pci_bus_type)) {
- pci_request_acs();
- bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+ err = bus_set_iommu(&pci_bus_type, ops);
+ if (err)
+ goto err_reset_amba_ops;
}
#endif
#ifdef CONFIG_FSL_MC_BUS
- if (!iommu_present(&fsl_mc_bus_type))
- bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
+ if (!iommu_present(&fsl_mc_bus_type)) {
+ err = bus_set_iommu(&fsl_mc_bus_type, ops);
+ if (err)
+ goto err_reset_pci_ops;
+ }
#endif
+ return 0;
+
+err_reset_pci_ops: __maybe_unused;
+#ifdef CONFIG_PCI
+ bus_set_iommu(&pci_bus_type, NULL);
+#endif
+err_reset_amba_ops: __maybe_unused;
+#ifdef CONFIG_ARM_AMBA
+ bus_set_iommu(&amba_bustype, NULL);
+#endif
+err_reset_platform_ops: __maybe_unused;
+ bus_set_iommu(&platform_bus_type, NULL);
+ return err;
}
static int arm_smmu_device_probe(struct platform_device *pdev)
@@ -2012,6 +2106,7 @@
struct arm_smmu_device *smmu;
struct device *dev = &pdev->dev;
int num_irqs, i, err;
+ irqreturn_t (*global_fault)(int irq, void *dev);
smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
if (!smmu) {
@@ -2028,10 +2123,6 @@
if (err)
return err;
- smmu = arm_smmu_impl_init(smmu);
- if (IS_ERR(smmu))
- return PTR_ERR(smmu);
-
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
ioaddr = res->start;
smmu->base = devm_ioremap_resource(dev, res);
@@ -2043,6 +2134,10 @@
*/
smmu->numpage = resource_size(res);
+ smmu = arm_smmu_impl_init(smmu);
+ if (IS_ERR(smmu))
+ return PTR_ERR(smmu);
+
num_irqs = 0;
while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
num_irqs++;
@@ -2066,10 +2161,8 @@
for (i = 0; i < num_irqs; ++i) {
int irq = platform_get_irq(pdev, i);
- if (irq < 0) {
- dev_err(dev, "failed to get irq index %d\n", i);
+ if (irq < 0)
return -ENODEV;
- }
smmu->irqs[i] = irq;
}
@@ -2100,9 +2193,14 @@
smmu->num_context_irqs = smmu->num_context_banks;
}
+ if (smmu->impl && smmu->impl->global_fault)
+ global_fault = smmu->impl->global_fault;
+ else
+ global_fault = arm_smmu_global_fault;
+
for (i = 0; i < smmu->num_global_irqs; ++i) {
err = devm_request_irq(smmu->dev, smmu->irqs[i],
- arm_smmu_global_fault,
+ global_fault,
IRQF_SHARED,
"arm-smmu global fault",
smmu);
@@ -2150,38 +2248,28 @@
* ready to handle default domain setup as soon as any SMMU exists.
*/
if (!using_legacy_binding)
- arm_smmu_bus_init();
+ return arm_smmu_bus_init(&arm_smmu_ops);
return 0;
}
-/*
- * With the legacy DT binding in play, though, we have no guarantees about
- * probe order, but then we're also not doing default domains, so we can
- * delay setting bus ops until we're sure every possible SMMU is ready,
- * and that way ensure that no add_device() calls get missed.
- */
-static int arm_smmu_legacy_bus_init(void)
-{
- if (using_legacy_binding)
- arm_smmu_bus_init();
- return 0;
-}
-device_initcall_sync(arm_smmu_legacy_bus_init);
-
-static void arm_smmu_device_shutdown(struct platform_device *pdev)
+static int arm_smmu_device_remove(struct platform_device *pdev)
{
struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
if (!smmu)
- return;
+ return -ENODEV;
if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
- dev_err(&pdev->dev, "removing device with active domains!\n");
+ dev_notice(&pdev->dev, "disabling translation\n");
+
+ arm_smmu_bus_init(NULL);
+ iommu_device_unregister(&smmu->iommu);
+ iommu_device_sysfs_remove(&smmu->iommu);
arm_smmu_rpm_get(smmu);
/* Turn the thing off */
- arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
arm_smmu_rpm_put(smmu);
if (pm_runtime_enabled(smmu->dev))
@@ -2190,6 +2278,12 @@
clk_bulk_disable(smmu->num_clks, smmu->clks);
clk_bulk_unprepare(smmu->num_clks, smmu->clks);
+ return 0;
+}
+
+static void arm_smmu_device_shutdown(struct platform_device *pdev)
+{
+ arm_smmu_device_remove(pdev);
}
static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
@@ -2240,11 +2334,17 @@
static struct platform_driver arm_smmu_driver = {
.driver = {
.name = "arm-smmu",
- .of_match_table = of_match_ptr(arm_smmu_of_match),
+ .of_match_table = arm_smmu_of_match,
.pm = &arm_smmu_pm_ops,
- .suppress_bind_attrs = true,
+ .suppress_bind_attrs = true,
},
.probe = arm_smmu_device_probe,
+ .remove = arm_smmu_device_remove,
.shutdown = arm_smmu_device_shutdown,
};
-builtin_platform_driver(arm_smmu_driver);
+module_platform_driver(arm_smmu_driver);
+
+MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
+MODULE_AUTHOR("Will Deacon <will@kernel.org>");
+MODULE_ALIAS("platform:arm-smmu");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
new file mode 100644
index 0000000..b71647e
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -0,0 +1,528 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * IOMMU API for ARM architected SMMU implementations.
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#ifndef _ARM_SMMU_H
+#define _ARM_SMMU_H
+
+#include <linux/atomic.h>
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/io-pgtable.h>
+#include <linux/iommu.h>
+#include <linux/irqreturn.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+/* Configuration registers */
+#define ARM_SMMU_GR0_sCR0 0x0
+#define ARM_SMMU_sCR0_VMID16EN BIT(31)
+#define ARM_SMMU_sCR0_BSU GENMASK(15, 14)
+#define ARM_SMMU_sCR0_FB BIT(13)
+#define ARM_SMMU_sCR0_PTM BIT(12)
+#define ARM_SMMU_sCR0_VMIDPNE BIT(11)
+#define ARM_SMMU_sCR0_USFCFG BIT(10)
+#define ARM_SMMU_sCR0_GCFGFIE BIT(5)
+#define ARM_SMMU_sCR0_GCFGFRE BIT(4)
+#define ARM_SMMU_sCR0_EXIDENABLE BIT(3)
+#define ARM_SMMU_sCR0_GFIE BIT(2)
+#define ARM_SMMU_sCR0_GFRE BIT(1)
+#define ARM_SMMU_sCR0_CLIENTPD BIT(0)
+
+/* Auxiliary Configuration register */
+#define ARM_SMMU_GR0_sACR 0x10
+
+/* Identification registers */
+#define ARM_SMMU_GR0_ID0 0x20
+#define ARM_SMMU_ID0_S1TS BIT(30)
+#define ARM_SMMU_ID0_S2TS BIT(29)
+#define ARM_SMMU_ID0_NTS BIT(28)
+#define ARM_SMMU_ID0_SMS BIT(27)
+#define ARM_SMMU_ID0_ATOSNS BIT(26)
+#define ARM_SMMU_ID0_PTFS_NO_AARCH32 BIT(25)
+#define ARM_SMMU_ID0_PTFS_NO_AARCH32S BIT(24)
+#define ARM_SMMU_ID0_NUMIRPT GENMASK(23, 16)
+#define ARM_SMMU_ID0_CTTW BIT(14)
+#define ARM_SMMU_ID0_NUMSIDB GENMASK(12, 9)
+#define ARM_SMMU_ID0_EXIDS BIT(8)
+#define ARM_SMMU_ID0_NUMSMRG GENMASK(7, 0)
+
+#define ARM_SMMU_GR0_ID1 0x24
+#define ARM_SMMU_ID1_PAGESIZE BIT(31)
+#define ARM_SMMU_ID1_NUMPAGENDXB GENMASK(30, 28)
+#define ARM_SMMU_ID1_NUMS2CB GENMASK(23, 16)
+#define ARM_SMMU_ID1_NUMCB GENMASK(7, 0)
+
+#define ARM_SMMU_GR0_ID2 0x28
+#define ARM_SMMU_ID2_VMID16 BIT(15)
+#define ARM_SMMU_ID2_PTFS_64K BIT(14)
+#define ARM_SMMU_ID2_PTFS_16K BIT(13)
+#define ARM_SMMU_ID2_PTFS_4K BIT(12)
+#define ARM_SMMU_ID2_UBS GENMASK(11, 8)
+#define ARM_SMMU_ID2_OAS GENMASK(7, 4)
+#define ARM_SMMU_ID2_IAS GENMASK(3, 0)
+
+#define ARM_SMMU_GR0_ID3 0x2c
+#define ARM_SMMU_GR0_ID4 0x30
+#define ARM_SMMU_GR0_ID5 0x34
+#define ARM_SMMU_GR0_ID6 0x38
+
+#define ARM_SMMU_GR0_ID7 0x3c
+#define ARM_SMMU_ID7_MAJOR GENMASK(7, 4)
+#define ARM_SMMU_ID7_MINOR GENMASK(3, 0)
+
+#define ARM_SMMU_GR0_sGFSR 0x48
+#define ARM_SMMU_sGFSR_USF BIT(1)
+
+#define ARM_SMMU_GR0_sGFSYNR0 0x50
+#define ARM_SMMU_GR0_sGFSYNR1 0x54
+#define ARM_SMMU_GR0_sGFSYNR2 0x58
+
+/* Global TLB invalidation */
+#define ARM_SMMU_GR0_TLBIVMID 0x64
+#define ARM_SMMU_GR0_TLBIALLNSNH 0x68
+#define ARM_SMMU_GR0_TLBIALLH 0x6c
+#define ARM_SMMU_GR0_sTLBGSYNC 0x70
+
+#define ARM_SMMU_GR0_sTLBGSTATUS 0x74
+#define ARM_SMMU_sTLBGSTATUS_GSACTIVE BIT(0)
+
+/* Stream mapping registers */
+#define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2))
+#define ARM_SMMU_SMR_VALID BIT(31)
+#define ARM_SMMU_SMR_MASK GENMASK(31, 16)
+#define ARM_SMMU_SMR_ID GENMASK(15, 0)
+
+#define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2))
+#define ARM_SMMU_S2CR_PRIVCFG GENMASK(25, 24)
+enum arm_smmu_s2cr_privcfg {
+ S2CR_PRIVCFG_DEFAULT,
+ S2CR_PRIVCFG_DIPAN,
+ S2CR_PRIVCFG_UNPRIV,
+ S2CR_PRIVCFG_PRIV,
+};
+#define ARM_SMMU_S2CR_TYPE GENMASK(17, 16)
+enum arm_smmu_s2cr_type {
+ S2CR_TYPE_TRANS,
+ S2CR_TYPE_BYPASS,
+ S2CR_TYPE_FAULT,
+};
+#define ARM_SMMU_S2CR_EXIDVALID BIT(10)
+#define ARM_SMMU_S2CR_CBNDX GENMASK(7, 0)
+
+/* Context bank attribute registers */
+#define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2))
+#define ARM_SMMU_CBAR_IRPTNDX GENMASK(31, 24)
+#define ARM_SMMU_CBAR_TYPE GENMASK(17, 16)
+enum arm_smmu_cbar_type {
+ CBAR_TYPE_S2_TRANS,
+ CBAR_TYPE_S1_TRANS_S2_BYPASS,
+ CBAR_TYPE_S1_TRANS_S2_FAULT,
+ CBAR_TYPE_S1_TRANS_S2_TRANS,
+};
+#define ARM_SMMU_CBAR_S1_MEMATTR GENMASK(15, 12)
+#define ARM_SMMU_CBAR_S1_MEMATTR_WB 0xf
+#define ARM_SMMU_CBAR_S1_BPSHCFG GENMASK(9, 8)
+#define ARM_SMMU_CBAR_S1_BPSHCFG_NSH 3
+#define ARM_SMMU_CBAR_VMID GENMASK(7, 0)
+
+#define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2))
+
+#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
+#define ARM_SMMU_CBA2R_VMID16 GENMASK(31, 16)
+#define ARM_SMMU_CBA2R_VA64 BIT(0)
+
+#define ARM_SMMU_CB_SCTLR 0x0
+#define ARM_SMMU_SCTLR_S1_ASIDPNE BIT(12)
+#define ARM_SMMU_SCTLR_CFCFG BIT(7)
+#define ARM_SMMU_SCTLR_CFIE BIT(6)
+#define ARM_SMMU_SCTLR_CFRE BIT(5)
+#define ARM_SMMU_SCTLR_E BIT(4)
+#define ARM_SMMU_SCTLR_AFE BIT(2)
+#define ARM_SMMU_SCTLR_TRE BIT(1)
+#define ARM_SMMU_SCTLR_M BIT(0)
+
+#define ARM_SMMU_CB_ACTLR 0x4
+
+#define ARM_SMMU_CB_RESUME 0x8
+#define ARM_SMMU_RESUME_TERMINATE BIT(0)
+
+#define ARM_SMMU_CB_TCR2 0x10
+#define ARM_SMMU_TCR2_SEP GENMASK(17, 15)
+#define ARM_SMMU_TCR2_SEP_UPSTREAM 0x7
+#define ARM_SMMU_TCR2_AS BIT(4)
+#define ARM_SMMU_TCR2_PASIZE GENMASK(3, 0)
+
+#define ARM_SMMU_CB_TTBR0 0x20
+#define ARM_SMMU_CB_TTBR1 0x28
+#define ARM_SMMU_TTBRn_ASID GENMASK_ULL(63, 48)
+
+#define ARM_SMMU_CB_TCR 0x30
+#define ARM_SMMU_TCR_EAE BIT(31)
+#define ARM_SMMU_TCR_EPD1 BIT(23)
+#define ARM_SMMU_TCR_A1 BIT(22)
+#define ARM_SMMU_TCR_TG0 GENMASK(15, 14)
+#define ARM_SMMU_TCR_SH0 GENMASK(13, 12)
+#define ARM_SMMU_TCR_ORGN0 GENMASK(11, 10)
+#define ARM_SMMU_TCR_IRGN0 GENMASK(9, 8)
+#define ARM_SMMU_TCR_EPD0 BIT(7)
+#define ARM_SMMU_TCR_T0SZ GENMASK(5, 0)
+
+#define ARM_SMMU_VTCR_RES1 BIT(31)
+#define ARM_SMMU_VTCR_PS GENMASK(18, 16)
+#define ARM_SMMU_VTCR_TG0 ARM_SMMU_TCR_TG0
+#define ARM_SMMU_VTCR_SH0 ARM_SMMU_TCR_SH0
+#define ARM_SMMU_VTCR_ORGN0 ARM_SMMU_TCR_ORGN0
+#define ARM_SMMU_VTCR_IRGN0 ARM_SMMU_TCR_IRGN0
+#define ARM_SMMU_VTCR_SL0 GENMASK(7, 6)
+#define ARM_SMMU_VTCR_T0SZ ARM_SMMU_TCR_T0SZ
+
+#define ARM_SMMU_CB_CONTEXTIDR 0x34
+#define ARM_SMMU_CB_S1_MAIR0 0x38
+#define ARM_SMMU_CB_S1_MAIR1 0x3c
+
+#define ARM_SMMU_CB_PAR 0x50
+#define ARM_SMMU_CB_PAR_F BIT(0)
+
+#define ARM_SMMU_CB_FSR 0x58
+#define ARM_SMMU_FSR_MULTI BIT(31)
+#define ARM_SMMU_FSR_SS BIT(30)
+#define ARM_SMMU_FSR_UUT BIT(8)
+#define ARM_SMMU_FSR_ASF BIT(7)
+#define ARM_SMMU_FSR_TLBLKF BIT(6)
+#define ARM_SMMU_FSR_TLBMCF BIT(5)
+#define ARM_SMMU_FSR_EF BIT(4)
+#define ARM_SMMU_FSR_PF BIT(3)
+#define ARM_SMMU_FSR_AFF BIT(2)
+#define ARM_SMMU_FSR_TF BIT(1)
+
+#define ARM_SMMU_FSR_IGN (ARM_SMMU_FSR_AFF | \
+ ARM_SMMU_FSR_ASF | \
+ ARM_SMMU_FSR_TLBMCF | \
+ ARM_SMMU_FSR_TLBLKF)
+
+#define ARM_SMMU_FSR_FAULT (ARM_SMMU_FSR_MULTI | \
+ ARM_SMMU_FSR_SS | \
+ ARM_SMMU_FSR_UUT | \
+ ARM_SMMU_FSR_EF | \
+ ARM_SMMU_FSR_PF | \
+ ARM_SMMU_FSR_TF | \
+ ARM_SMMU_FSR_IGN)
+
+#define ARM_SMMU_CB_FAR 0x60
+
+#define ARM_SMMU_CB_FSYNR0 0x68
+#define ARM_SMMU_FSYNR0_WNR BIT(4)
+
+#define ARM_SMMU_CB_S1_TLBIVA 0x600
+#define ARM_SMMU_CB_S1_TLBIASID 0x610
+#define ARM_SMMU_CB_S1_TLBIVAL 0x620
+#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
+#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
+#define ARM_SMMU_CB_TLBSYNC 0x7f0
+#define ARM_SMMU_CB_TLBSTATUS 0x7f4
+#define ARM_SMMU_CB_ATS1PR 0x800
+
+#define ARM_SMMU_CB_ATSR 0x8f0
+#define ARM_SMMU_ATSR_ACTIVE BIT(0)
+
+
+/* Maximum number of context banks per SMMU */
+#define ARM_SMMU_MAX_CBS 128
+
+#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
+#define TLB_SPIN_COUNT 10
+
+/* Shared driver definitions */
+enum arm_smmu_arch_version {
+ ARM_SMMU_V1,
+ ARM_SMMU_V1_64K,
+ ARM_SMMU_V2,
+};
+
+enum arm_smmu_implementation {
+ GENERIC_SMMU,
+ ARM_MMU500,
+ CAVIUM_SMMUV2,
+ QCOM_SMMUV2,
+};
+
+struct arm_smmu_s2cr {
+ struct iommu_group *group;
+ int count;
+ enum arm_smmu_s2cr_type type;
+ enum arm_smmu_s2cr_privcfg privcfg;
+ u8 cbndx;
+};
+
+struct arm_smmu_smr {
+ u16 mask;
+ u16 id;
+ bool valid;
+ bool pinned;
+};
+
+struct arm_smmu_device {
+ struct device *dev;
+
+ void __iomem *base;
+ unsigned int numpage;
+ unsigned int pgshift;
+
+#define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
+#define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
+#define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
+#define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
+#define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
+#define ARM_SMMU_FEAT_VMID16 (1 << 6)
+#define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
+#define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
+#define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
+#define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
+#define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
+#define ARM_SMMU_FEAT_EXIDS (1 << 12)
+ u32 features;
+
+ enum arm_smmu_arch_version version;
+ enum arm_smmu_implementation model;
+ const struct arm_smmu_impl *impl;
+
+ u32 num_context_banks;
+ u32 num_s2_context_banks;
+ DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
+ struct arm_smmu_cb *cbs;
+ atomic_t irptndx;
+
+ u32 num_mapping_groups;
+ u16 streamid_mask;
+ u16 smr_mask_mask;
+ struct arm_smmu_smr *smrs;
+ struct arm_smmu_s2cr *s2crs;
+ struct mutex stream_map_mutex;
+
+ unsigned long va_size;
+ unsigned long ipa_size;
+ unsigned long pa_size;
+ unsigned long pgsize_bitmap;
+
+ u32 num_global_irqs;
+ u32 num_context_irqs;
+ unsigned int *irqs;
+ struct clk_bulk_data *clks;
+ int num_clks;
+
+ spinlock_t global_sync_lock;
+
+ /* IOMMU core code handle */
+ struct iommu_device iommu;
+};
+
+enum arm_smmu_context_fmt {
+ ARM_SMMU_CTX_FMT_NONE,
+ ARM_SMMU_CTX_FMT_AARCH64,
+ ARM_SMMU_CTX_FMT_AARCH32_L,
+ ARM_SMMU_CTX_FMT_AARCH32_S,
+};
+
+struct arm_smmu_cfg {
+ u8 cbndx;
+ u8 irptndx;
+ union {
+ u16 asid;
+ u16 vmid;
+ };
+ enum arm_smmu_cbar_type cbar;
+ enum arm_smmu_context_fmt fmt;
+};
+#define ARM_SMMU_INVALID_IRPTNDX 0xff
+
+struct arm_smmu_cb {
+ u64 ttbr[2];
+ u32 tcr[2];
+ u32 mair[2];
+ struct arm_smmu_cfg *cfg;
+};
+
+enum arm_smmu_domain_stage {
+ ARM_SMMU_DOMAIN_S1 = 0,
+ ARM_SMMU_DOMAIN_S2,
+ ARM_SMMU_DOMAIN_NESTED,
+ ARM_SMMU_DOMAIN_BYPASS,
+};
+
+struct arm_smmu_domain {
+ struct arm_smmu_device *smmu;
+ struct io_pgtable_ops *pgtbl_ops;
+ const struct iommu_flush_ops *flush_ops;
+ struct arm_smmu_cfg cfg;
+ enum arm_smmu_domain_stage stage;
+ bool non_strict;
+ struct mutex init_mutex; /* Protects smmu pointer */
+ spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
+ struct iommu_domain domain;
+};
+
+struct arm_smmu_master_cfg {
+ struct arm_smmu_device *smmu;
+ s16 smendx[];
+};
+
+static inline u32 arm_smmu_lpae_tcr(const struct io_pgtable_cfg *cfg)
+{
+ u32 tcr = FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) |
+ FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) |
+ FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) |
+ FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) |
+ FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz);
+
+ /*
+ * When TTBR1 is selected shift the TCR fields by 16 bits and disable
+ * translation in TTBR0
+ */
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
+ tcr = (tcr << 16) & ~ARM_SMMU_TCR_A1;
+ tcr |= ARM_SMMU_TCR_EPD0;
+ } else
+ tcr |= ARM_SMMU_TCR_EPD1;
+
+ return tcr;
+}
+
+static inline u32 arm_smmu_lpae_tcr2(const struct io_pgtable_cfg *cfg)
+{
+ return FIELD_PREP(ARM_SMMU_TCR2_PASIZE, cfg->arm_lpae_s1_cfg.tcr.ips) |
+ FIELD_PREP(ARM_SMMU_TCR2_SEP, ARM_SMMU_TCR2_SEP_UPSTREAM);
+}
+
+static inline u32 arm_smmu_lpae_vtcr(const struct io_pgtable_cfg *cfg)
+{
+ return ARM_SMMU_VTCR_RES1 |
+ FIELD_PREP(ARM_SMMU_VTCR_PS, cfg->arm_lpae_s2_cfg.vtcr.ps) |
+ FIELD_PREP(ARM_SMMU_VTCR_TG0, cfg->arm_lpae_s2_cfg.vtcr.tg) |
+ FIELD_PREP(ARM_SMMU_VTCR_SH0, cfg->arm_lpae_s2_cfg.vtcr.sh) |
+ FIELD_PREP(ARM_SMMU_VTCR_ORGN0, cfg->arm_lpae_s2_cfg.vtcr.orgn) |
+ FIELD_PREP(ARM_SMMU_VTCR_IRGN0, cfg->arm_lpae_s2_cfg.vtcr.irgn) |
+ FIELD_PREP(ARM_SMMU_VTCR_SL0, cfg->arm_lpae_s2_cfg.vtcr.sl) |
+ FIELD_PREP(ARM_SMMU_VTCR_T0SZ, cfg->arm_lpae_s2_cfg.vtcr.tsz);
+}
+
+/* Implementation details, yay! */
+struct arm_smmu_impl {
+ u32 (*read_reg)(struct arm_smmu_device *smmu, int page, int offset);
+ void (*write_reg)(struct arm_smmu_device *smmu, int page, int offset,
+ u32 val);
+ u64 (*read_reg64)(struct arm_smmu_device *smmu, int page, int offset);
+ void (*write_reg64)(struct arm_smmu_device *smmu, int page, int offset,
+ u64 val);
+ int (*cfg_probe)(struct arm_smmu_device *smmu);
+ int (*reset)(struct arm_smmu_device *smmu);
+ int (*init_context)(struct arm_smmu_domain *smmu_domain,
+ struct io_pgtable_cfg *cfg, struct device *dev);
+ void (*tlb_sync)(struct arm_smmu_device *smmu, int page, int sync,
+ int status);
+ int (*def_domain_type)(struct device *dev);
+ irqreturn_t (*global_fault)(int irq, void *dev);
+ irqreturn_t (*context_fault)(int irq, void *dev);
+ int (*alloc_context_bank)(struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_device *smmu,
+ struct device *dev, int start);
+ void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
+};
+
+#define INVALID_SMENDX -1
+#define cfg_smendx(cfg, fw, i) \
+ (i >= fw->num_ids ? INVALID_SMENDX : cfg->smendx[i])
+#define for_each_cfg_sme(cfg, fw, i, idx) \
+ for (i = 0; idx = cfg_smendx(cfg, fw, i), i < fw->num_ids; ++i)
+
+static inline int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
+{
+ int idx;
+
+ do {
+ idx = find_next_zero_bit(map, end, start);
+ if (idx == end)
+ return -ENOSPC;
+ } while (test_and_set_bit(idx, map));
+
+ return idx;
+}
+
+static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
+{
+ return smmu->base + (n << smmu->pgshift);
+}
+
+static inline u32 arm_smmu_readl(struct arm_smmu_device *smmu, int page, int offset)
+{
+ if (smmu->impl && unlikely(smmu->impl->read_reg))
+ return smmu->impl->read_reg(smmu, page, offset);
+ return readl_relaxed(arm_smmu_page(smmu, page) + offset);
+}
+
+static inline void arm_smmu_writel(struct arm_smmu_device *smmu, int page,
+ int offset, u32 val)
+{
+ if (smmu->impl && unlikely(smmu->impl->write_reg))
+ smmu->impl->write_reg(smmu, page, offset, val);
+ else
+ writel_relaxed(val, arm_smmu_page(smmu, page) + offset);
+}
+
+static inline u64 arm_smmu_readq(struct arm_smmu_device *smmu, int page, int offset)
+{
+ if (smmu->impl && unlikely(smmu->impl->read_reg64))
+ return smmu->impl->read_reg64(smmu, page, offset);
+ return readq_relaxed(arm_smmu_page(smmu, page) + offset);
+}
+
+static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page,
+ int offset, u64 val)
+{
+ if (smmu->impl && unlikely(smmu->impl->write_reg64))
+ smmu->impl->write_reg64(smmu, page, offset, val);
+ else
+ writeq_relaxed(val, arm_smmu_page(smmu, page) + offset);
+}
+
+#define ARM_SMMU_GR0 0
+#define ARM_SMMU_GR1 1
+#define ARM_SMMU_CB(s, n) ((s)->numpage + (n))
+
+#define arm_smmu_gr0_read(s, o) \
+ arm_smmu_readl((s), ARM_SMMU_GR0, (o))
+#define arm_smmu_gr0_write(s, o, v) \
+ arm_smmu_writel((s), ARM_SMMU_GR0, (o), (v))
+
+#define arm_smmu_gr1_read(s, o) \
+ arm_smmu_readl((s), ARM_SMMU_GR1, (o))
+#define arm_smmu_gr1_write(s, o, v) \
+ arm_smmu_writel((s), ARM_SMMU_GR1, (o), (v))
+
+#define arm_smmu_cb_read(s, n, o) \
+ arm_smmu_readl((s), ARM_SMMU_CB((s), (n)), (o))
+#define arm_smmu_cb_write(s, n, o, v) \
+ arm_smmu_writel((s), ARM_SMMU_CB((s), (n)), (o), (v))
+#define arm_smmu_cb_readq(s, n, o) \
+ arm_smmu_readq((s), ARM_SMMU_CB((s), (n)), (o))
+#define arm_smmu_cb_writeq(s, n, o, v) \
+ arm_smmu_writeq((s), ARM_SMMU_CB((s), (n)), (o), (v))
+
+struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu);
+struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu);
+struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu);
+
+void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx);
+int arm_mmu500_reset(struct arm_smmu_device *smmu);
+
+#endif /* _ARM_SMMU_H */
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
similarity index 84%
rename from drivers/iommu/qcom_iommu.c
rename to drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 280de92..b30d6c9 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -37,18 +37,24 @@
#define SMMU_INTR_SEL_NS 0x2000
+enum qcom_iommu_clk {
+ CLK_IFACE,
+ CLK_BUS,
+ CLK_TBU,
+ CLK_NUM,
+};
+
struct qcom_iommu_ctx;
struct qcom_iommu_dev {
/* IOMMU core code handle */
struct iommu_device iommu;
struct device *dev;
- struct clk *iface_clk;
- struct clk *bus_clk;
+ struct clk_bulk_data clks[CLK_NUM];
void __iomem *local_base;
u32 sec_id;
u8 num_ctxs;
- struct qcom_iommu_ctx *ctxs[0]; /* indexed by asid-1 */
+ struct qcom_iommu_ctx *ctxs[]; /* indexed by asid-1 */
};
struct qcom_iommu_ctx {
@@ -65,6 +71,7 @@
struct mutex init_mutex; /* Protects iommu pointer */
struct iommu_domain domain;
struct qcom_iommu_dev *iommu;
+ struct iommu_fwspec *fwspec;
};
static struct qcom_iommu_domain *to_qcom_iommu_domain(struct iommu_domain *dom)
@@ -74,16 +81,19 @@
static const struct iommu_ops qcom_iommu_ops;
-static struct qcom_iommu_dev * to_iommu(struct iommu_fwspec *fwspec)
+static struct qcom_iommu_dev * to_iommu(struct device *dev)
{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
if (!fwspec || fwspec->ops != &qcom_iommu_ops)
return NULL;
- return fwspec->iommu_priv;
+
+ return dev_iommu_priv_get(dev);
}
-static struct qcom_iommu_ctx * to_ctx(struct iommu_fwspec *fwspec, unsigned asid)
+static struct qcom_iommu_ctx * to_ctx(struct qcom_iommu_domain *d, unsigned asid)
{
- struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec);
+ struct qcom_iommu_dev *qcom_iommu = d->iommu;
if (!qcom_iommu)
return NULL;
return qcom_iommu->ctxs[asid - 1];
@@ -115,11 +125,12 @@
static void qcom_iommu_tlb_sync(void *cookie)
{
- struct iommu_fwspec *fwspec = cookie;
+ struct qcom_iommu_domain *qcom_domain = cookie;
+ struct iommu_fwspec *fwspec = qcom_domain->fwspec;
unsigned i;
for (i = 0; i < fwspec->num_ids; i++) {
- struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]);
+ struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]);
unsigned int val, ret;
iommu_writel(ctx, ARM_SMMU_CB_TLBSYNC, 0);
@@ -133,11 +144,12 @@
static void qcom_iommu_tlb_inv_context(void *cookie)
{
- struct iommu_fwspec *fwspec = cookie;
+ struct qcom_iommu_domain *qcom_domain = cookie;
+ struct iommu_fwspec *fwspec = qcom_domain->fwspec;
unsigned i;
for (i = 0; i < fwspec->num_ids; i++) {
- struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]);
+ struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]);
iommu_writel(ctx, ARM_SMMU_CB_S1_TLBIASID, ctx->asid);
}
@@ -147,13 +159,14 @@
static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
{
- struct iommu_fwspec *fwspec = cookie;
+ struct qcom_iommu_domain *qcom_domain = cookie;
+ struct iommu_fwspec *fwspec = qcom_domain->fwspec;
unsigned i, reg;
reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
for (i = 0; i < fwspec->num_ids; i++) {
- struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]);
+ struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]);
size_t s = size;
iova = (iova >> 12) << 12;
@@ -201,7 +214,7 @@
fsr = iommu_readl(ctx, ARM_SMMU_CB_FSR);
- if (!(fsr & FSR_FAULT))
+ if (!(fsr & ARM_SMMU_FSR_FAULT))
return IRQ_NONE;
fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0);
@@ -215,16 +228,17 @@
}
iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr);
- iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE);
+ iommu_writel(ctx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE);
return IRQ_HANDLED;
}
static int qcom_iommu_init_domain(struct iommu_domain *domain,
struct qcom_iommu_dev *qcom_iommu,
- struct iommu_fwspec *fwspec)
+ struct device *dev)
{
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct io_pgtable_ops *pgtbl_ops;
struct io_pgtable_cfg pgtbl_cfg;
int i, ret = 0;
@@ -243,7 +257,9 @@
};
qcom_domain->iommu = qcom_iommu;
- pgtbl_ops = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &pgtbl_cfg, fwspec);
+ qcom_domain->fwspec = fwspec;
+
+ pgtbl_ops = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &pgtbl_cfg, qcom_domain);
if (!pgtbl_ops) {
dev_err(qcom_iommu->dev, "failed to allocate pagetable ops\n");
ret = -ENOMEM;
@@ -256,7 +272,7 @@
domain->geometry.force_aperture = true;
for (i = 0; i < fwspec->num_ids; i++) {
- struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]);
+ struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]);
if (!ctx->secure_init) {
ret = qcom_scm_restore_sec_cfg(qcom_iommu->sec_id, ctx->asid);
@@ -269,31 +285,30 @@
/* TTBRs */
iommu_writeq(ctx, ARM_SMMU_CB_TTBR0,
- pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0] |
- FIELD_PREP(TTBRn_ASID, ctx->asid));
- iommu_writeq(ctx, ARM_SMMU_CB_TTBR1,
- pgtbl_cfg.arm_lpae_s1_cfg.ttbr[1] |
- FIELD_PREP(TTBRn_ASID, ctx->asid));
+ pgtbl_cfg.arm_lpae_s1_cfg.ttbr |
+ FIELD_PREP(ARM_SMMU_TTBRn_ASID, ctx->asid));
+ iommu_writeq(ctx, ARM_SMMU_CB_TTBR1, 0);
/* TCR */
iommu_writel(ctx, ARM_SMMU_CB_TCR2,
- (pgtbl_cfg.arm_lpae_s1_cfg.tcr >> 32) |
- FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM));
+ arm_smmu_lpae_tcr2(&pgtbl_cfg));
iommu_writel(ctx, ARM_SMMU_CB_TCR,
- pgtbl_cfg.arm_lpae_s1_cfg.tcr);
+ arm_smmu_lpae_tcr(&pgtbl_cfg) | ARM_SMMU_TCR_EAE);
/* MAIRs (stage-1 only) */
iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR0,
- pgtbl_cfg.arm_lpae_s1_cfg.mair[0]);
+ pgtbl_cfg.arm_lpae_s1_cfg.mair);
iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR1,
- pgtbl_cfg.arm_lpae_s1_cfg.mair[1]);
+ pgtbl_cfg.arm_lpae_s1_cfg.mair >> 32);
/* SCTLR */
- reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE |
- SCTLR_M | SCTLR_S1_ASIDPNE | SCTLR_CFCFG;
+ reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE |
+ ARM_SMMU_SCTLR_AFE | ARM_SMMU_SCTLR_TRE |
+ ARM_SMMU_SCTLR_M | ARM_SMMU_SCTLR_S1_ASIDPNE |
+ ARM_SMMU_SCTLR_CFCFG;
- if (IS_ENABLED(CONFIG_BIG_ENDIAN))
- reg |= SCTLR_E;
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ reg |= ARM_SMMU_SCTLR_E;
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg);
@@ -364,8 +379,7 @@
static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec);
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(dev);
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
int ret;
@@ -376,7 +390,7 @@
/* Ensure that the domain is finalized */
pm_runtime_get_sync(qcom_iommu->dev);
- ret = qcom_iommu_init_domain(domain, qcom_iommu, fwspec);
+ ret = qcom_iommu_init_domain(domain, qcom_iommu, dev);
pm_runtime_put_sync(qcom_iommu->dev);
if (ret < 0)
return ret;
@@ -398,9 +412,9 @@
static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *dev)
{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec);
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(dev);
unsigned i;
if (WARN_ON(!qcom_domain->iommu))
@@ -408,7 +422,7 @@
pm_runtime_get_sync(qcom_iommu->dev);
for (i = 0; i < fwspec->num_ids; i++) {
- struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]);
+ struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]);
/* Disable the context bank: */
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
@@ -419,7 +433,7 @@
}
static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
int ret;
unsigned long flags;
@@ -430,7 +444,7 @@
return -ENODEV;
spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags);
- ret = ops->map(ops, iova, paddr, size, prot);
+ ret = ops->map(ops, iova, paddr, size, prot, GFP_ATOMIC);
spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags);
return ret;
}
@@ -513,14 +527,13 @@
}
}
-static int qcom_iommu_add_device(struct device *dev)
+static struct iommu_device *qcom_iommu_probe_device(struct device *dev)
{
- struct qcom_iommu_dev *qcom_iommu = to_iommu(dev_iommu_fwspec_get(dev));
- struct iommu_group *group;
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(dev);
struct device_link *link;
if (!qcom_iommu)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
/*
* Establish the link between iommu and master, so that the
@@ -531,34 +544,24 @@
if (!link) {
dev_err(qcom_iommu->dev, "Unable to create device link between %s and %s\n",
dev_name(qcom_iommu->dev), dev_name(dev));
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
}
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR_OR_NULL(group))
- return PTR_ERR_OR_ZERO(group);
-
- iommu_group_put(group);
- iommu_device_link(&qcom_iommu->iommu, dev);
-
- return 0;
+ return &qcom_iommu->iommu;
}
-static void qcom_iommu_remove_device(struct device *dev)
+static void qcom_iommu_release_device(struct device *dev)
{
- struct qcom_iommu_dev *qcom_iommu = to_iommu(dev_iommu_fwspec_get(dev));
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(dev);
if (!qcom_iommu)
return;
- iommu_device_unlink(&qcom_iommu->iommu, dev);
- iommu_group_remove_device(dev);
iommu_fwspec_free(dev);
}
static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct qcom_iommu_dev *qcom_iommu;
struct platform_device *iommu_pdev;
unsigned asid = args->args[0];
@@ -581,18 +584,22 @@
* index into qcom_iommu->ctxs:
*/
if (WARN_ON(asid < 1) ||
- WARN_ON(asid > qcom_iommu->num_ctxs))
+ WARN_ON(asid > qcom_iommu->num_ctxs)) {
+ put_device(&iommu_pdev->dev);
return -EINVAL;
+ }
- if (!fwspec->iommu_priv) {
- fwspec->iommu_priv = qcom_iommu;
+ if (!dev_iommu_priv_get(dev)) {
+ dev_iommu_priv_set(dev, qcom_iommu);
} else {
/* make sure devices iommus dt node isn't referring to
* multiple different iommu devices. Multiple context
* banks are ok, but multiple devices are not:
*/
- if (WARN_ON(qcom_iommu != fwspec->iommu_priv))
+ if (WARN_ON(qcom_iommu != dev_iommu_priv_get(dev))) {
+ put_device(&iommu_pdev->dev);
return -EINVAL;
+ }
}
return iommu_fwspec_add_ids(dev, &asid, 1);
@@ -609,39 +616,13 @@
.flush_iotlb_all = qcom_iommu_flush_iotlb_all,
.iotlb_sync = qcom_iommu_iotlb_sync,
.iova_to_phys = qcom_iommu_iova_to_phys,
- .add_device = qcom_iommu_add_device,
- .remove_device = qcom_iommu_remove_device,
+ .probe_device = qcom_iommu_probe_device,
+ .release_device = qcom_iommu_release_device,
.device_group = generic_device_group,
.of_xlate = qcom_iommu_of_xlate,
.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
};
-static int qcom_iommu_enable_clocks(struct qcom_iommu_dev *qcom_iommu)
-{
- int ret;
-
- ret = clk_prepare_enable(qcom_iommu->iface_clk);
- if (ret) {
- dev_err(qcom_iommu->dev, "Couldn't enable iface_clk\n");
- return ret;
- }
-
- ret = clk_prepare_enable(qcom_iommu->bus_clk);
- if (ret) {
- dev_err(qcom_iommu->dev, "Couldn't enable bus_clk\n");
- clk_disable_unprepare(qcom_iommu->iface_clk);
- return ret;
- }
-
- return 0;
-}
-
-static void qcom_iommu_disable_clocks(struct qcom_iommu_dev *qcom_iommu)
-{
- clk_disable_unprepare(qcom_iommu->bus_clk);
- clk_disable_unprepare(qcom_iommu->iface_clk);
-}
-
static int qcom_iommu_sec_ptbl_init(struct device *dev)
{
size_t psize = 0;
@@ -775,7 +756,7 @@
static struct platform_driver qcom_iommu_ctx_driver = {
.driver = {
.name = "qcom-iommu-ctx",
- .of_match_table = of_match_ptr(ctx_of_match),
+ .of_match_table = ctx_of_match,
},
.probe = qcom_iommu_ctx_probe,
.remove = qcom_iommu_ctx_remove,
@@ -798,6 +779,7 @@
struct qcom_iommu_dev *qcom_iommu;
struct device *dev = &pdev->dev;
struct resource *res;
+ struct clk *clk;
int ret, max_asid = 0;
/* find the max asid (which is 1:1 to ctx bank idx), so we know how
@@ -820,17 +802,26 @@
return PTR_ERR(qcom_iommu->local_base);
}
- qcom_iommu->iface_clk = devm_clk_get(dev, "iface");
- if (IS_ERR(qcom_iommu->iface_clk)) {
+ clk = devm_clk_get(dev, "iface");
+ if (IS_ERR(clk)) {
dev_err(dev, "failed to get iface clock\n");
- return PTR_ERR(qcom_iommu->iface_clk);
+ return PTR_ERR(clk);
}
+ qcom_iommu->clks[CLK_IFACE].clk = clk;
- qcom_iommu->bus_clk = devm_clk_get(dev, "bus");
- if (IS_ERR(qcom_iommu->bus_clk)) {
+ clk = devm_clk_get(dev, "bus");
+ if (IS_ERR(clk)) {
dev_err(dev, "failed to get bus clock\n");
- return PTR_ERR(qcom_iommu->bus_clk);
+ return PTR_ERR(clk);
}
+ qcom_iommu->clks[CLK_BUS].clk = clk;
+
+ clk = devm_clk_get_optional(dev, "tbu");
+ if (IS_ERR(clk)) {
+ dev_err(dev, "failed to get tbu clock\n");
+ return PTR_ERR(clk);
+ }
+ qcom_iommu->clks[CLK_TBU].clk = clk;
if (of_property_read_u32(dev->of_node, "qcom,iommu-secure-id",
&qcom_iommu->sec_id)) {
@@ -902,14 +893,14 @@
{
struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(dev);
- return qcom_iommu_enable_clocks(qcom_iommu);
+ return clk_bulk_prepare_enable(CLK_NUM, qcom_iommu->clks);
}
static int __maybe_unused qcom_iommu_suspend(struct device *dev)
{
struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(dev);
- qcom_iommu_disable_clocks(qcom_iommu);
+ clk_bulk_disable_unprepare(CLK_NUM, qcom_iommu->clks);
return 0;
}
@@ -928,7 +919,7 @@
static struct platform_driver qcom_iommu_driver = {
.driver = {
.name = "qcom-iommu",
- .of_match_table = of_match_ptr(qcom_iommu_of_match),
+ .of_match_table = qcom_iommu_of_match,
.pm = &qcom_iommu_pm_ops,
},
.probe = qcom_iommu_device_probe,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9c3e630..d1539b7 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -10,9 +10,8 @@
#include <linux/acpi_iort.h>
#include <linux/device.h>
-#include <linux/dma-contiguous.h>
+#include <linux/dma-map-ops.h>
#include <linux/dma-iommu.h>
-#include <linux/dma-noncoherent.h>
#include <linux/gfp.h>
#include <linux/huge_mm.h>
#include <linux/iommu.h>
@@ -23,6 +22,7 @@
#include <linux/pci.h>
#include <linux/scatterlist.h>
#include <linux/vmalloc.h>
+#include <linux/crash_dump.h>
struct iommu_dma_msi_page {
struct list_head list;
@@ -344,8 +344,11 @@
if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
- cookie->fq_domain = domain;
- init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL);
+ if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
+ NULL))
+ pr_warn("iova flush queue initialization failed\n");
+ else
+ cookie->fq_domain = domain;
}
if (!dev)
@@ -354,6 +357,21 @@
return iova_reserve_iommu_regions(dev, domain);
}
+static int iommu_dma_deferred_attach(struct device *dev,
+ struct iommu_domain *domain)
+{
+ const struct iommu_ops *ops = domain->ops;
+
+ if (!is_kdump_kernel())
+ return 0;
+
+ if (unlikely(ops->is_attach_deferred &&
+ ops->is_attach_deferred(domain, dev)))
+ return iommu_attach_device(domain, dev);
+
+ return 0;
+}
+
/**
* dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
* page flags.
@@ -384,7 +402,7 @@
}
static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
- size_t size, dma_addr_t dma_limit, struct device *dev)
+ size_t size, u64 dma_limit, struct device *dev)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
@@ -406,11 +424,10 @@
if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
iova_len = roundup_pow_of_two(iova_len);
- if (dev->bus_dma_mask)
- dma_limit &= dev->bus_dma_mask;
+ dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
if (domain->geometry.force_aperture)
- dma_limit = min(dma_limit, domain->geometry.aperture_end);
+ dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
/* Try to get PCI devices a SAC address */
if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
@@ -458,12 +475,12 @@
WARN_ON(unmapped != size);
if (!cookie->fq_domain)
- iommu_tlb_sync(domain, &iotlb_gather);
+ iommu_iotlb_sync(domain, &iotlb_gather);
iommu_dma_free_iova(cookie, dma_addr, size);
}
static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
- size_t size, int prot)
+ size_t size, int prot, u64 dma_mask)
{
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
@@ -471,13 +488,16 @@
size_t iova_off = iova_offset(iovad, phys);
dma_addr_t iova;
+ if (unlikely(iommu_dma_deferred_attach(dev, domain)))
+ return DMA_MAPPING_ERROR;
+
size = iova_align(iovad, size + iova_off);
- iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
+ iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
if (!iova)
return DMA_MAPPING_ERROR;
- if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
+ if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
iommu_dma_free_iova(cookie, iova, size);
return DMA_MAPPING_ERROR;
}
@@ -508,6 +528,9 @@
/* IOMMU can map any pages, so himem can also be used here */
gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
+ /* It makes no sense to muck about with huge pages */
+ gfp &= ~__GFP_COMP;
+
while (count) {
struct page *page = NULL;
unsigned int order_size;
@@ -528,15 +551,9 @@
page = alloc_pages_node(nid, alloc_flags, order);
if (!page)
continue;
- if (!order)
- break;
- if (!PageCompound(page)) {
+ if (order)
split_page(page, order);
- break;
- } else if (!split_huge_page(page)) {
- break;
- }
- __free_pages(page, order);
+ break;
}
if (!page) {
__iommu_dma_free_pages(pages, i);
@@ -556,6 +573,7 @@
* @size: Size of buffer in bytes
* @dma_handle: Out argument for allocated DMA handle
* @gfp: Allocation flags
+ * @prot: pgprot_t to use for the remapped mapping
* @attrs: DMA attributes for this allocation
*
* If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
@@ -564,14 +582,14 @@
* Return: Mapped virtual address, or NULL on failure.
*/
static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+ dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
+ unsigned long attrs)
{
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
bool coherent = dev_is_dma_coherent(dev);
int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
- pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
struct page **pages;
struct sg_table sgt;
@@ -580,6 +598,9 @@
*dma_handle = DMA_MAPPING_ERROR;
+ if (unlikely(iommu_dma_deferred_attach(dev, domain)))
+ return NULL;
+
min_size = alloc_sizes & -alloc_sizes;
if (min_size < PAGE_SIZE) {
min_size = PAGE_SIZE;
@@ -612,7 +633,7 @@
arch_dma_prep_coherent(sg_page(sg), sg->length);
}
- if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, ioprot)
+ if (iommu_map_sg_atomic(domain, iova, sgt.sgl, sgt.orig_nents, ioprot)
< size)
goto out_free_sg;
@@ -660,7 +681,7 @@
return;
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
- arch_sync_dma_for_cpu(dev, phys, size, dir);
+ arch_sync_dma_for_cpu(phys, size, dir);
}
static void iommu_dma_sync_single_for_device(struct device *dev,
@@ -672,7 +693,7 @@
return;
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
- arch_sync_dma_for_device(dev, phys, size, dir);
+ arch_sync_dma_for_device(phys, size, dir);
}
static void iommu_dma_sync_sg_for_cpu(struct device *dev,
@@ -686,7 +707,7 @@
return;
for_each_sg(sgl, sg, nelems, i)
- arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
+ arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
}
static void iommu_dma_sync_sg_for_device(struct device *dev,
@@ -700,7 +721,7 @@
return;
for_each_sg(sgl, sg, nelems, i)
- arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
+ arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
}
static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
@@ -712,10 +733,10 @@
int prot = dma_info_to_prot(dir, coherent, attrs);
dma_addr_t dma_handle;
- dma_handle =__iommu_dma_map(dev, phys, size, prot);
+ dma_handle = __iommu_dma_map(dev, phys, size, prot, dma_get_mask(dev));
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
dma_handle != DMA_MAPPING_ERROR)
- arch_sync_dma_for_device(dev, phys, size, dir);
+ arch_sync_dma_for_device(phys, size, dir);
return dma_handle;
}
@@ -822,6 +843,9 @@
unsigned long mask = dma_get_seg_boundary(dev);
int i;
+ if (unlikely(iommu_dma_deferred_attach(dev, domain)))
+ return 0;
+
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
@@ -872,7 +896,7 @@
* We'll leave any physical concatenation to the IOMMU driver's
* implementation - it knows better than we do.
*/
- if (iommu_map_sg(domain, iova, sg, nents, prot) < iova_len)
+ if (iommu_map_sg_atomic(domain, iova, sg, nents, prot) < iova_len)
goto out_free_iova;
return __finalise_sg(dev, sg, nents, iova);
@@ -912,7 +936,8 @@
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
return __iommu_dma_map(dev, phys, size,
- dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO);
+ dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
+ dma_get_mask(dev));
}
static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
@@ -929,7 +954,7 @@
/* Non-coherent atomic allocation? Easy */
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_free_from_pool(cpu_addr, alloc_size))
+ dma_free_from_pool(dev, cpu_addr, alloc_size))
return;
if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
@@ -1007,18 +1032,22 @@
gfp |= __GFP_ZERO;
if (IS_ENABLED(CONFIG_DMA_REMAP) && gfpflags_allow_blocking(gfp) &&
- !(attrs & DMA_ATTR_FORCE_CONTIGUOUS))
- return iommu_dma_alloc_remap(dev, size, handle, gfp, attrs);
+ !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) {
+ return iommu_dma_alloc_remap(dev, size, handle, gfp,
+ dma_pgprot(dev, PAGE_KERNEL, attrs), attrs);
+ }
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
!gfpflags_allow_blocking(gfp) && !coherent)
- cpu_addr = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp);
+ page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
+ gfp, NULL);
else
cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
if (!cpu_addr)
return NULL;
- *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot);
+ *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot,
+ dev->coherent_dma_mask);
if (*handle == DMA_MAPPING_ERROR) {
__iommu_dma_free(dev, size, cpu_addr);
return NULL;
@@ -1027,6 +1056,34 @@
return cpu_addr;
}
+#ifdef CONFIG_DMA_REMAP
+static void *iommu_dma_alloc_noncoherent(struct device *dev, size_t size,
+ dma_addr_t *handle, enum dma_data_direction dir, gfp_t gfp)
+{
+ if (!gfpflags_allow_blocking(gfp)) {
+ struct page *page;
+
+ page = dma_common_alloc_pages(dev, size, handle, dir, gfp);
+ if (!page)
+ return NULL;
+ return page_address(page);
+ }
+
+ return iommu_dma_alloc_remap(dev, size, handle, gfp | __GFP_ZERO,
+ PAGE_KERNEL, 0);
+}
+
+static void iommu_dma_free_noncoherent(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t handle, enum dma_data_direction dir)
+{
+ __iommu_dma_unmap(dev, handle, size);
+ __iommu_dma_free(dev, size, cpu_addr);
+}
+#else
+#define iommu_dma_alloc_noncoherent NULL
+#define iommu_dma_free_noncoherent NULL
+#endif /* CONFIG_DMA_REMAP */
+
static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
@@ -1095,6 +1152,10 @@
static const struct dma_map_ops iommu_dma_ops = {
.alloc = iommu_dma_alloc,
.free = iommu_dma_free,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
+ .alloc_noncoherent = iommu_dma_alloc_noncoherent,
+ .free_noncoherent = iommu_dma_free_noncoherent,
.mmap = iommu_dma_mmap,
.get_sgtable = iommu_dma_get_sgtable,
.map_page = iommu_dma_map_page,
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 55ed857..de324b4 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -173,7 +173,7 @@
#define REG_V5_FAULT_AR_VA 0x070
#define REG_V5_FAULT_AW_VA 0x080
-#define has_sysmmu(dev) (dev->archdata.iommu != NULL)
+#define has_sysmmu(dev) (dev_iommu_priv_get(dev) != NULL)
static struct device *dma_dev;
static struct kmem_cache *lv2table_kmem_cache;
@@ -226,7 +226,7 @@
};
/*
- * This structure is attached to dev.archdata.iommu of the master device
+ * This structure is attached to dev->iommu->priv of the master device
* on device add, contains a list of SYSMMU controllers defined by device tree,
* which are bound to given master device. It is usually referenced by 'owner'
* pointer.
@@ -670,7 +670,7 @@
struct device *master = data->master;
if (master) {
- struct exynos_iommu_owner *owner = master->archdata.iommu;
+ struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
if (data->domain) {
@@ -688,7 +688,7 @@
struct device *master = data->master;
if (master) {
- struct exynos_iommu_owner *owner = master->archdata.iommu;
+ struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
if (data->domain) {
@@ -721,7 +721,7 @@
}
};
-static inline void update_pte(sysmmu_pte_t *ent, sysmmu_pte_t val)
+static inline void exynos_iommu_set_pte(sysmmu_pte_t *ent, sysmmu_pte_t val)
{
dma_sync_single_for_cpu(dma_dev, virt_to_phys(ent), sizeof(*ent),
DMA_TO_DEVICE);
@@ -837,8 +837,8 @@
static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain,
struct device *dev)
{
- struct exynos_iommu_owner *owner = dev->archdata.iommu;
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
+ struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
phys_addr_t pagetable = virt_to_phys(domain->pgtable);
struct sysmmu_drvdata *data, *next;
unsigned long flags;
@@ -875,8 +875,8 @@
static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
struct device *dev)
{
- struct exynos_iommu_owner *owner = dev->archdata.iommu;
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
+ struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data;
phys_addr_t pagetable = virt_to_phys(domain->pgtable);
unsigned long flags;
@@ -933,7 +933,7 @@
if (!pent)
return ERR_PTR(-ENOMEM);
- update_pte(sent, mk_lv1ent_page(virt_to_phys(pent)));
+ exynos_iommu_set_pte(sent, mk_lv1ent_page(virt_to_phys(pent)));
kmemleak_ignore(pent);
*pgcounter = NUM_LV2ENTRIES;
handle = dma_map_single(dma_dev, pent, LV2TABLE_SIZE,
@@ -994,7 +994,7 @@
*pgcnt = 0;
}
- update_pte(sent, mk_lv1ent_sect(paddr, prot));
+ exynos_iommu_set_pte(sent, mk_lv1ent_sect(paddr, prot));
spin_lock(&domain->lock);
if (lv1ent_page_zero(sent)) {
@@ -1018,7 +1018,7 @@
if (WARN_ON(!lv2ent_fault(pent)))
return -EADDRINUSE;
- update_pte(pent, mk_lv2ent_spage(paddr, prot));
+ exynos_iommu_set_pte(pent, mk_lv2ent_spage(paddr, prot));
*pgcnt -= 1;
} else { /* size == LPAGE_SIZE */
int i;
@@ -1073,7 +1073,7 @@
*/
static int exynos_iommu_map(struct iommu_domain *iommu_domain,
unsigned long l_iova, phys_addr_t paddr, size_t size,
- int prot)
+ int prot, gfp_t gfp)
{
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
sysmmu_pte_t *entry;
@@ -1150,7 +1150,7 @@
}
/* workaround for h/w bug in System MMU v3.3 */
- update_pte(ent, ZERO_LV2LINK);
+ exynos_iommu_set_pte(ent, ZERO_LV2LINK);
size = SECT_SIZE;
goto done;
}
@@ -1171,7 +1171,7 @@
}
if (lv2ent_small(ent)) {
- update_pte(ent, 0);
+ exynos_iommu_set_pte(ent, 0);
size = SPAGE_SIZE;
domain->lv2entcnt[lv1ent_offset(iova)] += 1;
goto done;
@@ -1235,19 +1235,13 @@
return phys;
}
-static int exynos_iommu_add_device(struct device *dev)
+static struct iommu_device *exynos_iommu_probe_device(struct device *dev)
{
- struct exynos_iommu_owner *owner = dev->archdata.iommu;
+ struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data;
- struct iommu_group *group;
if (!has_sysmmu(dev))
- return -ENODEV;
-
- group = iommu_group_get_for_dev(dev);
-
- if (IS_ERR(group))
- return PTR_ERR(group);
+ return ERR_PTR(-ENODEV);
list_for_each_entry(data, &owner->controllers, owner_node) {
/*
@@ -1259,14 +1253,17 @@
DL_FLAG_STATELESS |
DL_FLAG_PM_RUNTIME);
}
- iommu_group_put(group);
- return 0;
+ /* There is always at least one entry, see exynos_iommu_of_xlate() */
+ data = list_first_entry(&owner->controllers,
+ struct sysmmu_drvdata, owner_node);
+
+ return &data->iommu;
}
-static void exynos_iommu_remove_device(struct device *dev)
+static void exynos_iommu_release_device(struct device *dev)
{
- struct exynos_iommu_owner *owner = dev->archdata.iommu;
+ struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data;
if (!has_sysmmu(dev))
@@ -1282,7 +1279,6 @@
iommu_group_put(group);
}
}
- iommu_group_remove_device(dev);
list_for_each_entry(data, &owner->controllers, owner_node)
device_link_del(data->link);
@@ -1291,8 +1287,8 @@
static int exynos_iommu_of_xlate(struct device *dev,
struct of_phandle_args *spec)
{
- struct exynos_iommu_owner *owner = dev->archdata.iommu;
struct platform_device *sysmmu = of_find_device_by_node(spec->np);
+ struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data, *entry;
if (!sysmmu)
@@ -1313,7 +1309,7 @@
INIT_LIST_HEAD(&owner->controllers);
mutex_init(&owner->rpm_lock);
- dev->archdata.iommu = owner;
+ dev_iommu_priv_set(dev, owner);
}
list_for_each_entry(entry, &owner->controllers, owner_node)
@@ -1335,8 +1331,8 @@
.unmap = exynos_iommu_unmap,
.iova_to_phys = exynos_iommu_iova_to_phys,
.device_group = generic_device_group,
- .add_device = exynos_iommu_add_device,
- .remove_device = exynos_iommu_remove_device,
+ .probe_device = exynos_iommu_probe_device,
+ .release_device = exynos_iommu_release_device,
.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
.of_xlate = exynos_iommu_of_xlate,
};
diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index cde281b..b9a974d 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -1174,10 +1174,7 @@
if (irq != NO_IRQ)
free_irq(irq, data);
- if (data) {
- memset(data, 0, sizeof(struct pamu_isr_data));
- kfree(data);
- }
+ kfree_sensitive(data);
if (pamu_regs)
iounmap(pamu_regs);
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 06828e2..b211076 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -323,7 +323,7 @@
pamu_disable_liodn(info->liodn);
spin_unlock_irqrestore(&iommu_lock, flags);
spin_lock_irqsave(&device_domain_lock, flags);
- info->dev->archdata.iommu_domain = NULL;
+ dev_iommu_priv_set(info->dev, NULL);
kmem_cache_free(iommu_devinfo_cache, info);
spin_unlock_irqrestore(&device_domain_lock, flags);
}
@@ -352,7 +352,7 @@
* Check here if the device is already attached to domain or not.
* If the device is already attached to a domain detach it.
*/
- old_domain_info = dev->archdata.iommu_domain;
+ old_domain_info = dev_iommu_priv_get(dev);
if (old_domain_info && old_domain_info->domain != dma_domain) {
spin_unlock_irqrestore(&device_domain_lock, flags);
detach_device(dev, old_domain_info->domain);
@@ -371,8 +371,8 @@
* the info for the first LIODN as all
* LIODNs share the same domain
*/
- if (!dev->archdata.iommu_domain)
- dev->archdata.iommu_domain = info;
+ if (!dev_iommu_priv_get(dev))
+ dev_iommu_priv_set(dev, info);
spin_unlock_irqrestore(&device_domain_lock, flags);
}
@@ -1016,25 +1016,13 @@
return group;
}
-static int fsl_pamu_add_device(struct device *dev)
+static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
{
- struct iommu_group *group;
-
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
-
- iommu_device_link(&pamu_iommu, dev);
-
- return 0;
+ return &pamu_iommu;
}
-static void fsl_pamu_remove_device(struct device *dev)
+static void fsl_pamu_release_device(struct device *dev)
{
- iommu_device_unlink(&pamu_iommu, dev);
- iommu_group_remove_device(dev);
}
static const struct iommu_ops fsl_pamu_ops = {
@@ -1048,8 +1036,8 @@
.iova_to_phys = fsl_pamu_iova_to_phys,
.domain_set_attr = fsl_pamu_set_domain_attr,
.domain_get_attr = fsl_pamu_get_domain_attr,
- .add_device = fsl_pamu_add_device,
- .remove_device = fsl_pamu_remove_device,
+ .probe_device = fsl_pamu_probe_device,
+ .release_device = fsl_pamu_release_device,
.device_group = fsl_pamu_device_group,
};
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index f0fe503..e09e2d7 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -101,7 +101,7 @@
* in the chip_data and hyperv_irq_remapping_activate()/hyperv_ir_set_
* affinity() set vector and dest_apicid directly into IO-APIC entry.
*/
- irq_data->chip_data = info->ioapic_entry;
+ irq_data->chip_data = info->ioapic.entry;
/*
* Hypver-V IO APIC irq affinity should be in the scope of
@@ -131,7 +131,7 @@
return 0;
}
-static struct irq_domain_ops hyperv_ir_domain_ops = {
+static const struct irq_domain_ops hyperv_ir_domain_ops = {
.alloc = hyperv_irq_remapping_alloc,
.free = hyperv_irq_remapping_free,
.activate = hyperv_irq_remapping_activate,
@@ -182,9 +182,9 @@
return IRQ_REMAP_X2APIC_MODE;
}
-static struct irq_domain *hyperv_get_ir_irq_domain(struct irq_alloc_info *info)
+static struct irq_domain *hyperv_get_irq_domain(struct irq_alloc_info *info)
{
- if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC)
+ if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT)
return ioapic_ir_domain;
else
return NULL;
@@ -193,7 +193,7 @@
struct irq_remap_ops hyperv_irq_remap_ops = {
.prepare = hyperv_prepare_irq_remapping,
.enable = hyperv_enable_irq_remapping,
- .get_ir_irq_domain = hyperv_get_ir_irq_domain,
+ .get_irq_domain = hyperv_get_irq_domain,
};
#endif
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
deleted file mode 100644
index e7cb0b8..0000000
--- a/drivers/iommu/intel-pasid.c
+++ /dev/null
@@ -1,649 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/**
- * intel-pasid.c - PASID idr, table and entry manipulation
- *
- * Copyright (C) 2018 Intel Corporation
- *
- * Author: Lu Baolu <baolu.lu@linux.intel.com>
- */
-
-#define pr_fmt(fmt) "DMAR: " fmt
-
-#include <linux/bitops.h>
-#include <linux/cpufeature.h>
-#include <linux/dmar.h>
-#include <linux/intel-iommu.h>
-#include <linux/iommu.h>
-#include <linux/memory.h>
-#include <linux/pci.h>
-#include <linux/pci-ats.h>
-#include <linux/spinlock.h>
-
-#include "intel-pasid.h"
-
-/*
- * Intel IOMMU system wide PASID name space:
- */
-static DEFINE_SPINLOCK(pasid_lock);
-u32 intel_pasid_max_id = PASID_MAX;
-static DEFINE_IDR(pasid_idr);
-
-int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp)
-{
- int ret, min, max;
-
- min = max_t(int, start, PASID_MIN);
- max = min_t(int, end, intel_pasid_max_id);
-
- WARN_ON(in_interrupt());
- idr_preload(gfp);
- spin_lock(&pasid_lock);
- ret = idr_alloc(&pasid_idr, ptr, min, max, GFP_ATOMIC);
- spin_unlock(&pasid_lock);
- idr_preload_end();
-
- return ret;
-}
-
-void intel_pasid_free_id(int pasid)
-{
- spin_lock(&pasid_lock);
- idr_remove(&pasid_idr, pasid);
- spin_unlock(&pasid_lock);
-}
-
-void *intel_pasid_lookup_id(int pasid)
-{
- void *p;
-
- spin_lock(&pasid_lock);
- p = idr_find(&pasid_idr, pasid);
- spin_unlock(&pasid_lock);
-
- return p;
-}
-
-/*
- * Per device pasid table management:
- */
-static inline void
-device_attach_pasid_table(struct device_domain_info *info,
- struct pasid_table *pasid_table)
-{
- info->pasid_table = pasid_table;
- list_add(&info->table, &pasid_table->dev);
-}
-
-static inline void
-device_detach_pasid_table(struct device_domain_info *info,
- struct pasid_table *pasid_table)
-{
- info->pasid_table = NULL;
- list_del(&info->table);
-}
-
-struct pasid_table_opaque {
- struct pasid_table **pasid_table;
- int segment;
- int bus;
- int devfn;
-};
-
-static int search_pasid_table(struct device_domain_info *info, void *opaque)
-{
- struct pasid_table_opaque *data = opaque;
-
- if (info->iommu->segment == data->segment &&
- info->bus == data->bus &&
- info->devfn == data->devfn &&
- info->pasid_table) {
- *data->pasid_table = info->pasid_table;
- return 1;
- }
-
- return 0;
-}
-
-static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque)
-{
- struct pasid_table_opaque *data = opaque;
-
- data->segment = pci_domain_nr(pdev->bus);
- data->bus = PCI_BUS_NUM(alias);
- data->devfn = alias & 0xff;
-
- return for_each_device_domain(&search_pasid_table, data);
-}
-
-/*
- * Allocate a pasid table for @dev. It should be called in a
- * single-thread context.
- */
-int intel_pasid_alloc_table(struct device *dev)
-{
- struct device_domain_info *info;
- struct pasid_table *pasid_table;
- struct pasid_table_opaque data;
- struct page *pages;
- int max_pasid = 0;
- int ret, order;
- int size;
-
- might_sleep();
- info = dev->archdata.iommu;
- if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
- return -EINVAL;
-
- /* DMA alias device already has a pasid table, use it: */
- data.pasid_table = &pasid_table;
- ret = pci_for_each_dma_alias(to_pci_dev(dev),
- &get_alias_pasid_table, &data);
- if (ret)
- goto attach_out;
-
- pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
- if (!pasid_table)
- return -ENOMEM;
- INIT_LIST_HEAD(&pasid_table->dev);
-
- if (info->pasid_supported)
- max_pasid = min_t(int, pci_max_pasids(to_pci_dev(dev)),
- intel_pasid_max_id);
-
- size = max_pasid >> (PASID_PDE_SHIFT - 3);
- order = size ? get_order(size) : 0;
- pages = alloc_pages_node(info->iommu->node,
- GFP_KERNEL | __GFP_ZERO, order);
- if (!pages) {
- kfree(pasid_table);
- return -ENOMEM;
- }
-
- pasid_table->table = page_address(pages);
- pasid_table->order = order;
- pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
-
-attach_out:
- device_attach_pasid_table(info, pasid_table);
-
- return 0;
-}
-
-void intel_pasid_free_table(struct device *dev)
-{
- struct device_domain_info *info;
- struct pasid_table *pasid_table;
- struct pasid_dir_entry *dir;
- struct pasid_entry *table;
- int i, max_pde;
-
- info = dev->archdata.iommu;
- if (!info || !dev_is_pci(dev) || !info->pasid_table)
- return;
-
- pasid_table = info->pasid_table;
- device_detach_pasid_table(info, pasid_table);
-
- if (!list_empty(&pasid_table->dev))
- return;
-
- /* Free scalable mode PASID directory tables: */
- dir = pasid_table->table;
- max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
- for (i = 0; i < max_pde; i++) {
- table = get_pasid_table_from_pde(&dir[i]);
- free_pgtable_page(table);
- }
-
- free_pages((unsigned long)pasid_table->table, pasid_table->order);
- kfree(pasid_table);
-}
-
-struct pasid_table *intel_pasid_get_table(struct device *dev)
-{
- struct device_domain_info *info;
-
- info = dev->archdata.iommu;
- if (!info)
- return NULL;
-
- return info->pasid_table;
-}
-
-int intel_pasid_get_dev_max_id(struct device *dev)
-{
- struct device_domain_info *info;
-
- info = dev->archdata.iommu;
- if (!info || !info->pasid_table)
- return 0;
-
- return info->pasid_table->max_pasid;
-}
-
-struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
-{
- struct device_domain_info *info;
- struct pasid_table *pasid_table;
- struct pasid_dir_entry *dir;
- struct pasid_entry *entries;
- int dir_index, index;
-
- pasid_table = intel_pasid_get_table(dev);
- if (WARN_ON(!pasid_table || pasid < 0 ||
- pasid >= intel_pasid_get_dev_max_id(dev)))
- return NULL;
-
- dir = pasid_table->table;
- info = dev->archdata.iommu;
- dir_index = pasid >> PASID_PDE_SHIFT;
- index = pasid & PASID_PTE_MASK;
-
- spin_lock(&pasid_lock);
- entries = get_pasid_table_from_pde(&dir[dir_index]);
- if (!entries) {
- entries = alloc_pgtable_page(info->iommu->node);
- if (!entries) {
- spin_unlock(&pasid_lock);
- return NULL;
- }
-
- WRITE_ONCE(dir[dir_index].val,
- (u64)virt_to_phys(entries) | PASID_PTE_PRESENT);
- }
- spin_unlock(&pasid_lock);
-
- return &entries[index];
-}
-
-/*
- * Interfaces for PASID table entry manipulation:
- */
-static inline void pasid_clear_entry(struct pasid_entry *pe)
-{
- WRITE_ONCE(pe->val[0], 0);
- WRITE_ONCE(pe->val[1], 0);
- WRITE_ONCE(pe->val[2], 0);
- WRITE_ONCE(pe->val[3], 0);
- WRITE_ONCE(pe->val[4], 0);
- WRITE_ONCE(pe->val[5], 0);
- WRITE_ONCE(pe->val[6], 0);
- WRITE_ONCE(pe->val[7], 0);
-}
-
-static void intel_pasid_clear_entry(struct device *dev, int pasid)
-{
- struct pasid_entry *pe;
-
- pe = intel_pasid_get_entry(dev, pasid);
- if (WARN_ON(!pe))
- return;
-
- pasid_clear_entry(pe);
-}
-
-static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
-{
- u64 old;
-
- old = READ_ONCE(*ptr);
- WRITE_ONCE(*ptr, (old & ~mask) | bits);
-}
-
-/*
- * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
- * PASID entry.
- */
-static inline void
-pasid_set_domain_id(struct pasid_entry *pe, u64 value)
-{
- pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value);
-}
-
-/*
- * Get domain ID value of a scalable mode PASID entry.
- */
-static inline u16
-pasid_get_domain_id(struct pasid_entry *pe)
-{
- return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0));
-}
-
-/*
- * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
- * of a scalable mode PASID entry.
- */
-static inline void
-pasid_set_slptr(struct pasid_entry *pe, u64 value)
-{
- pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value);
-}
-
-/*
- * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
- * entry.
- */
-static inline void
-pasid_set_address_width(struct pasid_entry *pe, u64 value)
-{
- pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2);
-}
-
-/*
- * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
- * of a scalable mode PASID entry.
- */
-static inline void
-pasid_set_translation_type(struct pasid_entry *pe, u64 value)
-{
- pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6);
-}
-
-/*
- * Enable fault processing by clearing the FPD(Fault Processing
- * Disable) field (Bit 1) of a scalable mode PASID entry.
- */
-static inline void pasid_set_fault_enable(struct pasid_entry *pe)
-{
- pasid_set_bits(&pe->val[0], 1 << 1, 0);
-}
-
-/*
- * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
- * scalable mode PASID entry.
- */
-static inline void pasid_set_sre(struct pasid_entry *pe)
-{
- pasid_set_bits(&pe->val[2], 1 << 0, 1);
-}
-
-/*
- * Setup the P(Present) field (Bit 0) of a scalable mode PASID
- * entry.
- */
-static inline void pasid_set_present(struct pasid_entry *pe)
-{
- pasid_set_bits(&pe->val[0], 1 << 0, 1);
-}
-
-/*
- * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
- * entry.
- */
-static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
-{
- pasid_set_bits(&pe->val[1], 1 << 23, value << 23);
-}
-
-/*
- * Setup the First Level Page table Pointer field (Bit 140~191)
- * of a scalable mode PASID entry.
- */
-static inline void
-pasid_set_flptr(struct pasid_entry *pe, u64 value)
-{
- pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value);
-}
-
-/*
- * Setup the First Level Paging Mode field (Bit 130~131) of a
- * scalable mode PASID entry.
- */
-static inline void
-pasid_set_flpm(struct pasid_entry *pe, u64 value)
-{
- pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2);
-}
-
-static void
-pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
- u16 did, int pasid)
-{
- struct qi_desc desc;
-
- desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
- desc.qw1 = 0;
- desc.qw2 = 0;
- desc.qw3 = 0;
-
- qi_submit_sync(&desc, iommu);
-}
-
-static void
-iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid)
-{
- struct qi_desc desc;
-
- desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) |
- QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
- desc.qw1 = 0;
- desc.qw2 = 0;
- desc.qw3 = 0;
-
- qi_submit_sync(&desc, iommu);
-}
-
-static void
-devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
- struct device *dev, int pasid)
-{
- struct device_domain_info *info;
- u16 sid, qdep, pfsid;
-
- info = dev->archdata.iommu;
- if (!info || !info->ats_enabled)
- return;
-
- sid = info->bus << 8 | info->devfn;
- qdep = info->ats_qdep;
- pfsid = info->pfsid;
-
- qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
-}
-
-void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
- struct device *dev, int pasid)
-{
- struct pasid_entry *pte;
- u16 did;
-
- pte = intel_pasid_get_entry(dev, pasid);
- if (WARN_ON(!pte))
- return;
-
- did = pasid_get_domain_id(pte);
- intel_pasid_clear_entry(dev, pasid);
-
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(pte, sizeof(*pte));
-
- pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- iotlb_invalidation_with_pasid(iommu, did, pasid);
-
- /* Device IOTLB doesn't need to be flushed in caching mode. */
- if (!cap_caching_mode(iommu->cap))
- devtlb_invalidation_with_pasid(iommu, dev, pasid);
-}
-
-/*
- * Set up the scalable mode pasid table entry for first only
- * translation type.
- */
-int intel_pasid_setup_first_level(struct intel_iommu *iommu,
- struct device *dev, pgd_t *pgd,
- int pasid, u16 did, int flags)
-{
- struct pasid_entry *pte;
-
- if (!ecap_flts(iommu->ecap)) {
- pr_err("No first level translation support on %s\n",
- iommu->name);
- return -EINVAL;
- }
-
- pte = intel_pasid_get_entry(dev, pasid);
- if (WARN_ON(!pte))
- return -EINVAL;
-
- pasid_clear_entry(pte);
-
- /* Setup the first level page table pointer: */
- pasid_set_flptr(pte, (u64)__pa(pgd));
- if (flags & PASID_FLAG_SUPERVISOR_MODE) {
- if (!ecap_srs(iommu->ecap)) {
- pr_err("No supervisor request support on %s\n",
- iommu->name);
- return -EINVAL;
- }
- pasid_set_sre(pte);
- }
-
-#ifdef CONFIG_X86
- /* Both CPU and IOMMU paging mode need to match */
- if (cpu_feature_enabled(X86_FEATURE_LA57)) {
- if (cap_5lp_support(iommu->cap)) {
- pasid_set_flpm(pte, 1);
- } else {
- pr_err("VT-d has no 5-level paging support for CPU\n");
- pasid_clear_entry(pte);
- return -EINVAL;
- }
- }
-#endif /* CONFIG_X86 */
-
- pasid_set_domain_id(pte, did);
- pasid_set_address_width(pte, iommu->agaw);
- pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
-
- /* Setup Present and PASID Granular Transfer Type: */
- pasid_set_translation_type(pte, 1);
- pasid_set_present(pte);
-
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(pte, sizeof(*pte));
-
- if (cap_caching_mode(iommu->cap)) {
- pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- iotlb_invalidation_with_pasid(iommu, did, pasid);
- } else {
- iommu_flush_write_buffer(iommu);
- }
-
- return 0;
-}
-
-/*
- * Set up the scalable mode pasid entry for second only translation type.
- */
-int intel_pasid_setup_second_level(struct intel_iommu *iommu,
- struct dmar_domain *domain,
- struct device *dev, int pasid)
-{
- struct pasid_entry *pte;
- struct dma_pte *pgd;
- u64 pgd_val;
- int agaw;
- u16 did;
-
- /*
- * If hardware advertises no support for second level
- * translation, return directly.
- */
- if (!ecap_slts(iommu->ecap)) {
- pr_err("No second level translation support on %s\n",
- iommu->name);
- return -EINVAL;
- }
-
- /*
- * Skip top levels of page tables for iommu which has less agaw
- * than default. Unnecessary for PT mode.
- */
- pgd = domain->pgd;
- for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd)) {
- dev_err(dev, "Invalid domain page table\n");
- return -EINVAL;
- }
- }
-
- pgd_val = virt_to_phys(pgd);
- did = domain->iommu_did[iommu->seq_id];
-
- pte = intel_pasid_get_entry(dev, pasid);
- if (!pte) {
- dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
- return -ENODEV;
- }
-
- pasid_clear_entry(pte);
- pasid_set_domain_id(pte, did);
- pasid_set_slptr(pte, pgd_val);
- pasid_set_address_width(pte, agaw);
- pasid_set_translation_type(pte, 2);
- pasid_set_fault_enable(pte);
- pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
-
- /*
- * Since it is a second level only translation setup, we should
- * set SRE bit as well (addresses are expected to be GPAs).
- */
- pasid_set_sre(pte);
- pasid_set_present(pte);
-
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(pte, sizeof(*pte));
-
- if (cap_caching_mode(iommu->cap)) {
- pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- iotlb_invalidation_with_pasid(iommu, did, pasid);
- } else {
- iommu_flush_write_buffer(iommu);
- }
-
- return 0;
-}
-
-/*
- * Set up the scalable mode pasid entry for passthrough translation type.
- */
-int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
- struct dmar_domain *domain,
- struct device *dev, int pasid)
-{
- u16 did = FLPT_DEFAULT_DID;
- struct pasid_entry *pte;
-
- pte = intel_pasid_get_entry(dev, pasid);
- if (!pte) {
- dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
- return -ENODEV;
- }
-
- pasid_clear_entry(pte);
- pasid_set_domain_id(pte, did);
- pasid_set_address_width(pte, iommu->agaw);
- pasid_set_translation_type(pte, 4);
- pasid_set_fault_enable(pte);
- pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
-
- /*
- * We should set SRE bit as well since the addresses are expected
- * to be GPAs.
- */
- pasid_set_sre(pte);
- pasid_set_present(pte);
-
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(pte, sizeof(*pte));
-
- if (cap_caching_mode(iommu->cap)) {
- pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- iotlb_invalidation_with_pasid(iommu, did, pasid);
- } else {
- iommu_flush_write_buffer(iommu);
- }
-
- return 0;
-}
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
deleted file mode 100644
index fc8cd8f..0000000
--- a/drivers/iommu/intel-pasid.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * intel-pasid.h - PASID idr, table and entry header
- *
- * Copyright (C) 2018 Intel Corporation
- *
- * Author: Lu Baolu <baolu.lu@linux.intel.com>
- */
-
-#ifndef __INTEL_PASID_H
-#define __INTEL_PASID_H
-
-#define PASID_RID2PASID 0x0
-#define PASID_MIN 0x1
-#define PASID_MAX 0x100000
-#define PASID_PTE_MASK 0x3F
-#define PASID_PTE_PRESENT 1
-#define PDE_PFN_MASK PAGE_MASK
-#define PASID_PDE_SHIFT 6
-#define MAX_NR_PASID_BITS 20
-#define PASID_TBL_ENTRIES BIT(PASID_PDE_SHIFT)
-
-#define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1)
-#define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7))
-
-/*
- * Domain ID reserved for pasid entries programmed for first-level
- * only and pass-through transfer modes.
- */
-#define FLPT_DEFAULT_DID 1
-
-/*
- * The SUPERVISOR_MODE flag indicates a first level translation which
- * can be used for access to kernel addresses. It is valid only for
- * access to the kernel's static 1:1 mapping of physical memory — not
- * to vmalloc or even module mappings.
- */
-#define PASID_FLAG_SUPERVISOR_MODE BIT(0)
-
-struct pasid_dir_entry {
- u64 val;
-};
-
-struct pasid_entry {
- u64 val[8];
-};
-
-/* The representative of a PASID table */
-struct pasid_table {
- void *table; /* pasid table pointer */
- int order; /* page order of pasid table */
- int max_pasid; /* max pasid */
- struct list_head dev; /* device list */
-};
-
-/* Get PRESENT bit of a PASID directory entry. */
-static inline bool pasid_pde_is_present(struct pasid_dir_entry *pde)
-{
- return READ_ONCE(pde->val) & PASID_PTE_PRESENT;
-}
-
-/* Get PASID table from a PASID directory entry. */
-static inline struct pasid_entry *
-get_pasid_table_from_pde(struct pasid_dir_entry *pde)
-{
- if (!pasid_pde_is_present(pde))
- return NULL;
-
- return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK);
-}
-
-/* Get PRESENT bit of a PASID table entry. */
-static inline bool pasid_pte_is_present(struct pasid_entry *pte)
-{
- return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT;
-}
-
-extern u32 intel_pasid_max_id;
-int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp);
-void intel_pasid_free_id(int pasid);
-void *intel_pasid_lookup_id(int pasid);
-int intel_pasid_alloc_table(struct device *dev);
-void intel_pasid_free_table(struct device *dev);
-struct pasid_table *intel_pasid_get_table(struct device *dev);
-int intel_pasid_get_dev_max_id(struct device *dev);
-struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid);
-int intel_pasid_setup_first_level(struct intel_iommu *iommu,
- struct device *dev, pgd_t *pgd,
- int pasid, u16 did, int flags);
-int intel_pasid_setup_second_level(struct intel_iommu *iommu,
- struct dmar_domain *domain,
- struct device *dev, int pasid);
-int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
- struct dmar_domain *domain,
- struct device *dev, int pasid);
-void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
- struct device *dev, int pasid);
-
-#endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
deleted file mode 100644
index a3739f6..0000000
--- a/drivers/iommu/intel-svm.c
+++ /dev/null
@@ -1,686 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright © 2015 Intel Corporation.
- *
- * Authors: David Woodhouse <dwmw2@infradead.org>
- */
-
-#include <linux/intel-iommu.h>
-#include <linux/mmu_notifier.h>
-#include <linux/sched.h>
-#include <linux/sched/mm.h>
-#include <linux/slab.h>
-#include <linux/intel-svm.h>
-#include <linux/rculist.h>
-#include <linux/pci.h>
-#include <linux/pci-ats.h>
-#include <linux/dmar.h>
-#include <linux/interrupt.h>
-#include <linux/mm_types.h>
-#include <asm/page.h>
-
-#include "intel-pasid.h"
-
-static irqreturn_t prq_event_thread(int irq, void *d);
-
-int intel_svm_init(struct intel_iommu *iommu)
-{
- if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
- !cap_fl1gp_support(iommu->cap))
- return -EINVAL;
-
- if (cpu_feature_enabled(X86_FEATURE_LA57) &&
- !cap_5lp_support(iommu->cap))
- return -EINVAL;
-
- return 0;
-}
-
-#define PRQ_ORDER 0
-
-int intel_svm_enable_prq(struct intel_iommu *iommu)
-{
- struct page *pages;
- int irq, ret;
-
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
- if (!pages) {
- pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
- iommu->name);
- return -ENOMEM;
- }
- iommu->prq = page_address(pages);
-
- irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
- if (irq <= 0) {
- pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
- iommu->name);
- ret = -EINVAL;
- err:
- free_pages((unsigned long)iommu->prq, PRQ_ORDER);
- iommu->prq = NULL;
- return ret;
- }
- iommu->pr_irq = irq;
-
- snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
-
- ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
- iommu->prq_name, iommu);
- if (ret) {
- pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
- iommu->name);
- dmar_free_hwirq(irq);
- iommu->pr_irq = 0;
- goto err;
- }
- dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
- dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
- dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
-
- return 0;
-}
-
-int intel_svm_finish_prq(struct intel_iommu *iommu)
-{
- dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
- dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
- dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
-
- if (iommu->pr_irq) {
- free_irq(iommu->pr_irq, iommu);
- dmar_free_hwirq(iommu->pr_irq);
- iommu->pr_irq = 0;
- }
-
- free_pages((unsigned long)iommu->prq, PRQ_ORDER);
- iommu->prq = NULL;
-
- return 0;
-}
-
-static void __flush_svm_range_dev(struct intel_svm *svm,
- struct intel_svm_dev *sdev,
- unsigned long address,
- unsigned long pages, int ih)
-{
- struct qi_desc desc;
-
- if (pages == -1) {
- desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
- QI_EIOTLB_DID(sdev->did) |
- QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = 0;
- } else {
- int mask = ilog2(__roundup_pow_of_two(pages));
-
- desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
- QI_EIOTLB_DID(sdev->did) |
- QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = QI_EIOTLB_ADDR(address) |
- QI_EIOTLB_IH(ih) |
- QI_EIOTLB_AM(mask);
- }
- desc.qw2 = 0;
- desc.qw3 = 0;
- qi_submit_sync(&desc, svm->iommu);
-
- if (sdev->dev_iotlb) {
- desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
- QI_DEV_EIOTLB_SID(sdev->sid) |
- QI_DEV_EIOTLB_QDEP(sdev->qdep) |
- QI_DEIOTLB_TYPE;
- if (pages == -1) {
- desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) |
- QI_DEV_EIOTLB_SIZE;
- } else if (pages > 1) {
- /* The least significant zero bit indicates the size. So,
- * for example, an "address" value of 0x12345f000 will
- * flush from 0x123440000 to 0x12347ffff (256KiB). */
- unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
- unsigned long mask = __rounddown_pow_of_two(address ^ last);
-
- desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) |
- (mask - 1)) | QI_DEV_EIOTLB_SIZE;
- } else {
- desc.qw1 = QI_DEV_EIOTLB_ADDR(address);
- }
- desc.qw2 = 0;
- desc.qw3 = 0;
- qi_submit_sync(&desc, svm->iommu);
- }
-}
-
-static void intel_flush_svm_range_dev(struct intel_svm *svm,
- struct intel_svm_dev *sdev,
- unsigned long address,
- unsigned long pages, int ih)
-{
- unsigned long shift = ilog2(__roundup_pow_of_two(pages));
- unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
- unsigned long start = ALIGN_DOWN(address, align);
- unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
-
- while (start < end) {
- __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
- start += align;
- }
-}
-
-static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
- unsigned long pages, int ih)
-{
- struct intel_svm_dev *sdev;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list)
- intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
- rcu_read_unlock();
-}
-
-/* Pages have been freed at this point */
-static void intel_invalidate_range(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
-
- intel_flush_svm_range(svm, start,
- (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
-}
-
-static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
-{
- struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
- struct intel_svm_dev *sdev;
-
- /* This might end up being called from exit_mmap(), *before* the page
- * tables are cleared. And __mmu_notifier_release() will delete us from
- * the list of notifiers so that our invalidate_range() callback doesn't
- * get called when the page tables are cleared. So we need to protect
- * against hardware accessing those page tables.
- *
- * We do it by clearing the entry in the PASID table and then flushing
- * the IOTLB and the PASID table caches. This might upset hardware;
- * perhaps we'll want to point the PASID to a dummy PGD (like the zero
- * page) so that we end up taking a fault that the hardware really
- * *has* to handle gracefully without affecting other processes.
- */
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list) {
- intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
- intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
- }
- rcu_read_unlock();
-
-}
-
-static const struct mmu_notifier_ops intel_mmuops = {
- .release = intel_mm_release,
- .invalidate_range = intel_invalidate_range,
-};
-
-static DEFINE_MUTEX(pasid_mutex);
-static LIST_HEAD(global_svm_list);
-
-int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
-{
- struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
- struct device_domain_info *info;
- struct intel_svm_dev *sdev;
- struct intel_svm *svm = NULL;
- struct mm_struct *mm = NULL;
- int pasid_max;
- int ret;
-
- if (!iommu || dmar_disabled)
- return -EINVAL;
-
- if (dev_is_pci(dev)) {
- pasid_max = pci_max_pasids(to_pci_dev(dev));
- if (pasid_max < 0)
- return -EINVAL;
- } else
- pasid_max = 1 << 20;
-
- if (flags & SVM_FLAG_SUPERVISOR_MODE) {
- if (!ecap_srs(iommu->ecap))
- return -EINVAL;
- } else if (pasid) {
- mm = get_task_mm(current);
- BUG_ON(!mm);
- }
-
- mutex_lock(&pasid_mutex);
- if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
- struct intel_svm *t;
-
- list_for_each_entry(t, &global_svm_list, list) {
- if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID))
- continue;
-
- svm = t;
- if (svm->pasid >= pasid_max) {
- dev_warn(dev,
- "Limited PASID width. Cannot use existing PASID %d\n",
- svm->pasid);
- ret = -ENOSPC;
- goto out;
- }
-
- list_for_each_entry(sdev, &svm->devs, list) {
- if (dev == sdev->dev) {
- if (sdev->ops != ops) {
- ret = -EBUSY;
- goto out;
- }
- sdev->users++;
- goto success;
- }
- }
-
- break;
- }
- }
-
- sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
- if (!sdev) {
- ret = -ENOMEM;
- goto out;
- }
- sdev->dev = dev;
-
- ret = intel_iommu_enable_pasid(iommu, dev);
- if (ret || !pasid) {
- /* If they don't actually want to assign a PASID, this is
- * just an enabling check/preparation. */
- kfree(sdev);
- goto out;
- }
-
- info = dev->archdata.iommu;
- if (!info || !info->pasid_supported) {
- kfree(sdev);
- goto out;
- }
-
- sdev->did = FLPT_DEFAULT_DID;
- sdev->sid = PCI_DEVID(info->bus, info->devfn);
- if (info->ats_enabled) {
- sdev->dev_iotlb = 1;
- sdev->qdep = info->ats_qdep;
- if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
- sdev->qdep = 0;
- }
-
- /* Finish the setup now we know we're keeping it */
- sdev->users = 1;
- sdev->ops = ops;
- init_rcu_head(&sdev->rcu);
-
- if (!svm) {
- svm = kzalloc(sizeof(*svm), GFP_KERNEL);
- if (!svm) {
- ret = -ENOMEM;
- kfree(sdev);
- goto out;
- }
- svm->iommu = iommu;
-
- if (pasid_max > intel_pasid_max_id)
- pasid_max = intel_pasid_max_id;
-
- /* Do not use PASID 0 in caching mode (virtualised IOMMU) */
- ret = intel_pasid_alloc_id(svm,
- !!cap_caching_mode(iommu->cap),
- pasid_max, GFP_KERNEL);
- if (ret < 0) {
- kfree(svm);
- kfree(sdev);
- goto out;
- }
- svm->pasid = ret;
- svm->notifier.ops = &intel_mmuops;
- svm->mm = mm;
- svm->flags = flags;
- INIT_LIST_HEAD_RCU(&svm->devs);
- INIT_LIST_HEAD(&svm->list);
- ret = -ENOMEM;
- if (mm) {
- ret = mmu_notifier_register(&svm->notifier, mm);
- if (ret) {
- intel_pasid_free_id(svm->pasid);
- kfree(svm);
- kfree(sdev);
- goto out;
- }
- }
-
- spin_lock(&iommu->lock);
- ret = intel_pasid_setup_first_level(iommu, dev,
- mm ? mm->pgd : init_mm.pgd,
- svm->pasid, FLPT_DEFAULT_DID,
- mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
- spin_unlock(&iommu->lock);
- if (ret) {
- if (mm)
- mmu_notifier_unregister(&svm->notifier, mm);
- intel_pasid_free_id(svm->pasid);
- kfree(svm);
- kfree(sdev);
- goto out;
- }
-
- list_add_tail(&svm->list, &global_svm_list);
- } else {
- /*
- * Binding a new device with existing PASID, need to setup
- * the PASID entry.
- */
- spin_lock(&iommu->lock);
- ret = intel_pasid_setup_first_level(iommu, dev,
- mm ? mm->pgd : init_mm.pgd,
- svm->pasid, FLPT_DEFAULT_DID,
- mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
- spin_unlock(&iommu->lock);
- if (ret) {
- kfree(sdev);
- goto out;
- }
- }
- list_add_rcu(&sdev->list, &svm->devs);
-
- success:
- *pasid = svm->pasid;
- ret = 0;
- out:
- mutex_unlock(&pasid_mutex);
- if (mm)
- mmput(mm);
- return ret;
-}
-EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
-
-int intel_svm_unbind_mm(struct device *dev, int pasid)
-{
- struct intel_svm_dev *sdev;
- struct intel_iommu *iommu;
- struct intel_svm *svm;
- int ret = -EINVAL;
-
- mutex_lock(&pasid_mutex);
- iommu = intel_svm_device_to_iommu(dev);
- if (!iommu)
- goto out;
-
- svm = intel_pasid_lookup_id(pasid);
- if (!svm)
- goto out;
-
- list_for_each_entry(sdev, &svm->devs, list) {
- if (dev == sdev->dev) {
- ret = 0;
- sdev->users--;
- if (!sdev->users) {
- list_del_rcu(&sdev->list);
- /* Flush the PASID cache and IOTLB for this device.
- * Note that we do depend on the hardware *not* using
- * the PASID any more. Just as we depend on other
- * devices never using PASIDs that they have no right
- * to use. We have a *shared* PASID table, because it's
- * large and has to be physically contiguous. So it's
- * hard to be as defensive as we might like. */
- intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
- intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
- kfree_rcu(sdev, rcu);
-
- if (list_empty(&svm->devs)) {
- intel_pasid_free_id(svm->pasid);
- if (svm->mm)
- mmu_notifier_unregister(&svm->notifier, svm->mm);
-
- list_del(&svm->list);
-
- /* We mandate that no page faults may be outstanding
- * for the PASID when intel_svm_unbind_mm() is called.
- * If that is not obeyed, subtle errors will happen.
- * Let's make them less subtle... */
- memset(svm, 0x6b, sizeof(*svm));
- kfree(svm);
- }
- }
- break;
- }
- }
- out:
- mutex_unlock(&pasid_mutex);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
-
-int intel_svm_is_pasid_valid(struct device *dev, int pasid)
-{
- struct intel_iommu *iommu;
- struct intel_svm *svm;
- int ret = -EINVAL;
-
- mutex_lock(&pasid_mutex);
- iommu = intel_svm_device_to_iommu(dev);
- if (!iommu)
- goto out;
-
- svm = intel_pasid_lookup_id(pasid);
- if (!svm)
- goto out;
-
- /* init_mm is used in this case */
- if (!svm->mm)
- ret = 1;
- else if (atomic_read(&svm->mm->mm_users) > 0)
- ret = 1;
- else
- ret = 0;
-
- out:
- mutex_unlock(&pasid_mutex);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
-
-/* Page request queue descriptor */
-struct page_req_dsc {
- union {
- struct {
- u64 type:8;
- u64 pasid_present:1;
- u64 priv_data_present:1;
- u64 rsvd:6;
- u64 rid:16;
- u64 pasid:20;
- u64 exe_req:1;
- u64 pm_req:1;
- u64 rsvd2:10;
- };
- u64 qw_0;
- };
- union {
- struct {
- u64 rd_req:1;
- u64 wr_req:1;
- u64 lpig:1;
- u64 prg_index:9;
- u64 addr:52;
- };
- u64 qw_1;
- };
- u64 priv_data[2];
-};
-
-#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
-
-static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
-{
- unsigned long requested = 0;
-
- if (req->exe_req)
- requested |= VM_EXEC;
-
- if (req->rd_req)
- requested |= VM_READ;
-
- if (req->wr_req)
- requested |= VM_WRITE;
-
- return (requested & ~vma->vm_flags) != 0;
-}
-
-static bool is_canonical_address(u64 addr)
-{
- int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
- long saddr = (long) addr;
-
- return (((saddr << shift) >> shift) == saddr);
-}
-
-static irqreturn_t prq_event_thread(int irq, void *d)
-{
- struct intel_iommu *iommu = d;
- struct intel_svm *svm = NULL;
- int head, tail, handled = 0;
-
- /* Clear PPR bit before reading head/tail registers, to
- * ensure that we get a new interrupt if needed. */
- writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
-
- tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
- head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
- while (head != tail) {
- struct intel_svm_dev *sdev;
- struct vm_area_struct *vma;
- struct page_req_dsc *req;
- struct qi_desc resp;
- int result;
- vm_fault_t ret;
- u64 address;
-
- handled = 1;
-
- req = &iommu->prq[head / sizeof(*req)];
-
- result = QI_RESP_FAILURE;
- address = (u64)req->addr << VTD_PAGE_SHIFT;
- if (!req->pasid_present) {
- pr_err("%s: Page request without PASID: %08llx %08llx\n",
- iommu->name, ((unsigned long long *)req)[0],
- ((unsigned long long *)req)[1]);
- goto no_pasid;
- }
-
- if (!svm || svm->pasid != req->pasid) {
- rcu_read_lock();
- svm = intel_pasid_lookup_id(req->pasid);
- /* It *can't* go away, because the driver is not permitted
- * to unbind the mm while any page faults are outstanding.
- * So we only need RCU to protect the internal idr code. */
- rcu_read_unlock();
-
- if (!svm) {
- pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
- iommu->name, req->pasid, ((unsigned long long *)req)[0],
- ((unsigned long long *)req)[1]);
- goto no_pasid;
- }
- }
-
- result = QI_RESP_INVALID;
- /* Since we're using init_mm.pgd directly, we should never take
- * any faults on kernel addresses. */
- if (!svm->mm)
- goto bad_req;
-
- /* If address is not canonical, return invalid response */
- if (!is_canonical_address(address))
- goto bad_req;
-
- /* If the mm is already defunct, don't handle faults. */
- if (!mmget_not_zero(svm->mm))
- goto bad_req;
-
- down_read(&svm->mm->mmap_sem);
- vma = find_extend_vma(svm->mm, address);
- if (!vma || address < vma->vm_start)
- goto invalid;
-
- if (access_error(vma, req))
- goto invalid;
-
- ret = handle_mm_fault(vma, address,
- req->wr_req ? FAULT_FLAG_WRITE : 0);
- if (ret & VM_FAULT_ERROR)
- goto invalid;
-
- result = QI_RESP_SUCCESS;
- invalid:
- up_read(&svm->mm->mmap_sem);
- mmput(svm->mm);
- bad_req:
- /* Accounting for major/minor faults? */
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list) {
- if (sdev->sid == req->rid)
- break;
- }
- /* Other devices can go away, but the drivers are not permitted
- * to unbind while any page faults might be in flight. So it's
- * OK to drop the 'lock' here now we have it. */
- rcu_read_unlock();
-
- if (WARN_ON(&sdev->list == &svm->devs))
- sdev = NULL;
-
- if (sdev && sdev->ops && sdev->ops->fault_cb) {
- int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
- (req->exe_req << 1) | (req->pm_req);
- sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr,
- req->priv_data, rwxp, result);
- }
- /* We get here in the error case where the PASID lookup failed,
- and these can be NULL. Do not use them below this point! */
- sdev = NULL;
- svm = NULL;
- no_pasid:
- if (req->lpig || req->priv_data_present) {
- /*
- * Per VT-d spec. v3.0 ch7.7, system software must
- * respond with page group response if private data
- * is present (PDP) or last page in group (LPIG) bit
- * is set. This is an additional VT-d feature beyond
- * PCI ATS spec.
- */
- resp.qw0 = QI_PGRP_PASID(req->pasid) |
- QI_PGRP_DID(req->rid) |
- QI_PGRP_PASID_P(req->pasid_present) |
- QI_PGRP_PDP(req->priv_data_present) |
- QI_PGRP_RESP_CODE(result) |
- QI_PGRP_RESP_TYPE;
- resp.qw1 = QI_PGRP_IDX(req->prg_index) |
- QI_PGRP_LPIG(req->lpig);
-
- if (req->priv_data_present)
- memcpy(&resp.qw2, req->priv_data,
- sizeof(req->priv_data));
- resp.qw2 = 0;
- resp.qw3 = 0;
- qi_submit_sync(&resp, iommu);
- }
- head = (head + sizeof(*req)) & PRQ_RING_MASK;
- }
-
- dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
-
- return IRQ_RETVAL(handled);
-}
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
new file mode 100644
index 0000000..5337ee1
--- /dev/null
+++ b/drivers/iommu/intel/Kconfig
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Intel IOMMU support
+config DMAR_TABLE
+ bool
+
+config INTEL_IOMMU
+ bool "Support for Intel IOMMU using DMA Remapping Devices"
+ depends on PCI_MSI && ACPI && (X86 || IA64)
+ select DMA_OPS
+ select IOMMU_API
+ select IOMMU_IOVA
+ select NEED_DMA_MAP_STATE
+ select DMAR_TABLE
+ select SWIOTLB
+ select IOASID
+ help
+ DMA remapping (DMAR) devices support enables independent address
+ translations for Direct Memory Access (DMA) from devices.
+ These DMA remapping devices are reported via ACPI tables
+ and include PCI device scope covered by these DMA
+ remapping devices.
+
+config INTEL_IOMMU_DEBUGFS
+ bool "Export Intel IOMMU internals in Debugfs"
+ depends on INTEL_IOMMU && IOMMU_DEBUGFS
+ help
+ !!!WARNING!!!
+
+ DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!!
+
+ Expose Intel IOMMU internals in Debugfs.
+
+ This option is -NOT- intended for production environments, and should
+ only be enabled for debugging Intel IOMMU.
+
+config INTEL_IOMMU_SVM
+ bool "Support for Shared Virtual Memory with Intel IOMMU"
+ depends on INTEL_IOMMU && X86_64
+ select PCI_PASID
+ select PCI_PRI
+ select MMU_NOTIFIER
+ select IOASID
+ help
+ Shared Virtual Memory (SVM) provides a facility for devices
+ to access DMA resources through process address space by
+ means of a Process Address Space ID (PASID).
+
+config INTEL_IOMMU_DEFAULT_ON
+ def_bool y
+ prompt "Enable Intel DMA Remapping Devices by default"
+ depends on INTEL_IOMMU
+ help
+ Selecting this option will enable a DMAR device at boot time if
+ one is found. If this option is not selected, DMAR support can
+ be enabled by passing intel_iommu=on to the kernel.
+
+config INTEL_IOMMU_BROKEN_GFX_WA
+ bool "Workaround broken graphics drivers (going away soon)"
+ depends on INTEL_IOMMU && BROKEN && X86
+ help
+ Current Graphics drivers tend to use physical address
+ for DMA and avoid using DMA APIs. Setting this config
+ option permits the IOMMU driver to set a unity map for
+ all the OS-visible memory. Hence the driver can continue
+ to use physical addresses for DMA, at least until this
+ option is removed in the 2.6.32 kernel.
+
+config INTEL_IOMMU_FLOPPY_WA
+ def_bool y
+ depends on INTEL_IOMMU && X86
+ help
+ Floppy disk drivers are known to bypass DMA API calls
+ thereby failing to work when IOMMU is enabled. This
+ workaround will setup a 1:1 mapping for the first
+ 16MiB to make floppy (an ISA device) work.
+
+config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
+ bool "Enable Intel IOMMU scalable mode by default"
+ depends on INTEL_IOMMU
+ help
+ Selecting this option will enable by default the scalable mode if
+ hardware presents the capability. The scalable mode is defined in
+ VT-d 3.0. The scalable mode capability could be checked by reading
+ /sys/devices/virtual/iommu/dmar*/intel-iommu/ecap. If this option
+ is not selected, scalable mode support could also be enabled by
+ passing intel_iommu=sm_on to the kernel. If not sure, please use
+ the default value.
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
new file mode 100644
index 0000000..fb8e1e8
--- /dev/null
+++ b/drivers/iommu/intel/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_DMAR_TABLE) += dmar.o
+obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
+obj-$(CONFIG_INTEL_IOMMU) += trace.o
+obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
+obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
+obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel/debugfs.c
similarity index 75%
rename from drivers/iommu/intel-iommu-debugfs.c
rename to drivers/iommu/intel/debugfs.c
index bdf095e..efea7f0 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -5,6 +5,7 @@
* Authors: Gayatri Kammela <gayatri.kammela@intel.com>
* Sohil Mehta <sohil.mehta@intel.com>
* Jacob Pan <jacob.jun.pan@linux.intel.com>
+ * Lu Baolu <baolu.lu@linux.intel.com>
*/
#include <linux/debugfs.h>
@@ -14,7 +15,7 @@
#include <asm/irq_remapping.h>
-#include "intel-pasid.h"
+#include "pasid.h"
struct tbl_walk {
u16 bus;
@@ -300,6 +301,137 @@
}
DEFINE_SHOW_ATTRIBUTE(dmar_translation_struct);
+static inline unsigned long level_to_directory_size(int level)
+{
+ return BIT_ULL(VTD_PAGE_SHIFT + VTD_STRIDE_SHIFT * (level - 1));
+}
+
+static inline void
+dump_page_info(struct seq_file *m, unsigned long iova, u64 *path)
+{
+ seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\n",
+ iova >> VTD_PAGE_SHIFT, path[5], path[4],
+ path[3], path[2], path[1]);
+}
+
+static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
+ int level, unsigned long start,
+ u64 *path)
+{
+ int i;
+
+ if (level > 5 || level < 1)
+ return;
+
+ for (i = 0; i < BIT_ULL(VTD_STRIDE_SHIFT);
+ i++, pde++, start += level_to_directory_size(level)) {
+ if (!dma_pte_present(pde))
+ continue;
+
+ path[level] = pde->val;
+ if (dma_pte_superpage(pde) || level == 1)
+ dump_page_info(m, start, path);
+ else
+ pgtable_walk_level(m, phys_to_virt(dma_pte_addr(pde)),
+ level - 1, start, path);
+ path[level] = 0;
+ }
+}
+
+static int show_device_domain_translation(struct device *dev, void *data)
+{
+ struct dmar_domain *domain = find_domain(dev);
+ struct seq_file *m = data;
+ u64 path[6] = { 0 };
+
+ if (!domain)
+ return 0;
+
+ seq_printf(m, "Device %s with pasid %d @0x%llx\n",
+ dev_name(dev), domain->default_pasid,
+ (u64)virt_to_phys(domain->pgd));
+ seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n");
+
+ pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
+ seq_putc(m, '\n');
+
+ return 0;
+}
+
+static int domain_translation_struct_show(struct seq_file *m, void *unused)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ ret = bus_for_each_dev(&pci_bus_type, NULL, m,
+ show_device_domain_translation);
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return ret;
+}
+DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
+
+static void invalidation_queue_entry_show(struct seq_file *m,
+ struct intel_iommu *iommu)
+{
+ int index, shift = qi_shift(iommu);
+ struct qi_desc *desc;
+ int offset;
+
+ if (ecap_smts(iommu->ecap))
+ seq_puts(m, "Index\t\tqw0\t\t\tqw1\t\t\tqw2\t\t\tqw3\t\t\tstatus\n");
+ else
+ seq_puts(m, "Index\t\tqw0\t\t\tqw1\t\t\tstatus\n");
+
+ for (index = 0; index < QI_LENGTH; index++) {
+ offset = index << shift;
+ desc = iommu->qi->desc + offset;
+ if (ecap_smts(iommu->ecap))
+ seq_printf(m, "%5d\t%016llx\t%016llx\t%016llx\t%016llx\t%016x\n",
+ index, desc->qw0, desc->qw1,
+ desc->qw2, desc->qw3,
+ iommu->qi->desc_status[index]);
+ else
+ seq_printf(m, "%5d\t%016llx\t%016llx\t%016x\n",
+ index, desc->qw0, desc->qw1,
+ iommu->qi->desc_status[index]);
+ }
+}
+
+static int invalidation_queue_show(struct seq_file *m, void *unused)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+ struct q_inval *qi;
+ int shift;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ qi = iommu->qi;
+ shift = qi_shift(iommu);
+
+ if (!qi || !ecap_qis(iommu->ecap))
+ continue;
+
+ seq_printf(m, "Invalidation queue on IOMMU: %s\n", iommu->name);
+
+ raw_spin_lock_irqsave(&qi->q_lock, flags);
+ seq_printf(m, " Base: 0x%llx\tHead: %lld\tTail: %lld\n",
+ (u64)virt_to_phys(qi->desc),
+ dmar_readq(iommu->reg + DMAR_IQH_REG) >> shift,
+ dmar_readq(iommu->reg + DMAR_IQT_REG) >> shift);
+ invalidation_queue_entry_show(m, iommu);
+ raw_spin_unlock_irqrestore(&qi->q_lock, flags);
+ seq_putc(m, '\n');
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(invalidation_queue);
+
#ifdef CONFIG_IRQ_REMAP
static void ir_tbl_remap_entry_show(struct seq_file *m,
struct intel_iommu *iommu)
@@ -415,6 +547,11 @@
&iommu_regset_fops);
debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug,
NULL, &dmar_translation_struct_fops);
+ debugfs_create_file("domain_translation_struct", 0444,
+ intel_iommu_debug, NULL,
+ &domain_translation_struct_fops);
+ debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug,
+ NULL, &invalidation_queue_fops);
#ifdef CONFIG_IRQ_REMAP
debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
NULL, &ir_translation_struct_fops);
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/intel/dmar.c
similarity index 90%
rename from drivers/iommu/dmar.c
rename to drivers/iommu/intel/dmar.c
index 2461652..b8d0b56 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -32,7 +32,7 @@
#include <asm/irq_remapping.h>
#include <asm/iommu_table.h>
-#include "irq_remapping.h"
+#include "../irq_remapping.h"
typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
struct dmar_res_callback {
@@ -252,7 +252,7 @@
info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
(scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
(info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
- info->dev->class >> 8 != PCI_CLASS_BRIDGE_OTHER))) {
+ info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
pr_warn("Device scope type does not match for %s\n",
pci_name(info->dev));
return -EINVAL;
@@ -316,6 +316,9 @@
if (ret < 0 && dmar_dev_scope_status == 0)
dmar_dev_scope_status = ret;
+ if (ret >= 0)
+ intel_irq_remap_add_device(info);
+
return ret;
}
@@ -330,6 +333,13 @@
dmar_iommu_notify_scope_dev(info);
}
+static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
+{
+ struct pci_dev *physfn = pci_physfn(pdev);
+
+ dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
+}
+
static int dmar_pci_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -339,8 +349,20 @@
/* Only care about add/remove events for physical functions.
* For VFs we actually do the lookup based on the corresponding
* PF in device_to_iommu() anyway. */
- if (pdev->is_virtfn)
+ if (pdev->is_virtfn) {
+ /*
+ * Ensure that the VF device inherits the irq domain of the
+ * PF device. Ideally the device would inherit the domain
+ * from the bus, but DMAR can have multiple units per bus
+ * which makes this impossible. The VF 'bus' could inherit
+ * from the PF device, but that's yet another x86'sism to
+ * inflict on everybody else.
+ */
+ if (action == BUS_NOTIFY_ADD_DEVICE)
+ vf_inherit_msi_domain(pdev);
return NOTIFY_DONE;
+ }
+
if (action != BUS_NOTIFY_ADD_DEVICE &&
action != BUS_NOTIFY_REMOVED_DEVICE)
return NOTIFY_DONE;
@@ -380,7 +402,7 @@
return NULL;
}
-/**
+/*
* dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
* structure which uniquely represent one DMA remapping hardware unit
* present in the platform
@@ -473,7 +495,7 @@
rhsa = (struct acpi_dmar_rhsa *)header;
for_each_drhd_unit(drhd) {
if (drhd->reg_base_addr == rhsa->base_address) {
- int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
+ int node = pxm_to_node(rhsa->proximity_domain);
if (!node_online(node))
node = NUMA_NO_NODE;
@@ -906,8 +928,11 @@
}
#ifdef CONFIG_X86
- if (!ret)
+ if (!ret) {
x86_init.iommu.iommu_init = intel_iommu_init;
+ x86_platform.iommu_shutdown = intel_iommu_shutdown;
+ }
+
#endif
if (dmar_tbl) {
@@ -961,6 +986,8 @@
warn_invalid_dmar(phys_addr, " returns all ones");
goto unmap;
}
+ if (ecap_vcs(iommu->ecap))
+ iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
@@ -1173,12 +1200,11 @@
}
}
-static int qi_check_fault(struct intel_iommu *iommu, int index)
+static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
{
u32 fault;
int head, tail;
struct q_inval *qi = iommu->qi;
- int wait_index = (index + 1) % QI_LENGTH;
int shift = qi_shift(iommu);
if (qi->desc_status[wait_index] == QI_ABORT)
@@ -1241,17 +1267,21 @@
}
/*
- * Submit the queued invalidation descriptor to the remapping
- * hardware unit and wait for its completion.
+ * Function to submit invalidation descriptors of all types to the queued
+ * invalidation interface(QI). Multiple descriptors can be submitted at a
+ * time, a wait descriptor will be appended to each submission to ensure
+ * hardware has completed the invalidation before return. Wait descriptors
+ * can be part of the submission but it will not be polled for completion.
*/
-int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
+int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
+ unsigned int count, unsigned long options)
{
- int rc;
struct q_inval *qi = iommu->qi;
- int offset, shift, length;
struct qi_desc wait_desc;
int wait_index, index;
unsigned long flags;
+ int offset, shift;
+ int rc, i;
if (!qi)
return 0;
@@ -1260,32 +1290,41 @@
rc = 0;
raw_spin_lock_irqsave(&qi->q_lock, flags);
- while (qi->free_cnt < 3) {
+ /*
+ * Check if we have enough empty slots in the queue to submit,
+ * the calculation is based on:
+ * # of desc + 1 wait desc + 1 space between head and tail
+ */
+ while (qi->free_cnt < count + 2) {
raw_spin_unlock_irqrestore(&qi->q_lock, flags);
cpu_relax();
raw_spin_lock_irqsave(&qi->q_lock, flags);
}
index = qi->free_head;
- wait_index = (index + 1) % QI_LENGTH;
+ wait_index = (index + count) % QI_LENGTH;
shift = qi_shift(iommu);
- length = 1 << shift;
- qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
+ for (i = 0; i < count; i++) {
+ offset = ((index + i) % QI_LENGTH) << shift;
+ memcpy(qi->desc + offset, &desc[i], 1 << shift);
+ qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
+ }
+ qi->desc_status[wait_index] = QI_IN_USE;
- offset = index << shift;
- memcpy(qi->desc + offset, desc, length);
wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
+ if (options & QI_OPT_WAIT_DRAIN)
+ wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
wait_desc.qw2 = 0;
wait_desc.qw3 = 0;
offset = wait_index << shift;
- memcpy(qi->desc + offset, &wait_desc, length);
+ memcpy(qi->desc + offset, &wait_desc, 1 << shift);
- qi->free_head = (qi->free_head + 2) % QI_LENGTH;
- qi->free_cnt -= 2;
+ qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
+ qi->free_cnt -= count + 1;
/*
* update the HW tail register indicating the presence of
@@ -1301,7 +1340,7 @@
* a deadlock where the interrupt context can wait indefinitely
* for free slots in the queue.
*/
- rc = qi_check_fault(iommu, index);
+ rc = qi_check_fault(iommu, index, wait_index);
if (rc)
break;
@@ -1310,7 +1349,8 @@
raw_spin_lock(&qi->q_lock);
}
- qi->desc_status[index] = QI_DONE;
+ for (i = 0; i < count; i++)
+ qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
reclaim_free_desc(qi);
raw_spin_unlock_irqrestore(&qi->q_lock, flags);
@@ -1334,7 +1374,7 @@
desc.qw3 = 0;
/* should never fail */
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
@@ -1348,7 +1388,7 @@
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
@@ -1372,7 +1412,7 @@
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
@@ -1394,7 +1434,102 @@
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
+/* PASID-based IOTLB invalidation */
+void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
+ unsigned long npages, bool ih)
+{
+ struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
+
+ /*
+ * npages == -1 means a PASID-selective invalidation, otherwise,
+ * a positive value for Page-selective-within-PASID invalidation.
+ * 0 is not a valid input.
+ */
+ if (WARN_ON(!npages)) {
+ pr_err("Invalid input npages = %ld\n", npages);
+ return;
+ }
+
+ if (npages == -1) {
+ desc.qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ QI_EIOTLB_TYPE;
+ desc.qw1 = 0;
+ } else {
+ int mask = ilog2(__roundup_pow_of_two(npages));
+ unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
+
+ if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
+ addr = ALIGN_DOWN(addr, align);
+
+ desc.qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
+ QI_EIOTLB_TYPE;
+ desc.qw1 = QI_EIOTLB_ADDR(addr) |
+ QI_EIOTLB_IH(ih) |
+ QI_EIOTLB_AM(mask);
+ }
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
+/* PASID-based device IOTLB Invalidate */
+void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u32 pasid, u16 qdep, u64 addr, unsigned int size_order)
+{
+ unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
+ struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
+
+ desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
+ QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
+ QI_DEV_IOTLB_PFSID(pfsid);
+
+ /*
+ * If S bit is 0, we only flush a single page. If S bit is set,
+ * The least significant zero bit indicates the invalidation address
+ * range. VT-d spec 6.5.2.6.
+ * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
+ * size order = 0 is PAGE_SIZE 4KB
+ * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
+ * ECAP.
+ */
+ if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
+ pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
+ addr, size_order);
+
+ /* Take page address */
+ desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
+
+ if (size_order) {
+ /*
+ * Existing 0s in address below size_order may be the least
+ * significant bit, we must set them to 1s to avoid having
+ * smaller size than desired.
+ */
+ desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
+ VTD_PAGE_SHIFT);
+ /* Clear size_order bit to indicate size */
+ desc.qw1 &= ~mask;
+ /* Set the S bit to indicate flushing more than 1 page */
+ desc.qw1 |= QI_DEV_EIOTLB_SIZE;
+ }
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
+void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
+ u64 granu, u32 pasid)
+{
+ struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
+
+ desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
+ QI_PC_GRAN(granu) | QI_PC_TYPE;
+ qi_submit_sync(iommu, &desc, 1, 0);
}
/*
@@ -1702,7 +1837,7 @@
}
static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
- u8 fault_reason, int pasid, u16 source_id,
+ u8 fault_reason, u32 pasid, u16 source_id,
unsigned long long addr)
{
const char *reason;
@@ -1752,7 +1887,8 @@
u8 fault_reason;
u16 source_id;
u64 guest_addr;
- int type, pasid;
+ u32 pasid;
+ int type;
u32 data;
bool pasid_present;
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel/iommu.c
similarity index 84%
rename from drivers/iommu/intel-iommu.c
rename to drivers/iommu/intel/iommu.c
index a2a03df..b21c822 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -23,7 +23,7 @@
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/dmar.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/mempool.h>
#include <linux/memory.h>
#include <linux/cpu.h>
@@ -37,7 +37,7 @@
#include <linux/dmi.h>
#include <linux/pci-ats.h>
#include <linux/memblock.h>
-#include <linux/dma-contiguous.h>
+#include <linux/dma-map-ops.h>
#include <linux/dma-direct.h>
#include <linux/crash_dump.h>
#include <linux/numa.h>
@@ -47,8 +47,8 @@
#include <asm/iommu.h>
#include <trace/events/intel_iommu.h>
-#include "irq_remapping.h"
-#include "intel-pasid.h"
+#include "../irq_remapping.h"
+#include "pasid.h"
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
@@ -67,8 +67,8 @@
#define MAX_AGAW_WIDTH 64
#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
-#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
-#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
+#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
+#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
to match. That way, we can use 'unsigned long' for PFNs with impunity. */
@@ -296,17 +296,6 @@
static struct dmar_domain *si_domain;
static int hw_pass_through = 1;
-/* si_domain contains mulitple devices */
-#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
-
-/*
- * This is a DMA domain allocated through the iommu domain allocation
- * interface. But one or more devices belonging to this domain have
- * been chosen to use a private domain. We should avoid to use the
- * map/unmap/iova_to_phys APIs on it.
- */
-#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
-
#define for_each_domain_iommu(idx, domain) \
for (idx = 0; idx < g_num_of_iommus; idx++) \
if (domain->iommu_refcnt[idx])
@@ -341,11 +330,6 @@
static void domain_remove_dev_info(struct dmar_domain *domain);
static void dmar_remove_one_dev_info(struct device *dev);
static void __dmar_remove_one_dev_info(struct device_domain_info *info);
-static void domain_context_clear(struct intel_iommu *iommu,
- struct device *dev);
-static int domain_detach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu);
-static bool device_is_rmrr_locked(struct device *dev);
static int intel_iommu_attach_device(struct iommu_domain *domain,
struct device *dev);
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -355,9 +339,14 @@
int dmar_disabled = 0;
#else
int dmar_disabled = 1;
-#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
+#endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
+#ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
+int intel_iommu_sm = 1;
+#else
int intel_iommu_sm;
+#endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
+
int intel_iommu_enabled = 0;
EXPORT_SYMBOL_GPL(intel_iommu_enabled);
@@ -367,17 +356,30 @@
static int intel_iommu_superpage = 1;
static int iommu_identity_mapping;
static int intel_no_bounce;
+static int iommu_skip_te_disable;
-#define IDENTMAP_ALL 1
#define IDENTMAP_GFX 2
#define IDENTMAP_AZALIA 4
int intel_iommu_gfx_mapped;
EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
-#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
-static DEFINE_SPINLOCK(device_domain_lock);
+struct device_domain_info *get_domain_info(struct device *dev)
+{
+ struct device_domain_info *info;
+
+ if (!dev)
+ return NULL;
+
+ info = dev_iommu_priv_get(dev);
+ if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO))
+ return NULL;
+
+ return info;
+}
+
+DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
@@ -428,12 +430,6 @@
iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
}
-/* Convert generic 'struct iommu_domain to private struct dmar_domain */
-static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
-{
- return container_of(dom, struct dmar_domain, domain);
-}
-
static int __init intel_iommu_setup(char *str)
{
if (!str)
@@ -462,8 +458,7 @@
pr_info("Intel-IOMMU: scalable mode supported\n");
intel_iommu_sm = 1;
} else if (!strncmp(str, "tboot_noforce", 13)) {
- printk(KERN_INFO
- "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
+ pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
intel_iommu_tboot_noforce = 1;
} else if (!strncmp(str, "nobounce", 8)) {
pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
@@ -552,6 +547,11 @@
return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
}
+static inline bool domain_use_first_level(struct dmar_domain *domain)
+{
+ return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
+}
+
static inline int domain_pfn_supported(struct dmar_domain *domain,
unsigned long pfn)
{
@@ -656,7 +656,14 @@
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
if (iommu != skip) {
- if (!ecap_sc_support(iommu->ecap)) {
+ /*
+ * If the hardware is operating in the scalable mode,
+ * the snooping control is always supported since we
+ * always set PASID-table-entry.PGSNP bit if the domain
+ * is managed outside (UNMANAGED).
+ */
+ if (!sm_supported(iommu) &&
+ !ecap_sc_support(iommu->ecap)) {
ret = 0;
break;
}
@@ -667,11 +674,12 @@
return ret;
}
-static int domain_update_iommu_superpage(struct intel_iommu *skip)
+static int domain_update_iommu_superpage(struct dmar_domain *domain,
+ struct intel_iommu *skip)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
- int mask = 0xf;
+ int mask = 0x3;
if (!intel_iommu_superpage) {
return 0;
@@ -681,7 +689,13 @@
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
if (iommu != skip) {
- mask &= cap_super_page_val(iommu->cap);
+ if (domain && domain_use_first_level(domain)) {
+ if (!cap_fl1gp_support(iommu->cap))
+ mask = 0x1;
+ } else {
+ mask &= cap_super_page_val(iommu->cap);
+ }
+
if (!mask)
break;
}
@@ -691,12 +705,59 @@
return fls(mask);
}
+static int domain_update_device_node(struct dmar_domain *domain)
+{
+ struct device_domain_info *info;
+ int nid = NUMA_NO_NODE;
+
+ assert_spin_locked(&device_domain_lock);
+
+ if (list_empty(&domain->devices))
+ return NUMA_NO_NODE;
+
+ list_for_each_entry(info, &domain->devices, link) {
+ if (!info->dev)
+ continue;
+
+ /*
+ * There could possibly be multiple device numa nodes as devices
+ * within the same domain may sit behind different IOMMUs. There
+ * isn't perfect answer in such situation, so we select first
+ * come first served policy.
+ */
+ nid = dev_to_node(info->dev);
+ if (nid != NUMA_NO_NODE)
+ break;
+ }
+
+ return nid;
+}
+
/* Some capabilities may be different across iommus */
static void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain->iommu_snooping = domain_update_iommu_snooping(NULL);
- domain->iommu_superpage = domain_update_iommu_superpage(NULL);
+ domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
+
+ /*
+ * If RHSA is missing, we should default to the device numa domain
+ * as fall back.
+ */
+ if (domain->nid == NUMA_NO_NODE)
+ domain->nid = domain_update_device_node(domain);
+
+ /*
+ * First-level translation restricts the input-address to a
+ * canonical address (i.e., address bits 63:N have the same
+ * value as address bit [N-1], where N is 48-bits with 4-level
+ * paging and 57-bits with 5-level paging). Hence, skip bit
+ * [N-1].
+ */
+ if (domain_use_first_level(domain))
+ domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
+ else
+ domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
}
struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
@@ -733,9 +794,9 @@
return &context[devfn];
}
-static int iommu_dummy(struct device *dev)
+static bool attach_deferred(struct device *dev)
{
- return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
+ return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
}
/**
@@ -765,28 +826,69 @@
return false;
}
-static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
+static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
+{
+ struct dmar_drhd_unit *drhd;
+ u32 vtbar;
+ int rc;
+
+ /* We know that this device on this chipset has its own IOMMU.
+ * If we find it under a different IOMMU, then the BIOS is lying
+ * to us. Hope that the IOMMU for this device is actually
+ * disabled, and it needs no translation...
+ */
+ rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
+ if (rc) {
+ /* "can't" happen */
+ dev_info(&pdev->dev, "failed to run vt-d quirk\n");
+ return false;
+ }
+ vtbar &= 0xffff0000;
+
+ /* we know that the this iommu should be at offset 0xa000 from vtbar */
+ drhd = dmar_find_matched_drhd_unit(pdev);
+ if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
+ pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ return true;
+ }
+
+ return false;
+}
+
+static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
+{
+ if (!iommu || iommu->drhd->ignored)
+ return true;
+
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+ pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
+ quirk_ioat_snb_local_iommu(pdev))
+ return true;
+ }
+
+ return false;
+}
+
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
{
struct dmar_drhd_unit *drhd = NULL;
+ struct pci_dev *pdev = NULL;
struct intel_iommu *iommu;
struct device *tmp;
- struct pci_dev *pdev = NULL;
u16 segment = 0;
int i;
- if (iommu_dummy(dev))
+ if (!dev)
return NULL;
if (dev_is_pci(dev)) {
struct pci_dev *pf_pdev;
- pdev = to_pci_dev(dev);
-
-#ifdef CONFIG_X86
- /* VMD child devices currently cannot be handled individually */
- if (is_vmd(pdev->bus))
- return NULL;
-#endif
+ pdev = pci_real_dma_dev(to_pci_dev(dev));
/* VFs aren't listed in scope tables; we need to look up
* the PF instead to find the IOMMU. */
@@ -797,7 +899,7 @@
dev = &ACPI_COMPANION(dev)->dev;
rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
+ for_each_iommu(iommu, drhd) {
if (pdev && segment != drhd->segment)
continue;
@@ -811,8 +913,10 @@
if (pdev && pdev->is_virtfn)
goto got_pdev;
- *bus = drhd->devices[i].bus;
- *devfn = drhd->devices[i].devfn;
+ if (bus && devfn) {
+ *bus = drhd->devices[i].bus;
+ *devfn = drhd->devices[i].devfn;
+ }
goto out;
}
@@ -822,13 +926,18 @@
if (pdev && drhd->include_all) {
got_pdev:
- *bus = pdev->bus->number;
- *devfn = pdev->devfn;
+ if (bus && devfn) {
+ *bus = pdev->bus->number;
+ *devfn = pdev->devfn;
+ }
goto out;
}
}
iommu = NULL;
out:
+ if (iommu_is_dummy(iommu, dev))
+ iommu = NULL;
+
rcu_read_unlock();
return iommu;
@@ -919,6 +1028,11 @@
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+ if (domain_use_first_level(domain)) {
+ pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
+ if (domain->domain.type == IOMMU_DOMAIN_DMA)
+ pteval |= DMA_FL_PTE_ACCESS;
+ }
if (cmpxchg64(&pte->val, 0ULL, pteval))
/* Someone else set it while we were thinking; use theirs. */
free_pgtable_page(tmp_page);
@@ -1234,6 +1348,11 @@
readl, (sts & DMA_GSTS_RTPS), sts);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
+
+ iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+ if (sm_supported(iommu))
+ qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
}
void iommu_flush_write_buffer(struct intel_iommu *iommu)
@@ -1429,8 +1548,7 @@
!pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
info->pri_enabled = 1;
#endif
- if (!pdev->untrusted && info->ats_supported &&
- pci_ats_page_aligned(pdev) &&
+ if (info->ats_supported && pci_ats_page_aligned(pdev) &&
!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
info->ats_enabled = 1;
domain_update_iotlb(info->domain);
@@ -1489,6 +1607,20 @@
spin_unlock_irqrestore(&device_domain_lock, flags);
}
+static void domain_flush_piotlb(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ u64 addr, unsigned long npages, bool ih)
+{
+ u16 did = domain->iommu_did[iommu->seq_id];
+
+ if (domain->default_pasid)
+ qi_flush_piotlb(iommu, did, domain->default_pasid,
+ addr, npages, ih);
+
+ if (!list_empty(&domain->devices))
+ qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
+}
+
static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
struct dmar_domain *domain,
unsigned long pfn, unsigned int pages,
@@ -1502,18 +1634,23 @@
if (ih)
ih = 1 << 6;
- /*
- * Fallback to domain selective flush if no PSI support or the size is
- * too big.
- * PSI requires page size to be 2 ^ x, and the base address is naturally
- * aligned to the size
- */
- if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
- iommu->flush.flush_iotlb(iommu, did, 0, 0,
- DMA_TLB_DSI_FLUSH);
- else
- iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
- DMA_TLB_PSI_FLUSH);
+
+ if (domain_use_first_level(domain)) {
+ domain_flush_piotlb(iommu, domain, addr, pages, ih);
+ } else {
+ /*
+ * Fallback to domain selective flush if no PSI support or
+ * the size is too big. PSI requires page size to be 2 ^ x,
+ * and the base address is naturally aligned to the size.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap))
+ iommu->flush.flush_iotlb(iommu, did, 0, 0,
+ DMA_TLB_DSI_FLUSH);
+ else
+ iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
+ DMA_TLB_PSI_FLUSH);
+ }
/*
* In caching mode, changes of pages from non-present to present require
@@ -1528,8 +1665,11 @@
struct dmar_domain *domain,
unsigned long pfn, unsigned int pages)
{
- /* It's a non-present to present mapping. Only flush if caching mode */
- if (cap_caching_mode(iommu->cap))
+ /*
+ * It's a non-present to present mapping. Only flush if caching mode
+ * and second level.
+ */
+ if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
else
iommu_flush_write_buffer(iommu);
@@ -1546,7 +1686,11 @@
struct intel_iommu *iommu = g_iommus[idx];
u16 did = domain->iommu_did[iommu->seq_id];
- iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+ if (domain_use_first_level(domain))
+ domain_flush_piotlb(iommu, domain, 0, -1, 0);
+ else
+ iommu->flush.flush_iotlb(iommu, did, 0, 0,
+ DMA_TLB_DSI_FLUSH);
if (!cap_caching_mode(iommu->cap))
iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
@@ -1595,6 +1739,10 @@
u32 sts;
unsigned long flag;
+ if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
+ (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
+ return;
+
raw_spin_lock_irqsave(&iommu->register_lock, flag);
iommu->gcmd &= ~DMA_GCMD_TE;
writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
@@ -1712,9 +1860,39 @@
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
}
+ if (vccap_pasid(iommu->vccap))
+ ioasid_unregister_allocator(&iommu->pasid_allocator);
+
#endif
}
+/*
+ * Check and return whether first level is used by default for
+ * DMA translation.
+ */
+static bool first_level_by_default(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ static int first_level_support = -1;
+
+ if (likely(first_level_support != -1))
+ return first_level_support;
+
+ first_level_support = 1;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) {
+ first_level_support = 0;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return first_level_support;
+}
+
static struct dmar_domain *alloc_domain(int flags)
{
struct dmar_domain *domain;
@@ -1726,6 +1904,8 @@
memset(domain, 0, sizeof(*domain));
domain->nid = NUMA_NO_NODE;
domain->flags = flags;
+ if (first_level_by_default())
+ domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
domain->has_iotlb_device = false;
INIT_LIST_HEAD(&domain->devices);
@@ -1831,11 +2011,6 @@
return 0;
}
-static void domain_reserve_special_ranges(struct dmar_domain *domain)
-{
- copy_reserved_iova(&reserved_iova_list, &domain->iovad);
-}
-
static inline int guestwidth_to_adjustwidth(int gaw)
{
int agaw;
@@ -1850,64 +2025,6 @@
return agaw;
}
-static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
- int guest_width)
-{
- int adjust_width, agaw, cap_width;
- unsigned long sagaw;
- int err;
-
- init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
-
- err = init_iova_flush_queue(&domain->iovad,
- iommu_flush_iova, iova_entry_free);
- if (err)
- return err;
-
- domain_reserve_special_ranges(domain);
-
- /* calculate AGAW */
- cap_width = min_t(int, cap_mgaw(iommu->cap), agaw_to_width(iommu->agaw));
- if (guest_width > cap_width)
- guest_width = cap_width;
- domain->gaw = guest_width;
- adjust_width = guestwidth_to_adjustwidth(guest_width);
- agaw = width_to_agaw(adjust_width);
- sagaw = cap_sagaw(iommu->cap);
- if (!test_bit(agaw, &sagaw)) {
- /* hardware doesn't support it, choose a bigger one */
- pr_debug("Hardware doesn't support agaw %d\n", agaw);
- agaw = find_next_bit(&sagaw, 5, agaw);
- if (agaw >= 5)
- return -ENODEV;
- }
- domain->agaw = agaw;
-
- if (ecap_coherent(iommu->ecap))
- domain->iommu_coherency = 1;
- else
- domain->iommu_coherency = 0;
-
- if (ecap_sc_support(iommu->ecap))
- domain->iommu_snooping = 1;
- else
- domain->iommu_snooping = 0;
-
- if (intel_iommu_superpage)
- domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
- else
- domain->iommu_superpage = 0;
-
- domain->nid = iommu->node;
-
- /* always allocate the top pgd */
- domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
- if (!domain->pgd)
- return -ENOMEM;
- __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
- return 0;
-}
-
static void domain_exit(struct dmar_domain *domain)
{
@@ -1915,7 +2032,8 @@
domain_remove_dev_info(domain);
/* destroy iovas */
- put_iova_domain(&domain->iovad);
+ if (domain->domain.type == IOMMU_DOMAIN_DMA)
+ put_iova_domain(&domain->iovad);
if (domain->pgd) {
struct page *freelist;
@@ -1953,7 +2071,6 @@
context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
{
context->hi |= pasid & ((1 << 20) - 1);
- context->hi |= (1 << 20);
}
/*
@@ -2233,21 +2350,32 @@
unsigned long nr_pages, int prot)
{
struct dma_pte *first_pte = NULL, *pte = NULL;
- phys_addr_t uninitialized_var(pteval);
+ phys_addr_t pteval;
unsigned long sg_res = 0;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
+ u64 attr;
BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
- prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
+ attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
+ attr |= DMA_FL_PTE_PRESENT;
+ if (domain_use_first_level(domain)) {
+ attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
+
+ if (domain->domain.type == IOMMU_DOMAIN_DMA) {
+ attr |= DMA_FL_PTE_ACCESS;
+ if (prot & DMA_PTE_WRITE)
+ attr |= DMA_FL_PTE_DIRTY;
+ }
+ }
if (!sg) {
sg_res = nr_pages;
- pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
+ pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
}
while (nr_pages > 0) {
@@ -2259,7 +2387,7 @@
sg_res = aligned_nrpages(sg->offset, sg->length);
sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
sg->dma_length = sg->length;
- pteval = (sg_phys(sg) - pgoff) | prot;
+ pteval = (sg_phys(sg) - pgoff) | attr;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
@@ -2401,6 +2529,10 @@
(((u16)bus) << 8) | devfn,
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
+
+ if (sm_supported(iommu))
+ qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
+
iommu->flush.flush_iotlb(iommu,
did_old,
0,
@@ -2414,7 +2546,7 @@
list_del(&info->link);
list_del(&info->global);
if (info->dev)
- info->dev->archdata.iommu = NULL;
+ dev_iommu_priv_set(info->dev, NULL);
}
static void domain_remove_dev_info(struct dmar_domain *domain)
@@ -2428,44 +2560,89 @@
spin_unlock_irqrestore(&device_domain_lock, flags);
}
-/*
- * find_domain
- * Note: we use struct device->archdata.iommu stores the info
- */
-static struct dmar_domain *find_domain(struct device *dev)
+struct dmar_domain *find_domain(struct device *dev)
{
struct device_domain_info *info;
- if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
- struct iommu_domain *domain;
+ if (unlikely(!dev || !dev->iommu))
+ return NULL;
- dev->archdata.iommu = NULL;
- domain = iommu_get_domain_for_dev(dev);
- if (domain)
- intel_iommu_attach_device(domain, dev);
- }
+ if (unlikely(attach_deferred(dev)))
+ return NULL;
/* No lock here, assumes no domain exit in normal case */
- info = dev->archdata.iommu;
-
+ info = get_domain_info(dev);
if (likely(info))
return info->domain;
+
return NULL;
}
+static void do_deferred_attach(struct device *dev)
+{
+ struct iommu_domain *domain;
+
+ dev_iommu_priv_set(dev, NULL);
+ domain = iommu_get_domain_for_dev(dev);
+ if (domain)
+ intel_iommu_attach_device(domain, dev);
+}
+
static inline struct device_domain_info *
dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
{
struct device_domain_info *info;
list_for_each_entry(info, &device_domain_list, global)
- if (info->iommu->segment == segment && info->bus == bus &&
+ if (info->segment == segment && info->bus == bus &&
info->devfn == devfn)
return info;
return NULL;
}
+static int domain_setup_first_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev,
+ u32 pasid)
+{
+ struct dma_pte *pgd = domain->pgd;
+ int agaw, level;
+ int flags = 0;
+
+ /*
+ * Skip top levels of page tables for iommu which has
+ * less agaw than default. Unnecessary for PT mode.
+ */
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd))
+ return -ENOMEM;
+ }
+
+ level = agaw_to_level(agaw);
+ if (level != 4 && level != 5)
+ return -EINVAL;
+
+ if (pasid != PASID_RID2PASID)
+ flags |= PASID_FLAG_SUPERVISOR_MODE;
+ if (level == 5)
+ flags |= PASID_FLAG_FL5LP;
+
+ if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
+ flags |= PASID_FLAG_PAGE_SNOOP;
+
+ return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
+ domain->iommu_did[iommu->seq_id],
+ flags);
+}
+
+static bool dev_is_real_dma_subdevice(struct device *dev)
+{
+ return dev && dev_is_pci(dev) &&
+ pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
+}
+
static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
int bus, int devfn,
struct device *dev,
@@ -2480,8 +2657,18 @@
if (!info)
return NULL;
- info->bus = bus;
- info->devfn = devfn;
+ if (!dev_is_real_dma_subdevice(dev)) {
+ info->bus = bus;
+ info->devfn = devfn;
+ info->segment = iommu->segment;
+ } else {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ info->bus = pdev->bus->number;
+ info->devfn = pdev->devfn;
+ info->segment = pci_domain_nr(pdev->bus);
+ }
+
info->ats_supported = info->pasid_supported = info->pri_supported = 0;
info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
info->ats_qdep = 0;
@@ -2495,10 +2682,8 @@
if (dev && dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(info->dev);
- if (!pdev->untrusted &&
- !pci_ats_disabled() &&
- ecap_dev_iotlb_support(iommu->ecap) &&
- pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
+ if (ecap_dev_iotlb_support(iommu->ecap) &&
+ pci_ats_supported(pdev) &&
dmar_find_matched_atsr_unit(pdev))
info->ats_supported = 1;
@@ -2510,7 +2695,7 @@
}
if (info->ats_supported && ecap_prs(iommu->ecap) &&
- pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+ pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
@@ -2521,7 +2706,8 @@
if (!found) {
struct device_domain_info *info2;
- info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
+ info2 = dmar_search_domain_by_dev_info(info->segment, info->bus,
+ info->devfn);
if (info2) {
found = info2->domain;
info2->dev = dev;
@@ -2548,7 +2734,7 @@
list_add(&info->link, &domain->devices);
list_add(&info->global, &device_domain_list);
if (dev)
- dev->archdata.iommu = info;
+ dev_iommu_priv_set(dev, info);
spin_unlock_irqrestore(&device_domain_lock, flags);
/* PASID table is mandatory for a PCI device in scalable mode. */
@@ -2565,6 +2751,9 @@
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
+ else if (domain_use_first_level(domain))
+ ret = domain_setup_first_level(iommu, domain, dev,
+ PASID_RID2PASID);
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
@@ -2585,108 +2774,10 @@
return domain;
}
-static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
-{
- *(u16 *)opaque = alias;
- return 0;
-}
-
-static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
-{
- struct device_domain_info *info;
- struct dmar_domain *domain = NULL;
- struct intel_iommu *iommu;
- u16 dma_alias;
- unsigned long flags;
- u8 bus, devfn;
-
- iommu = device_to_iommu(dev, &bus, &devfn);
- if (!iommu)
- return NULL;
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
-
- pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
-
- spin_lock_irqsave(&device_domain_lock, flags);
- info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
- PCI_BUS_NUM(dma_alias),
- dma_alias & 0xff);
- if (info) {
- iommu = info->iommu;
- domain = info->domain;
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
-
- /* DMA alias already has a domain, use it */
- if (info)
- goto out;
- }
-
- /* Allocate and initialize new domain for the device */
- domain = alloc_domain(0);
- if (!domain)
- return NULL;
- if (domain_init(domain, iommu, gaw)) {
- domain_exit(domain);
- return NULL;
- }
-
-out:
- return domain;
-}
-
-static struct dmar_domain *set_domain_for_dev(struct device *dev,
- struct dmar_domain *domain)
-{
- struct intel_iommu *iommu;
- struct dmar_domain *tmp;
- u16 req_id, dma_alias;
- u8 bus, devfn;
-
- iommu = device_to_iommu(dev, &bus, &devfn);
- if (!iommu)
- return NULL;
-
- req_id = ((u16)bus << 8) | devfn;
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
-
- pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
-
- /* register PCI DMA alias device */
- if (req_id != dma_alias) {
- tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
- dma_alias & 0xff, NULL, domain);
-
- if (!tmp || tmp != domain)
- return tmp;
- }
- }
-
- tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
- if (!tmp || tmp != domain)
- return tmp;
-
- return domain;
-}
-
static int iommu_domain_identity_map(struct dmar_domain *domain,
- unsigned long long start,
- unsigned long long end)
+ unsigned long first_vpfn,
+ unsigned long last_vpfn)
{
- unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
- unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
-
- if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
- dma_to_mm_pfn(last_vpfn))) {
- pr_err("Reserving iova failed\n");
- return -ENOMEM;
- }
-
- pr_debug("Mapping reserved region %llx-%llx\n", start, end);
/*
* RMRR range might have overlap with physical memory range,
* clear it first
@@ -2698,45 +2789,6 @@
DMA_PTE_READ|DMA_PTE_WRITE);
}
-static int domain_prepare_identity_map(struct device *dev,
- struct dmar_domain *domain,
- unsigned long long start,
- unsigned long long end)
-{
- /* For _hardware_ passthrough, don't bother. But for software
- passthrough, we do it anyway -- it may indicate a memory
- range which is reserved in E820, so which didn't get set
- up to start with in si_domain */
- if (domain == si_domain && hw_pass_through) {
- dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
- start, end);
- return 0;
- }
-
- dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
-
- if (end < start) {
- WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
- "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
- dmi_get_system_info(DMI_BIOS_VENDOR),
- dmi_get_system_info(DMI_BIOS_VERSION),
- dmi_get_system_info(DMI_PRODUCT_VERSION));
- return -EIO;
- }
-
- if (end >> agaw_to_width(domain->agaw)) {
- WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
- "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
- agaw_to_width(domain->agaw),
- dmi_get_system_info(DMI_BIOS_VENDOR),
- dmi_get_system_info(DMI_BIOS_VERSION),
- dmi_get_system_info(DMI_PRODUCT_VERSION));
- return -EIO;
- }
-
- return iommu_domain_identity_map(domain, start, end);
-}
-
static int md_domain_init(struct dmar_domain *domain, int guest_width);
static int __init si_domain_init(int hw)
@@ -2763,7 +2815,8 @@
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
ret = iommu_domain_identity_map(si_domain,
- PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
+ mm_to_dma_pfn(start_pfn),
+ mm_to_dma_pfn(end_pfn));
if (ret)
return ret;
}
@@ -2783,7 +2836,9 @@
end >> agaw_to_width(si_domain->agaw)))
continue;
- ret = iommu_domain_identity_map(si_domain, start, end);
+ ret = iommu_domain_identity_map(si_domain,
+ mm_to_dma_pfn(start >> PAGE_SHIFT),
+ mm_to_dma_pfn(end >> PAGE_SHIFT));
if (ret)
return ret;
}
@@ -2792,17 +2847,6 @@
return 0;
}
-static int identity_mapping(struct device *dev)
-{
- struct device_domain_info *info;
-
- info = dev->archdata.iommu;
- if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
- return (info->domain == si_domain);
-
- return 0;
-}
-
static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
{
struct dmar_domain *ndomain;
@@ -2929,35 +2973,9 @@
if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
return IOMMU_DOMAIN_IDENTITY;
-
- /*
- * We want to start off with all devices in the 1:1 domain, and
- * take them out later if we find they can't access all of memory.
- *
- * However, we can't do this for PCI devices behind bridges,
- * because all PCI devices behind the same bridge will end up
- * with the same source-id on their transactions.
- *
- * Practically speaking, we can't change things around for these
- * devices at run-time, because we can't be sure there'll be no
- * DMA transactions in flight for any of their siblings.
- *
- * So PCI devices (unless they're on the root bus) as well as
- * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
- * the 1:1 domain, just in _case_ one of their siblings turns out
- * not to be able to map all of memory.
- */
- if (!pci_is_pcie(pdev)) {
- if (!pci_is_root_bus(pdev->bus))
- return IOMMU_DOMAIN_DMA;
- if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
- return IOMMU_DOMAIN_DMA;
- } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
- return IOMMU_DOMAIN_DMA;
}
- return (iommu_identity_mapping & IDENTMAP_ALL) ?
- IOMMU_DOMAIN_IDENTITY : 0;
+ return 0;
}
static void intel_iommu_init_qi(struct intel_iommu *iommu)
@@ -3179,6 +3197,85 @@
return ret;
}
+#ifdef CONFIG_INTEL_IOMMU_SVM
+static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
+{
+ struct intel_iommu *iommu = data;
+ ioasid_t ioasid;
+
+ if (!iommu)
+ return INVALID_IOASID;
+ /*
+ * VT-d virtual command interface always uses the full 20 bit
+ * PASID range. Host can partition guest PASID range based on
+ * policies but it is out of guest's control.
+ */
+ if (min < PASID_MIN || max > intel_pasid_max_id)
+ return INVALID_IOASID;
+
+ if (vcmd_alloc_pasid(iommu, &ioasid))
+ return INVALID_IOASID;
+
+ return ioasid;
+}
+
+static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
+{
+ struct intel_iommu *iommu = data;
+
+ if (!iommu)
+ return;
+ /*
+ * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
+ * We can only free the PASID when all the devices are unbound.
+ */
+ if (ioasid_find(NULL, ioasid, NULL)) {
+ pr_alert("Cannot free active IOASID %d\n", ioasid);
+ return;
+ }
+ vcmd_free_pasid(iommu, ioasid);
+}
+
+static void register_pasid_allocator(struct intel_iommu *iommu)
+{
+ /*
+ * If we are running in the host, no need for custom allocator
+ * in that PASIDs are allocated from the host system-wide.
+ */
+ if (!cap_caching_mode(iommu->cap))
+ return;
+
+ if (!sm_supported(iommu)) {
+ pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
+ return;
+ }
+
+ /*
+ * Register a custom PASID allocator if we are running in a guest,
+ * guest PASID must be obtained via virtual command interface.
+ * There can be multiple vIOMMUs in each guest but only one allocator
+ * is active. All vIOMMU allocators will eventually be calling the same
+ * host allocator.
+ */
+ if (!vccap_pasid(iommu->vccap))
+ return;
+
+ pr_info("Register custom PASID allocator\n");
+ iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
+ iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
+ iommu->pasid_allocator.pdata = (void *)iommu;
+ if (ioasid_register_allocator(&iommu->pasid_allocator)) {
+ pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
+ /*
+ * Disable scalable mode on this IOMMU if there
+ * is no custom allocator. Mixing SM capable vIOMMU
+ * and non-SM vIOMMU are not supported.
+ */
+ intel_iommu_sm = 0;
+ }
+}
+#endif
+
static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
@@ -3288,14 +3385,10 @@
hw_pass_through = 0;
if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) {
- pr_info("Disable batched IOTLB flush due to virtualization");
+ pr_warn("Disable batched IOTLB flush due to virtualization");
intel_iommu_strict = 1;
}
-
-#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_supported(iommu))
- intel_svm_init(iommu);
-#endif
+ intel_svm_check(iommu);
}
/*
@@ -3305,14 +3398,12 @@
*/
for_each_active_iommu(iommu, drhd) {
iommu_flush_write_buffer(iommu);
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ register_pasid_allocator(iommu);
+#endif
iommu_set_root_entry(iommu);
- iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
}
- if (iommu_default_passthrough())
- iommu_identity_mapping |= IDENTMAP_ALL;
-
#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
dmar_map_gfx = 0;
#endif
@@ -3385,8 +3476,21 @@
{
unsigned long iova_pfn;
- /* Restrict dma_mask to the width that the iommu can handle */
- dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
+ /*
+ * Restrict dma_mask to the width that the iommu can handle.
+ * First-level translation restricts the input-address to a
+ * canonical address (i.e., address bits 63:N have the same
+ * value as address bit [N-1], where N is 48-bits with 4-level
+ * paging and 57-bits with 5-level paging). Hence, skip bit
+ * [N-1].
+ */
+ if (domain_use_first_level(domain))
+ dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw - 1),
+ dma_mask);
+ else
+ dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw),
+ dma_mask);
+
/* Ensure we reserve the whole size-aligned region */
nrpages = __roundup_pow_of_two(nrpages);
@@ -3412,97 +3516,6 @@
return iova_pfn;
}
-static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
-{
- struct dmar_domain *domain, *tmp;
- struct dmar_rmrr_unit *rmrr;
- struct device *i_dev;
- int i, ret;
-
- /* Device shouldn't be attached by any domains. */
- domain = find_domain(dev);
- if (domain)
- return NULL;
-
- domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
- if (!domain)
- goto out;
-
- /* We have a new domain - setup possible RMRRs for the device */
- rcu_read_lock();
- for_each_rmrr_units(rmrr) {
- for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
- i, i_dev) {
- if (i_dev != dev)
- continue;
-
- ret = domain_prepare_identity_map(dev, domain,
- rmrr->base_address,
- rmrr->end_address);
- if (ret)
- dev_err(dev, "Mapping reserved region failed\n");
- }
- }
- rcu_read_unlock();
-
- tmp = set_domain_for_dev(dev, domain);
- if (!tmp || domain != tmp) {
- domain_exit(domain);
- domain = tmp;
- }
-
-out:
- if (!domain)
- dev_err(dev, "Allocating domain failed\n");
- else
- domain->domain.type = IOMMU_DOMAIN_DMA;
-
- return domain;
-}
-
-/* Check if the dev needs to go through non-identity map and unmap process.*/
-static bool iommu_need_mapping(struct device *dev)
-{
- int ret;
-
- if (iommu_dummy(dev))
- return false;
-
- ret = identity_mapping(dev);
- if (ret) {
- u64 dma_mask = *dev->dma_mask;
-
- if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
- dma_mask = dev->coherent_dma_mask;
-
- if (dma_mask >= dma_direct_get_required_mask(dev))
- return false;
-
- /*
- * 32 bit DMA is removed from si_domain and fall back to
- * non-identity mapping.
- */
- dmar_remove_one_dev_info(dev);
- ret = iommu_request_dma_domain_for_dev(dev);
- if (ret) {
- struct iommu_domain *domain;
- struct dmar_domain *dmar_domain;
-
- domain = iommu_get_domain_for_dev(dev);
- if (domain) {
- dmar_domain = to_dmar_domain(domain);
- dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
- }
- dmar_remove_one_dev_info(dev);
- get_private_domain_for_dev(dev);
- }
-
- dev_info(dev, "32bit DMA uses non-identity mapping\n");
- }
-
- return true;
-}
-
static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
size_t size, int dir, u64 dma_mask)
{
@@ -3516,6 +3529,9 @@
BUG_ON(dir == DMA_NONE);
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
+
domain = find_domain(dev);
if (!domain)
return DMA_MAPPING_ERROR;
@@ -3567,20 +3583,15 @@
enum dma_data_direction dir,
unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- return __intel_map_single(dev, page_to_phys(page) + offset,
- size, dir, *dev->dma_mask);
- return dma_direct_map_page(dev, page, offset, size, dir, attrs);
+ return __intel_map_single(dev, page_to_phys(page) + offset,
+ size, dir, *dev->dma_mask);
}
static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- return __intel_map_single(dev, phys_addr, size, dir,
- *dev->dma_mask);
- return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
+ return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask);
}
static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
@@ -3631,17 +3642,13 @@
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- intel_unmap(dev, dev_addr, size);
- else
- dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
+ intel_unmap(dev, dev_addr, size);
}
static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- intel_unmap(dev, dev_addr, size);
+ intel_unmap(dev, dev_addr, size);
}
static void *intel_alloc_coherent(struct device *dev, size_t size,
@@ -3651,8 +3658,8 @@
struct page *page = NULL;
int order;
- if (!iommu_need_mapping(dev))
- return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -3687,9 +3694,6 @@
int order;
struct page *page = virt_to_page(vaddr);
- if (!iommu_need_mapping(dev))
- return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
-
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -3707,9 +3711,6 @@
struct scatterlist *sg;
int i;
- if (!iommu_need_mapping(dev))
- return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
-
for_each_sg(sglist, sg, nelems, i) {
nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
}
@@ -3733,8 +3734,9 @@
struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
- if (!iommu_need_mapping(dev))
- return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
+
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
domain = find_domain(dev);
if (!domain)
@@ -3773,16 +3775,14 @@
return 0;
}
- trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
- sg_phys(sglist), size << VTD_PAGE_SHIFT);
+ for_each_sg(sglist, sg, nelems, i)
+ trace_map_sg(dev, i + 1, nelems, sg);
return nelems;
}
static u64 intel_get_required_mask(struct device *dev)
{
- if (!iommu_need_mapping(dev))
- return dma_direct_get_required_mask(dev);
return DMA_BIT_MASK(32);
}
@@ -3798,6 +3798,8 @@
.dma_supported = dma_direct_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
.get_required_mask = intel_get_required_mask,
};
@@ -3831,7 +3833,11 @@
int prot = 0;
int ret;
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
+
domain = find_domain(dev);
+
if (WARN_ON(dir == DMA_NONE || !domain))
return DMA_MAPPING_ERROR;
@@ -3860,9 +3866,8 @@
* page aligned, we don't need to use a bounce page.
*/
if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
- tlb_addr = swiotlb_tbl_map_single(dev,
- __phys_to_dma(dev, io_tlb_start),
- paddr, size, aligned_size, dir, attrs);
+ tlb_addr = swiotlb_tbl_map_single(dev, paddr, size,
+ aligned_size, dir, attrs);
if (tlb_addr == DMA_MAPPING_ERROR) {
goto swiotlb_error;
} else {
@@ -3986,6 +3991,9 @@
sg_dma_len(sg) = sg->length;
}
+ for_each_sg(sglist, sg, nelems, i)
+ trace_bounce_map_sg(dev, i + 1, nelems, sg);
+
return nelems;
out_unmap:
@@ -4044,6 +4052,8 @@
.sync_sg_for_device = bounce_sync_sg_for_device,
.map_resource = bounce_map_resource,
.unmap_resource = bounce_unmap_resource,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
.dma_supported = dma_direct_supported,
};
@@ -4111,35 +4121,6 @@
iova_cache_put();
}
-static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
-{
- struct dmar_drhd_unit *drhd;
- u32 vtbar;
- int rc;
-
- /* We know that this device on this chipset has its own IOMMU.
- * If we find it under a different IOMMU, then the BIOS is lying
- * to us. Hope that the IOMMU for this device is actually
- * disabled, and it needs no translation...
- */
- rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
- if (rc) {
- /* "can't" happen */
- dev_info(&pdev->dev, "failed to run vt-d quirk\n");
- return;
- }
- vtbar &= 0xffff0000;
-
- /* we know that the this iommu should be at offset 0xa000 from vtbar */
- drhd = dmar_find_matched_drhd_unit(pdev);
- if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
- pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
- add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
- pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
- }
-}
-DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
-
static void __init init_no_remapping_devices(void)
{
struct dmar_drhd_unit *drhd;
@@ -4170,12 +4151,9 @@
/* This IOMMU has *only* gfx devices. Either bypass it or
set the gfx_mapped flag, as appropriate */
- if (!dmar_map_gfx) {
+ drhd->gfx_dedicated = 1;
+ if (!dmar_map_gfx)
drhd->ignored = 1;
- for_each_active_dev_scope(drhd->devices,
- drhd->devices_cnt, i, dev)
- dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
- }
}
}
@@ -4201,12 +4179,7 @@
}
iommu_flush_write_buffer(iommu);
-
iommu_set_root_entry(iommu);
-
- iommu->flush.flush_context(iommu, 0, 0, 0,
- DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
iommu_enable_translation(iommu);
iommu_disable_protect_mem_regions(iommu);
}
@@ -4315,17 +4288,40 @@
static inline void init_iommu_pm_ops(void) {}
#endif /* CONFIG_PM */
+static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
+{
+ if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
+ !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
+ rmrr->end_address <= rmrr->base_address ||
+ arch_rmrr_sanity_check(rmrr))
+ return -EINVAL;
+
+ return 0;
+}
+
int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
{
struct acpi_dmar_reserved_memory *rmrr;
struct dmar_rmrr_unit *rmrru;
+ rmrr = (struct acpi_dmar_reserved_memory *)header;
+ if (rmrr_sanity_check(rmrr)) {
+ pr_warn(FW_BUG
+ "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
+ "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+ rmrr->base_address, rmrr->end_address,
+ dmi_get_system_info(DMI_BIOS_VENDOR),
+ dmi_get_system_info(DMI_BIOS_VERSION),
+ dmi_get_system_info(DMI_PRODUCT_VERSION));
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ }
+
rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
if (!rmrru)
goto out;
rmrru->hdr = header;
- rmrr = (struct acpi_dmar_reserved_memory *)header;
+
rmrru->base_address = rmrr->base_address;
rmrru->end_address = rmrr->end_address;
@@ -4465,7 +4461,7 @@
iommu->name);
return -ENXIO;
}
- sp = domain_update_iommu_superpage(iommu) - 1;
+ sp = domain_update_iommu_superpage(NULL, iommu) - 1;
if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
pr_warn("%s: Doesn't support large page.\n",
iommu->name);
@@ -4485,10 +4481,7 @@
if (ret)
goto out;
-#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_supported(iommu))
- intel_svm_init(iommu);
-#endif
+ intel_svm_check(iommu);
if (dmaru->ignored) {
/*
@@ -4514,8 +4507,6 @@
goto disable_iommu;
iommu_set_root_entry(iommu);
- iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
iommu_enable_translation(iommu);
iommu_disable_protect_mem_regions(iommu);
@@ -4664,58 +4655,37 @@
unsigned long val, void *v)
{
struct memory_notify *mhp = v;
- unsigned long long start, end;
- unsigned long start_vpfn, last_vpfn;
+ unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
+ unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
+ mhp->nr_pages - 1);
switch (val) {
case MEM_GOING_ONLINE:
- start = mhp->start_pfn << PAGE_SHIFT;
- end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
- if (iommu_domain_identity_map(si_domain, start, end)) {
- pr_warn("Failed to build identity map for [%llx-%llx]\n",
- start, end);
+ if (iommu_domain_identity_map(si_domain,
+ start_vpfn, last_vpfn)) {
+ pr_warn("Failed to build identity map for [%lx-%lx]\n",
+ start_vpfn, last_vpfn);
return NOTIFY_BAD;
}
break;
case MEM_OFFLINE:
case MEM_CANCEL_ONLINE:
- start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
- last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
- while (start_vpfn <= last_vpfn) {
- struct iova *iova;
+ {
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
struct page *freelist;
- iova = find_iova(&si_domain->iovad, start_vpfn);
- if (iova == NULL) {
- pr_debug("Failed get IOVA for PFN %lx\n",
- start_vpfn);
- break;
- }
-
- iova = split_and_remove_iova(&si_domain->iovad, iova,
- start_vpfn, last_vpfn);
- if (iova == NULL) {
- pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
- start_vpfn, last_vpfn);
- return NOTIFY_BAD;
- }
-
- freelist = domain_unmap(si_domain, iova->pfn_lo,
- iova->pfn_hi);
+ freelist = domain_unmap(si_domain,
+ start_vpfn, last_vpfn);
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
iommu_flush_iotlb_psi(iommu, si_domain,
- iova->pfn_lo, iova_size(iova),
+ start_vpfn, mhp->nr_pages,
!freelist, 0);
rcu_read_unlock();
dma_free_pagelist(freelist);
-
- start_vpfn = iova->pfn_hi + 1;
- free_iova_mem(iova);
}
break;
}
@@ -4743,8 +4713,9 @@
for (did = 0; did < cap_ndoms(iommu->cap); did++) {
domain = get_iommu_domain(iommu, (u16)did);
- if (!domain)
+ if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
continue;
+
free_cpu_cached_iovas(cpu, &domain->iovad);
}
}
@@ -4765,6 +4736,26 @@
iommu_disable_translation(iommu);
}
+void intel_iommu_shutdown(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu = NULL;
+
+ if (no_iommu || dmar_disabled)
+ return;
+
+ down_write(&dmar_global_lock);
+
+ /* Disable PMRs explicitly here. */
+ for_each_iommu(iommu, drhd)
+ iommu_disable_protect_mem_regions(iommu);
+
+ /* Make sure the IOMMUs are switched off */
+ intel_disable_iommus();
+
+ up_write(&dmar_global_lock);
+}
+
static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
{
struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
@@ -4849,12 +4840,12 @@
NULL,
};
-static inline bool has_untrusted_dev(void)
+static inline bool has_external_pci(void)
{
struct pci_dev *pdev = NULL;
for_each_pci_dev(pdev)
- if (pdev->untrusted)
+ if (pdev->external_facing)
return true;
return false;
@@ -4862,7 +4853,7 @@
static int __init platform_optin_force_iommu(void)
{
- if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
+ if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
return 0;
if (no_iommu || dmar_disabled)
@@ -4873,7 +4864,7 @@
* map for all devices except those marked as being untrusted.
*/
if (dmar_disabled)
- iommu_identity_mapping |= IDENTMAP_ALL;
+ iommu_set_default_passthrough(false);
dmar_disabled = 0;
no_iommu = 0;
@@ -5018,18 +5009,6 @@
}
up_write(&dmar_global_lock);
-#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
- /*
- * If the system has no untrusted device or the user has decided
- * to disable the bounce page mechanisms, we don't need swiotlb.
- * Mark this and the pre-allocated bounce pages will be released
- * later.
- */
- if (!has_untrusted_dev() || intel_no_bounce)
- swiotlb = 0;
-#endif
- dma_ops = &intel_dma_ops;
-
init_iommu_pm_ops();
down_read(&dmar_global_lock);
@@ -5115,10 +5094,11 @@
if (info->dev) {
if (dev_is_pci(info->dev) && sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, info->dev,
- PASID_RID2PASID);
+ PASID_RID2PASID, false);
iommu_disable_dev_iotlb(info);
- domain_context_clear(iommu, info->dev);
+ if (!dev_is_real_dma_subdevice(info->dev))
+ domain_context_clear(iommu, info->dev);
intel_pasid_free_table(info->dev);
}
@@ -5128,12 +5108,6 @@
domain_detach_iommu(domain, iommu);
spin_unlock_irqrestore(&iommu->lock, flags);
- /* free the private domain */
- if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
- !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
- list_empty(&domain->devices))
- domain_exit(info->domain);
-
free_devinfo_mem(info);
}
@@ -5143,9 +5117,8 @@
unsigned long flags;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
- if (info && info != DEFER_DEVICE_DOMAIN_INFO
- && info != DUMMY_DEVICE_DOMAIN_INFO)
+ info = get_domain_info(dev);
+ if (info)
__dmar_remove_one_dev_info(info);
spin_unlock_irqrestore(&device_domain_lock, flags);
}
@@ -5154,9 +5127,6 @@
{
int adjust_width;
- init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
- domain_reserve_special_ranges(domain);
-
/* calculate AGAW */
domain->gaw = guest_width;
adjust_width = guestwidth_to_adjustwidth(guest_width);
@@ -5175,6 +5145,17 @@
return 0;
}
+static void intel_init_iova_domain(struct dmar_domain *dmar_domain)
+{
+ init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
+ copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad);
+
+ if (!intel_iommu_strict &&
+ init_iova_flush_queue(&dmar_domain->iovad,
+ iommu_flush_iova, iova_entry_free))
+ pr_info("iova flush queue initialization failed\n");
+}
+
static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
{
struct dmar_domain *dmar_domain;
@@ -5182,7 +5163,6 @@
switch (type) {
case IOMMU_DOMAIN_DMA:
- /* fallthrough */
case IOMMU_DOMAIN_UNMANAGED:
dmar_domain = alloc_domain(0);
if (!dmar_domain) {
@@ -5195,14 +5175,8 @@
return NULL;
}
- if (type == IOMMU_DOMAIN_DMA &&
- init_iova_flush_queue(&dmar_domain->iovad,
- iommu_flush_iova, iova_entry_free)) {
- pr_warn("iova flush queue initialization failed\n");
- intel_iommu_strict = 1;
- }
-
- domain_update_iommu_cap(dmar_domain);
+ if (type == IOMMU_DOMAIN_DMA)
+ intel_init_iova_domain(dmar_domain);
domain = &dmar_domain->domain;
domain->geometry.aperture_start = 0;
@@ -5233,7 +5207,7 @@
static inline bool
is_aux_domain(struct device *dev, struct iommu_domain *domain)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
return info && info->auxd_enabled &&
domain->type == IOMMU_DOMAIN_UNMANAGED;
@@ -5242,7 +5216,7 @@
static void auxiliary_link_device(struct dmar_domain *domain,
struct device *dev)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
assert_spin_locked(&device_domain_lock);
if (WARN_ON(!info))
@@ -5255,7 +5229,7 @@
static void auxiliary_unlink_device(struct dmar_domain *domain,
struct device *dev)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
assert_spin_locked(&device_domain_lock);
if (WARN_ON(!info))
@@ -5265,28 +5239,28 @@
domain->auxd_refcnt--;
if (!domain->auxd_refcnt && domain->default_pasid > 0)
- intel_pasid_free_id(domain->default_pasid);
+ ioasid_free(domain->default_pasid);
}
static int aux_domain_add_dev(struct dmar_domain *domain,
struct device *dev)
{
int ret;
- u8 bus, devfn;
unsigned long flags;
struct intel_iommu *iommu;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
if (domain->default_pasid <= 0) {
- int pasid;
+ u32 pasid;
- pasid = intel_pasid_alloc_id(domain, PASID_MIN,
- pci_max_pasids(to_pci_dev(dev)),
- GFP_KERNEL);
- if (pasid <= 0) {
+ /* No private data needed for the default pasid */
+ pasid = ioasid_alloc(NULL, PASID_MIN,
+ pci_max_pasids(to_pci_dev(dev)) - 1,
+ NULL);
+ if (pasid == INVALID_IOASID) {
pr_err("Can't allocate default pasid\n");
return -ENODEV;
}
@@ -5304,8 +5278,12 @@
goto attach_failed;
/* Setup the PASID entry for mediated devices: */
- ret = intel_pasid_setup_second_level(iommu, domain, dev,
- domain->default_pasid);
+ if (domain_use_first_level(domain))
+ ret = domain_setup_first_level(iommu, domain, dev,
+ domain->default_pasid);
+ else
+ ret = intel_pasid_setup_second_level(iommu, domain, dev,
+ domain->default_pasid);
if (ret)
goto table_failed;
spin_unlock(&iommu->lock);
@@ -5322,7 +5300,7 @@
spin_unlock(&iommu->lock);
spin_unlock_irqrestore(&device_domain_lock, flags);
if (!domain->auxd_refcnt && domain->default_pasid > 0)
- intel_pasid_free_id(domain->default_pasid);
+ ioasid_free(domain->default_pasid);
return ret;
}
@@ -5338,13 +5316,13 @@
return;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
iommu = info->iommu;
auxiliary_unlink_device(domain, dev);
spin_lock(&iommu->lock);
- intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
+ intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false);
domain_detach_iommu(domain, iommu);
spin_unlock(&iommu->lock);
@@ -5357,9 +5335,8 @@
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
int addr_width;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
@@ -5451,9 +5428,188 @@
aux_domain_remove_dev(to_dmar_domain(domain), dev);
}
+#ifdef CONFIG_INTEL_IOMMU_SVM
+/*
+ * 2D array for converting and sanitizing IOMMU generic TLB granularity to
+ * VT-d granularity. Invalidation is typically included in the unmap operation
+ * as a result of DMA or VFIO unmap. However, for assigned devices guest
+ * owns the first level page tables. Invalidations of translation caches in the
+ * guest are trapped and passed down to the host.
+ *
+ * vIOMMU in the guest will only expose first level page tables, therefore
+ * we do not support IOTLB granularity for request without PASID (second level).
+ *
+ * For example, to find the VT-d granularity encoding for IOTLB
+ * type and page selective granularity within PASID:
+ * X: indexed by iommu cache type
+ * Y: indexed by enum iommu_inv_granularity
+ * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
+ */
+
+static const int
+inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
+ /*
+ * PASID based IOTLB invalidation: PASID selective (per PASID),
+ * page selective (address granularity)
+ */
+ {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
+ /* PASID based dev TLBs */
+ {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
+ /* PASID cache */
+ {-EINVAL, -EINVAL, -EINVAL}
+};
+
+static inline int to_vtd_granularity(int type, int granu)
+{
+ return inv_type_granu_table[type][granu];
+}
+
+static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
+{
+ u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
+
+ /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
+ * IOMMU cache invalidate API passes granu_size in bytes, and number of
+ * granu size in contiguous memory.
+ */
+ return order_base_2(nr_pages);
+}
+
+static int
+intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
+ struct iommu_cache_invalidate_info *inv_info)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct device_domain_info *info;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+ int cache_type;
+ u8 bus, devfn;
+ u16 did, sid;
+ int ret = 0;
+ u64 size = 0;
+
+ if (!inv_info || !dmar_domain)
+ return -EINVAL;
+
+ if (!dev || !dev_is_pci(dev))
+ return -ENODEV;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
+ return -EINVAL;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ spin_lock(&iommu->lock);
+ info = get_domain_info(dev);
+ if (!info) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ did = dmar_domain->iommu_did[iommu->seq_id];
+ sid = PCI_DEVID(bus, devfn);
+
+ /* Size is only valid in address selective invalidation */
+ if (inv_info->granularity == IOMMU_INV_GRANU_ADDR)
+ size = to_vtd_size(inv_info->granu.addr_info.granule_size,
+ inv_info->granu.addr_info.nb_granules);
+
+ for_each_set_bit(cache_type,
+ (unsigned long *)&inv_info->cache,
+ IOMMU_CACHE_INV_TYPE_NR) {
+ int granu = 0;
+ u64 pasid = 0;
+ u64 addr = 0;
+
+ granu = to_vtd_granularity(cache_type, inv_info->granularity);
+ if (granu == -EINVAL) {
+ pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
+ cache_type, inv_info->granularity);
+ break;
+ }
+
+ /*
+ * PASID is stored in different locations based on the
+ * granularity.
+ */
+ if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
+ (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
+ pasid = inv_info->granu.pasid_info.pasid;
+ else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
+ (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
+ pasid = inv_info->granu.addr_info.pasid;
+
+ switch (BIT(cache_type)) {
+ case IOMMU_CACHE_INV_TYPE_IOTLB:
+ /* HW will ignore LSB bits based on address mask */
+ if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
+ size &&
+ (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
+ pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n",
+ inv_info->granu.addr_info.addr, size);
+ }
+
+ /*
+ * If granu is PASID-selective, address is ignored.
+ * We use npages = -1 to indicate that.
+ */
+ qi_flush_piotlb(iommu, did, pasid,
+ mm_to_dma_pfn(inv_info->granu.addr_info.addr),
+ (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
+ inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
+
+ if (!info->ats_enabled)
+ break;
+ /*
+ * Always flush device IOTLB if ATS is enabled. vIOMMU
+ * in the guest may assume IOTLB flush is inclusive,
+ * which is more efficient.
+ */
+ fallthrough;
+ case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
+ /*
+ * PASID based device TLB invalidation does not support
+ * IOMMU_INV_GRANU_PASID granularity but only supports
+ * IOMMU_INV_GRANU_ADDR.
+ * The equivalent of that is we set the size to be the
+ * entire range of 64 bit. User only provides PASID info
+ * without address info. So we set addr to 0.
+ */
+ if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
+ size = 64 - VTD_PAGE_SHIFT;
+ addr = 0;
+ } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
+ addr = inv_info->granu.addr_info.addr;
+ }
+
+ if (info->ats_enabled)
+ qi_flush_dev_iotlb_pasid(iommu, sid,
+ info->pfsid, pasid,
+ info->ats_qdep, addr,
+ size);
+ else
+ pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
+ break;
+ default:
+ dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
+ cache_type);
+ ret = -EINVAL;
+ }
+ }
+out_unlock:
+ spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return ret;
+}
+#endif
+
static int intel_iommu_map(struct iommu_domain *domain,
unsigned long iova, phys_addr_t hpa,
- size_t size, int iommu_prot)
+ size_t size, int iommu_prot, gfp_t gfp)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
u64 max_addr;
@@ -5578,6 +5734,24 @@
return ret;
}
+static inline bool nested_mode_support(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ bool ret = true;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) {
+ ret = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
static bool intel_iommu_capable(enum iommu_cap cap)
{
if (cap == IOMMU_CAP_CACHE_COHERENCY)
@@ -5588,93 +5762,43 @@
return false;
}
-static int intel_iommu_add_device(struct device *dev)
+static struct iommu_device *intel_iommu_probe_device(struct device *dev)
{
- struct dmar_domain *dmar_domain;
- struct iommu_domain *domain;
struct intel_iommu *iommu;
- struct iommu_group *group;
- u8 bus, devfn;
- int ret;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
- return -ENODEV;
-
- iommu_device_link(&iommu->iommu, dev);
+ return ERR_PTR(-ENODEV);
if (translation_pre_enabled(iommu))
- dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
+ dev_iommu_priv_set(dev, DEFER_DEVICE_DOMAIN_INFO);
- group = iommu_group_get_for_dev(dev);
-
- if (IS_ERR(group)) {
- ret = PTR_ERR(group);
- goto unlink;
- }
-
- iommu_group_put(group);
-
- domain = iommu_get_domain_for_dev(dev);
- dmar_domain = to_dmar_domain(domain);
- if (domain->type == IOMMU_DOMAIN_DMA) {
- if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
- ret = iommu_request_dm_for_dev(dev);
- if (ret) {
- dmar_remove_one_dev_info(dev);
- dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
- domain_add_dev_info(si_domain, dev);
- dev_info(dev,
- "Device uses a private identity domain.\n");
- }
- }
- } else {
- if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
- ret = iommu_request_dma_domain_for_dev(dev);
- if (ret) {
- dmar_remove_one_dev_info(dev);
- dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
- if (!get_private_domain_for_dev(dev)) {
- dev_warn(dev,
- "Failed to get a private domain.\n");
- ret = -ENOMEM;
- goto unlink;
- }
-
- dev_info(dev,
- "Device uses a private dma domain.\n");
- }
- }
- }
-
- if (device_needs_bounce(dev)) {
- dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
- set_dma_ops(dev, &bounce_dma_ops);
- }
-
- return 0;
-
-unlink:
- iommu_device_unlink(&iommu->iommu, dev);
- return ret;
+ return &iommu->iommu;
}
-static void intel_iommu_remove_device(struct device *dev)
+static void intel_iommu_release_device(struct device *dev)
{
struct intel_iommu *iommu;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return;
dmar_remove_one_dev_info(dev);
- iommu_group_remove_device(dev);
+ set_dma_ops(dev, NULL);
+}
- iommu_device_unlink(&iommu->iommu, dev);
+static void intel_iommu_probe_finalize(struct device *dev)
+{
+ struct iommu_domain *domain;
+ domain = iommu_get_domain_for_dev(dev);
if (device_needs_bounce(dev))
+ set_dma_ops(dev, &bounce_dma_ops);
+ else if (domain && domain->type == IOMMU_DOMAIN_DMA)
+ set_dma_ops(dev, &intel_dma_ops);
+ else
set_dma_ops(dev, NULL);
}
@@ -5735,15 +5859,6 @@
list_add_tail(®->list, head);
}
-static void intel_iommu_put_resv_regions(struct device *dev,
- struct list_head *head)
-{
- struct iommu_resv_region *entry, *next;
-
- list_for_each_entry_safe(entry, next, head, list)
- kfree(entry);
-}
-
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
{
struct device_domain_info *info;
@@ -5761,7 +5876,7 @@
spin_lock(&iommu->lock);
ret = -EINVAL;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info || !info->pasid_supported)
goto out;
@@ -5815,37 +5930,14 @@
return generic_device_group(dev);
}
-#ifdef CONFIG_INTEL_IOMMU_SVM
-struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
-{
- struct intel_iommu *iommu;
- u8 bus, devfn;
-
- if (iommu_dummy(dev)) {
- dev_warn(dev,
- "No IOMMU translation for device; cannot enable SVM\n");
- return NULL;
- }
-
- iommu = device_to_iommu(dev, &bus, &devfn);
- if ((!iommu)) {
- dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
- return NULL;
- }
-
- return iommu;
-}
-#endif /* CONFIG_INTEL_IOMMU_SVM */
-
static int intel_iommu_enable_auxd(struct device *dev)
{
struct device_domain_info *info;
struct intel_iommu *iommu;
unsigned long flags;
- u8 bus, devfn;
int ret;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu || dmar_disabled)
return -EINVAL;
@@ -5857,7 +5949,7 @@
return -ENODEV;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
info->auxd_enabled = 1;
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -5870,7 +5962,7 @@
unsigned long flags;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!WARN_ON(!info))
info->auxd_enabled = 0;
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -5923,6 +6015,14 @@
return !!siov_find_pci_dvsec(to_pci_dev(dev));
}
+ if (feat == IOMMU_DEV_FEAT_SVA) {
+ struct device_domain_info *info = get_domain_info(dev);
+
+ return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
+ info->pasid_supported && info->pri_supported &&
+ info->ats_supported;
+ }
+
return false;
}
@@ -5932,6 +6032,16 @@
if (feat == IOMMU_DEV_FEAT_AUX)
return intel_iommu_enable_auxd(dev);
+ if (feat == IOMMU_DEV_FEAT_SVA) {
+ struct device_domain_info *info = get_domain_info(dev);
+
+ if (!info)
+ return -EINVAL;
+
+ if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
+ return 0;
+ }
+
return -ENODEV;
}
@@ -5947,7 +6057,7 @@
static bool
intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
if (feat == IOMMU_DEV_FEAT_AUX)
return scalable_mode_support() && info && info->auxd_enabled;
@@ -5967,7 +6077,38 @@
static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
struct device *dev)
{
- return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
+ return attach_deferred(dev);
+}
+
+static int
+intel_iommu_domain_set_attr(struct iommu_domain *domain,
+ enum iommu_attr attr, void *data)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned long flags;
+ int ret = 0;
+
+ if (domain->type != IOMMU_DOMAIN_UNMANAGED)
+ return -EINVAL;
+
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ spin_lock_irqsave(&device_domain_lock, flags);
+ if (nested_mode_support() &&
+ list_empty(&dmar_domain->devices)) {
+ dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
+ dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
+ } else {
+ ret = -ENODEV;
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
}
/*
@@ -5991,6 +6132,7 @@
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,
.domain_free = intel_iommu_domain_free,
+ .domain_set_attr = intel_iommu_domain_set_attr,
.attach_dev = intel_iommu_attach_device,
.detach_dev = intel_iommu_detach_device,
.aux_attach_dev = intel_iommu_aux_attach_device,
@@ -5999,10 +6141,11 @@
.map = intel_iommu_map,
.unmap = intel_iommu_unmap,
.iova_to_phys = intel_iommu_iova_to_phys,
- .add_device = intel_iommu_add_device,
- .remove_device = intel_iommu_remove_device,
+ .probe_device = intel_iommu_probe_device,
+ .probe_finalize = intel_iommu_probe_finalize,
+ .release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
- .put_resv_regions = intel_iommu_put_resv_regions,
+ .put_resv_regions = generic_iommu_put_resv_regions,
.apply_resv_region = intel_iommu_apply_resv_region,
.device_group = intel_iommu_device_group,
.dev_has_feat = intel_iommu_dev_has_feat,
@@ -6010,7 +6153,17 @@
.dev_enable_feat = intel_iommu_dev_enable_feat,
.dev_disable_feat = intel_iommu_dev_disable_feat,
.is_attach_deferred = intel_iommu_is_attach_deferred,
+ .def_domain_type = device_def_domain_type,
.pgsize_bitmap = INTEL_IOMMU_PGSIZES,
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ .cache_invalidate = intel_iommu_sva_invalidate,
+ .sva_bind_gpasid = intel_svm_bind_gpasid,
+ .sva_unbind_gpasid = intel_svm_unbind_gpasid,
+ .sva_bind = intel_svm_bind,
+ .sva_unbind = intel_svm_unbind,
+ .sva_get_pasid = intel_svm_get_pasid,
+ .page_response = intel_svm_page_response,
+#endif
};
static void quirk_iommu_igfx(struct pci_dev *dev)
@@ -6112,6 +6265,27 @@
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
+static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
+{
+ unsigned short ver;
+
+ if (!IS_GFX_DEVICE(dev))
+ return;
+
+ ver = (dev->device >> 8) & 0xff;
+ if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
+ ver != 0x4e && ver != 0x8a && ver != 0x98 &&
+ ver != 0x9a)
+ return;
+
+ if (risky_device(dev))
+ return;
+
+ pci_info(dev, "Skip IOMMU disabling for graphics\n");
+ iommu_skip_te_disable = 1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
+
/* On Tylersburg chipsets, some BIOSes have been known to enable the
ISOCH DMAR unit for the Azalia sound device, but not give it any
TLB entries, which causes it to deadlock. Check for that. We do
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
similarity index 93%
rename from drivers/iommu/intel_irq_remapping.c
rename to drivers/iommu/intel/irq_remapping.c
index 5dcc81b..b853888 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -15,13 +15,14 @@
#include <linux/irqdomain.h>
#include <linux/crash_dump.h>
#include <asm/io_apic.h>
+#include <asm/apic.h>
#include <asm/smp.h>
#include <asm/cpu.h>
#include <asm/irq_remapping.h>
#include <asm/pci-direct.h>
#include <asm/msidef.h>
-#include "irq_remapping.h"
+#include "../irq_remapping.h"
enum irq_mode {
IRQ_REMAPPING,
@@ -151,7 +152,7 @@
desc.qw2 = 0;
desc.qw3 = 0;
- return qi_submit_sync(&desc, iommu);
+ return qi_submit_sync(iommu, &desc, 1, 0);
}
static int modify_irte(struct irq_2_iommu *irq_iommu,
@@ -203,35 +204,40 @@
return rc;
}
-static struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
+static struct irq_domain *map_hpet_to_ir(u8 hpet_id)
{
int i;
- for (i = 0; i < MAX_HPET_TBS; i++)
+ for (i = 0; i < MAX_HPET_TBS; i++) {
if (ir_hpet[i].id == hpet_id && ir_hpet[i].iommu)
- return ir_hpet[i].iommu;
+ return ir_hpet[i].iommu->ir_domain;
+ }
return NULL;
}
-static struct intel_iommu *map_ioapic_to_ir(int apic)
+static struct intel_iommu *map_ioapic_to_iommu(int apic)
{
int i;
- for (i = 0; i < MAX_IO_APICS; i++)
+ for (i = 0; i < MAX_IO_APICS; i++) {
if (ir_ioapic[i].id == apic && ir_ioapic[i].iommu)
return ir_ioapic[i].iommu;
+ }
return NULL;
}
-static struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
+static struct irq_domain *map_ioapic_to_ir(int apic)
{
- struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu = map_ioapic_to_iommu(apic);
- drhd = dmar_find_matched_drhd_unit(dev);
- if (!drhd)
- return NULL;
+ return iommu ? iommu->ir_domain : NULL;
+}
- return drhd->iommu;
+static struct irq_domain *map_dev_to_ir(struct pci_dev *dev)
+{
+ struct dmar_drhd_unit *drhd = dmar_find_matched_drhd_unit(dev);
+
+ return drhd ? drhd->iommu->ir_msi_domain : NULL;
}
static int clear_entries(struct irq_2_iommu *irq_iommu)
@@ -570,9 +576,8 @@
fn, &intel_ir_domain_ops,
iommu);
if (!iommu->ir_domain) {
- irq_domain_free_fwnode(fn);
pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
- goto out_free_bitmap;
+ goto out_free_fwnode;
}
iommu->ir_msi_domain =
arch_create_remap_msi_irq_domain(iommu->ir_domain,
@@ -596,7 +601,7 @@
if (dmar_enable_qi(iommu)) {
pr_err("Failed to enable queued invalidation\n");
- goto out_free_bitmap;
+ goto out_free_ir_domain;
}
}
@@ -620,6 +625,14 @@
return 0;
+out_free_ir_domain:
+ if (iommu->ir_msi_domain)
+ irq_domain_remove(iommu->ir_msi_domain);
+ iommu->ir_msi_domain = NULL;
+ irq_domain_remove(iommu->ir_domain);
+ iommu->ir_domain = NULL;
+out_free_fwnode:
+ irq_domain_free_fwnode(fn);
out_free_bitmap:
bitmap_free(bitmap);
out_free_pages:
@@ -1001,7 +1014,7 @@
for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
int ioapic_id = mpc_ioapic_id(ioapic_idx);
- if (!map_ioapic_to_ir(ioapic_id)) {
+ if (!map_ioapic_to_iommu(ioapic_id)) {
pr_err(FW_BUG "ioapic %d has no mapping iommu, "
"interrupt remapping will be disabled\n",
ioapic_id);
@@ -1086,6 +1099,22 @@
return -1;
}
+/*
+ * Store the MSI remapping domain pointer in the device if enabled.
+ *
+ * This is called from dmar_pci_bus_add_dev() so it works even when DMA
+ * remapping is disabled. Only update the pointer if the device is not
+ * already handled by a non default PCI/MSI interrupt domain. This protects
+ * e.g. VMD devices.
+ */
+void intel_irq_remap_add_device(struct dmar_pci_notify_info *info)
+{
+ if (!irq_remapping_enabled || pci_dev_has_special_msi_domain(info->dev))
+ return;
+
+ dev_set_msi_domain(&info->dev->dev, map_dev_to_ir(info->dev));
+}
+
static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
{
memset(irte, 0, sizeof(*irte));
@@ -1106,51 +1135,20 @@
irte->redir_hint = 1;
}
-static struct irq_domain *intel_get_ir_irq_domain(struct irq_alloc_info *info)
-{
- struct intel_iommu *iommu = NULL;
-
- if (!info)
- return NULL;
-
- switch (info->type) {
- case X86_IRQ_ALLOC_TYPE_IOAPIC:
- iommu = map_ioapic_to_ir(info->ioapic_id);
- break;
- case X86_IRQ_ALLOC_TYPE_HPET:
- iommu = map_hpet_to_ir(info->hpet_id);
- break;
- case X86_IRQ_ALLOC_TYPE_MSI:
- case X86_IRQ_ALLOC_TYPE_MSIX:
- iommu = map_dev_to_ir(info->msi_dev);
- break;
- default:
- BUG_ON(1);
- break;
- }
-
- return iommu ? iommu->ir_domain : NULL;
-}
-
static struct irq_domain *intel_get_irq_domain(struct irq_alloc_info *info)
{
- struct intel_iommu *iommu;
-
if (!info)
return NULL;
switch (info->type) {
- case X86_IRQ_ALLOC_TYPE_MSI:
- case X86_IRQ_ALLOC_TYPE_MSIX:
- iommu = map_dev_to_ir(info->msi_dev);
- if (iommu)
- return iommu->ir_msi_domain;
- break;
+ case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT:
+ return map_ioapic_to_ir(info->devid);
+ case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT:
+ return map_hpet_to_ir(info->devid);
default:
- break;
+ WARN_ON_ONCE(1);
+ return NULL;
}
-
- return NULL;
}
struct irq_remap_ops intel_irq_remap_ops = {
@@ -1159,7 +1157,6 @@
.disable = disable_irq_remapping,
.reenable = reenable_irq_remapping,
.enable_faulting = enable_drhd_fault_handling,
- .get_ir_irq_domain = intel_get_ir_irq_domain,
.get_irq_domain = intel_get_irq_domain,
};
@@ -1283,16 +1280,16 @@
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
/* Set source-id of interrupt request */
- set_ioapic_sid(irte, info->ioapic_id);
+ set_ioapic_sid(irte, info->devid);
apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n",
- info->ioapic_id, irte->present, irte->fpd,
+ info->devid, irte->present, irte->fpd,
irte->dst_mode, irte->redir_hint,
irte->trigger_mode, irte->dlvry_mode,
irte->avail, irte->vector, irte->dest_id,
irte->sid, irte->sq, irte->svt);
- entry = (struct IR_IO_APIC_route_entry *)info->ioapic_entry;
- info->ioapic_entry = NULL;
+ entry = (struct IR_IO_APIC_route_entry *)info->ioapic.entry;
+ info->ioapic.entry = NULL;
memset(entry, 0, sizeof(*entry));
entry->index2 = (index >> 15) & 0x1;
entry->zero = 0;
@@ -1302,21 +1299,21 @@
* IO-APIC RTE will be configured with virtual vector.
* irq handler will do the explicit EOI to the io-apic.
*/
- entry->vector = info->ioapic_pin;
+ entry->vector = info->ioapic.pin;
entry->mask = 0; /* enable IRQ */
- entry->trigger = info->ioapic_trigger;
- entry->polarity = info->ioapic_polarity;
- if (info->ioapic_trigger)
+ entry->trigger = info->ioapic.trigger;
+ entry->polarity = info->ioapic.polarity;
+ if (info->ioapic.trigger)
entry->mask = 1; /* Mask level triggered irqs. */
break;
case X86_IRQ_ALLOC_TYPE_HPET:
- case X86_IRQ_ALLOC_TYPE_MSI:
- case X86_IRQ_ALLOC_TYPE_MSIX:
+ case X86_IRQ_ALLOC_TYPE_PCI_MSI:
+ case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
if (info->type == X86_IRQ_ALLOC_TYPE_HPET)
- set_hpet_sid(irte, info->hpet_id);
+ set_hpet_sid(irte, info->devid);
else
- set_msi_sid(irte, info->msi_dev);
+ set_msi_sid(irte, msi_desc_to_pci_dev(info->desc));
msg->address_hi = MSI_ADDR_BASE_HI;
msg->data = sub_handle;
@@ -1367,15 +1364,15 @@
if (!info || !iommu)
return -EINVAL;
- if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI &&
- info->type != X86_IRQ_ALLOC_TYPE_MSIX)
+ if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI &&
+ info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX)
return -EINVAL;
/*
* With IRQ remapping enabled, don't need contiguous CPU vectors
* to support multiple MSI interrupts.
*/
- if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+ if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI)
info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
new file mode 100644
index 0000000..fb911b6
--- /dev/null
+++ b/drivers/iommu/intel/pasid.c
@@ -0,0 +1,870 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * intel-pasid.c - PASID idr, table and entry manipulation
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+
+#define pr_fmt(fmt) "DMAR: " fmt
+
+#include <linux/bitops.h>
+#include <linux/cpufeature.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+#include <linux/iommu.h>
+#include <linux/memory.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+#include <linux/spinlock.h>
+
+#include "pasid.h"
+
+/*
+ * Intel IOMMU system wide PASID name space:
+ */
+static DEFINE_SPINLOCK(pasid_lock);
+u32 intel_pasid_max_id = PASID_MAX;
+
+int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid)
+{
+ unsigned long flags;
+ u8 status_code;
+ int ret = 0;
+ u64 res;
+
+ raw_spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC);
+ IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
+ !(res & VCMD_VRSP_IP), res);
+ raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ status_code = VCMD_VRSP_SC(res);
+ switch (status_code) {
+ case VCMD_VRSP_SC_SUCCESS:
+ *pasid = VCMD_VRSP_RESULT_PASID(res);
+ break;
+ case VCMD_VRSP_SC_NO_PASID_AVAIL:
+ pr_info("IOMMU: %s: No PASID available\n", iommu->name);
+ ret = -ENOSPC;
+ break;
+ default:
+ ret = -ENODEV;
+ pr_warn("IOMMU: %s: Unexpected error code %d\n",
+ iommu->name, status_code);
+ }
+
+ return ret;
+}
+
+void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid)
+{
+ unsigned long flags;
+ u8 status_code;
+ u64 res;
+
+ raw_spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writeq(iommu->reg + DMAR_VCMD_REG,
+ VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE);
+ IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
+ !(res & VCMD_VRSP_IP), res);
+ raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ status_code = VCMD_VRSP_SC(res);
+ switch (status_code) {
+ case VCMD_VRSP_SC_SUCCESS:
+ break;
+ case VCMD_VRSP_SC_INVALID_PASID:
+ pr_info("IOMMU: %s: Invalid PASID\n", iommu->name);
+ break;
+ default:
+ pr_warn("IOMMU: %s: Unexpected error code %d\n",
+ iommu->name, status_code);
+ }
+}
+
+/*
+ * Per device pasid table management:
+ */
+static inline void
+device_attach_pasid_table(struct device_domain_info *info,
+ struct pasid_table *pasid_table)
+{
+ info->pasid_table = pasid_table;
+ list_add(&info->table, &pasid_table->dev);
+}
+
+static inline void
+device_detach_pasid_table(struct device_domain_info *info,
+ struct pasid_table *pasid_table)
+{
+ info->pasid_table = NULL;
+ list_del(&info->table);
+}
+
+struct pasid_table_opaque {
+ struct pasid_table **pasid_table;
+ int segment;
+ int bus;
+ int devfn;
+};
+
+static int search_pasid_table(struct device_domain_info *info, void *opaque)
+{
+ struct pasid_table_opaque *data = opaque;
+
+ if (info->iommu->segment == data->segment &&
+ info->bus == data->bus &&
+ info->devfn == data->devfn &&
+ info->pasid_table) {
+ *data->pasid_table = info->pasid_table;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+ struct pasid_table_opaque *data = opaque;
+
+ data->segment = pci_domain_nr(pdev->bus);
+ data->bus = PCI_BUS_NUM(alias);
+ data->devfn = alias & 0xff;
+
+ return for_each_device_domain(&search_pasid_table, data);
+}
+
+/*
+ * Allocate a pasid table for @dev. It should be called in a
+ * single-thread context.
+ */
+int intel_pasid_alloc_table(struct device *dev)
+{
+ struct device_domain_info *info;
+ struct pasid_table *pasid_table;
+ struct pasid_table_opaque data;
+ struct page *pages;
+ u32 max_pasid = 0;
+ int ret, order;
+ int size;
+
+ might_sleep();
+ info = get_domain_info(dev);
+ if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
+ return -EINVAL;
+
+ /* DMA alias device already has a pasid table, use it: */
+ data.pasid_table = &pasid_table;
+ ret = pci_for_each_dma_alias(to_pci_dev(dev),
+ &get_alias_pasid_table, &data);
+ if (ret)
+ goto attach_out;
+
+ pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
+ if (!pasid_table)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&pasid_table->dev);
+
+ if (info->pasid_supported)
+ max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)),
+ intel_pasid_max_id);
+
+ size = max_pasid >> (PASID_PDE_SHIFT - 3);
+ order = size ? get_order(size) : 0;
+ pages = alloc_pages_node(info->iommu->node,
+ GFP_KERNEL | __GFP_ZERO, order);
+ if (!pages) {
+ kfree(pasid_table);
+ return -ENOMEM;
+ }
+
+ pasid_table->table = page_address(pages);
+ pasid_table->order = order;
+ pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
+
+attach_out:
+ device_attach_pasid_table(info, pasid_table);
+
+ return 0;
+}
+
+void intel_pasid_free_table(struct device *dev)
+{
+ struct device_domain_info *info;
+ struct pasid_table *pasid_table;
+ struct pasid_dir_entry *dir;
+ struct pasid_entry *table;
+ int i, max_pde;
+
+ info = get_domain_info(dev);
+ if (!info || !dev_is_pci(dev) || !info->pasid_table)
+ return;
+
+ pasid_table = info->pasid_table;
+ device_detach_pasid_table(info, pasid_table);
+
+ if (!list_empty(&pasid_table->dev))
+ return;
+
+ /* Free scalable mode PASID directory tables: */
+ dir = pasid_table->table;
+ max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
+ for (i = 0; i < max_pde; i++) {
+ table = get_pasid_table_from_pde(&dir[i]);
+ free_pgtable_page(table);
+ }
+
+ free_pages((unsigned long)pasid_table->table, pasid_table->order);
+ kfree(pasid_table);
+}
+
+struct pasid_table *intel_pasid_get_table(struct device *dev)
+{
+ struct device_domain_info *info;
+
+ info = get_domain_info(dev);
+ if (!info)
+ return NULL;
+
+ return info->pasid_table;
+}
+
+int intel_pasid_get_dev_max_id(struct device *dev)
+{
+ struct device_domain_info *info;
+
+ info = get_domain_info(dev);
+ if (!info || !info->pasid_table)
+ return 0;
+
+ return info->pasid_table->max_pasid;
+}
+
+struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
+{
+ struct device_domain_info *info;
+ struct pasid_table *pasid_table;
+ struct pasid_dir_entry *dir;
+ struct pasid_entry *entries;
+ int dir_index, index;
+
+ pasid_table = intel_pasid_get_table(dev);
+ if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev)))
+ return NULL;
+
+ dir = pasid_table->table;
+ info = get_domain_info(dev);
+ dir_index = pasid >> PASID_PDE_SHIFT;
+ index = pasid & PASID_PTE_MASK;
+
+ spin_lock(&pasid_lock);
+ entries = get_pasid_table_from_pde(&dir[dir_index]);
+ if (!entries) {
+ entries = alloc_pgtable_page(info->iommu->node);
+ if (!entries) {
+ spin_unlock(&pasid_lock);
+ return NULL;
+ }
+
+ WRITE_ONCE(dir[dir_index].val,
+ (u64)virt_to_phys(entries) | PASID_PTE_PRESENT);
+ }
+ spin_unlock(&pasid_lock);
+
+ return &entries[index];
+}
+
+/*
+ * Interfaces for PASID table entry manipulation:
+ */
+static inline void pasid_clear_entry(struct pasid_entry *pe)
+{
+ WRITE_ONCE(pe->val[0], 0);
+ WRITE_ONCE(pe->val[1], 0);
+ WRITE_ONCE(pe->val[2], 0);
+ WRITE_ONCE(pe->val[3], 0);
+ WRITE_ONCE(pe->val[4], 0);
+ WRITE_ONCE(pe->val[5], 0);
+ WRITE_ONCE(pe->val[6], 0);
+ WRITE_ONCE(pe->val[7], 0);
+}
+
+static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe)
+{
+ WRITE_ONCE(pe->val[0], PASID_PTE_FPD);
+ WRITE_ONCE(pe->val[1], 0);
+ WRITE_ONCE(pe->val[2], 0);
+ WRITE_ONCE(pe->val[3], 0);
+ WRITE_ONCE(pe->val[4], 0);
+ WRITE_ONCE(pe->val[5], 0);
+ WRITE_ONCE(pe->val[6], 0);
+ WRITE_ONCE(pe->val[7], 0);
+}
+
+static void
+intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore)
+{
+ struct pasid_entry *pe;
+
+ pe = intel_pasid_get_entry(dev, pasid);
+ if (WARN_ON(!pe))
+ return;
+
+ if (fault_ignore && pasid_pte_is_present(pe))
+ pasid_clear_entry_with_fpd(pe);
+ else
+ pasid_clear_entry(pe);
+}
+
+static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
+{
+ u64 old;
+
+ old = READ_ONCE(*ptr);
+ WRITE_ONCE(*ptr, (old & ~mask) | bits);
+}
+
+/*
+ * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
+ * PASID entry.
+ */
+static inline void
+pasid_set_domain_id(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value);
+}
+
+/*
+ * Get domain ID value of a scalable mode PASID entry.
+ */
+static inline u16
+pasid_get_domain_id(struct pasid_entry *pe)
+{
+ return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0));
+}
+
+/*
+ * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_slptr(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value);
+}
+
+/*
+ * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
+ * entry.
+ */
+static inline void
+pasid_set_address_width(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2);
+}
+
+/*
+ * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_translation_type(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6);
+}
+
+/*
+ * Enable fault processing by clearing the FPD(Fault Processing
+ * Disable) field (Bit 1) of a scalable mode PASID entry.
+ */
+static inline void pasid_set_fault_enable(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[0], 1 << 1, 0);
+}
+
+/*
+ * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
+ * scalable mode PASID entry.
+ */
+static inline void pasid_set_sre(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 0, 1);
+}
+
+/*
+ * Setup the P(Present) field (Bit 0) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_present(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[0], 1 << 0, 1);
+}
+
+/*
+ * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
+{
+ pasid_set_bits(&pe->val[1], 1 << 23, value << 23);
+}
+
+/*
+ * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode
+ * PASID entry.
+ */
+static inline void
+pasid_set_pgsnp(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[1], 1ULL << 24, 1ULL << 24);
+}
+
+/*
+ * Setup the First Level Page table Pointer field (Bit 140~191)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_flptr(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value);
+}
+
+/*
+ * Setup the First Level Paging Mode field (Bit 130~131) of a
+ * scalable mode PASID entry.
+ */
+static inline void
+pasid_set_flpm(struct pasid_entry *pe, u64 value)
+{
+ pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2);
+}
+
+/*
+ * Setup the Extended Access Flag Enable (EAFE) field (Bit 135)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_eafe(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7);
+}
+
+static void
+pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
+ u16 did, u32 pasid)
+{
+ struct qi_desc desc;
+
+ desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) |
+ QI_PC_PASID(pasid) | QI_PC_TYPE;
+ desc.qw1 = 0;
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
+static void
+devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid)
+{
+ struct device_domain_info *info;
+ u16 sid, qdep, pfsid;
+
+ info = get_domain_info(dev);
+ if (!info || !info->ats_enabled)
+ return;
+
+ sid = info->bus << 8 | info->devfn;
+ qdep = info->ats_qdep;
+ pfsid = info->pfsid;
+
+ /*
+ * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID),
+ * devTLB flush w/o PASID should be used. For non-zero PASID under
+ * SVA usage, device could do DMA with multiple PASIDs. It is more
+ * efficient to flush devTLB specific to the PASID.
+ */
+ if (pasid == PASID_RID2PASID)
+ qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
+ else
+ qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT);
+}
+
+void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
+ u32 pasid, bool fault_ignore)
+{
+ struct pasid_entry *pte;
+ u16 did, pgtt;
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (WARN_ON(!pte))
+ return;
+
+ did = pasid_get_domain_id(pte);
+ pgtt = pasid_pte_get_pgtt(pte);
+
+ intel_pasid_clear_entry(dev, pasid, fault_ignore);
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+
+ if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
+ qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
+ else
+ iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+
+ /* Device IOTLB doesn't need to be flushed in caching mode. */
+ if (!cap_caching_mode(iommu->cap))
+ devtlb_invalidation_with_pasid(iommu, dev, pasid);
+}
+
+static void pasid_flush_caches(struct intel_iommu *iommu,
+ struct pasid_entry *pte,
+ u32 pasid, u16 did)
+{
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ if (cap_caching_mode(iommu->cap)) {
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+ qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+}
+
+/*
+ * Set up the scalable mode pasid table entry for first only
+ * translation type.
+ */
+int intel_pasid_setup_first_level(struct intel_iommu *iommu,
+ struct device *dev, pgd_t *pgd,
+ u32 pasid, u16 did, int flags)
+{
+ struct pasid_entry *pte;
+
+ if (!ecap_flts(iommu->ecap)) {
+ pr_err("No first level translation support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (WARN_ON(!pte))
+ return -EINVAL;
+
+ pasid_clear_entry(pte);
+
+ /* Setup the first level page table pointer: */
+ pasid_set_flptr(pte, (u64)__pa(pgd));
+ if (flags & PASID_FLAG_SUPERVISOR_MODE) {
+ if (!ecap_srs(iommu->ecap)) {
+ pr_err("No supervisor request support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+ pasid_set_sre(pte);
+ }
+
+ if (flags & PASID_FLAG_FL5LP) {
+ if (cap_5lp_support(iommu->cap)) {
+ pasid_set_flpm(pte, 1);
+ } else {
+ pr_err("No 5-level paging support for first-level\n");
+ pasid_clear_entry(pte);
+ return -EINVAL;
+ }
+ }
+
+ if (flags & PASID_FLAG_PAGE_SNOOP)
+ pasid_set_pgsnp(pte);
+
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, iommu->agaw);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ /* Setup Present and PASID Granular Transfer Type: */
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
+ pasid_set_present(pte);
+ pasid_flush_caches(iommu, pte, pasid, did);
+
+ return 0;
+}
+
+/*
+ * Skip top levels of page tables for iommu which has less agaw
+ * than default. Unnecessary for PT mode.
+ */
+static inline int iommu_skip_agaw(struct dmar_domain *domain,
+ struct intel_iommu *iommu,
+ struct dma_pte **pgd)
+{
+ int agaw;
+
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ *pgd = phys_to_virt(dma_pte_addr(*pgd));
+ if (!dma_pte_present(*pgd))
+ return -EINVAL;
+ }
+
+ return agaw;
+}
+
+/*
+ * Set up the scalable mode pasid entry for second only translation type.
+ */
+int intel_pasid_setup_second_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, u32 pasid)
+{
+ struct pasid_entry *pte;
+ struct dma_pte *pgd;
+ u64 pgd_val;
+ int agaw;
+ u16 did;
+
+ /*
+ * If hardware advertises no support for second level
+ * translation, return directly.
+ */
+ if (!ecap_slts(iommu->ecap)) {
+ pr_err("No second level translation support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ pgd = domain->pgd;
+ agaw = iommu_skip_agaw(domain, iommu, &pgd);
+ if (agaw < 0) {
+ dev_err(dev, "Invalid domain page table\n");
+ return -EINVAL;
+ }
+
+ pgd_val = virt_to_phys(pgd);
+ did = domain->iommu_did[iommu->seq_id];
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ return -ENODEV;
+ }
+
+ pasid_clear_entry(pte);
+ pasid_set_domain_id(pte, did);
+ pasid_set_slptr(pte, pgd_val);
+ pasid_set_address_width(pte, agaw);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
+ pasid_set_fault_enable(pte);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
+ pasid_set_pgsnp(pte);
+
+ /*
+ * Since it is a second level only translation setup, we should
+ * set SRE bit as well (addresses are expected to be GPAs).
+ */
+ if (pasid != PASID_RID2PASID)
+ pasid_set_sre(pte);
+ pasid_set_present(pte);
+ pasid_flush_caches(iommu, pte, pasid, did);
+
+ return 0;
+}
+
+/*
+ * Set up the scalable mode pasid entry for passthrough translation type.
+ */
+int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, u32 pasid)
+{
+ u16 did = FLPT_DEFAULT_DID;
+ struct pasid_entry *pte;
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ return -ENODEV;
+ }
+
+ pasid_clear_entry(pte);
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, iommu->agaw);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
+ pasid_set_fault_enable(pte);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ /*
+ * We should set SRE bit as well since the addresses are expected
+ * to be GPAs.
+ */
+ pasid_set_sre(pte);
+ pasid_set_present(pte);
+ pasid_flush_caches(iommu, pte, pasid, did);
+
+ return 0;
+}
+
+static int
+intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte,
+ struct iommu_gpasid_bind_data_vtd *pasid_data)
+{
+ /*
+ * Not all guest PASID table entry fields are passed down during bind,
+ * here we only set up the ones that are dependent on guest settings.
+ * Execution related bits such as NXE, SMEP are not supported.
+ * Other fields, such as snoop related, are set based on host needs
+ * regardless of guest settings.
+ */
+ if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) {
+ if (!ecap_srs(iommu->ecap)) {
+ pr_err_ratelimited("No supervisor request support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+ pasid_set_sre(pte);
+ }
+
+ if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) {
+ if (!ecap_eafs(iommu->ecap)) {
+ pr_err_ratelimited("No extended access flag support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+ pasid_set_eafe(pte);
+ }
+
+ /*
+ * Memory type is only applicable to devices inside processor coherent
+ * domain. Will add MTS support once coherent devices are available.
+ */
+ if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) {
+ pr_warn_ratelimited("No memory type support %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * intel_pasid_setup_nested() - Set up PASID entry for nested translation.
+ * This could be used for guest shared virtual address. In this case, the
+ * first level page tables are used for GVA-GPA translation in the guest,
+ * second level page tables are used for GPA-HPA translation.
+ *
+ * @iommu: IOMMU which the device belong to
+ * @dev: Device to be set up for translation
+ * @gpgd: FLPTPTR: First Level Page translation pointer in GPA
+ * @pasid: PASID to be programmed in the device PASID table
+ * @pasid_data: Additional PASID info from the guest bind request
+ * @domain: Domain info for setting up second level page tables
+ * @addr_width: Address width of the first level (guest)
+ */
+int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
+ pgd_t *gpgd, u32 pasid,
+ struct iommu_gpasid_bind_data_vtd *pasid_data,
+ struct dmar_domain *domain, int addr_width)
+{
+ struct pasid_entry *pte;
+ struct dma_pte *pgd;
+ int ret = 0;
+ u64 pgd_val;
+ int agaw;
+ u16 did;
+
+ if (!ecap_nest(iommu->ecap)) {
+ pr_err_ratelimited("IOMMU: %s: No nested translation support\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) {
+ pr_err_ratelimited("Domain is not in nesting mode, %x\n",
+ domain->flags);
+ return -EINVAL;
+ }
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (WARN_ON(!pte))
+ return -EINVAL;
+
+ /*
+ * Caller must ensure PASID entry is not in use, i.e. not bind the
+ * same PASID to the same device twice.
+ */
+ if (pasid_pte_is_present(pte))
+ return -EBUSY;
+
+ pasid_clear_entry(pte);
+
+ /* Sanity checking performed by caller to make sure address
+ * width matching in two dimensions:
+ * 1. CPU vs. IOMMU
+ * 2. Guest vs. Host.
+ */
+ switch (addr_width) {
+#ifdef CONFIG_X86
+ case ADDR_WIDTH_5LEVEL:
+ if (!cpu_feature_enabled(X86_FEATURE_LA57) ||
+ !cap_5lp_support(iommu->cap)) {
+ dev_err_ratelimited(dev,
+ "5-level paging not supported\n");
+ return -EINVAL;
+ }
+
+ pasid_set_flpm(pte, 1);
+ break;
+#endif
+ case ADDR_WIDTH_4LEVEL:
+ pasid_set_flpm(pte, 0);
+ break;
+ default:
+ dev_err_ratelimited(dev, "Invalid guest address width %d\n",
+ addr_width);
+ return -EINVAL;
+ }
+
+ /* First level PGD is in GPA, must be supported by the second level */
+ if ((uintptr_t)gpgd > domain->max_addr) {
+ dev_err_ratelimited(dev,
+ "Guest PGD %lx not supported, max %llx\n",
+ (uintptr_t)gpgd, domain->max_addr);
+ return -EINVAL;
+ }
+ pasid_set_flptr(pte, (uintptr_t)gpgd);
+
+ ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data);
+ if (ret)
+ return ret;
+
+ /* Setup the second level based on the given domain */
+ pgd = domain->pgd;
+
+ agaw = iommu_skip_agaw(domain, iommu, &pgd);
+ if (agaw < 0) {
+ dev_err_ratelimited(dev, "Invalid domain page table\n");
+ return -EINVAL;
+ }
+ pgd_val = virt_to_phys(pgd);
+ pasid_set_slptr(pte, pgd_val);
+ pasid_set_fault_enable(pte);
+
+ did = domain->iommu_did[iommu->seq_id];
+ pasid_set_domain_id(pte, did);
+
+ pasid_set_address_width(pte, agaw);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
+ pasid_set_present(pte);
+ pasid_flush_caches(iommu, pte, pasid, did);
+
+ return ret;
+}
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
new file mode 100644
index 0000000..35963e6
--- /dev/null
+++ b/drivers/iommu/intel/pasid.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * pasid.h - PASID idr, table and entry header
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+
+#ifndef __INTEL_PASID_H
+#define __INTEL_PASID_H
+
+#define PASID_RID2PASID 0x0
+#define PASID_MIN 0x1
+#define PASID_MAX 0x100000
+#define PASID_PTE_MASK 0x3F
+#define PASID_PTE_PRESENT 1
+#define PASID_PTE_FPD 2
+#define PDE_PFN_MASK PAGE_MASK
+#define PASID_PDE_SHIFT 6
+#define MAX_NR_PASID_BITS 20
+#define PASID_TBL_ENTRIES BIT(PASID_PDE_SHIFT)
+
+#define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1)
+#define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7))
+
+/* Virtual command interface for enlightened pasid management. */
+#define VCMD_CMD_ALLOC 0x1
+#define VCMD_CMD_FREE 0x2
+#define VCMD_VRSP_IP 0x1
+#define VCMD_VRSP_SC(e) (((e) & 0xff) >> 1)
+#define VCMD_VRSP_SC_SUCCESS 0
+#define VCMD_VRSP_SC_NO_PASID_AVAIL 16
+#define VCMD_VRSP_SC_INVALID_PASID 16
+#define VCMD_VRSP_RESULT_PASID(e) (((e) >> 16) & 0xfffff)
+#define VCMD_CMD_OPERAND(e) ((e) << 16)
+/*
+ * Domain ID reserved for pasid entries programmed for first-level
+ * only and pass-through transfer modes.
+ */
+#define FLPT_DEFAULT_DID 1
+
+/*
+ * The SUPERVISOR_MODE flag indicates a first level translation which
+ * can be used for access to kernel addresses. It is valid only for
+ * access to the kernel's static 1:1 mapping of physical memory — not
+ * to vmalloc or even module mappings.
+ */
+#define PASID_FLAG_SUPERVISOR_MODE BIT(0)
+#define PASID_FLAG_NESTED BIT(1)
+#define PASID_FLAG_PAGE_SNOOP BIT(2)
+
+/*
+ * The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first-
+ * level translation, otherwise, 4-level paging will be used.
+ */
+#define PASID_FLAG_FL5LP BIT(1)
+
+struct pasid_dir_entry {
+ u64 val;
+};
+
+struct pasid_entry {
+ u64 val[8];
+};
+
+#define PASID_ENTRY_PGTT_FL_ONLY (1)
+#define PASID_ENTRY_PGTT_SL_ONLY (2)
+#define PASID_ENTRY_PGTT_NESTED (3)
+#define PASID_ENTRY_PGTT_PT (4)
+
+/* The representative of a PASID table */
+struct pasid_table {
+ void *table; /* pasid table pointer */
+ int order; /* page order of pasid table */
+ u32 max_pasid; /* max pasid */
+ struct list_head dev; /* device list */
+};
+
+/* Get PRESENT bit of a PASID directory entry. */
+static inline bool pasid_pde_is_present(struct pasid_dir_entry *pde)
+{
+ return READ_ONCE(pde->val) & PASID_PTE_PRESENT;
+}
+
+/* Get PASID table from a PASID directory entry. */
+static inline struct pasid_entry *
+get_pasid_table_from_pde(struct pasid_dir_entry *pde)
+{
+ if (!pasid_pde_is_present(pde))
+ return NULL;
+
+ return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK);
+}
+
+/* Get PRESENT bit of a PASID table entry. */
+static inline bool pasid_pte_is_present(struct pasid_entry *pte)
+{
+ return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT;
+}
+
+/* Get PGTT field of a PASID table entry */
+static inline u16 pasid_pte_get_pgtt(struct pasid_entry *pte)
+{
+ return (u16)((READ_ONCE(pte->val[0]) >> 6) & 0x7);
+}
+
+extern unsigned int intel_pasid_max_id;
+int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp);
+void intel_pasid_free_id(u32 pasid);
+void *intel_pasid_lookup_id(u32 pasid);
+int intel_pasid_alloc_table(struct device *dev);
+void intel_pasid_free_table(struct device *dev);
+struct pasid_table *intel_pasid_get_table(struct device *dev);
+int intel_pasid_get_dev_max_id(struct device *dev);
+struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid);
+int intel_pasid_setup_first_level(struct intel_iommu *iommu,
+ struct device *dev, pgd_t *pgd,
+ u32 pasid, u16 did, int flags);
+int intel_pasid_setup_second_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, u32 pasid);
+int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, u32 pasid);
+int intel_pasid_setup_nested(struct intel_iommu *iommu,
+ struct device *dev, pgd_t *pgd, u32 pasid,
+ struct iommu_gpasid_bind_data_vtd *pasid_data,
+ struct dmar_domain *domain, int addr_width);
+void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid,
+ bool fault_ignore);
+int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid);
+void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid);
+#endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
new file mode 100644
index 0000000..aabf562
--- /dev/null
+++ b/drivers/iommu/intel/svm.c
@@ -0,0 +1,1223 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2015 Intel Corporation.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ */
+
+#include <linux/intel-iommu.h>
+#include <linux/mmu_notifier.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/slab.h>
+#include <linux/intel-svm.h>
+#include <linux/rculist.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+#include <linux/dmar.h>
+#include <linux/interrupt.h>
+#include <linux/mm_types.h>
+#include <linux/ioasid.h>
+#include <asm/page.h>
+#include <asm/fpu/api.h>
+
+#include "pasid.h"
+
+static irqreturn_t prq_event_thread(int irq, void *d);
+static void intel_svm_drain_prq(struct device *dev, u32 pasid);
+
+#define PRQ_ORDER 0
+
+int intel_svm_enable_prq(struct intel_iommu *iommu)
+{
+ struct page *pages;
+ int irq, ret;
+
+ pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+ if (!pages) {
+ pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
+ iommu->name);
+ return -ENOMEM;
+ }
+ iommu->prq = page_address(pages);
+
+ irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
+ if (irq <= 0) {
+ pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
+ iommu->name);
+ ret = -EINVAL;
+ err:
+ free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu->prq = NULL;
+ return ret;
+ }
+ iommu->pr_irq = irq;
+
+ snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
+
+ ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
+ iommu->prq_name, iommu);
+ if (ret) {
+ pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
+ iommu->name);
+ dmar_free_hwirq(irq);
+ iommu->pr_irq = 0;
+ goto err;
+ }
+ dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
+
+ init_completion(&iommu->prq_complete);
+
+ return 0;
+}
+
+int intel_svm_finish_prq(struct intel_iommu *iommu)
+{
+ dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
+
+ if (iommu->pr_irq) {
+ free_irq(iommu->pr_irq, iommu);
+ dmar_free_hwirq(iommu->pr_irq);
+ iommu->pr_irq = 0;
+ }
+
+ free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu->prq = NULL;
+
+ return 0;
+}
+
+static inline bool intel_svm_capable(struct intel_iommu *iommu)
+{
+ return iommu->flags & VTD_FLAG_SVM_CAPABLE;
+}
+
+void intel_svm_check(struct intel_iommu *iommu)
+{
+ if (!pasid_supported(iommu))
+ return;
+
+ if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
+ !cap_fl1gp_support(iommu->cap)) {
+ pr_err("%s SVM disabled, incompatible 1GB page capability\n",
+ iommu->name);
+ return;
+ }
+
+ if (cpu_feature_enabled(X86_FEATURE_LA57) &&
+ !cap_5lp_support(iommu->cap)) {
+ pr_err("%s SVM disabled, incompatible paging mode\n",
+ iommu->name);
+ return;
+ }
+
+ iommu->flags |= VTD_FLAG_SVM_CAPABLE;
+}
+
+static void __flush_svm_range_dev(struct intel_svm *svm,
+ struct intel_svm_dev *sdev,
+ unsigned long address,
+ unsigned long pages, int ih)
+{
+ struct device_domain_info *info = get_domain_info(sdev->dev);
+
+ if (WARN_ON(!pages))
+ return;
+
+ qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
+ if (info->ats_enabled)
+ qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
+ svm->pasid, sdev->qdep, address,
+ order_base_2(pages));
+}
+
+static void intel_flush_svm_range_dev(struct intel_svm *svm,
+ struct intel_svm_dev *sdev,
+ unsigned long address,
+ unsigned long pages, int ih)
+{
+ unsigned long shift = ilog2(__roundup_pow_of_two(pages));
+ unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
+ unsigned long start = ALIGN_DOWN(address, align);
+ unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
+
+ while (start < end) {
+ __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
+ start += align;
+ }
+}
+
+static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
+ unsigned long pages, int ih)
+{
+ struct intel_svm_dev *sdev;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdev, &svm->devs, list)
+ intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
+ rcu_read_unlock();
+}
+
+/* Pages have been freed at this point */
+static void intel_invalidate_range(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+ intel_flush_svm_range(svm, start,
+ (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
+}
+
+static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+ struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+ struct intel_svm_dev *sdev;
+
+ /* This might end up being called from exit_mmap(), *before* the page
+ * tables are cleared. And __mmu_notifier_release() will delete us from
+ * the list of notifiers so that our invalidate_range() callback doesn't
+ * get called when the page tables are cleared. So we need to protect
+ * against hardware accessing those page tables.
+ *
+ * We do it by clearing the entry in the PASID table and then flushing
+ * the IOTLB and the PASID table caches. This might upset hardware;
+ * perhaps we'll want to point the PASID to a dummy PGD (like the zero
+ * page) so that we end up taking a fault that the hardware really
+ * *has* to handle gracefully without affecting other processes.
+ */
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdev, &svm->devs, list)
+ intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
+ svm->pasid, true);
+ rcu_read_unlock();
+
+}
+
+static const struct mmu_notifier_ops intel_mmuops = {
+ .release = intel_mm_release,
+ .invalidate_range = intel_invalidate_range,
+};
+
+static DEFINE_MUTEX(pasid_mutex);
+static LIST_HEAD(global_svm_list);
+
+#define for_each_svm_dev(sdev, svm, d) \
+ list_for_each_entry((sdev), &(svm)->devs, list) \
+ if ((d) != (sdev)->dev) {} else
+
+static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
+ struct intel_svm **rsvm,
+ struct intel_svm_dev **rsdev)
+{
+ struct intel_svm_dev *d, *sdev = NULL;
+ struct intel_svm *svm;
+
+ /* The caller should hold the pasid_mutex lock */
+ if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
+ return -EINVAL;
+
+ if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
+ return -EINVAL;
+
+ svm = ioasid_find(NULL, pasid, NULL);
+ if (IS_ERR(svm))
+ return PTR_ERR(svm);
+
+ if (!svm)
+ goto out;
+
+ /*
+ * If we found svm for the PASID, there must be at least one device
+ * bond.
+ */
+ if (WARN_ON(list_empty(&svm->devs)))
+ return -EINVAL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(d, &svm->devs, list) {
+ if (d->dev == dev) {
+ sdev = d;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+out:
+ *rsvm = svm;
+ *rsdev = sdev;
+
+ return 0;
+}
+
+int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
+ struct iommu_gpasid_bind_data *data)
+{
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
+ struct intel_svm_dev *sdev = NULL;
+ struct dmar_domain *dmar_domain;
+ struct device_domain_info *info;
+ struct intel_svm *svm = NULL;
+ unsigned long iflags;
+ int ret = 0;
+
+ if (WARN_ON(!iommu) || !data)
+ return -EINVAL;
+
+ if (data->format != IOMMU_PASID_FORMAT_INTEL_VTD)
+ return -EINVAL;
+
+ /* IOMMU core ensures argsz is more than the start of the union */
+ if (data->argsz < offsetofend(struct iommu_gpasid_bind_data, vendor.vtd))
+ return -EINVAL;
+
+ /* Make sure no undefined flags are used in vendor data */
+ if (data->vendor.vtd.flags & ~(IOMMU_SVA_VTD_GPASID_LAST - 1))
+ return -EINVAL;
+
+ if (!dev_is_pci(dev))
+ return -ENOTSUPP;
+
+ /* VT-d supports devices with full 20 bit PASIDs only */
+ if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX)
+ return -EINVAL;
+
+ /*
+ * We only check host PASID range, we have no knowledge to check
+ * guest PASID range.
+ */
+ if (data->hpasid <= 0 || data->hpasid >= PASID_MAX)
+ return -EINVAL;
+
+ info = get_domain_info(dev);
+ if (!info)
+ return -EINVAL;
+
+ dmar_domain = to_dmar_domain(domain);
+
+ mutex_lock(&pasid_mutex);
+ ret = pasid_to_svm_sdev(dev, data->hpasid, &svm, &sdev);
+ if (ret)
+ goto out;
+
+ if (sdev) {
+ /*
+ * Do not allow multiple bindings of the same device-PASID since
+ * there is only one SL page tables per PASID. We may revisit
+ * once sharing PGD across domains are supported.
+ */
+ dev_warn_ratelimited(dev, "Already bound with PASID %u\n",
+ svm->pasid);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (!svm) {
+ /* We come here when PASID has never been bond to a device. */
+ svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+ if (!svm) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ /* REVISIT: upper layer/VFIO can track host process that bind
+ * the PASID. ioasid_set = mm might be sufficient for vfio to
+ * check pasid VMM ownership. We can drop the following line
+ * once VFIO and IOASID set check is in place.
+ */
+ svm->mm = get_task_mm(current);
+ svm->pasid = data->hpasid;
+ if (data->flags & IOMMU_SVA_GPASID_VAL) {
+ svm->gpasid = data->gpasid;
+ svm->flags |= SVM_FLAG_GUEST_PASID;
+ }
+ ioasid_set_data(data->hpasid, svm);
+ INIT_LIST_HEAD_RCU(&svm->devs);
+ mmput(svm->mm);
+ }
+ sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ if (!sdev) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ sdev->dev = dev;
+ sdev->sid = PCI_DEVID(info->bus, info->devfn);
+ sdev->iommu = iommu;
+
+ /* Only count users if device has aux domains */
+ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
+ sdev->users = 1;
+
+ /* Set up device context entry for PASID if not enabled already */
+ ret = intel_iommu_enable_pasid(iommu, sdev->dev);
+ if (ret) {
+ dev_err_ratelimited(dev, "Failed to enable PASID capability\n");
+ kfree(sdev);
+ goto out;
+ }
+
+ /*
+ * PASID table is per device for better security. Therefore, for
+ * each bind of a new device even with an existing PASID, we need to
+ * call the nested mode setup function here.
+ */
+ spin_lock_irqsave(&iommu->lock, iflags);
+ ret = intel_pasid_setup_nested(iommu, dev,
+ (pgd_t *)(uintptr_t)data->gpgd,
+ data->hpasid, &data->vendor.vtd, dmar_domain,
+ data->addr_width);
+ spin_unlock_irqrestore(&iommu->lock, iflags);
+ if (ret) {
+ dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n",
+ data->hpasid, ret);
+ /*
+ * PASID entry should be in cleared state if nested mode
+ * set up failed. So we only need to clear IOASID tracking
+ * data such that free call will succeed.
+ */
+ kfree(sdev);
+ goto out;
+ }
+
+ svm->flags |= SVM_FLAG_GUEST_MODE;
+
+ init_rcu_head(&sdev->rcu);
+ list_add_rcu(&sdev->list, &svm->devs);
+ out:
+ if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) {
+ ioasid_set_data(data->hpasid, NULL);
+ kfree(svm);
+ }
+
+ mutex_unlock(&pasid_mutex);
+ return ret;
+}
+
+int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
+{
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
+ struct intel_svm_dev *sdev;
+ struct intel_svm *svm;
+ int ret;
+
+ if (WARN_ON(!iommu))
+ return -EINVAL;
+
+ mutex_lock(&pasid_mutex);
+ ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
+ if (ret)
+ goto out;
+
+ if (sdev) {
+ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
+ sdev->users--;
+ if (!sdev->users) {
+ list_del_rcu(&sdev->list);
+ intel_pasid_tear_down_entry(iommu, dev,
+ svm->pasid, false);
+ intel_svm_drain_prq(dev, svm->pasid);
+ kfree_rcu(sdev, rcu);
+
+ if (list_empty(&svm->devs)) {
+ /*
+ * We do not free the IOASID here in that
+ * IOMMU driver did not allocate it.
+ * Unlike native SVM, IOASID for guest use was
+ * allocated prior to the bind call.
+ * In any case, if the free call comes before
+ * the unbind, IOMMU driver will get notified
+ * and perform cleanup.
+ */
+ ioasid_set_data(pasid, NULL);
+ kfree(svm);
+ }
+ }
+ }
+out:
+ mutex_unlock(&pasid_mutex);
+ return ret;
+}
+
+static void _load_pasid(void *unused)
+{
+ update_pasid();
+}
+
+static void load_pasid(struct mm_struct *mm, u32 pasid)
+{
+ mutex_lock(&mm->context.lock);
+
+ /* Synchronize with READ_ONCE in update_pasid(). */
+ smp_store_release(&mm->pasid, pasid);
+
+ /* Update PASID MSR on all CPUs running the mm's tasks. */
+ on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true);
+
+ mutex_unlock(&mm->context.lock);
+}
+
+/* Caller must hold pasid_mutex, mm reference */
+static int
+intel_svm_bind_mm(struct device *dev, unsigned int flags,
+ struct svm_dev_ops *ops,
+ struct mm_struct *mm, struct intel_svm_dev **sd)
+{
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
+ struct device_domain_info *info;
+ struct intel_svm_dev *sdev;
+ struct intel_svm *svm = NULL;
+ unsigned long iflags;
+ int pasid_max;
+ int ret;
+
+ if (!iommu || dmar_disabled)
+ return -EINVAL;
+
+ if (!intel_svm_capable(iommu))
+ return -ENOTSUPP;
+
+ if (dev_is_pci(dev)) {
+ pasid_max = pci_max_pasids(to_pci_dev(dev));
+ if (pasid_max < 0)
+ return -EINVAL;
+ } else
+ pasid_max = 1 << 20;
+
+ /* Bind supervisor PASID shuld have mm = NULL */
+ if (flags & SVM_FLAG_SUPERVISOR_MODE) {
+ if (!ecap_srs(iommu->ecap) || mm) {
+ pr_err("Supervisor PASID with user provided mm.\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!(flags & SVM_FLAG_PRIVATE_PASID)) {
+ struct intel_svm *t;
+
+ list_for_each_entry(t, &global_svm_list, list) {
+ if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID))
+ continue;
+
+ svm = t;
+ if (svm->pasid >= pasid_max) {
+ dev_warn(dev,
+ "Limited PASID width. Cannot use existing PASID %d\n",
+ svm->pasid);
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ /* Find the matching device in svm list */
+ for_each_svm_dev(sdev, svm, dev) {
+ if (sdev->ops != ops) {
+ ret = -EBUSY;
+ goto out;
+ }
+ sdev->users++;
+ goto success;
+ }
+
+ break;
+ }
+ }
+
+ sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ if (!sdev) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ sdev->dev = dev;
+ sdev->iommu = iommu;
+
+ ret = intel_iommu_enable_pasid(iommu, dev);
+ if (ret) {
+ kfree(sdev);
+ goto out;
+ }
+
+ info = get_domain_info(dev);
+ sdev->did = FLPT_DEFAULT_DID;
+ sdev->sid = PCI_DEVID(info->bus, info->devfn);
+ if (info->ats_enabled) {
+ sdev->dev_iotlb = 1;
+ sdev->qdep = info->ats_qdep;
+ if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
+ sdev->qdep = 0;
+ }
+
+ /* Finish the setup now we know we're keeping it */
+ sdev->users = 1;
+ sdev->ops = ops;
+ init_rcu_head(&sdev->rcu);
+
+ if (!svm) {
+ svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+ if (!svm) {
+ ret = -ENOMEM;
+ kfree(sdev);
+ goto out;
+ }
+
+ if (pasid_max > intel_pasid_max_id)
+ pasid_max = intel_pasid_max_id;
+
+ /* Do not use PASID 0, reserved for RID to PASID */
+ svm->pasid = ioasid_alloc(NULL, PASID_MIN,
+ pasid_max - 1, svm);
+ if (svm->pasid == INVALID_IOASID) {
+ kfree(svm);
+ kfree(sdev);
+ ret = -ENOSPC;
+ goto out;
+ }
+ svm->notifier.ops = &intel_mmuops;
+ svm->mm = mm;
+ svm->flags = flags;
+ INIT_LIST_HEAD_RCU(&svm->devs);
+ INIT_LIST_HEAD(&svm->list);
+ ret = -ENOMEM;
+ if (mm) {
+ ret = mmu_notifier_register(&svm->notifier, mm);
+ if (ret) {
+ ioasid_free(svm->pasid);
+ kfree(svm);
+ kfree(sdev);
+ goto out;
+ }
+ }
+
+ spin_lock_irqsave(&iommu->lock, iflags);
+ ret = intel_pasid_setup_first_level(iommu, dev,
+ mm ? mm->pgd : init_mm.pgd,
+ svm->pasid, FLPT_DEFAULT_DID,
+ (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
+ (cpu_feature_enabled(X86_FEATURE_LA57) ?
+ PASID_FLAG_FL5LP : 0));
+ spin_unlock_irqrestore(&iommu->lock, iflags);
+ if (ret) {
+ if (mm)
+ mmu_notifier_unregister(&svm->notifier, mm);
+ ioasid_free(svm->pasid);
+ kfree(svm);
+ kfree(sdev);
+ goto out;
+ }
+
+ list_add_tail(&svm->list, &global_svm_list);
+ if (mm) {
+ /* The newly allocated pasid is loaded to the mm. */
+ load_pasid(mm, svm->pasid);
+ }
+ } else {
+ /*
+ * Binding a new device with existing PASID, need to setup
+ * the PASID entry.
+ */
+ spin_lock_irqsave(&iommu->lock, iflags);
+ ret = intel_pasid_setup_first_level(iommu, dev,
+ mm ? mm->pgd : init_mm.pgd,
+ svm->pasid, FLPT_DEFAULT_DID,
+ (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
+ (cpu_feature_enabled(X86_FEATURE_LA57) ?
+ PASID_FLAG_FL5LP : 0));
+ spin_unlock_irqrestore(&iommu->lock, iflags);
+ if (ret) {
+ kfree(sdev);
+ goto out;
+ }
+ }
+ list_add_rcu(&sdev->list, &svm->devs);
+success:
+ sdev->pasid = svm->pasid;
+ sdev->sva.dev = dev;
+ if (sd)
+ *sd = sdev;
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Caller must hold pasid_mutex */
+static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
+{
+ struct intel_svm_dev *sdev;
+ struct intel_iommu *iommu;
+ struct intel_svm *svm;
+ int ret = -EINVAL;
+
+ iommu = device_to_iommu(dev, NULL, NULL);
+ if (!iommu)
+ goto out;
+
+ ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
+ if (ret)
+ goto out;
+
+ if (sdev) {
+ sdev->users--;
+ if (!sdev->users) {
+ list_del_rcu(&sdev->list);
+ /* Flush the PASID cache and IOTLB for this device.
+ * Note that we do depend on the hardware *not* using
+ * the PASID any more. Just as we depend on other
+ * devices never using PASIDs that they have no right
+ * to use. We have a *shared* PASID table, because it's
+ * large and has to be physically contiguous. So it's
+ * hard to be as defensive as we might like. */
+ intel_pasid_tear_down_entry(iommu, dev,
+ svm->pasid, false);
+ intel_svm_drain_prq(dev, svm->pasid);
+ kfree_rcu(sdev, rcu);
+
+ if (list_empty(&svm->devs)) {
+ ioasid_free(svm->pasid);
+ if (svm->mm) {
+ mmu_notifier_unregister(&svm->notifier, svm->mm);
+ /* Clear mm's pasid. */
+ load_pasid(svm->mm, PASID_DISABLED);
+ }
+ list_del(&svm->list);
+ /* We mandate that no page faults may be outstanding
+ * for the PASID when intel_svm_unbind_mm() is called.
+ * If that is not obeyed, subtle errors will happen.
+ * Let's make them less subtle... */
+ memset(svm, 0x6b, sizeof(*svm));
+ kfree(svm);
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+/* Page request queue descriptor */
+struct page_req_dsc {
+ union {
+ struct {
+ u64 type:8;
+ u64 pasid_present:1;
+ u64 priv_data_present:1;
+ u64 rsvd:6;
+ u64 rid:16;
+ u64 pasid:20;
+ u64 exe_req:1;
+ u64 pm_req:1;
+ u64 rsvd2:10;
+ };
+ u64 qw_0;
+ };
+ union {
+ struct {
+ u64 rd_req:1;
+ u64 wr_req:1;
+ u64 lpig:1;
+ u64 prg_index:9;
+ u64 addr:52;
+ };
+ u64 qw_1;
+ };
+ u64 priv_data[2];
+};
+
+#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
+
+static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
+{
+ unsigned long requested = 0;
+
+ if (req->exe_req)
+ requested |= VM_EXEC;
+
+ if (req->rd_req)
+ requested |= VM_READ;
+
+ if (req->wr_req)
+ requested |= VM_WRITE;
+
+ return (requested & ~vma->vm_flags) != 0;
+}
+
+static bool is_canonical_address(u64 addr)
+{
+ int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
+ long saddr = (long) addr;
+
+ return (((saddr << shift) >> shift) == saddr);
+}
+
+/**
+ * intel_svm_drain_prq - Drain page requests and responses for a pasid
+ * @dev: target device
+ * @pasid: pasid for draining
+ *
+ * Drain all pending page requests and responses related to @pasid in both
+ * software and hardware. This is supposed to be called after the device
+ * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
+ * and DevTLB have been invalidated.
+ *
+ * It waits until all pending page requests for @pasid in the page fault
+ * queue are completed by the prq handling thread. Then follow the steps
+ * described in VT-d spec CH7.10 to drain all page requests and page
+ * responses pending in the hardware.
+ */
+static void intel_svm_drain_prq(struct device *dev, u32 pasid)
+{
+ struct device_domain_info *info;
+ struct dmar_domain *domain;
+ struct intel_iommu *iommu;
+ struct qi_desc desc[3];
+ struct pci_dev *pdev;
+ int head, tail;
+ u16 sid, did;
+ int qdep;
+
+ info = get_domain_info(dev);
+ if (WARN_ON(!info || !dev_is_pci(dev)))
+ return;
+
+ if (!info->pri_enabled)
+ return;
+
+ iommu = info->iommu;
+ domain = info->domain;
+ pdev = to_pci_dev(dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+ did = domain->iommu_did[iommu->seq_id];
+ qdep = pci_ats_queue_depth(pdev);
+
+ /*
+ * Check and wait until all pending page requests in the queue are
+ * handled by the prq handling thread.
+ */
+prq_retry:
+ reinit_completion(&iommu->prq_complete);
+ tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ while (head != tail) {
+ struct page_req_dsc *req;
+
+ req = &iommu->prq[head / sizeof(*req)];
+ if (!req->pasid_present || req->pasid != pasid) {
+ head = (head + sizeof(*req)) & PRQ_RING_MASK;
+ continue;
+ }
+
+ wait_for_completion(&iommu->prq_complete);
+ goto prq_retry;
+ }
+
+ /*
+ * Perform steps described in VT-d spec CH7.10 to drain page
+ * requests and responses in hardware.
+ */
+ memset(desc, 0, sizeof(desc));
+ desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
+ QI_IWD_FENCE |
+ QI_IWD_TYPE;
+ desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ QI_EIOTLB_TYPE;
+ desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
+ QI_DEV_EIOTLB_SID(sid) |
+ QI_DEV_EIOTLB_QDEP(qdep) |
+ QI_DEIOTLB_TYPE |
+ QI_DEV_IOTLB_PFSID(info->pfsid);
+qi_retry:
+ reinit_completion(&iommu->prq_complete);
+ qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
+ if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
+ wait_for_completion(&iommu->prq_complete);
+ goto qi_retry;
+ }
+}
+
+static int prq_to_iommu_prot(struct page_req_dsc *req)
+{
+ int prot = 0;
+
+ if (req->rd_req)
+ prot |= IOMMU_FAULT_PERM_READ;
+ if (req->wr_req)
+ prot |= IOMMU_FAULT_PERM_WRITE;
+ if (req->exe_req)
+ prot |= IOMMU_FAULT_PERM_EXEC;
+ if (req->pm_req)
+ prot |= IOMMU_FAULT_PERM_PRIV;
+
+ return prot;
+}
+
+static int
+intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
+{
+ struct iommu_fault_event event;
+
+ if (!dev || !dev_is_pci(dev))
+ return -ENODEV;
+
+ /* Fill in event data for device specific processing */
+ memset(&event, 0, sizeof(struct iommu_fault_event));
+ event.fault.type = IOMMU_FAULT_PAGE_REQ;
+ event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
+ event.fault.prm.pasid = desc->pasid;
+ event.fault.prm.grpid = desc->prg_index;
+ event.fault.prm.perm = prq_to_iommu_prot(desc);
+
+ if (desc->lpig)
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+ if (desc->pasid_present) {
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
+ }
+ if (desc->priv_data_present) {
+ /*
+ * Set last page in group bit if private data is present,
+ * page response is required as it does for LPIG.
+ * iommu_report_device_fault() doesn't understand this vendor
+ * specific requirement thus we set last_page as a workaround.
+ */
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+ memcpy(event.fault.prm.private_data, desc->priv_data,
+ sizeof(desc->priv_data));
+ }
+
+ return iommu_report_device_fault(dev, &event);
+}
+
+static irqreturn_t prq_event_thread(int irq, void *d)
+{
+ struct intel_svm_dev *sdev = NULL;
+ struct intel_iommu *iommu = d;
+ struct intel_svm *svm = NULL;
+ int head, tail, handled = 0;
+
+ /* Clear PPR bit before reading head/tail registers, to
+ * ensure that we get a new interrupt if needed. */
+ writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
+
+ tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ while (head != tail) {
+ struct vm_area_struct *vma;
+ struct page_req_dsc *req;
+ struct qi_desc resp;
+ int result;
+ vm_fault_t ret;
+ u64 address;
+
+ handled = 1;
+
+ req = &iommu->prq[head / sizeof(*req)];
+
+ result = QI_RESP_FAILURE;
+ address = (u64)req->addr << VTD_PAGE_SHIFT;
+ if (!req->pasid_present) {
+ pr_err("%s: Page request without PASID: %08llx %08llx\n",
+ iommu->name, ((unsigned long long *)req)[0],
+ ((unsigned long long *)req)[1]);
+ goto no_pasid;
+ }
+ /* We shall not receive page request for supervisor SVM */
+ if (req->pm_req && (req->rd_req | req->wr_req)) {
+ pr_err("Unexpected page request in Privilege Mode");
+ /* No need to find the matching sdev as for bad_req */
+ goto no_pasid;
+ }
+ /* DMA read with exec requeset is not supported. */
+ if (req->exe_req && req->rd_req) {
+ pr_err("Execution request not supported\n");
+ goto no_pasid;
+ }
+ if (!svm || svm->pasid != req->pasid) {
+ rcu_read_lock();
+ svm = ioasid_find(NULL, req->pasid, NULL);
+ /* It *can't* go away, because the driver is not permitted
+ * to unbind the mm while any page faults are outstanding.
+ * So we only need RCU to protect the internal idr code. */
+ rcu_read_unlock();
+ if (IS_ERR_OR_NULL(svm)) {
+ pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
+ iommu->name, req->pasid, ((unsigned long long *)req)[0],
+ ((unsigned long long *)req)[1]);
+ goto no_pasid;
+ }
+ }
+
+ if (!sdev || sdev->sid != req->rid) {
+ struct intel_svm_dev *t;
+
+ sdev = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(t, &svm->devs, list) {
+ if (t->sid == req->rid) {
+ sdev = t;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ }
+
+ result = QI_RESP_INVALID;
+ /* Since we're using init_mm.pgd directly, we should never take
+ * any faults on kernel addresses. */
+ if (!svm->mm)
+ goto bad_req;
+
+ /* If address is not canonical, return invalid response */
+ if (!is_canonical_address(address))
+ goto bad_req;
+
+ /*
+ * If prq is to be handled outside iommu driver via receiver of
+ * the fault notifiers, we skip the page response here.
+ */
+ if (svm->flags & SVM_FLAG_GUEST_MODE) {
+ if (sdev && !intel_svm_prq_report(sdev->dev, req))
+ goto prq_advance;
+ else
+ goto bad_req;
+ }
+
+ /* If the mm is already defunct, don't handle faults. */
+ if (!mmget_not_zero(svm->mm))
+ goto bad_req;
+
+ mmap_read_lock(svm->mm);
+ vma = find_extend_vma(svm->mm, address);
+ if (!vma || address < vma->vm_start)
+ goto invalid;
+
+ if (access_error(vma, req))
+ goto invalid;
+
+ ret = handle_mm_fault(vma, address,
+ req->wr_req ? FAULT_FLAG_WRITE : 0,
+ NULL);
+ if (ret & VM_FAULT_ERROR)
+ goto invalid;
+
+ result = QI_RESP_SUCCESS;
+invalid:
+ mmap_read_unlock(svm->mm);
+ mmput(svm->mm);
+bad_req:
+ WARN_ON(!sdev);
+ if (sdev && sdev->ops && sdev->ops->fault_cb) {
+ int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
+ (req->exe_req << 1) | (req->pm_req);
+ sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr,
+ req->priv_data, rwxp, result);
+ }
+ /* We get here in the error case where the PASID lookup failed,
+ and these can be NULL. Do not use them below this point! */
+ sdev = NULL;
+ svm = NULL;
+no_pasid:
+ if (req->lpig || req->priv_data_present) {
+ /*
+ * Per VT-d spec. v3.0 ch7.7, system software must
+ * respond with page group response if private data
+ * is present (PDP) or last page in group (LPIG) bit
+ * is set. This is an additional VT-d feature beyond
+ * PCI ATS spec.
+ */
+ resp.qw0 = QI_PGRP_PASID(req->pasid) |
+ QI_PGRP_DID(req->rid) |
+ QI_PGRP_PASID_P(req->pasid_present) |
+ QI_PGRP_PDP(req->priv_data_present) |
+ QI_PGRP_RESP_CODE(result) |
+ QI_PGRP_RESP_TYPE;
+ resp.qw1 = QI_PGRP_IDX(req->prg_index) |
+ QI_PGRP_LPIG(req->lpig);
+ resp.qw2 = 0;
+ resp.qw3 = 0;
+
+ if (req->priv_data_present)
+ memcpy(&resp.qw2, req->priv_data,
+ sizeof(req->priv_data));
+ qi_submit_sync(iommu, &resp, 1, 0);
+ }
+prq_advance:
+ head = (head + sizeof(*req)) & PRQ_RING_MASK;
+ }
+
+ dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
+
+ /*
+ * Clear the page request overflow bit and wake up all threads that
+ * are waiting for the completion of this handling.
+ */
+ if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
+ pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
+ iommu->name);
+ head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ if (head == tail) {
+ writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
+ pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
+ iommu->name);
+ }
+ }
+
+ if (!completion_done(&iommu->prq_complete))
+ complete(&iommu->prq_complete);
+
+ return IRQ_RETVAL(handled);
+}
+
+#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
+struct iommu_sva *
+intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+{
+ struct iommu_sva *sva = ERR_PTR(-EINVAL);
+ struct intel_svm_dev *sdev = NULL;
+ unsigned int flags = 0;
+ int ret;
+
+ /*
+ * TODO: Consolidate with generic iommu-sva bind after it is merged.
+ * It will require shared SVM data structures, i.e. combine io_mm
+ * and intel_svm etc.
+ */
+ if (drvdata)
+ flags = *(unsigned int *)drvdata;
+ mutex_lock(&pasid_mutex);
+ ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
+ if (ret)
+ sva = ERR_PTR(ret);
+ else if (sdev)
+ sva = &sdev->sva;
+ else
+ WARN(!sdev, "SVM bind succeeded with no sdev!\n");
+
+ mutex_unlock(&pasid_mutex);
+
+ return sva;
+}
+
+void intel_svm_unbind(struct iommu_sva *sva)
+{
+ struct intel_svm_dev *sdev;
+
+ mutex_lock(&pasid_mutex);
+ sdev = to_intel_svm_dev(sva);
+ intel_svm_unbind_mm(sdev->dev, sdev->pasid);
+ mutex_unlock(&pasid_mutex);
+}
+
+u32 intel_svm_get_pasid(struct iommu_sva *sva)
+{
+ struct intel_svm_dev *sdev;
+ u32 pasid;
+
+ mutex_lock(&pasid_mutex);
+ sdev = to_intel_svm_dev(sva);
+ pasid = sdev->pasid;
+ mutex_unlock(&pasid_mutex);
+
+ return pasid;
+}
+
+int intel_svm_page_response(struct device *dev,
+ struct iommu_fault_event *evt,
+ struct iommu_page_response *msg)
+{
+ struct iommu_fault_page_request *prm;
+ struct intel_svm_dev *sdev = NULL;
+ struct intel_svm *svm = NULL;
+ struct intel_iommu *iommu;
+ bool private_present;
+ bool pasid_present;
+ bool last_page;
+ u8 bus, devfn;
+ int ret = 0;
+ u16 sid;
+
+ if (!dev || !dev_is_pci(dev))
+ return -ENODEV;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ if (!msg || !evt)
+ return -EINVAL;
+
+ mutex_lock(&pasid_mutex);
+
+ prm = &evt->fault.prm;
+ sid = PCI_DEVID(bus, devfn);
+ pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+ last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+
+ if (!pasid_present) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
+ if (ret || !sdev) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /*
+ * For responses from userspace, need to make sure that the
+ * pasid has been bound to its mm.
+ */
+ if (svm->flags & SVM_FLAG_GUEST_MODE) {
+ struct mm_struct *mm;
+
+ mm = get_task_mm(current);
+ if (!mm) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (mm != svm->mm) {
+ ret = -ENODEV;
+ mmput(mm);
+ goto out;
+ }
+
+ mmput(mm);
+ }
+
+ /*
+ * Per VT-d spec. v3.0 ch7.7, system software must respond
+ * with page group response if private data is present (PDP)
+ * or last page in group (LPIG) bit is set. This is an
+ * additional VT-d requirement beyond PCI ATS spec.
+ */
+ if (last_page || private_present) {
+ struct qi_desc desc;
+
+ desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
+ QI_PGRP_PASID_P(pasid_present) |
+ QI_PGRP_PDP(private_present) |
+ QI_PGRP_RESP_CODE(msg->code) |
+ QI_PGRP_RESP_TYPE;
+ desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+ if (private_present)
+ memcpy(&desc.qw2, prm->private_data,
+ sizeof(prm->private_data));
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+ }
+out:
+ mutex_unlock(&pasid_mutex);
+ return ret;
+}
diff --git a/drivers/iommu/intel-trace.c b/drivers/iommu/intel/trace.c
similarity index 100%
rename from drivers/iommu/intel-trace.c
rename to drivers/iommu/intel/trace.c
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 4cb3949..3bcd3af 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -149,8 +149,6 @@
#define ARM_V7S_TTBR_IRGN_ATTR(attr) \
((((attr) & 0x1) << 6) | (((attr) & 0x2) >> 1))
-#define ARM_V7S_TCR_PD1 BIT(5)
-
#ifdef CONFIG_ZONE_DMA32
#define ARM_V7S_TABLE_GFP_DMA GFP_DMA32
#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32
@@ -244,13 +242,17 @@
__GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size));
else if (lvl == 2)
table = kmem_cache_zalloc(data->l2_tables, gfp);
+
+ if (!table)
+ return NULL;
+
phys = virt_to_phys(table);
if (phys != (arm_v7s_iopte)phys) {
/* Doesn't fit in PTE */
dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
goto out_free;
}
- if (table && !cfg->coherent_walk) {
+ if (!cfg->coherent_walk) {
dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma))
goto out_free;
@@ -472,7 +474,7 @@
static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
phys_addr_t paddr, size_t size, int prot,
- int lvl, arm_v7s_iopte *ptep)
+ int lvl, arm_v7s_iopte *ptep, gfp_t gfp)
{
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_v7s_iopte pte, *cptep;
@@ -493,7 +495,7 @@
/* Grab a pointer to the next level */
pte = READ_ONCE(*ptep);
if (!pte) {
- cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data);
+ cptep = __arm_v7s_alloc_table(lvl + 1, gfp, data);
if (!cptep)
return -ENOMEM;
@@ -514,11 +516,11 @@
}
/* Rinse, repeat */
- return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep);
+ return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep, gfp);
}
static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
struct io_pgtable *iop = &data->iop;
@@ -532,7 +534,7 @@
paddr >= (1ULL << data->iop.cfg.oas)))
return -ERANGE;
- ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd);
+ ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd, gfp);
/*
* Synchronise all PTE updates for the new mapping before there's
* a chance for anything to kick off a table walk for the new iova.
@@ -798,8 +800,8 @@
*/
cfg->pgsize_bitmap &= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
- /* TCR: T0SZ=0, disable TTBR1 */
- cfg->arm_v7s_cfg.tcr = ARM_V7S_TCR_PD1;
+ /* TCR: T0SZ=0, EAE=0 (if applicable) */
+ cfg->arm_v7s_cfg.tcr = 0;
/*
* TEX remap: the indices used map to the closest equivalent types
@@ -822,15 +824,13 @@
/* Ensure the empty pgd is visible before any actual TTBR write */
wmb();
- /* TTBRs */
- cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) |
- ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS |
- (cfg->coherent_walk ?
- (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
- ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
- (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
- ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
- cfg->arm_v7s_cfg.ttbr[1] = 0;
+ /* TTBR */
+ cfg->arm_v7s_cfg.ttbr = virt_to_phys(data->pgd) | ARM_V7S_TTBR_S |
+ (cfg->coherent_walk ? (ARM_V7S_TTBR_NOS |
+ ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
+ ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
+ (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
+ ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
return &data->iop;
out_free_data:
@@ -846,27 +846,28 @@
#ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
-static struct io_pgtable_cfg *cfg_cookie;
+static struct io_pgtable_cfg *cfg_cookie __initdata;
-static void dummy_tlb_flush_all(void *cookie)
+static void __init dummy_tlb_flush_all(void *cookie)
{
WARN_ON(cookie != cfg_cookie);
}
-static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
- void *cookie)
+static void __init dummy_tlb_flush(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
-static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule, void *cookie)
+static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
{
dummy_tlb_flush(iova, granule, granule, cookie);
}
-static const struct iommu_flush_ops dummy_tlb_ops = {
+static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
.tlb_flush_all = dummy_tlb_flush_all,
.tlb_flush_walk = dummy_tlb_flush,
.tlb_flush_leaf = dummy_tlb_flush,
@@ -925,12 +926,12 @@
if (ops->map(ops, iova, iova, size, IOMMU_READ |
IOMMU_WRITE |
IOMMU_NOEXEC |
- IOMMU_CACHE))
+ IOMMU_CACHE, GFP_KERNEL))
return __FAIL(ops);
/* Overlapping mappings */
if (!ops->map(ops, iova, iova + size, size,
- IOMMU_READ | IOMMU_NOEXEC))
+ IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL))
return __FAIL(ops);
if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
@@ -949,7 +950,7 @@
return __FAIL(ops);
/* Remap of partial unmap */
- if (ops->map(ops, iova_start + size, size, size, IOMMU_READ))
+ if (ops->map(ops, iova_start + size, size, size, IOMMU_READ, GFP_KERNEL))
return __FAIL(ops);
if (ops->iova_to_phys(ops, iova_start + size + 42)
@@ -970,7 +971,7 @@
return __FAIL(ops);
/* Remap full block */
- if (ops->map(ops, iova, iova, size, IOMMU_WRITE))
+ if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL))
return __FAIL(ops);
if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index ca51036..e1cd31c 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -20,6 +20,8 @@
#include <asm/barrier.h>
+#include "io-pgtable-arm.h"
+
#define ARM_LPAE_MAX_ADDR_BITS 52
#define ARM_LPAE_S2_MAX_CONCAT_PAGES 16
#define ARM_LPAE_MAX_LEVELS 4
@@ -32,39 +34,31 @@
io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
/*
- * For consistency with the architecture, we always consider
- * ARM_LPAE_MAX_LEVELS levels, with the walk starting at level n >=0
- */
-#define ARM_LPAE_START_LVL(d) (ARM_LPAE_MAX_LEVELS - (d)->levels)
-
-/*
* Calculate the right shift amount to get to the portion describing level l
* in a virtual address mapped by the pagetable in d.
*/
#define ARM_LPAE_LVL_SHIFT(l,d) \
- ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \
- * (d)->bits_per_level) + (d)->pg_shift)
+ (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) + \
+ ilog2(sizeof(arm_lpae_iopte)))
-#define ARM_LPAE_GRANULE(d) (1UL << (d)->pg_shift)
-
-#define ARM_LPAE_PAGES_PER_PGD(d) \
- DIV_ROUND_UP((d)->pgd_size, ARM_LPAE_GRANULE(d))
+#define ARM_LPAE_GRANULE(d) \
+ (sizeof(arm_lpae_iopte) << (d)->bits_per_level)
+#define ARM_LPAE_PGD_SIZE(d) \
+ (sizeof(arm_lpae_iopte) << (d)->pgd_bits)
/*
* Calculate the index at level l used to map virtual address a using the
* pagetable in d.
*/
#define ARM_LPAE_PGD_IDX(l,d) \
- ((l) == ARM_LPAE_START_LVL(d) ? ilog2(ARM_LPAE_PAGES_PER_PGD(d)) : 0)
+ ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0)
#define ARM_LPAE_LVL_IDX(a,l,d) \
(((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \
((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1))
/* Calculate the block/page mapping size at level l for pagetable in d. */
-#define ARM_LPAE_BLOCK_SIZE(l,d) \
- (1ULL << (ilog2(sizeof(arm_lpae_iopte)) + \
- ((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level)))
+#define ARM_LPAE_BLOCK_SIZE(l,d) (1ULL << ARM_LPAE_LVL_SHIFT(l,d))
/* Page table bits */
#define ARM_LPAE_PTE_TYPE_SHIFT 0
@@ -108,48 +102,12 @@
#define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2)
/* Register bits */
-#define ARM_32_LPAE_TCR_EAE (1 << 31)
-#define ARM_64_LPAE_S2_TCR_RES1 (1 << 31)
-
-#define ARM_LPAE_TCR_EPD1 (1 << 23)
-
-#define ARM_LPAE_TCR_TG0_4K (0 << 14)
-#define ARM_LPAE_TCR_TG0_64K (1 << 14)
-#define ARM_LPAE_TCR_TG0_16K (2 << 14)
-
-#define ARM_LPAE_TCR_SH0_SHIFT 12
-#define ARM_LPAE_TCR_SH0_MASK 0x3
-#define ARM_LPAE_TCR_SH_NS 0
-#define ARM_LPAE_TCR_SH_OS 2
-#define ARM_LPAE_TCR_SH_IS 3
-
-#define ARM_LPAE_TCR_ORGN0_SHIFT 10
-#define ARM_LPAE_TCR_IRGN0_SHIFT 8
-#define ARM_LPAE_TCR_RGN_MASK 0x3
-#define ARM_LPAE_TCR_RGN_NC 0
-#define ARM_LPAE_TCR_RGN_WBWA 1
-#define ARM_LPAE_TCR_RGN_WT 2
-#define ARM_LPAE_TCR_RGN_WB 3
-
-#define ARM_LPAE_TCR_SL0_SHIFT 6
-#define ARM_LPAE_TCR_SL0_MASK 0x3
+#define ARM_LPAE_VTCR_SL0_MASK 0x3
#define ARM_LPAE_TCR_T0SZ_SHIFT 0
-#define ARM_LPAE_TCR_SZ_MASK 0xf
-#define ARM_LPAE_TCR_PS_SHIFT 16
-#define ARM_LPAE_TCR_PS_MASK 0x7
-
-#define ARM_LPAE_TCR_IPS_SHIFT 32
-#define ARM_LPAE_TCR_IPS_MASK 0x7
-
-#define ARM_LPAE_TCR_PS_32_BIT 0x0ULL
-#define ARM_LPAE_TCR_PS_36_BIT 0x1ULL
-#define ARM_LPAE_TCR_PS_40_BIT 0x2ULL
-#define ARM_LPAE_TCR_PS_42_BIT 0x3ULL
-#define ARM_LPAE_TCR_PS_44_BIT 0x4ULL
-#define ARM_LPAE_TCR_PS_48_BIT 0x5ULL
-#define ARM_LPAE_TCR_PS_52_BIT 0x6ULL
+#define ARM_LPAE_VTCR_PS_SHIFT 16
+#define ARM_LPAE_VTCR_PS_MASK 0x7
#define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3)
#define ARM_LPAE_MAIR_ATTR_MASK 0xff
@@ -180,10 +138,9 @@
struct arm_lpae_io_pgtable {
struct io_pgtable iop;
- int levels;
- size_t pgd_size;
- unsigned long pg_shift;
- unsigned long bits_per_level;
+ int pgd_bits;
+ int start_level;
+ int bits_per_level;
void *pgd;
};
@@ -213,7 +170,7 @@
{
u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
- if (data->pg_shift < 16)
+ if (ARM_LPAE_GRANULE(data) < SZ_64K)
return paddr;
/* Rotate the packed high-order bits back to the top */
@@ -302,17 +259,11 @@
{
arm_lpae_iopte pte = prot;
- if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
- pte |= ARM_LPAE_PTE_NS;
-
if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
pte |= ARM_LPAE_PTE_TYPE_PAGE;
else
pte |= ARM_LPAE_PTE_TYPE_BLOCK;
- if (data->iop.fmt != ARM_MALI_LPAE)
- pte |= ARM_LPAE_PTE_AF;
- pte |= ARM_LPAE_PTE_SH_IS;
pte |= paddr_to_iopte(paddr, data);
__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
@@ -351,11 +302,12 @@
static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
arm_lpae_iopte *ptep,
arm_lpae_iopte curr,
- struct io_pgtable_cfg *cfg)
+ struct arm_lpae_io_pgtable *data)
{
arm_lpae_iopte old, new;
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
- new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE;
+ new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE;
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
new |= ARM_LPAE_PTE_NSTABLE;
@@ -381,7 +333,7 @@
static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
- int lvl, arm_lpae_iopte *ptep)
+ int lvl, arm_lpae_iopte *ptep, gfp_t gfp)
{
arm_lpae_iopte *cptep, pte;
size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
@@ -392,7 +344,7 @@
ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
/* If we can install a leaf entry at this level, then do so */
- if (size == block_size && (size & cfg->pgsize_bitmap))
+ if (size == block_size)
return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
/* We can't allocate tables at the final level */
@@ -402,11 +354,11 @@
/* Grab a pointer to the next level */
pte = READ_ONCE(*ptep);
if (!pte) {
- cptep = __arm_lpae_alloc_pages(tblsz, GFP_ATOMIC, cfg);
+ cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg);
if (!cptep)
return -ENOMEM;
- pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
+ pte = arm_lpae_install_table(cptep, ptep, 0, data);
if (pte)
__arm_lpae_free_pages(cptep, tblsz, cfg);
} else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
@@ -422,7 +374,7 @@
}
/* Rinse, repeat */
- return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep);
+ return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep, gfp);
}
static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
@@ -464,35 +416,55 @@
else if (prot & IOMMU_CACHE)
pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
- else if (prot & IOMMU_QCOM_SYS_CACHE)
- pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE
- << ARM_LPAE_PTE_ATTRINDX_SHIFT);
}
+ /*
+ * Also Mali has its own notions of shareability wherein its Inner
+ * domain covers the cores within the GPU, and its Outer domain is
+ * "outside the GPU" (i.e. either the Inner or System domain in CPU
+ * terms, depending on coherency).
+ */
+ if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE)
+ pte |= ARM_LPAE_PTE_SH_IS;
+ else
+ pte |= ARM_LPAE_PTE_SH_OS;
+
if (prot & IOMMU_NOEXEC)
pte |= ARM_LPAE_PTE_XN;
+ if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+ pte |= ARM_LPAE_PTE_NS;
+
+ if (data->iop.fmt != ARM_MALI_LPAE)
+ pte |= ARM_LPAE_PTE_AF;
+
return pte;
}
static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
- phys_addr_t paddr, size_t size, int iommu_prot)
+ phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte *ptep = data->pgd;
- int ret, lvl = ARM_LPAE_START_LVL(data);
+ int ret, lvl = data->start_level;
arm_lpae_iopte prot;
+ long iaext = (s64)iova >> cfg->ias;
/* If no access, then nothing to do */
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
- if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
- paddr >= (1ULL << data->iop.cfg.oas)))
+ if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+ return -EINVAL;
+
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
+ iaext = ~iaext;
+ if (WARN_ON(iaext || paddr >> cfg->oas))
return -ERANGE;
prot = arm_lpae_prot_to_pte(data, iommu_prot);
- ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep);
+ ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, gfp);
/*
* Synchronise all PTE updates for the new mapping before there's
* a chance for anything to kick off a table walk for the new iova.
@@ -508,8 +480,8 @@
arm_lpae_iopte *start, *end;
unsigned long table_size;
- if (lvl == ARM_LPAE_START_LVL(data))
- table_size = data->pgd_size;
+ if (lvl == data->start_level)
+ table_size = ARM_LPAE_PGD_SIZE(data);
else
table_size = ARM_LPAE_GRANULE(data);
@@ -537,7 +509,7 @@
{
struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop);
- __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd);
+ __arm_lpae_free_pgtable(data, data->start_level, data->pgd);
kfree(data);
}
@@ -575,7 +547,7 @@
__arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
}
- pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
+ pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);
if (pte != blk_pte) {
__arm_lpae_free_pages(tablep, tablesz, cfg);
/*
@@ -652,13 +624,19 @@
size_t size, struct iommu_iotlb_gather *gather)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte *ptep = data->pgd;
- int lvl = ARM_LPAE_START_LVL(data);
+ long iaext = (s64)iova >> cfg->ias;
- if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
+ if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
return 0;
- return __arm_lpae_unmap(data, gather, iova, size, lvl, ptep);
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
+ iaext = ~iaext;
+ if (WARN_ON(iaext))
+ return 0;
+
+ return __arm_lpae_unmap(data, gather, iova, size, data->start_level, ptep);
}
static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
@@ -666,7 +644,7 @@
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
arm_lpae_iopte pte, *ptep = data->pgd;
- int lvl = ARM_LPAE_START_LVL(data);
+ int lvl = data->start_level;
do {
/* Valid IOPTE pointer? */
@@ -743,8 +721,8 @@
static struct arm_lpae_io_pgtable *
arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
{
- unsigned long va_bits, pgd_bits;
struct arm_lpae_io_pgtable *data;
+ int levels, va_bits, pg_shift;
arm_lpae_restrict_pgsizes(cfg);
@@ -757,24 +735,19 @@
if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS)
return NULL;
- if (!selftest_running && cfg->iommu_dev->dma_pfn_offset) {
- dev_err(cfg->iommu_dev, "Cannot accommodate DMA offset for IOMMU page tables\n");
- return NULL;
- }
-
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
- data->pg_shift = __ffs(cfg->pgsize_bitmap);
- data->bits_per_level = data->pg_shift - ilog2(sizeof(arm_lpae_iopte));
+ pg_shift = __ffs(cfg->pgsize_bitmap);
+ data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte));
- va_bits = cfg->ias - data->pg_shift;
- data->levels = DIV_ROUND_UP(va_bits, data->bits_per_level);
+ va_bits = cfg->ias - pg_shift;
+ levels = DIV_ROUND_UP(va_bits, data->bits_per_level);
+ data->start_level = ARM_LPAE_MAX_LEVELS - levels;
/* Calculate the actual size of our pgd (without concatenation) */
- pgd_bits = va_bits - (data->bits_per_level * (data->levels - 1));
- data->pgd_size = 1UL << (pgd_bits + ilog2(sizeof(arm_lpae_iopte)));
+ data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1));
data->iop.ops = (struct io_pgtable_ops) {
.map = arm_lpae_map,
@@ -790,9 +763,12 @@
{
u64 reg;
struct arm_lpae_io_pgtable *data;
+ typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr;
+ bool tg1;
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
- IO_PGTABLE_QUIRK_NON_STRICT))
+ IO_PGTABLE_QUIRK_NON_STRICT |
+ IO_PGTABLE_QUIRK_ARM_TTBR1))
return NULL;
data = arm_lpae_alloc_pgtable(cfg);
@@ -801,58 +777,55 @@
/* TCR */
if (cfg->coherent_walk) {
- reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
- (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
- (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
+ tcr->sh = ARM_LPAE_TCR_SH_IS;
+ tcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
+ tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
} else {
- reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) |
- (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) |
- (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT);
+ tcr->sh = ARM_LPAE_TCR_SH_OS;
+ tcr->irgn = ARM_LPAE_TCR_RGN_NC;
+ tcr->orgn = ARM_LPAE_TCR_RGN_NC;
}
+ tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1;
switch (ARM_LPAE_GRANULE(data)) {
case SZ_4K:
- reg |= ARM_LPAE_TCR_TG0_4K;
+ tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K;
break;
case SZ_16K:
- reg |= ARM_LPAE_TCR_TG0_16K;
+ tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K;
break;
case SZ_64K:
- reg |= ARM_LPAE_TCR_TG0_64K;
+ tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K;
break;
}
switch (cfg->oas) {
case 32:
- reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_IPS_SHIFT);
+ tcr->ips = ARM_LPAE_TCR_PS_32_BIT;
break;
case 36:
- reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_IPS_SHIFT);
+ tcr->ips = ARM_LPAE_TCR_PS_36_BIT;
break;
case 40:
- reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_IPS_SHIFT);
+ tcr->ips = ARM_LPAE_TCR_PS_40_BIT;
break;
case 42:
- reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_IPS_SHIFT);
+ tcr->ips = ARM_LPAE_TCR_PS_42_BIT;
break;
case 44:
- reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_IPS_SHIFT);
+ tcr->ips = ARM_LPAE_TCR_PS_44_BIT;
break;
case 48:
- reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_IPS_SHIFT);
+ tcr->ips = ARM_LPAE_TCR_PS_48_BIT;
break;
case 52:
- reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_IPS_SHIFT);
+ tcr->ips = ARM_LPAE_TCR_PS_52_BIT;
break;
default:
goto out_free_data;
}
- reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT;
-
- /* Disable speculative walks through TTBR1 */
- reg |= ARM_LPAE_TCR_EPD1;
- cfg->arm_lpae_s1_cfg.tcr = reg;
+ tcr->tsz = 64ULL - cfg->ias;
/* MAIRs */
reg = (ARM_LPAE_MAIR_ATTR_NC
@@ -864,20 +837,19 @@
(ARM_LPAE_MAIR_ATTR_INC_OWBRWA
<< ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));
- cfg->arm_lpae_s1_cfg.mair[0] = reg;
- cfg->arm_lpae_s1_cfg.mair[1] = 0;
+ cfg->arm_lpae_s1_cfg.mair = reg;
/* Looking good; allocate a pgd */
- data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
+ GFP_KERNEL, cfg);
if (!data->pgd)
goto out_free_data;
/* Ensure the empty pgd is visible before any actual TTBR write */
wmb();
- /* TTBRs */
- cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
- cfg->arm_lpae_s1_cfg.ttbr[1] = 0;
+ /* TTBR */
+ cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd);
return &data->iop;
out_free_data:
@@ -888,8 +860,9 @@
static struct io_pgtable *
arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
{
- u64 reg, sl;
+ u64 sl;
struct arm_lpae_io_pgtable *data;
+ typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
/* The NS quirk doesn't apply at stage 2 */
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT))
@@ -903,69 +876,74 @@
* Concatenate PGDs at level 1 if possible in order to reduce
* the depth of the stage-2 walk.
*/
- if (data->levels == ARM_LPAE_MAX_LEVELS) {
+ if (data->start_level == 0) {
unsigned long pgd_pages;
- pgd_pages = data->pgd_size >> ilog2(sizeof(arm_lpae_iopte));
+ pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) {
- data->pgd_size = pgd_pages << data->pg_shift;
- data->levels--;
+ data->pgd_bits += data->bits_per_level;
+ data->start_level++;
}
}
/* VTCR */
- reg = ARM_64_LPAE_S2_TCR_RES1 |
- (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
- (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
- (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
+ if (cfg->coherent_walk) {
+ vtcr->sh = ARM_LPAE_TCR_SH_IS;
+ vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
+ vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
+ } else {
+ vtcr->sh = ARM_LPAE_TCR_SH_OS;
+ vtcr->irgn = ARM_LPAE_TCR_RGN_NC;
+ vtcr->orgn = ARM_LPAE_TCR_RGN_NC;
+ }
- sl = ARM_LPAE_START_LVL(data);
+ sl = data->start_level;
switch (ARM_LPAE_GRANULE(data)) {
case SZ_4K:
- reg |= ARM_LPAE_TCR_TG0_4K;
+ vtcr->tg = ARM_LPAE_TCR_TG0_4K;
sl++; /* SL0 format is different for 4K granule size */
break;
case SZ_16K:
- reg |= ARM_LPAE_TCR_TG0_16K;
+ vtcr->tg = ARM_LPAE_TCR_TG0_16K;
break;
case SZ_64K:
- reg |= ARM_LPAE_TCR_TG0_64K;
+ vtcr->tg = ARM_LPAE_TCR_TG0_64K;
break;
}
switch (cfg->oas) {
case 32:
- reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_PS_SHIFT);
+ vtcr->ps = ARM_LPAE_TCR_PS_32_BIT;
break;
case 36:
- reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_PS_SHIFT);
+ vtcr->ps = ARM_LPAE_TCR_PS_36_BIT;
break;
case 40:
- reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_PS_SHIFT);
+ vtcr->ps = ARM_LPAE_TCR_PS_40_BIT;
break;
case 42:
- reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_PS_SHIFT);
+ vtcr->ps = ARM_LPAE_TCR_PS_42_BIT;
break;
case 44:
- reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_PS_SHIFT);
+ vtcr->ps = ARM_LPAE_TCR_PS_44_BIT;
break;
case 48:
- reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_PS_SHIFT);
+ vtcr->ps = ARM_LPAE_TCR_PS_48_BIT;
break;
case 52:
- reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_PS_SHIFT);
+ vtcr->ps = ARM_LPAE_TCR_PS_52_BIT;
break;
default:
goto out_free_data;
}
- reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT;
- reg |= (~sl & ARM_LPAE_TCR_SL0_MASK) << ARM_LPAE_TCR_SL0_SHIFT;
- cfg->arm_lpae_s2_cfg.vtcr = reg;
+ vtcr->tsz = 64ULL - cfg->ias;
+ vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK;
/* Allocate pgd pages */
- data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
+ GFP_KERNEL, cfg);
if (!data->pgd)
goto out_free_data;
@@ -984,35 +962,21 @@
static struct io_pgtable *
arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
{
- struct io_pgtable *iop;
-
if (cfg->ias > 32 || cfg->oas > 40)
return NULL;
cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
- iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
- if (iop) {
- cfg->arm_lpae_s1_cfg.tcr |= ARM_32_LPAE_TCR_EAE;
- cfg->arm_lpae_s1_cfg.tcr &= 0xffffffff;
- }
-
- return iop;
+ return arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
}
static struct io_pgtable *
arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
{
- struct io_pgtable *iop;
-
if (cfg->ias > 40 || cfg->oas > 40)
return NULL;
cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
- iop = arm_64_lpae_alloc_pgtable_s2(cfg, cookie);
- if (iop)
- cfg->arm_lpae_s2_cfg.vtcr &= 0xffffffff;
-
- return iop;
+ return arm_64_lpae_alloc_pgtable_s2(cfg, cookie);
}
static struct io_pgtable *
@@ -1034,9 +998,9 @@
return NULL;
/* Mali seems to need a full 4-level table regardless of IAS */
- if (data->levels < ARM_LPAE_MAX_LEVELS) {
- data->levels = ARM_LPAE_MAX_LEVELS;
- data->pgd_size = sizeof(arm_lpae_iopte);
+ if (data->start_level > 0) {
+ data->start_level = 0;
+ data->pgd_bits = 0;
}
/*
* MEMATTR: Mali has no actual notion of a non-cacheable type, so the
@@ -1053,7 +1017,8 @@
(ARM_MALI_LPAE_MEMATTR_IMP_DEF
<< ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
- data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
+ cfg);
if (!data->pgd)
goto out_free_data;
@@ -1063,6 +1028,9 @@
cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
ARM_MALI_LPAE_TTBR_READ_INNER |
ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+ if (cfg->coherent_walk)
+ cfg->arm_mali_lpae_cfg.transtab |= ARM_MALI_LPAE_TTBR_SHARE_OUTER;
+
return &data->iop;
out_free_data:
@@ -1097,22 +1065,23 @@
#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
-static struct io_pgtable_cfg *cfg_cookie;
+static struct io_pgtable_cfg *cfg_cookie __initdata;
-static void dummy_tlb_flush_all(void *cookie)
+static void __init dummy_tlb_flush_all(void *cookie)
{
WARN_ON(cookie != cfg_cookie);
}
-static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
- void *cookie)
+static void __init dummy_tlb_flush(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
-static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule, void *cookie)
+static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
{
dummy_tlb_flush(iova, granule, granule, cookie);
}
@@ -1131,9 +1100,9 @@
pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n",
cfg->pgsize_bitmap, cfg->ias);
- pr_err("data: %d levels, 0x%zx pgd_size, %lu pg_shift, %lu bits_per_level, pgd @ %p\n",
- data->levels, data->pgd_size, data->pg_shift,
- data->bits_per_level, data->pgd);
+ pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n",
+ ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data),
+ ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd);
}
#define __FAIL(ops, i) ({ \
@@ -1145,7 +1114,7 @@
static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
{
- static const enum io_pgtable_fmt fmts[] = {
+ static const enum io_pgtable_fmt fmts[] __initconst = {
ARM_64_LPAE_S1,
ARM_64_LPAE_S2,
};
@@ -1188,12 +1157,12 @@
if (ops->map(ops, iova, iova, size, IOMMU_READ |
IOMMU_WRITE |
IOMMU_NOEXEC |
- IOMMU_CACHE))
+ IOMMU_CACHE, GFP_KERNEL))
return __FAIL(ops, i);
/* Overlapping mappings */
if (!ops->map(ops, iova, iova + size, size,
- IOMMU_READ | IOMMU_NOEXEC))
+ IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL))
return __FAIL(ops, i);
if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
@@ -1208,7 +1177,7 @@
return __FAIL(ops, i);
/* Remap of partial unmap */
- if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ))
+ if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ, GFP_KERNEL))
return __FAIL(ops, i);
if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42))
@@ -1226,7 +1195,7 @@
return __FAIL(ops, i);
/* Remap full block */
- if (ops->map(ops, iova, iova, size, IOMMU_WRITE))
+ if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL))
return __FAIL(ops, i);
if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
@@ -1244,13 +1213,13 @@
static int __init arm_lpae_do_selftests(void)
{
- static const unsigned long pgsize[] = {
+ static const unsigned long pgsize[] __initconst = {
SZ_4K | SZ_2M | SZ_1G,
SZ_16K | SZ_32M,
SZ_64K | SZ_512M,
};
- static const unsigned int ias[] = {
+ static const unsigned int ias[] __initconst = {
32, 36, 40, 42, 44, 48,
};
diff --git a/drivers/iommu/io-pgtable-arm.h b/drivers/iommu/io-pgtable-arm.h
new file mode 100644
index 0000000..ba7cfdf
--- /dev/null
+++ b/drivers/iommu/io-pgtable-arm.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef IO_PGTABLE_ARM_H_
+#define IO_PGTABLE_ARM_H_
+
+#define ARM_LPAE_TCR_TG0_4K 0
+#define ARM_LPAE_TCR_TG0_64K 1
+#define ARM_LPAE_TCR_TG0_16K 2
+
+#define ARM_LPAE_TCR_TG1_16K 1
+#define ARM_LPAE_TCR_TG1_4K 2
+#define ARM_LPAE_TCR_TG1_64K 3
+
+#define ARM_LPAE_TCR_SH_NS 0
+#define ARM_LPAE_TCR_SH_OS 2
+#define ARM_LPAE_TCR_SH_IS 3
+
+#define ARM_LPAE_TCR_RGN_NC 0
+#define ARM_LPAE_TCR_RGN_WBWA 1
+#define ARM_LPAE_TCR_RGN_WT 2
+#define ARM_LPAE_TCR_RGN_WB 3
+
+#define ARM_LPAE_TCR_PS_32_BIT 0x0ULL
+#define ARM_LPAE_TCR_PS_36_BIT 0x1ULL
+#define ARM_LPAE_TCR_PS_40_BIT 0x2ULL
+#define ARM_LPAE_TCR_PS_42_BIT 0x3ULL
+#define ARM_LPAE_TCR_PS_44_BIT 0x4ULL
+#define ARM_LPAE_TCR_PS_48_BIT 0x5ULL
+#define ARM_LPAE_TCR_PS_52_BIT 0x6ULL
+
+#endif /* IO_PGTABLE_ARM_H_ */
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index ced53e5..94394c8 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -63,7 +63,7 @@
if (!ops)
return;
- iop = container_of(ops, struct io_pgtable, ops);
+ iop = io_pgtable_ops_to_pgtable(ops);
io_pgtable_tlb_flush_all(iop);
io_pgtable_init_table[iop->fmt]->free(iop);
}
diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c
new file mode 100644
index 0000000..0f8dd37
--- /dev/null
+++ b/drivers/iommu/ioasid.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I/O Address Space ID allocator. There is one global IOASID space, split into
+ * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and
+ * free IOASIDs with ioasid_alloc and ioasid_free.
+ */
+#include <linux/ioasid.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/xarray.h>
+
+struct ioasid_data {
+ ioasid_t id;
+ struct ioasid_set *set;
+ void *private;
+ struct rcu_head rcu;
+};
+
+/*
+ * struct ioasid_allocator_data - Internal data structure to hold information
+ * about an allocator. There are two types of allocators:
+ *
+ * - Default allocator always has its own XArray to track the IOASIDs allocated.
+ * - Custom allocators may share allocation helpers with different private data.
+ * Custom allocators that share the same helper functions also share the same
+ * XArray.
+ * Rules:
+ * 1. Default allocator is always available, not dynamically registered. This is
+ * to prevent race conditions with early boot code that want to register
+ * custom allocators or allocate IOASIDs.
+ * 2. Custom allocators take precedence over the default allocator.
+ * 3. When all custom allocators sharing the same helper functions are
+ * unregistered (e.g. due to hotplug), all outstanding IOASIDs must be
+ * freed. Otherwise, outstanding IOASIDs will be lost and orphaned.
+ * 4. When switching between custom allocators sharing the same helper
+ * functions, outstanding IOASIDs are preserved.
+ * 5. When switching between custom allocator and default allocator, all IOASIDs
+ * must be freed to ensure unadulterated space for the new allocator.
+ *
+ * @ops: allocator helper functions and its data
+ * @list: registered custom allocators
+ * @slist: allocators share the same ops but different data
+ * @flags: attributes of the allocator
+ * @xa: xarray holds the IOASID space
+ * @rcu: used for kfree_rcu when unregistering allocator
+ */
+struct ioasid_allocator_data {
+ struct ioasid_allocator_ops *ops;
+ struct list_head list;
+ struct list_head slist;
+#define IOASID_ALLOCATOR_CUSTOM BIT(0) /* Needs framework to track results */
+ unsigned long flags;
+ struct xarray xa;
+ struct rcu_head rcu;
+};
+
+static DEFINE_SPINLOCK(ioasid_allocator_lock);
+static LIST_HEAD(allocators_list);
+
+static ioasid_t default_alloc(ioasid_t min, ioasid_t max, void *opaque);
+static void default_free(ioasid_t ioasid, void *opaque);
+
+static struct ioasid_allocator_ops default_ops = {
+ .alloc = default_alloc,
+ .free = default_free,
+};
+
+static struct ioasid_allocator_data default_allocator = {
+ .ops = &default_ops,
+ .flags = 0,
+ .xa = XARRAY_INIT(ioasid_xa, XA_FLAGS_ALLOC),
+};
+
+static struct ioasid_allocator_data *active_allocator = &default_allocator;
+
+static ioasid_t default_alloc(ioasid_t min, ioasid_t max, void *opaque)
+{
+ ioasid_t id;
+
+ if (xa_alloc(&default_allocator.xa, &id, opaque, XA_LIMIT(min, max), GFP_ATOMIC)) {
+ pr_err("Failed to alloc ioasid from %d to %d\n", min, max);
+ return INVALID_IOASID;
+ }
+
+ return id;
+}
+
+static void default_free(ioasid_t ioasid, void *opaque)
+{
+ struct ioasid_data *ioasid_data;
+
+ ioasid_data = xa_erase(&default_allocator.xa, ioasid);
+ kfree_rcu(ioasid_data, rcu);
+}
+
+/* Allocate and initialize a new custom allocator with its helper functions */
+static struct ioasid_allocator_data *ioasid_alloc_allocator(struct ioasid_allocator_ops *ops)
+{
+ struct ioasid_allocator_data *ia_data;
+
+ ia_data = kzalloc(sizeof(*ia_data), GFP_ATOMIC);
+ if (!ia_data)
+ return NULL;
+
+ xa_init_flags(&ia_data->xa, XA_FLAGS_ALLOC);
+ INIT_LIST_HEAD(&ia_data->slist);
+ ia_data->flags |= IOASID_ALLOCATOR_CUSTOM;
+ ia_data->ops = ops;
+
+ /* For tracking custom allocators that share the same ops */
+ list_add_tail(&ops->list, &ia_data->slist);
+
+ return ia_data;
+}
+
+static bool use_same_ops(struct ioasid_allocator_ops *a, struct ioasid_allocator_ops *b)
+{
+ return (a->free == b->free) && (a->alloc == b->alloc);
+}
+
+/**
+ * ioasid_register_allocator - register a custom allocator
+ * @ops: the custom allocator ops to be registered
+ *
+ * Custom allocators take precedence over the default xarray based allocator.
+ * Private data associated with the IOASID allocated by the custom allocators
+ * are managed by IOASID framework similar to data stored in xa by default
+ * allocator.
+ *
+ * There can be multiple allocators registered but only one is active. In case
+ * of runtime removal of a custom allocator, the next one is activated based
+ * on the registration ordering.
+ *
+ * Multiple allocators can share the same alloc() function, in this case the
+ * IOASID space is shared.
+ */
+int ioasid_register_allocator(struct ioasid_allocator_ops *ops)
+{
+ struct ioasid_allocator_data *ia_data;
+ struct ioasid_allocator_data *pallocator;
+ int ret = 0;
+
+ spin_lock(&ioasid_allocator_lock);
+
+ ia_data = ioasid_alloc_allocator(ops);
+ if (!ia_data) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ /*
+ * No particular preference, we activate the first one and keep
+ * the later registered allocators in a list in case the first one gets
+ * removed due to hotplug.
+ */
+ if (list_empty(&allocators_list)) {
+ WARN_ON(active_allocator != &default_allocator);
+ /* Use this new allocator if default is not active */
+ if (xa_empty(&active_allocator->xa)) {
+ rcu_assign_pointer(active_allocator, ia_data);
+ list_add_tail(&ia_data->list, &allocators_list);
+ goto out_unlock;
+ }
+ pr_warn("Default allocator active with outstanding IOASID\n");
+ ret = -EAGAIN;
+ goto out_free;
+ }
+
+ /* Check if the allocator is already registered */
+ list_for_each_entry(pallocator, &allocators_list, list) {
+ if (pallocator->ops == ops) {
+ pr_err("IOASID allocator already registered\n");
+ ret = -EEXIST;
+ goto out_free;
+ } else if (use_same_ops(pallocator->ops, ops)) {
+ /*
+ * If the new allocator shares the same ops,
+ * then they will share the same IOASID space.
+ * We should put them under the same xarray.
+ */
+ list_add_tail(&ops->list, &pallocator->slist);
+ goto out_free;
+ }
+ }
+ list_add_tail(&ia_data->list, &allocators_list);
+
+ spin_unlock(&ioasid_allocator_lock);
+ return 0;
+out_free:
+ kfree(ia_data);
+out_unlock:
+ spin_unlock(&ioasid_allocator_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ioasid_register_allocator);
+
+/**
+ * ioasid_unregister_allocator - Remove a custom IOASID allocator ops
+ * @ops: the custom allocator to be removed
+ *
+ * Remove an allocator from the list, activate the next allocator in
+ * the order it was registered. Or revert to default allocator if all
+ * custom allocators are unregistered without outstanding IOASIDs.
+ */
+void ioasid_unregister_allocator(struct ioasid_allocator_ops *ops)
+{
+ struct ioasid_allocator_data *pallocator;
+ struct ioasid_allocator_ops *sops;
+
+ spin_lock(&ioasid_allocator_lock);
+ if (list_empty(&allocators_list)) {
+ pr_warn("No custom IOASID allocators active!\n");
+ goto exit_unlock;
+ }
+
+ list_for_each_entry(pallocator, &allocators_list, list) {
+ if (!use_same_ops(pallocator->ops, ops))
+ continue;
+
+ if (list_is_singular(&pallocator->slist)) {
+ /* No shared helper functions */
+ list_del(&pallocator->list);
+ /*
+ * All IOASIDs should have been freed before
+ * the last allocator that shares the same ops
+ * is unregistered.
+ */
+ WARN_ON(!xa_empty(&pallocator->xa));
+ if (list_empty(&allocators_list)) {
+ pr_info("No custom IOASID allocators, switch to default.\n");
+ rcu_assign_pointer(active_allocator, &default_allocator);
+ } else if (pallocator == active_allocator) {
+ rcu_assign_pointer(active_allocator,
+ list_first_entry(&allocators_list,
+ struct ioasid_allocator_data, list));
+ pr_info("IOASID allocator changed");
+ }
+ kfree_rcu(pallocator, rcu);
+ break;
+ }
+ /*
+ * Find the matching shared ops to delete,
+ * but keep outstanding IOASIDs
+ */
+ list_for_each_entry(sops, &pallocator->slist, list) {
+ if (sops == ops) {
+ list_del(&ops->list);
+ break;
+ }
+ }
+ break;
+ }
+
+exit_unlock:
+ spin_unlock(&ioasid_allocator_lock);
+}
+EXPORT_SYMBOL_GPL(ioasid_unregister_allocator);
+
+/**
+ * ioasid_set_data - Set private data for an allocated ioasid
+ * @ioasid: the ID to set data
+ * @data: the private data
+ *
+ * For IOASID that is already allocated, private data can be set
+ * via this API. Future lookup can be done via ioasid_find.
+ */
+int ioasid_set_data(ioasid_t ioasid, void *data)
+{
+ struct ioasid_data *ioasid_data;
+ int ret = 0;
+
+ spin_lock(&ioasid_allocator_lock);
+ ioasid_data = xa_load(&active_allocator->xa, ioasid);
+ if (ioasid_data)
+ rcu_assign_pointer(ioasid_data->private, data);
+ else
+ ret = -ENOENT;
+ spin_unlock(&ioasid_allocator_lock);
+
+ /*
+ * Wait for readers to stop accessing the old private data, so the
+ * caller can free it.
+ */
+ if (!ret)
+ synchronize_rcu();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ioasid_set_data);
+
+/**
+ * ioasid_alloc - Allocate an IOASID
+ * @set: the IOASID set
+ * @min: the minimum ID (inclusive)
+ * @max: the maximum ID (inclusive)
+ * @private: data private to the caller
+ *
+ * Allocate an ID between @min and @max. The @private pointer is stored
+ * internally and can be retrieved with ioasid_find().
+ *
+ * Return: the allocated ID on success, or %INVALID_IOASID on failure.
+ */
+ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
+ void *private)
+{
+ struct ioasid_data *data;
+ void *adata;
+ ioasid_t id;
+
+ data = kzalloc(sizeof(*data), GFP_ATOMIC);
+ if (!data)
+ return INVALID_IOASID;
+
+ data->set = set;
+ data->private = private;
+
+ /*
+ * Custom allocator needs allocator data to perform platform specific
+ * operations.
+ */
+ spin_lock(&ioasid_allocator_lock);
+ adata = active_allocator->flags & IOASID_ALLOCATOR_CUSTOM ? active_allocator->ops->pdata : data;
+ id = active_allocator->ops->alloc(min, max, adata);
+ if (id == INVALID_IOASID) {
+ pr_err("Failed ASID allocation %lu\n", active_allocator->flags);
+ goto exit_free;
+ }
+
+ if ((active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) &&
+ xa_alloc(&active_allocator->xa, &id, data, XA_LIMIT(id, id), GFP_ATOMIC)) {
+ /* Custom allocator needs framework to store and track allocation results */
+ pr_err("Failed to alloc ioasid from %d\n", id);
+ active_allocator->ops->free(id, active_allocator->ops->pdata);
+ goto exit_free;
+ }
+ data->id = id;
+
+ spin_unlock(&ioasid_allocator_lock);
+ return id;
+exit_free:
+ spin_unlock(&ioasid_allocator_lock);
+ kfree(data);
+ return INVALID_IOASID;
+}
+EXPORT_SYMBOL_GPL(ioasid_alloc);
+
+/**
+ * ioasid_free - Free an IOASID
+ * @ioasid: the ID to remove
+ */
+void ioasid_free(ioasid_t ioasid)
+{
+ struct ioasid_data *ioasid_data;
+
+ spin_lock(&ioasid_allocator_lock);
+ ioasid_data = xa_load(&active_allocator->xa, ioasid);
+ if (!ioasid_data) {
+ pr_err("Trying to free unknown IOASID %u\n", ioasid);
+ goto exit_unlock;
+ }
+
+ active_allocator->ops->free(ioasid, active_allocator->ops->pdata);
+ /* Custom allocator needs additional steps to free the xa element */
+ if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) {
+ ioasid_data = xa_erase(&active_allocator->xa, ioasid);
+ kfree_rcu(ioasid_data, rcu);
+ }
+
+exit_unlock:
+ spin_unlock(&ioasid_allocator_lock);
+}
+EXPORT_SYMBOL_GPL(ioasid_free);
+
+/**
+ * ioasid_find - Find IOASID data
+ * @set: the IOASID set
+ * @ioasid: the IOASID to find
+ * @getter: function to call on the found object
+ *
+ * The optional getter function allows to take a reference to the found object
+ * under the rcu lock. The function can also check if the object is still valid:
+ * if @getter returns false, then the object is invalid and NULL is returned.
+ *
+ * If the IOASID exists, return the private pointer passed to ioasid_alloc.
+ * Private data can be NULL if not set. Return an error if the IOASID is not
+ * found, or if @set is not NULL and the IOASID does not belong to the set.
+ */
+void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
+ bool (*getter)(void *))
+{
+ void *priv;
+ struct ioasid_data *ioasid_data;
+ struct ioasid_allocator_data *idata;
+
+ rcu_read_lock();
+ idata = rcu_dereference(active_allocator);
+ ioasid_data = xa_load(&idata->xa, ioasid);
+ if (!ioasid_data) {
+ priv = ERR_PTR(-ENOENT);
+ goto unlock;
+ }
+ if (set && ioasid_data->set != set) {
+ /* data found but does not belong to the set */
+ priv = ERR_PTR(-EACCES);
+ goto unlock;
+ }
+ /* Now IOASID and its set is verified, we can return the private data */
+ priv = rcu_dereference(ioasid_data->private);
+ if (getter && !getter(priv))
+ priv = NULL;
+unlock:
+ rcu_read_unlock();
+
+ return priv;
+}
+EXPORT_SYMBOL_GPL(ioasid_find);
+
+MODULE_AUTHOR("Jean-Philippe Brucker <jean-philippe.brucker@arm.com>");
+MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
+MODULE_DESCRIPTION("IO Address Space ID (IOASID) allocator");
+MODULE_LICENSE("GPL");
diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c
index e436ff8..9986921 100644
--- a/drivers/iommu/iommu-sysfs.c
+++ b/drivers/iommu/iommu-sysfs.c
@@ -87,6 +87,7 @@
put_device(iommu->dev);
return ret;
}
+EXPORT_SYMBOL_GPL(iommu_device_sysfs_add);
void iommu_device_sysfs_remove(struct iommu_device *iommu)
{
@@ -94,6 +95,8 @@
device_unregister(iommu->dev);
iommu->dev = NULL;
}
+EXPORT_SYMBOL_GPL(iommu_device_sysfs_remove);
+
/*
* IOMMU drivers can indicate a device is managed by a given IOMMU using
* this interface. A link to the device will be created in the "devices"
@@ -119,6 +122,7 @@
return ret;
}
+EXPORT_SYMBOL_GPL(iommu_device_link);
void iommu_device_unlink(struct iommu_device *iommu, struct device *link)
{
@@ -128,3 +132,4 @@
sysfs_remove_link(&link->kobj, "iommu");
sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link));
}
+EXPORT_SYMBOL_GPL(iommu_device_unlink);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index c5758fb..9d65557 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -22,6 +22,7 @@
#include <linux/bitops.h>
#include <linux/property.h>
#include <linux/fsl/mc.h>
+#include <linux/module.h>
#include <trace/events/iommu.h>
static struct kset *iommu_group_kset;
@@ -43,6 +44,7 @@
int id;
struct iommu_domain *default_domain;
struct iommu_domain *domain;
+ struct list_head entry;
};
struct group_device {
@@ -78,6 +80,20 @@
return !!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API);
}
+static int iommu_alloc_default_domain(struct iommu_group *group,
+ struct device *dev);
+static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
+ unsigned type);
+static int __iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev);
+static int __iommu_attach_group(struct iommu_domain *domain,
+ struct iommu_group *group);
+static void __iommu_detach_group(struct iommu_domain *domain,
+ struct iommu_group *group);
+static int iommu_create_device_direct_mappings(struct iommu_group *group,
+ struct device *dev);
+static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
+
#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \
struct iommu_group_attribute iommu_group_attr_##_name = \
__ATTR(_name, _mode, _show, _store)
@@ -141,6 +157,7 @@
spin_unlock(&iommu_device_lock);
return 0;
}
+EXPORT_SYMBOL_GPL(iommu_device_register);
void iommu_device_unregister(struct iommu_device *iommu)
{
@@ -148,10 +165,11 @@
list_del(&iommu->list);
spin_unlock(&iommu_device_lock);
}
+EXPORT_SYMBOL_GPL(iommu_device_unregister);
-static struct iommu_param *iommu_get_dev_param(struct device *dev)
+static struct dev_iommu *dev_iommu_get(struct device *dev)
{
- struct iommu_param *param = dev->iommu_param;
+ struct dev_iommu *param = dev->iommu;
if (param)
return param;
@@ -161,54 +179,137 @@
return NULL;
mutex_init(¶m->lock);
- dev->iommu_param = param;
+ dev->iommu = param;
return param;
}
-static void iommu_free_dev_param(struct device *dev)
+static void dev_iommu_free(struct device *dev)
{
- kfree(dev->iommu_param);
- dev->iommu_param = NULL;
+ struct dev_iommu *param = dev->iommu;
+
+ dev->iommu = NULL;
+ if (param->fwspec) {
+ fwnode_handle_put(param->fwspec->iommu_fwnode);
+ kfree(param->fwspec);
+ }
+ kfree(param);
+}
+
+static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+ struct iommu_device *iommu_dev;
+ struct iommu_group *group;
+ int ret;
+
+ if (!ops)
+ return -ENODEV;
+
+ if (!dev_iommu_get(dev))
+ return -ENOMEM;
+
+ if (!try_module_get(ops->owner)) {
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ iommu_dev = ops->probe_device(dev);
+ if (IS_ERR(iommu_dev)) {
+ ret = PTR_ERR(iommu_dev);
+ goto out_module_put;
+ }
+
+ dev->iommu->iommu_dev = iommu_dev;
+
+ group = iommu_group_get_for_dev(dev);
+ if (IS_ERR(group)) {
+ ret = PTR_ERR(group);
+ goto out_release;
+ }
+ iommu_group_put(group);
+
+ if (group_list && !group->default_domain && list_empty(&group->entry))
+ list_add_tail(&group->entry, group_list);
+
+ iommu_device_link(iommu_dev, dev);
+
+ return 0;
+
+out_release:
+ ops->release_device(dev);
+
+out_module_put:
+ module_put(ops->owner);
+
+err_free:
+ dev_iommu_free(dev);
+
+ return ret;
}
int iommu_probe_device(struct device *dev)
{
const struct iommu_ops *ops = dev->bus->iommu_ops;
+ struct iommu_group *group;
int ret;
- WARN_ON(dev->iommu_group);
- if (!ops)
- return -EINVAL;
-
- if (!iommu_get_dev_param(dev))
- return -ENOMEM;
-
- ret = ops->add_device(dev);
+ ret = __iommu_probe_device(dev, NULL);
if (ret)
- iommu_free_dev_param(dev);
+ goto err_out;
+ group = iommu_group_get(dev);
+ if (!group)
+ goto err_release;
+
+ /*
+ * Try to allocate a default domain - needs support from the
+ * IOMMU driver. There are still some drivers which don't
+ * support default domains, so the return value is not yet
+ * checked.
+ */
+ iommu_alloc_default_domain(group, dev);
+
+ if (group->default_domain) {
+ ret = __iommu_attach_device(group->default_domain, dev);
+ if (ret) {
+ iommu_group_put(group);
+ goto err_release;
+ }
+ }
+
+ iommu_create_device_direct_mappings(group, dev);
+
+ iommu_group_put(group);
+
+ if (ops->probe_finalize)
+ ops->probe_finalize(dev);
+
+ return 0;
+
+err_release:
+ iommu_release_device(dev);
+
+err_out:
return ret;
+
}
void iommu_release_device(struct device *dev)
{
const struct iommu_ops *ops = dev->bus->iommu_ops;
- if (dev->iommu_group)
- ops->remove_device(dev);
+ if (!dev->iommu)
+ return;
- iommu_free_dev_param(dev);
+ iommu_device_unlink(dev->iommu->iommu_dev, dev);
+
+ ops->release_device(dev);
+
+ iommu_group_remove_device(dev);
+ module_put(ops->owner);
+ dev_iommu_free(dev);
}
-static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
- unsigned type);
-static int __iommu_attach_device(struct iommu_domain *domain,
- struct device *dev);
-static int __iommu_attach_group(struct iommu_domain *domain,
- struct iommu_group *group);
-static void __iommu_detach_group(struct iommu_domain *domain,
- struct iommu_group *group);
-
static int __init iommu_set_def_domain_type(char *str)
{
bool pt;
@@ -289,8 +390,8 @@
* Elements are sorted by start address and overlapping segments
* of the same type are merged.
*/
-int iommu_insert_resv_region(struct iommu_resv_region *new,
- struct list_head *regions)
+static int iommu_insert_resv_region(struct iommu_resv_region *new,
+ struct list_head *regions)
{
struct iommu_resv_region *iter, *tmp, *nr, *top;
LIST_HEAD(stack);
@@ -479,6 +580,7 @@
group->kobj.kset = iommu_group_kset;
mutex_init(&group->mutex);
INIT_LIST_HEAD(&group->devices);
+ INIT_LIST_HEAD(&group->entry);
BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL);
@@ -620,8 +722,8 @@
}
EXPORT_SYMBOL_GPL(iommu_group_set_name);
-static int iommu_group_create_direct_mappings(struct iommu_group *group,
- struct device *dev)
+static int iommu_create_device_direct_mappings(struct iommu_group *group,
+ struct device *dev)
{
struct iommu_domain *domain = group->default_domain;
struct iommu_resv_region *entry;
@@ -667,7 +769,7 @@
}
- iommu_flush_tlb_all(domain);
+ iommu_flush_iotlb_all(domain);
out:
iommu_put_resv_regions(dev, &mappings);
@@ -675,6 +777,15 @@
return ret;
}
+static bool iommu_is_attach_deferred(struct iommu_domain *domain,
+ struct device *dev)
+{
+ if (domain->ops->is_attach_deferred)
+ return domain->ops->is_attach_deferred(domain, dev);
+
+ return false;
+}
+
/**
* iommu_group_add_device - add a device to an iommu group
* @group: the group into which to add the device (reference should be held)
@@ -725,11 +836,9 @@
dev->iommu_group = group;
- iommu_group_create_direct_mappings(group, dev);
-
mutex_lock(&group->mutex);
list_add_tail(&device->list, &group->devices);
- if (group->domain)
+ if (group->domain && !iommu_is_attach_deferred(group->domain, dev))
ret = __iommu_attach_device(group->domain, dev);
mutex_unlock(&group->mutex);
if (ret)
@@ -890,6 +999,7 @@
kobject_get(group->devices_kobj);
return group;
}
+EXPORT_SYMBOL_GPL(iommu_group_ref_get);
/**
* iommu_group_put - Decrement group reference
@@ -957,7 +1067,7 @@
iommu_dev_fault_handler_t handler,
void *data)
{
- struct iommu_param *param = dev->iommu_param;
+ struct dev_iommu *param = dev->iommu;
int ret = 0;
if (!param)
@@ -1000,7 +1110,7 @@
*/
int iommu_unregister_device_fault_handler(struct device *dev)
{
- struct iommu_param *param = dev->iommu_param;
+ struct dev_iommu *param = dev->iommu;
int ret = 0;
if (!param)
@@ -1040,7 +1150,7 @@
*/
int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
{
- struct iommu_param *param = dev->iommu_param;
+ struct dev_iommu *param = dev->iommu;
struct iommu_fault_event *evt_pending = NULL;
struct iommu_fault_param *fparam;
int ret = 0;
@@ -1085,11 +1195,12 @@
int iommu_page_response(struct device *dev,
struct iommu_page_response *msg)
{
- bool pasid_valid;
+ bool needs_pasid;
int ret = -EINVAL;
struct iommu_fault_event *evt;
struct iommu_fault_page_request *prm;
- struct iommu_param *param = dev->iommu_param;
+ struct dev_iommu *param = dev->iommu;
+ bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
if (!domain || !domain->ops->page_response)
@@ -1114,14 +1225,24 @@
*/
list_for_each_entry(evt, ¶m->fault_param->faults, list) {
prm = &evt->fault.prm;
- pasid_valid = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
-
- if ((pasid_valid && prm->pasid != msg->pasid) ||
- prm->grpid != msg->grpid)
+ if (prm->grpid != msg->grpid)
continue;
- /* Sanitize the reply */
- msg->flags = pasid_valid ? IOMMU_PAGE_RESP_PASID_VALID : 0;
+ /*
+ * If the PASID is required, the corresponding request is
+ * matched using the group ID, the PASID valid bit and the PASID
+ * value. Otherwise only the group ID matches request and
+ * response.
+ */
+ needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
+ if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid))
+ continue;
+
+ if (!needs_pasid && has_pasid) {
+ /* No big deal, just clear it. */
+ msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID;
+ msg->pasid = 0;
+ }
ret = domain->ops->page_response(dev, evt, msg);
list_del(&evt->list);
@@ -1263,6 +1384,7 @@
{
return iommu_group_alloc();
}
+EXPORT_SYMBOL_GPL(generic_device_group);
/*
* Use standard PCI bus topology, isolation features, and DMA alias quirks
@@ -1330,6 +1452,7 @@
/* No shared group found, allocate new */
return iommu_group_alloc();
}
+EXPORT_SYMBOL_GPL(pci_device_group);
/* Get the IOMMU group for device on fsl-mc bus */
struct iommu_group *fsl_mc_device_group(struct device *dev)
@@ -1342,6 +1465,62 @@
group = iommu_group_alloc();
return group;
}
+EXPORT_SYMBOL_GPL(fsl_mc_device_group);
+
+static int iommu_get_def_domain_type(struct device *dev)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+ unsigned int type = 0;
+
+ if (ops->def_domain_type)
+ type = ops->def_domain_type(dev);
+
+ return (type == 0) ? iommu_def_domain_type : type;
+}
+
+static int iommu_group_alloc_default_domain(struct bus_type *bus,
+ struct iommu_group *group,
+ unsigned int type)
+{
+ struct iommu_domain *dom;
+
+ dom = __iommu_domain_alloc(bus, type);
+ if (!dom && type != IOMMU_DOMAIN_DMA) {
+ dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
+ if (dom)
+ pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
+ type, group->name);
+ }
+
+ if (!dom)
+ return -ENOMEM;
+
+ group->default_domain = dom;
+ if (!group->domain)
+ group->domain = dom;
+
+ if (!iommu_dma_strict) {
+ int attr = 1;
+ iommu_domain_set_attr(dom,
+ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+ &attr);
+ }
+
+ return 0;
+}
+
+static int iommu_alloc_default_domain(struct iommu_group *group,
+ struct device *dev)
+{
+ unsigned int type;
+
+ if (group->default_domain)
+ return 0;
+
+ type = iommu_get_def_domain_type(dev);
+
+ return iommu_group_alloc_default_domain(dev->bus, group, type);
+}
/**
* iommu_group_get_for_dev - Find or create the IOMMU group for a device
@@ -1353,7 +1532,7 @@
* to the returned IOMMU group, which will already include the provided
* device. The reference should be released with iommu_group_put().
*/
-struct iommu_group *iommu_group_get_for_dev(struct device *dev)
+static struct iommu_group *iommu_group_get_for_dev(struct device *dev)
{
const struct iommu_ops *ops = dev->bus->iommu_ops;
struct iommu_group *group;
@@ -1373,42 +1552,16 @@
if (IS_ERR(group))
return group;
- /*
- * Try to allocate a default domain - needs support from the
- * IOMMU driver.
- */
- if (!group->default_domain) {
- struct iommu_domain *dom;
-
- dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type);
- if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) {
- dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA);
- if (dom) {
- dev_warn(dev,
- "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA",
- iommu_def_domain_type);
- }
- }
-
- group->default_domain = dom;
- if (!group->domain)
- group->domain = dom;
-
- if (dom && !iommu_dma_strict) {
- int attr = 1;
- iommu_domain_set_attr(dom,
- DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
- &attr);
- }
- }
-
ret = iommu_group_add_device(group, dev);
- if (ret) {
- iommu_group_put(group);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto out_put_group;
return group;
+
+out_put_group:
+ iommu_group_put(group);
+
+ return ERR_PTR(ret);
}
struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
@@ -1416,15 +1569,20 @@
return group->default_domain;
}
-static int add_iommu_group(struct device *dev, void *data)
+static int probe_iommu_group(struct device *dev, void *data)
{
- int ret = iommu_probe_device(dev);
+ struct list_head *group_list = data;
+ struct iommu_group *group;
+ int ret;
- /*
- * We ignore -ENODEV errors for now, as they just mean that the
- * device is not translated by an IOMMU. We still care about
- * other errors and fail to initialize when they happen.
- */
+ /* Device is probed already if in a group */
+ group = iommu_group_get(dev);
+ if (group) {
+ iommu_group_put(group);
+ return 0;
+ }
+
+ ret = __iommu_probe_device(dev, group_list);
if (ret == -ENODEV)
ret = 0;
@@ -1490,10 +1648,152 @@
return 0;
}
+struct __group_domain_type {
+ struct device *dev;
+ unsigned int type;
+};
+
+static int probe_get_default_domain_type(struct device *dev, void *data)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+ struct __group_domain_type *gtype = data;
+ unsigned int type = 0;
+
+ if (ops->def_domain_type)
+ type = ops->def_domain_type(dev);
+
+ if (type) {
+ if (gtype->type && gtype->type != type) {
+ dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
+ iommu_domain_type_str(type),
+ dev_name(gtype->dev),
+ iommu_domain_type_str(gtype->type));
+ gtype->type = 0;
+ }
+
+ if (!gtype->dev) {
+ gtype->dev = dev;
+ gtype->type = type;
+ }
+ }
+
+ return 0;
+}
+
+static void probe_alloc_default_domain(struct bus_type *bus,
+ struct iommu_group *group)
+{
+ struct __group_domain_type gtype;
+
+ memset(>ype, 0, sizeof(gtype));
+
+ /* Ask for default domain requirements of all devices in the group */
+ __iommu_group_for_each_dev(group, >ype,
+ probe_get_default_domain_type);
+
+ if (!gtype.type)
+ gtype.type = iommu_def_domain_type;
+
+ iommu_group_alloc_default_domain(bus, group, gtype.type);
+
+}
+
+static int iommu_group_do_dma_attach(struct device *dev, void *data)
+{
+ struct iommu_domain *domain = data;
+ int ret = 0;
+
+ if (!iommu_is_attach_deferred(domain, dev))
+ ret = __iommu_attach_device(domain, dev);
+
+ return ret;
+}
+
+static int __iommu_group_dma_attach(struct iommu_group *group)
+{
+ return __iommu_group_for_each_dev(group, group->default_domain,
+ iommu_group_do_dma_attach);
+}
+
+static int iommu_group_do_probe_finalize(struct device *dev, void *data)
+{
+ struct iommu_domain *domain = data;
+
+ if (domain->ops->probe_finalize)
+ domain->ops->probe_finalize(dev);
+
+ return 0;
+}
+
+static void __iommu_group_dma_finalize(struct iommu_group *group)
+{
+ __iommu_group_for_each_dev(group, group->default_domain,
+ iommu_group_do_probe_finalize);
+}
+
+static int iommu_do_create_direct_mappings(struct device *dev, void *data)
+{
+ struct iommu_group *group = data;
+
+ iommu_create_device_direct_mappings(group, dev);
+
+ return 0;
+}
+
+static int iommu_group_create_direct_mappings(struct iommu_group *group)
+{
+ return __iommu_group_for_each_dev(group, group,
+ iommu_do_create_direct_mappings);
+}
+
+int bus_iommu_probe(struct bus_type *bus)
+{
+ struct iommu_group *group, *next;
+ LIST_HEAD(group_list);
+ int ret;
+
+ /*
+ * This code-path does not allocate the default domain when
+ * creating the iommu group, so do it after the groups are
+ * created.
+ */
+ ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
+ if (ret)
+ return ret;
+
+ list_for_each_entry_safe(group, next, &group_list, entry) {
+ /* Remove item from the list */
+ list_del_init(&group->entry);
+
+ mutex_lock(&group->mutex);
+
+ /* Try to allocate default domain */
+ probe_alloc_default_domain(bus, group);
+
+ if (!group->default_domain) {
+ mutex_unlock(&group->mutex);
+ continue;
+ }
+
+ iommu_group_create_direct_mappings(group);
+
+ ret = __iommu_group_dma_attach(group);
+
+ mutex_unlock(&group->mutex);
+
+ if (ret)
+ break;
+
+ __iommu_group_dma_finalize(group);
+ }
+
+ return ret;
+}
+
static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
{
- int err;
struct notifier_block *nb;
+ int err;
nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
if (!nb)
@@ -1505,7 +1805,7 @@
if (err)
goto out_free;
- err = bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
+ err = bus_iommu_probe(bus);
if (err)
goto out_err;
@@ -1540,6 +1840,11 @@
{
int err;
+ if (ops == NULL) {
+ bus->iommu_ops = NULL;
+ return 0;
+ }
+
if (bus->iommu_ops != NULL)
return -EBUSY;
@@ -1628,9 +1933,6 @@
struct device *dev)
{
int ret;
- if ((domain->ops->is_attach_deferred != NULL) &&
- domain->ops->is_attach_deferred(domain, dev))
- return 0;
if (unlikely(domain->ops->attach_dev == NULL))
return -ENODEV;
@@ -1669,11 +1971,220 @@
}
EXPORT_SYMBOL_GPL(iommu_attach_device);
+/*
+ * Check flags and other user provided data for valid combinations. We also
+ * make sure no reserved fields or unused flags are set. This is to ensure
+ * not breaking userspace in the future when these fields or flags are used.
+ */
+static int iommu_check_cache_invl_data(struct iommu_cache_invalidate_info *info)
+{
+ u32 mask;
+ int i;
+
+ if (info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
+ return -EINVAL;
+
+ mask = (1 << IOMMU_CACHE_INV_TYPE_NR) - 1;
+ if (info->cache & ~mask)
+ return -EINVAL;
+
+ if (info->granularity >= IOMMU_INV_GRANU_NR)
+ return -EINVAL;
+
+ switch (info->granularity) {
+ case IOMMU_INV_GRANU_ADDR:
+ if (info->cache & IOMMU_CACHE_INV_TYPE_PASID)
+ return -EINVAL;
+
+ mask = IOMMU_INV_ADDR_FLAGS_PASID |
+ IOMMU_INV_ADDR_FLAGS_ARCHID |
+ IOMMU_INV_ADDR_FLAGS_LEAF;
+
+ if (info->granu.addr_info.flags & ~mask)
+ return -EINVAL;
+ break;
+ case IOMMU_INV_GRANU_PASID:
+ mask = IOMMU_INV_PASID_FLAGS_PASID |
+ IOMMU_INV_PASID_FLAGS_ARCHID;
+ if (info->granu.pasid_info.flags & ~mask)
+ return -EINVAL;
+
+ break;
+ case IOMMU_INV_GRANU_DOMAIN:
+ if (info->cache & IOMMU_CACHE_INV_TYPE_DEV_IOTLB)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Check reserved padding fields */
+ for (i = 0; i < sizeof(info->padding); i++) {
+ if (info->padding[i])
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int iommu_uapi_cache_invalidate(struct iommu_domain *domain, struct device *dev,
+ void __user *uinfo)
+{
+ struct iommu_cache_invalidate_info inv_info = { 0 };
+ u32 minsz;
+ int ret;
+
+ if (unlikely(!domain->ops->cache_invalidate))
+ return -ENODEV;
+
+ /*
+ * No new spaces can be added before the variable sized union, the
+ * minimum size is the offset to the union.
+ */
+ minsz = offsetof(struct iommu_cache_invalidate_info, granu);
+
+ /* Copy minsz from user to get flags and argsz */
+ if (copy_from_user(&inv_info, uinfo, minsz))
+ return -EFAULT;
+
+ /* Fields before the variable size union are mandatory */
+ if (inv_info.argsz < minsz)
+ return -EINVAL;
+
+ /* PASID and address granu require additional info beyond minsz */
+ if (inv_info.granularity == IOMMU_INV_GRANU_PASID &&
+ inv_info.argsz < offsetofend(struct iommu_cache_invalidate_info, granu.pasid_info))
+ return -EINVAL;
+
+ if (inv_info.granularity == IOMMU_INV_GRANU_ADDR &&
+ inv_info.argsz < offsetofend(struct iommu_cache_invalidate_info, granu.addr_info))
+ return -EINVAL;
+
+ /*
+ * User might be using a newer UAPI header which has a larger data
+ * size, we shall support the existing flags within the current
+ * size. Copy the remaining user data _after_ minsz but not more
+ * than the current kernel supported size.
+ */
+ if (copy_from_user((void *)&inv_info + minsz, uinfo + minsz,
+ min_t(u32, inv_info.argsz, sizeof(inv_info)) - minsz))
+ return -EFAULT;
+
+ /* Now the argsz is validated, check the content */
+ ret = iommu_check_cache_invl_data(&inv_info);
+ if (ret)
+ return ret;
+
+ return domain->ops->cache_invalidate(domain, dev, &inv_info);
+}
+EXPORT_SYMBOL_GPL(iommu_uapi_cache_invalidate);
+
+static int iommu_check_bind_data(struct iommu_gpasid_bind_data *data)
+{
+ u64 mask;
+ int i;
+
+ if (data->version != IOMMU_GPASID_BIND_VERSION_1)
+ return -EINVAL;
+
+ /* Check the range of supported formats */
+ if (data->format >= IOMMU_PASID_FORMAT_LAST)
+ return -EINVAL;
+
+ /* Check all flags */
+ mask = IOMMU_SVA_GPASID_VAL;
+ if (data->flags & ~mask)
+ return -EINVAL;
+
+ /* Check reserved padding fields */
+ for (i = 0; i < sizeof(data->padding); i++) {
+ if (data->padding[i])
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int iommu_sva_prepare_bind_data(void __user *udata,
+ struct iommu_gpasid_bind_data *data)
+{
+ u32 minsz;
+
+ /*
+ * No new spaces can be added before the variable sized union, the
+ * minimum size is the offset to the union.
+ */
+ minsz = offsetof(struct iommu_gpasid_bind_data, vendor);
+
+ /* Copy minsz from user to get flags and argsz */
+ if (copy_from_user(data, udata, minsz))
+ return -EFAULT;
+
+ /* Fields before the variable size union are mandatory */
+ if (data->argsz < minsz)
+ return -EINVAL;
+ /*
+ * User might be using a newer UAPI header, we shall let IOMMU vendor
+ * driver decide on what size it needs. Since the guest PASID bind data
+ * can be vendor specific, larger argsz could be the result of extension
+ * for one vendor but it should not affect another vendor.
+ * Copy the remaining user data _after_ minsz
+ */
+ if (copy_from_user((void *)data + minsz, udata + minsz,
+ min_t(u32, data->argsz, sizeof(*data)) - minsz))
+ return -EFAULT;
+
+ return iommu_check_bind_data(data);
+}
+
+int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, struct device *dev,
+ void __user *udata)
+{
+ struct iommu_gpasid_bind_data data = { 0 };
+ int ret;
+
+ if (unlikely(!domain->ops->sva_bind_gpasid))
+ return -ENODEV;
+
+ ret = iommu_sva_prepare_bind_data(udata, &data);
+ if (ret)
+ return ret;
+
+ return domain->ops->sva_bind_gpasid(domain, dev, &data);
+}
+EXPORT_SYMBOL_GPL(iommu_uapi_sva_bind_gpasid);
+
+int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev,
+ ioasid_t pasid)
+{
+ if (unlikely(!domain->ops->sva_unbind_gpasid))
+ return -ENODEV;
+
+ return domain->ops->sva_unbind_gpasid(dev, pasid);
+}
+EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid);
+
+int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev,
+ void __user *udata)
+{
+ struct iommu_gpasid_bind_data data = { 0 };
+ int ret;
+
+ if (unlikely(!domain->ops->sva_bind_gpasid))
+ return -ENODEV;
+
+ ret = iommu_sva_prepare_bind_data(udata, &data);
+ if (ret)
+ return ret;
+
+ return iommu_sva_unbind_gpasid(domain, dev, data.hpasid);
+}
+EXPORT_SYMBOL_GPL(iommu_uapi_sva_unbind_gpasid);
+
static void __iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
- if ((domain->ops->is_attach_deferred != NULL) &&
- domain->ops->is_attach_deferred(domain, dev))
+ if (iommu_is_attach_deferred(domain, dev))
return;
if (unlikely(domain->ops->detach_dev == NULL))
@@ -1858,8 +2369,8 @@
return pgsize;
}
-int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
const struct iommu_ops *ops = domain->ops;
unsigned long orig_iova = iova;
@@ -1896,8 +2407,8 @@
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
iova, &paddr, pgsize);
+ ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
- ret = ops->map(domain, iova, paddr, pgsize, prot);
if (ret)
break;
@@ -1906,9 +2417,6 @@
size -= pgsize;
}
- if (ops->iotlb_sync_map)
- ops->iotlb_sync_map(domain);
-
/* unroll mapping in case something went wrong */
if (ret)
iommu_unmap(domain, orig_iova, orig_size - size);
@@ -1917,8 +2425,35 @@
return ret;
}
+
+static int _iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ const struct iommu_ops *ops = domain->ops;
+ int ret;
+
+ ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
+ if (ret == 0 && ops->iotlb_sync_map)
+ ops->iotlb_sync_map(domain);
+
+ return ret;
+}
+
+int iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ might_sleep();
+ return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
+}
EXPORT_SYMBOL_GPL(iommu_map);
+int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC);
+}
+EXPORT_SYMBOL_GPL(iommu_map_atomic);
+
static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -1981,7 +2516,7 @@
iommu_iotlb_gather_init(&iotlb_gather);
ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
- iommu_tlb_sync(domain, &iotlb_gather);
+ iommu_iotlb_sync(domain, &iotlb_gather);
return ret;
}
@@ -1995,9 +2530,11 @@
}
EXPORT_SYMBOL_GPL(iommu_unmap_fast);
-size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot)
+static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp)
{
+ const struct iommu_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
phys_addr_t start;
unsigned int i = 0;
@@ -2007,7 +2544,9 @@
phys_addr_t s_phys = sg_phys(sg);
if (len && s_phys != start + len) {
- ret = iommu_map(domain, iova + mapped, start, len, prot);
+ ret = __iommu_map(domain, iova + mapped, start,
+ len, prot, gfp);
+
if (ret)
goto out_err;
@@ -2026,6 +2565,8 @@
sg = sg_next(sg);
}
+ if (ops->iotlb_sync_map)
+ ops->iotlb_sync_map(domain);
return mapped;
out_err:
@@ -2035,8 +2576,22 @@
return 0;
}
+
+size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot)
+{
+ might_sleep();
+ return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL);
+}
EXPORT_SYMBOL_GPL(iommu_map_sg);
+size_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot)
+{
+ return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
+}
+EXPORT_SYMBOL_GPL(iommu_map_sg_atomic);
+
int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
phys_addr_t paddr, u64 size, int prot)
{
@@ -2172,6 +2727,25 @@
ops->put_resv_regions(dev, list);
}
+/**
+ * generic_iommu_put_resv_regions - Reserved region driver helper
+ * @dev: device for which to free reserved regions
+ * @list: reserved region list for device
+ *
+ * IOMMU drivers can use this to implement their .put_resv_regions() callback
+ * for simple reservations. Memory allocated for each reserved region will be
+ * freed. If an IOMMU driver allocates additional resources per region, it is
+ * going to have to implement a custom callback.
+ */
+void generic_iommu_put_resv_regions(struct device *dev, struct list_head *list)
+{
+ struct iommu_resv_region *entry, *next;
+
+ list_for_each_entry_safe(entry, next, list, list)
+ kfree(entry);
+}
+EXPORT_SYMBOL(generic_iommu_put_resv_regions);
+
struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
size_t length, int prot,
enum iommu_resv_type type)
@@ -2189,71 +2763,7 @@
region->type = type;
return region;
}
-
-static int
-request_default_domain_for_dev(struct device *dev, unsigned long type)
-{
- struct iommu_domain *domain;
- struct iommu_group *group;
- int ret;
-
- /* Device must already be in a group before calling this function */
- group = iommu_group_get(dev);
- if (!group)
- return -EINVAL;
-
- mutex_lock(&group->mutex);
-
- ret = 0;
- if (group->default_domain && group->default_domain->type == type)
- goto out;
-
- /* Don't change mappings of existing devices */
- ret = -EBUSY;
- if (iommu_group_device_count(group) != 1)
- goto out;
-
- ret = -ENOMEM;
- domain = __iommu_domain_alloc(dev->bus, type);
- if (!domain)
- goto out;
-
- /* Attach the device to the domain */
- ret = __iommu_attach_group(domain, group);
- if (ret) {
- iommu_domain_free(domain);
- goto out;
- }
-
- /* Make the domain the default for this group */
- if (group->default_domain)
- iommu_domain_free(group->default_domain);
- group->default_domain = domain;
-
- iommu_group_create_direct_mappings(group, dev);
-
- dev_info(dev, "Using iommu %s mapping\n",
- type == IOMMU_DOMAIN_DMA ? "dma" : "direct");
-
- ret = 0;
-out:
- mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
- return ret;
-}
-
-/* Request that a device is direct mapped by the IOMMU */
-int iommu_request_dm_for_dev(struct device *dev)
-{
- return request_default_domain_for_dev(dev, IOMMU_DOMAIN_IDENTITY);
-}
-
-/* Request that a device can't be direct mapped by the IOMMU */
-int iommu_request_dma_domain_for_dev(struct device *dev)
-{
- return request_default_domain_for_dev(dev, IOMMU_DOMAIN_DMA);
-}
+EXPORT_SYMBOL_GPL(iommu_alloc_resv_region);
void iommu_set_default_passthrough(bool cmd_line)
{
@@ -2300,7 +2810,11 @@
if (fwspec)
return ops == fwspec->ops ? 0 : -EINVAL;
- fwspec = kzalloc(sizeof(*fwspec), GFP_KERNEL);
+ if (!dev_iommu_get(dev))
+ return -ENOMEM;
+
+ /* Preallocate for the overwhelmingly common case of 1 ID */
+ fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL);
if (!fwspec)
return -ENOMEM;
@@ -2327,15 +2841,15 @@
int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- size_t size;
- int i;
+ int i, new_num;
if (!fwspec)
return -EINVAL;
- size = offsetof(struct iommu_fwspec, ids[fwspec->num_ids + num_ids]);
- if (size > sizeof(*fwspec)) {
- fwspec = krealloc(fwspec, size, GFP_KERNEL);
+ new_num = fwspec->num_ids + num_ids;
+ if (new_num > 1) {
+ fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num),
+ GFP_KERNEL);
if (!fwspec)
return -ENOMEM;
@@ -2345,7 +2859,7 @@
for (i = 0; i < num_ids; i++)
fwspec->ids[fwspec->num_ids + i] = ids[i];
- fwspec->num_ids += num_ids;
+ fwspec->num_ids = new_num;
return 0;
}
EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
@@ -2366,10 +2880,12 @@
int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
{
- const struct iommu_ops *ops = dev->bus->iommu_ops;
+ if (dev->iommu && dev->iommu->iommu_dev) {
+ const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
- if (ops && ops->dev_enable_feat)
- return ops->dev_enable_feat(dev, feat);
+ if (ops->dev_enable_feat)
+ return ops->dev_enable_feat(dev, feat);
+ }
return -ENODEV;
}
@@ -2382,10 +2898,12 @@
*/
int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
{
- const struct iommu_ops *ops = dev->bus->iommu_ops;
+ if (dev->iommu && dev->iommu->iommu_dev) {
+ const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
- if (ops && ops->dev_disable_feat)
- return ops->dev_disable_feat(dev, feat);
+ if (ops->dev_disable_feat)
+ return ops->dev_disable_feat(dev, feat);
+ }
return -EBUSY;
}
@@ -2393,10 +2911,12 @@
bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat)
{
- const struct iommu_ops *ops = dev->bus->iommu_ops;
+ if (dev->iommu && dev->iommu->iommu_dev) {
+ const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
- if (ops && ops->dev_feat_enabled)
- return ops->dev_feat_enabled(dev, feat);
+ if (ops->dev_feat_enabled)
+ return ops->dev_feat_enabled(dev, feat);
+ }
return false;
}
@@ -2528,18 +3048,7 @@
}
EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
-int iommu_sva_set_ops(struct iommu_sva *handle,
- const struct iommu_sva_ops *sva_ops)
-{
- if (handle->ops && handle->ops != sva_ops)
- return -EEXIST;
-
- handle->ops = sva_ops;
- return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_sva_set_ops);
-
-int iommu_sva_get_pasid(struct iommu_sva *handle)
+u32 iommu_sva_get_pasid(struct iommu_sva *handle)
{
const struct iommu_ops *ops = handle->dev->bus->iommu_ops;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 612cbf6..1164d1a 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -64,8 +64,7 @@
if (!has_iova_flush_queue(iovad))
return;
- if (timer_pending(&iovad->fq_timer))
- del_timer(&iovad->fq_timer);
+ del_timer_sync(&iovad->fq_timer);
fq_destroy_all_entries(iovad);
@@ -253,7 +252,7 @@
SLAB_HWCACHE_ALIGN, NULL);
if (!iova_cache) {
mutex_unlock(&iova_cache_mutex);
- printk(KERN_ERR "Couldn't create iova cache\n");
+ pr_err("Couldn't create iova cache\n");
return -ENOMEM;
}
}
@@ -579,7 +578,7 @@
/* Avoid false sharing as much as possible. */
if (!atomic_read(&iovad->fq_timer_on) &&
- !atomic_cmpxchg(&iovad->fq_timer_on, 0, 1))
+ !atomic_xchg(&iovad->fq_timer_on, 1))
mod_timer(&iovad->fq_timer,
jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
}
@@ -718,8 +717,8 @@
new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
if (!new_iova)
- printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
- iova->pfn_lo, iova->pfn_lo);
+ pr_err("Reserve iova range %lx@%lx failed\n",
+ iova->pfn_lo, iova->pfn_lo);
}
spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
}
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 2639fc7..0f18abd 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -3,7 +3,7 @@
* IOMMU API for Renesas VMSA-compatible IPMMU
* Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
*
- * Copyright (C) 2014 Renesas Electronics Corporation
+ * Copyright (C) 2014-2020 Renesas Electronics Corporation
*/
#include <linux/bitmap.h>
@@ -28,7 +28,6 @@
#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
#include <asm/dma-iommu.h>
-#include <asm/pgalloc.h>
#else
#define arm_iommu_create_mapping(...) NULL
#define arm_iommu_attach_device(...) -ENODEV
@@ -50,6 +49,9 @@
bool twobit_imttbcr_sl0;
bool reserved_context;
bool cache_snoop;
+ unsigned int ctx_offset_base;
+ unsigned int ctx_offset_stride;
+ unsigned int utlb_offset_base;
};
struct ipmmu_vmsa_device {
@@ -86,9 +88,7 @@
static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
-
- return fwspec ? fwspec->iommu_priv : NULL;
+ return dev_iommu_priv_get(dev);
}
#define TLB_LOOP_TIMEOUT 100 /* 100us */
@@ -99,125 +99,49 @@
#define IM_NS_ALIAS_OFFSET 0x800
-#define IM_CTX_SIZE 0x40
+/* MMU "context" registers */
+#define IMCTR 0x0000 /* R-Car Gen2/3 */
+#define IMCTR_INTEN (1 << 2) /* R-Car Gen2/3 */
+#define IMCTR_FLUSH (1 << 1) /* R-Car Gen2/3 */
+#define IMCTR_MMUEN (1 << 0) /* R-Car Gen2/3 */
-#define IMCTR 0x0000
-#define IMCTR_TRE (1 << 17)
-#define IMCTR_AFE (1 << 16)
-#define IMCTR_RTSEL_MASK (3 << 4)
-#define IMCTR_RTSEL_SHIFT 4
-#define IMCTR_TREN (1 << 3)
-#define IMCTR_INTEN (1 << 2)
-#define IMCTR_FLUSH (1 << 1)
-#define IMCTR_MMUEN (1 << 0)
-
-#define IMCAAR 0x0004
-
-#define IMTTBCR 0x0008
-#define IMTTBCR_EAE (1 << 31)
-#define IMTTBCR_PMB (1 << 30)
-#define IMTTBCR_SH1_NON_SHAREABLE (0 << 28) /* R-Car Gen2 only */
-#define IMTTBCR_SH1_OUTER_SHAREABLE (2 << 28) /* R-Car Gen2 only */
-#define IMTTBCR_SH1_INNER_SHAREABLE (3 << 28) /* R-Car Gen2 only */
-#define IMTTBCR_SH1_MASK (3 << 28) /* R-Car Gen2 only */
-#define IMTTBCR_ORGN1_NC (0 << 26) /* R-Car Gen2 only */
-#define IMTTBCR_ORGN1_WB_WA (1 << 26) /* R-Car Gen2 only */
-#define IMTTBCR_ORGN1_WT (2 << 26) /* R-Car Gen2 only */
-#define IMTTBCR_ORGN1_WB (3 << 26) /* R-Car Gen2 only */
-#define IMTTBCR_ORGN1_MASK (3 << 26) /* R-Car Gen2 only */
-#define IMTTBCR_IRGN1_NC (0 << 24) /* R-Car Gen2 only */
-#define IMTTBCR_IRGN1_WB_WA (1 << 24) /* R-Car Gen2 only */
-#define IMTTBCR_IRGN1_WT (2 << 24) /* R-Car Gen2 only */
-#define IMTTBCR_IRGN1_WB (3 << 24) /* R-Car Gen2 only */
-#define IMTTBCR_IRGN1_MASK (3 << 24) /* R-Car Gen2 only */
-#define IMTTBCR_TSZ1_MASK (7 << 16)
-#define IMTTBCR_TSZ1_SHIFT 16
-#define IMTTBCR_SH0_NON_SHAREABLE (0 << 12) /* R-Car Gen2 only */
-#define IMTTBCR_SH0_OUTER_SHAREABLE (2 << 12) /* R-Car Gen2 only */
+#define IMTTBCR 0x0008 /* R-Car Gen2/3 */
+#define IMTTBCR_EAE (1 << 31) /* R-Car Gen2/3 */
#define IMTTBCR_SH0_INNER_SHAREABLE (3 << 12) /* R-Car Gen2 only */
-#define IMTTBCR_SH0_MASK (3 << 12) /* R-Car Gen2 only */
-#define IMTTBCR_ORGN0_NC (0 << 10) /* R-Car Gen2 only */
#define IMTTBCR_ORGN0_WB_WA (1 << 10) /* R-Car Gen2 only */
-#define IMTTBCR_ORGN0_WT (2 << 10) /* R-Car Gen2 only */
-#define IMTTBCR_ORGN0_WB (3 << 10) /* R-Car Gen2 only */
-#define IMTTBCR_ORGN0_MASK (3 << 10) /* R-Car Gen2 only */
-#define IMTTBCR_IRGN0_NC (0 << 8) /* R-Car Gen2 only */
#define IMTTBCR_IRGN0_WB_WA (1 << 8) /* R-Car Gen2 only */
-#define IMTTBCR_IRGN0_WT (2 << 8) /* R-Car Gen2 only */
-#define IMTTBCR_IRGN0_WB (3 << 8) /* R-Car Gen2 only */
-#define IMTTBCR_IRGN0_MASK (3 << 8) /* R-Car Gen2 only */
-#define IMTTBCR_SL0_TWOBIT_LVL_3 (0 << 6) /* R-Car Gen3 only */
-#define IMTTBCR_SL0_TWOBIT_LVL_2 (1 << 6) /* R-Car Gen3 only */
#define IMTTBCR_SL0_TWOBIT_LVL_1 (2 << 6) /* R-Car Gen3 only */
-#define IMTTBCR_SL0_LVL_2 (0 << 4)
-#define IMTTBCR_SL0_LVL_1 (1 << 4)
-#define IMTTBCR_TSZ0_MASK (7 << 0)
-#define IMTTBCR_TSZ0_SHIFT O
+#define IMTTBCR_SL0_LVL_1 (1 << 4) /* R-Car Gen2 only */
-#define IMBUSCR 0x000c
-#define IMBUSCR_DVM (1 << 2)
-#define IMBUSCR_BUSSEL_SYS (0 << 0)
-#define IMBUSCR_BUSSEL_CCI (1 << 0)
-#define IMBUSCR_BUSSEL_IMCAAR (2 << 0)
-#define IMBUSCR_BUSSEL_CCI_IMCAAR (3 << 0)
-#define IMBUSCR_BUSSEL_MASK (3 << 0)
+#define IMBUSCR 0x000c /* R-Car Gen2 only */
+#define IMBUSCR_DVM (1 << 2) /* R-Car Gen2 only */
+#define IMBUSCR_BUSSEL_MASK (3 << 0) /* R-Car Gen2 only */
-#define IMTTLBR0 0x0010
-#define IMTTUBR0 0x0014
-#define IMTTLBR1 0x0018
-#define IMTTUBR1 0x001c
+#define IMTTLBR0 0x0010 /* R-Car Gen2/3 */
+#define IMTTUBR0 0x0014 /* R-Car Gen2/3 */
-#define IMSTR 0x0020
-#define IMSTR_ERRLVL_MASK (3 << 12)
-#define IMSTR_ERRLVL_SHIFT 12
-#define IMSTR_ERRCODE_TLB_FORMAT (1 << 8)
-#define IMSTR_ERRCODE_ACCESS_PERM (4 << 8)
-#define IMSTR_ERRCODE_SECURE_ACCESS (5 << 8)
-#define IMSTR_ERRCODE_MASK (7 << 8)
-#define IMSTR_MHIT (1 << 4)
-#define IMSTR_ABORT (1 << 2)
-#define IMSTR_PF (1 << 1)
-#define IMSTR_TF (1 << 0)
+#define IMSTR 0x0020 /* R-Car Gen2/3 */
+#define IMSTR_MHIT (1 << 4) /* R-Car Gen2/3 */
+#define IMSTR_ABORT (1 << 2) /* R-Car Gen2/3 */
+#define IMSTR_PF (1 << 1) /* R-Car Gen2/3 */
+#define IMSTR_TF (1 << 0) /* R-Car Gen2/3 */
-#define IMMAIR0 0x0028
-#define IMMAIR1 0x002c
-#define IMMAIR_ATTR_MASK 0xff
-#define IMMAIR_ATTR_DEVICE 0x04
-#define IMMAIR_ATTR_NC 0x44
-#define IMMAIR_ATTR_WBRWA 0xff
-#define IMMAIR_ATTR_SHIFT(n) ((n) << 3)
-#define IMMAIR_ATTR_IDX_NC 0
-#define IMMAIR_ATTR_IDX_WBRWA 1
-#define IMMAIR_ATTR_IDX_DEV 2
+#define IMMAIR0 0x0028 /* R-Car Gen2/3 */
-#define IMELAR 0x0030 /* IMEAR on R-Car Gen2 */
-#define IMEUAR 0x0034 /* R-Car Gen3 only */
+#define IMELAR 0x0030 /* R-Car Gen2/3, IMEAR on R-Car Gen2 */
+#define IMEUAR 0x0034 /* R-Car Gen3 only */
-#define IMPCTR 0x0200
-#define IMPSTR 0x0208
-#define IMPEAR 0x020c
-#define IMPMBA(n) (0x0280 + ((n) * 4))
-#define IMPMBD(n) (0x02c0 + ((n) * 4))
-
+/* uTLB registers */
#define IMUCTR(n) ((n) < 32 ? IMUCTR0(n) : IMUCTR32(n))
-#define IMUCTR0(n) (0x0300 + ((n) * 16))
-#define IMUCTR32(n) (0x0600 + (((n) - 32) * 16))
-#define IMUCTR_FIXADDEN (1 << 31)
-#define IMUCTR_FIXADD_MASK (0xff << 16)
-#define IMUCTR_FIXADD_SHIFT 16
-#define IMUCTR_TTSEL_MMU(n) ((n) << 4)
-#define IMUCTR_TTSEL_PMB (8 << 4)
-#define IMUCTR_TTSEL_MASK (15 << 4)
-#define IMUCTR_FLUSH (1 << 1)
-#define IMUCTR_MMUEN (1 << 0)
+#define IMUCTR0(n) (0x0300 + ((n) * 16)) /* R-Car Gen2/3 */
+#define IMUCTR32(n) (0x0600 + (((n) - 32) * 16)) /* R-Car Gen3 only */
+#define IMUCTR_TTSEL_MMU(n) ((n) << 4) /* R-Car Gen2/3 */
+#define IMUCTR_FLUSH (1 << 1) /* R-Car Gen2/3 */
+#define IMUCTR_MMUEN (1 << 0) /* R-Car Gen2/3 */
#define IMUASID(n) ((n) < 32 ? IMUASID0(n) : IMUASID32(n))
-#define IMUASID0(n) (0x0308 + ((n) * 16))
-#define IMUASID32(n) (0x0608 + (((n) - 32) * 16))
-#define IMUASID_ASID8_MASK (0xff << 8)
-#define IMUASID_ASID8_SHIFT 8
-#define IMUASID_ASID0_MASK (0xff << 0)
-#define IMUASID_ASID0_SHIFT 0
+#define IMUASID0(n) (0x0308 + ((n) * 16)) /* R-Car Gen2/3 */
+#define IMUASID32(n) (0x0608 + (((n) - 32) * 16)) /* R-Car Gen3 only */
/* -----------------------------------------------------------------------------
* Root device handling
@@ -264,29 +188,61 @@
iowrite32(data, mmu->base + offset);
}
+static unsigned int ipmmu_ctx_reg(struct ipmmu_vmsa_device *mmu,
+ unsigned int context_id, unsigned int reg)
+{
+ return mmu->features->ctx_offset_base +
+ context_id * mmu->features->ctx_offset_stride + reg;
+}
+
+static u32 ipmmu_ctx_read(struct ipmmu_vmsa_device *mmu,
+ unsigned int context_id, unsigned int reg)
+{
+ return ipmmu_read(mmu, ipmmu_ctx_reg(mmu, context_id, reg));
+}
+
+static void ipmmu_ctx_write(struct ipmmu_vmsa_device *mmu,
+ unsigned int context_id, unsigned int reg, u32 data)
+{
+ ipmmu_write(mmu, ipmmu_ctx_reg(mmu, context_id, reg), data);
+}
+
static u32 ipmmu_ctx_read_root(struct ipmmu_vmsa_domain *domain,
unsigned int reg)
{
- return ipmmu_read(domain->mmu->root,
- domain->context_id * IM_CTX_SIZE + reg);
+ return ipmmu_ctx_read(domain->mmu->root, domain->context_id, reg);
}
static void ipmmu_ctx_write_root(struct ipmmu_vmsa_domain *domain,
unsigned int reg, u32 data)
{
- ipmmu_write(domain->mmu->root,
- domain->context_id * IM_CTX_SIZE + reg, data);
+ ipmmu_ctx_write(domain->mmu->root, domain->context_id, reg, data);
}
static void ipmmu_ctx_write_all(struct ipmmu_vmsa_domain *domain,
unsigned int reg, u32 data)
{
if (domain->mmu != domain->mmu->root)
- ipmmu_write(domain->mmu,
- domain->context_id * IM_CTX_SIZE + reg, data);
+ ipmmu_ctx_write(domain->mmu, domain->context_id, reg, data);
- ipmmu_write(domain->mmu->root,
- domain->context_id * IM_CTX_SIZE + reg, data);
+ ipmmu_ctx_write(domain->mmu->root, domain->context_id, reg, data);
+}
+
+static u32 ipmmu_utlb_reg(struct ipmmu_vmsa_device *mmu, unsigned int reg)
+{
+ return mmu->features->utlb_offset_base + reg;
+}
+
+static void ipmmu_imuasid_write(struct ipmmu_vmsa_device *mmu,
+ unsigned int utlb, u32 data)
+{
+ ipmmu_write(mmu, ipmmu_utlb_reg(mmu, IMUASID(utlb)), data);
+}
+
+static void ipmmu_imuctr_write(struct ipmmu_vmsa_device *mmu,
+ unsigned int utlb, u32 data)
+{
+ ipmmu_write(mmu, ipmmu_utlb_reg(mmu, IMUCTR(utlb)), data);
}
/* -----------------------------------------------------------------------------
@@ -334,11 +290,10 @@
*/
/* TODO: What should we set the ASID to ? */
- ipmmu_write(mmu, IMUASID(utlb), 0);
+ ipmmu_imuasid_write(mmu, utlb, 0);
/* TODO: Do we need to flush the microTLB ? */
- ipmmu_write(mmu, IMUCTR(utlb),
- IMUCTR_TTSEL_MMU(domain->context_id) | IMUCTR_FLUSH |
- IMUCTR_MMUEN);
+ ipmmu_imuctr_write(mmu, utlb, IMUCTR_TTSEL_MMU(domain->context_id) |
+ IMUCTR_FLUSH | IMUCTR_MMUEN);
mmu->utlb_ctx[utlb] = domain->context_id;
}
@@ -350,7 +305,7 @@
{
struct ipmmu_vmsa_device *mmu = domain->mmu;
- ipmmu_write(mmu, IMUCTR(utlb), 0);
+ ipmmu_imuctr_write(mmu, utlb, 0);
mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID;
}
@@ -416,7 +371,7 @@
u32 tmp;
/* TTBR0 */
- ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0];
+ ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr;
ipmmu_ctx_write_root(domain, IMTTLBR0, ttbr);
ipmmu_ctx_write_root(domain, IMTTUBR0, ttbr >> 32);
@@ -438,7 +393,7 @@
/* MAIR0 */
ipmmu_ctx_write_root(domain, IMMAIR0,
- domain->cfg.arm_lpae_s1_cfg.mair[0]);
+ domain->cfg.arm_lpae_s1_cfg.mair);
/* IMBUSCR */
if (domain->mmu->features->setup_imbuscr)
@@ -724,14 +679,14 @@
}
static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
if (!domain)
return -ENODEV;
- return domain->iop->map(domain->iop, iova, paddr, size, prot);
+ return domain->iop->map(domain->iop, iova, paddr, size, prot, gfp);
}
static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
@@ -769,22 +724,24 @@
static int ipmmu_init_platform_device(struct device *dev,
struct of_phandle_args *args)
{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct platform_device *ipmmu_pdev;
ipmmu_pdev = of_find_device_by_node(args->np);
if (!ipmmu_pdev)
return -ENODEV;
- fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev);
+ dev_iommu_priv_set(dev, platform_get_drvdata(ipmmu_pdev));
return 0;
}
static const struct soc_device_attribute soc_rcar_gen3[] = {
{ .soc_id = "r8a774a1", },
+ { .soc_id = "r8a774b1", },
{ .soc_id = "r8a774c0", },
+ { .soc_id = "r8a774e1", },
{ .soc_id = "r8a7795", },
+ { .soc_id = "r8a77961", },
{ .soc_id = "r8a7796", },
{ .soc_id = "r8a77965", },
{ .soc_id = "r8a77970", },
@@ -794,8 +751,11 @@
};
static const struct soc_device_attribute soc_rcar_gen3_whitelist[] = {
+ { .soc_id = "r8a774b1", },
{ .soc_id = "r8a774c0", },
+ { .soc_id = "r8a774e1", },
{ .soc_id = "r8a7795", .revision = "ES3.*" },
+ { .soc_id = "r8a77961", },
{ .soc_id = "r8a77965", },
{ .soc_id = "r8a77990", },
{ .soc_id = "r8a77995", },
@@ -848,24 +808,8 @@
static int ipmmu_init_arm_mapping(struct device *dev)
{
struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
- struct iommu_group *group;
int ret;
- /* Create a device group and add the device to it. */
- group = iommu_group_alloc();
- if (IS_ERR(group)) {
- dev_err(dev, "Failed to allocate IOMMU group\n");
- return PTR_ERR(group);
- }
-
- ret = iommu_group_add_device(group, dev);
- iommu_group_put(group);
-
- if (ret < 0) {
- dev_err(dev, "Failed to add device to IPMMU group\n");
- return ret;
- }
-
/*
* Create the ARM mapping, used by the ARM DMA mapping core to allocate
* VAs. This will allocate a corresponding IOMMU domain.
@@ -899,48 +843,39 @@
return 0;
error:
- iommu_group_remove_device(dev);
if (mmu->mapping)
arm_iommu_release_mapping(mmu->mapping);
return ret;
}
-static int ipmmu_add_device(struct device *dev)
+static struct iommu_device *ipmmu_probe_device(struct device *dev)
{
struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
- struct iommu_group *group;
- int ret;
/*
* Only let through devices that have been verified in xlate()
*/
if (!mmu)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
- if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)) {
- ret = ipmmu_init_arm_mapping(dev);
- if (ret)
- return ret;
- } else {
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
- }
-
- iommu_device_link(&mmu->iommu, dev);
- return 0;
+ return &mmu->iommu;
}
-static void ipmmu_remove_device(struct device *dev)
+static void ipmmu_probe_finalize(struct device *dev)
{
- struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
+ int ret = 0;
- iommu_device_unlink(&mmu->iommu, dev);
+ if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA))
+ ret = ipmmu_init_arm_mapping(dev);
+
+ if (ret)
+ dev_err(dev, "Can't create IOMMU mapping - DMA-OPS will not work\n");
+}
+
+static void ipmmu_release_device(struct device *dev)
+{
arm_iommu_detach_device(dev);
- iommu_group_remove_device(dev);
}
static struct iommu_group *ipmmu_find_group(struct device *dev)
@@ -968,9 +903,11 @@
.flush_iotlb_all = ipmmu_flush_iotlb_all,
.iotlb_sync = ipmmu_iotlb_sync,
.iova_to_phys = ipmmu_iova_to_phys,
- .add_device = ipmmu_add_device,
- .remove_device = ipmmu_remove_device,
- .device_group = ipmmu_find_group,
+ .probe_device = ipmmu_probe_device,
+ .release_device = ipmmu_release_device,
+ .probe_finalize = ipmmu_probe_finalize,
+ .device_group = IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)
+ ? generic_device_group : ipmmu_find_group,
.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
.of_xlate = ipmmu_of_xlate,
};
@@ -985,7 +922,7 @@
/* Disable all contexts. */
for (i = 0; i < mmu->num_ctx; ++i)
- ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0);
+ ipmmu_ctx_write(mmu, i, IMCTR, 0);
}
static const struct ipmmu_features ipmmu_features_default = {
@@ -997,6 +934,9 @@
.twobit_imttbcr_sl0 = false,
.reserved_context = false,
.cache_snoop = true,
+ .ctx_offset_base = 0,
+ .ctx_offset_stride = 0x40,
+ .utlb_offset_base = 0,
};
static const struct ipmmu_features ipmmu_features_rcar_gen3 = {
@@ -1008,6 +948,9 @@
.twobit_imttbcr_sl0 = true,
.reserved_context = true,
.cache_snoop = false,
+ .ctx_offset_base = 0,
+ .ctx_offset_stride = 0x40,
+ .utlb_offset_base = 0,
};
static const struct of_device_id ipmmu_of_ids[] = {
@@ -1018,15 +961,24 @@
.compatible = "renesas,ipmmu-r8a774a1",
.data = &ipmmu_features_rcar_gen3,
}, {
+ .compatible = "renesas,ipmmu-r8a774b1",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
.compatible = "renesas,ipmmu-r8a774c0",
.data = &ipmmu_features_rcar_gen3,
}, {
+ .compatible = "renesas,ipmmu-r8a774e1",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
.compatible = "renesas,ipmmu-r8a7795",
.data = &ipmmu_features_rcar_gen3,
}, {
.compatible = "renesas,ipmmu-r8a7796",
.data = &ipmmu_features_rcar_gen3,
}, {
+ .compatible = "renesas,ipmmu-r8a77961",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
.compatible = "renesas,ipmmu-r8a77965",
.data = &ipmmu_features_rcar_gen3,
}, {
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 83f36f6..2d84b1e 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -160,33 +160,12 @@
}
/**
- * irq_remapping_get_ir_irq_domain - Get the irqdomain associated with the IOMMU
- * device serving request @info
- * @info: interrupt allocation information, used to identify the IOMMU device
- *
- * It's used to get parent irqdomain for HPET and IOAPIC irqdomains.
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *
-irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info)
-{
- if (!remap_ops || !remap_ops->get_ir_irq_domain)
- return NULL;
-
- return remap_ops->get_ir_irq_domain(info);
-}
-
-/**
* irq_remapping_get_irq_domain - Get the irqdomain serving the request @info
* @info: interrupt allocation information, used to identify the IOMMU device
*
- * There will be one PCI MSI/MSIX irqdomain associated with each interrupt
- * remapping device, so this interface is used to retrieve the PCI MSI/MSIX
- * irqdomain serving request @info.
* Returns pointer to IRQ domain, or NULL on failure.
*/
-struct irq_domain *
-irq_remapping_get_irq_domain(struct irq_alloc_info *info)
+struct irq_domain *irq_remapping_get_irq_domain(struct irq_alloc_info *info)
{
if (!remap_ops || !remap_ops->get_irq_domain)
return NULL;
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index 6a190d5..1661b3d 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -43,10 +43,7 @@
/* Enable fault handling */
int (*enable_faulting)(void);
- /* Get the irqdomain associated the IOMMU device */
- struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *);
-
- /* Get the MSI irqdomain associated with the IOMMU device */
+ /* Get the irqdomain associated to IOMMU device */
struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *);
};
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index be99d40..3615cd6 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -34,7 +34,7 @@
/* bitmap of the page sizes currently supported */
#define MSM_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M)
-DEFINE_SPINLOCK(msm_iommu_lock);
+static DEFINE_SPINLOCK(msm_iommu_lock);
static LIST_HEAD(qcom_iommu_devices);
static struct iommu_ops msm_iommu_ops;
@@ -279,8 +279,8 @@
SET_V2PCFG(base, ctx, 0x3);
SET_TTBCR(base, ctx, priv->cfg.arm_v7s_cfg.tcr);
- SET_TTBR0(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[0]);
- SET_TTBR1(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[1]);
+ SET_TTBR0(base, ctx, priv->cfg.arm_v7s_cfg.ttbr);
+ SET_TTBR1(base, ctx, 0);
/* Set prrr and nmrr */
SET_PRRR(base, ctx, priv->cfg.arm_v7s_cfg.prrr);
@@ -388,43 +388,23 @@
return ret;
}
-static int msm_iommu_add_device(struct device *dev)
+static struct iommu_device *msm_iommu_probe_device(struct device *dev)
{
struct msm_iommu_dev *iommu;
- struct iommu_group *group;
unsigned long flags;
spin_lock_irqsave(&msm_iommu_lock, flags);
iommu = find_iommu_for_dev(dev);
spin_unlock_irqrestore(&msm_iommu_lock, flags);
- if (iommu)
- iommu_device_link(&iommu->iommu, dev);
- else
- return -ENODEV;
+ if (!iommu)
+ return ERR_PTR(-ENODEV);
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
-
- return 0;
+ return &iommu->iommu;
}
-static void msm_iommu_remove_device(struct device *dev)
+static void msm_iommu_release_device(struct device *dev)
{
- struct msm_iommu_dev *iommu;
- unsigned long flags;
-
- spin_lock_irqsave(&msm_iommu_lock, flags);
- iommu = find_iommu_for_dev(dev);
- spin_unlock_irqrestore(&msm_iommu_lock, flags);
-
- if (iommu)
- iommu_device_unlink(&iommu->iommu, dev);
-
- iommu_group_remove_device(dev);
}
static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -504,14 +484,14 @@
}
static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t pa, size_t len, int prot)
+ phys_addr_t pa, size_t len, int prot, gfp_t gfp)
{
struct msm_priv *priv = to_msm_priv(domain);
unsigned long flags;
int ret;
spin_lock_irqsave(&priv->pgtlock, flags);
- ret = priv->iop->map(priv->iop, iova, pa, len, prot);
+ ret = priv->iop->map(priv->iop, iova, pa, len, prot, GFP_ATOMIC);
spin_unlock_irqrestore(&priv->pgtlock, flags);
return ret;
@@ -613,14 +593,14 @@
struct msm_iommu_dev **iommu,
struct of_phandle_args *spec)
{
- struct msm_iommu_ctx_dev *master = dev->archdata.iommu;
+ struct msm_iommu_ctx_dev *master = dev_iommu_priv_get(dev);
int sid;
if (list_empty(&(*iommu)->ctx_list)) {
master = kzalloc(sizeof(*master), GFP_ATOMIC);
master->of_node = dev->of_node;
list_add(&master->list, &(*iommu)->ctx_list);
- dev->archdata.iommu = master;
+ dev_iommu_priv_set(dev, master);
}
for (sid = 0; sid < master->num_mids; sid++)
@@ -708,8 +688,8 @@
*/
.iotlb_sync = NULL,
.iova_to_phys = msm_iommu_iova_to_phys,
- .add_device = msm_iommu_add_device,
- .remove_device = msm_iommu_remove_device,
+ .probe_device = msm_iommu_probe_device,
+ .release_device = msm_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = MSM_IOMMU_PGSIZES,
.of_xlate = qcom_iommu_of_xlate,
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index c2f6c78..19387d2 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -3,7 +3,6 @@
* Copyright (c) 2015-2016 MediaTek Inc.
* Author: Yong Wu <yong.wu@mediatek.com>
*/
-#include <linux/memblock.h>
#include <linux/bug.h>
#include <linux/clk.h>
#include <linux/component.h>
@@ -15,13 +14,16 @@
#include <linux/iommu.h>
#include <linux/iopoll.h>
#include <linux/list.h>
+#include <linux/mfd/syscon.h>
#include <linux/of_address.h>
#include <linux/of_iommu.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
+#include <linux/regmap.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/soc/mediatek/infracfg.h>
#include <asm/barrier.h>
#include <soc/mediatek/smi.h>
@@ -37,12 +39,18 @@
#define REG_MMU_INVLD_START_A 0x024
#define REG_MMU_INVLD_END_A 0x028
-#define REG_MMU_INV_SEL 0x038
+#define REG_MMU_INV_SEL_GEN2 0x02c
+#define REG_MMU_INV_SEL_GEN1 0x038
#define F_INVLD_EN0 BIT(0)
#define F_INVLD_EN1 BIT(1)
-#define REG_MMU_STANDARD_AXI_MODE 0x048
+#define REG_MMU_MISC_CTRL 0x048
+#define F_MMU_IN_ORDER_WR_EN_MASK (BIT(1) | BIT(17))
+#define F_MMU_STANDARD_AXI_MODE_MASK (BIT(3) | BIT(19))
+
#define REG_MMU_DCM_DIS 0x050
+#define REG_MMU_WR_LEN_CTRL 0x054
+#define F_MMU_WR_THROT_DIS_MASK (BIT(5) | BIT(21))
#define REG_MMU_CTRL_REG 0x110
#define F_MMU_TF_PROT_TO_PROGRAM_ADDR (2 << 4)
@@ -88,10 +96,12 @@
#define REG_MMU1_INVLD_PA 0x148
#define REG_MMU0_INT_ID 0x150
#define REG_MMU1_INT_ID 0x154
+#define F_MMU_INT_ID_COMM_ID(a) (((a) >> 9) & 0x7)
+#define F_MMU_INT_ID_SUB_COMM_ID(a) (((a) >> 7) & 0x3)
#define F_MMU_INT_ID_LARB_ID(a) (((a) >> 7) & 0x7)
#define F_MMU_INT_ID_PORT_ID(a) (((a) >> 2) & 0x1f)
-#define MTK_PROTECT_PA_ALIGN 128
+#define MTK_PROTECT_PA_ALIGN 256
/*
* Get the local arbiter ID and the portid within the larb arbiter
@@ -100,9 +110,20 @@
#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf)
#define MTK_M4U_TO_PORT(id) ((id) & 0x1f)
-struct mtk_iommu_domain {
- spinlock_t pgtlock; /* lock for page table */
+#define HAS_4GB_MODE BIT(0)
+/* HW will use the EMI clock if there isn't the "bclk". */
+#define HAS_BCLK BIT(1)
+#define HAS_VLD_PA_RNG BIT(2)
+#define RESET_AXI BIT(3)
+#define OUT_ORDER_WR_EN BIT(4)
+#define HAS_SUB_COMM BIT(5)
+#define WR_THROT_EN BIT(6)
+#define HAS_LEGACY_IVRP_PADDR BIT(7)
+#define MTK_IOMMU_HAS_FLAG(pdata, _x) \
+ ((((pdata)->flags) & (_x)) == (_x))
+
+struct mtk_iommu_domain {
struct io_pgtable_cfg cfg;
struct io_pgtable_ops *iop;
@@ -167,44 +188,34 @@
for_each_m4u(data) {
writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
- data->base + REG_MMU_INV_SEL);
+ data->base + data->plat_data->inv_sel_reg);
writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE);
wmb(); /* Make sure the tlb flush all done */
}
}
-static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size,
- size_t granule, bool leaf,
- void *cookie)
+static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
struct mtk_iommu_data *data = cookie;
+ unsigned long flags;
+ int ret;
+ u32 tmp;
for_each_m4u(data) {
+ spin_lock_irqsave(&data->tlb_lock, flags);
writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
- data->base + REG_MMU_INV_SEL);
+ data->base + data->plat_data->inv_sel_reg);
writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
writel_relaxed(iova + size - 1,
data->base + REG_MMU_INVLD_END_A);
writel_relaxed(F_MMU_INV_RANGE,
data->base + REG_MMU_INVALIDATE);
- data->tlb_flush_active = true;
- }
-}
-static void mtk_iommu_tlb_sync(void *cookie)
-{
- struct mtk_iommu_data *data = cookie;
- int ret;
- u32 tmp;
-
- for_each_m4u(data) {
- /* Avoid timing out if there's nothing to wait for */
- if (!data->tlb_flush_active)
- return;
-
+ /* tlb sync */
ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE,
- tmp, tmp != 0, 10, 100000);
+ tmp, tmp != 0, 10, 1000);
if (ret) {
dev_warn(data->dev,
"Partial TLB flush timed out, falling back to full flush\n");
@@ -212,50 +223,24 @@
}
/* Clear the CPE status */
writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
- data->tlb_flush_active = false;
+ spin_unlock_irqrestore(&data->tlb_lock, flags);
}
}
-static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
- size_t granule, void *cookie)
-{
- struct mtk_iommu_data *data = cookie;
- unsigned long flags;
-
- spin_lock_irqsave(&data->tlb_lock, flags);
- mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie);
- mtk_iommu_tlb_sync(cookie);
- spin_unlock_irqrestore(&data->tlb_lock, flags);
-}
-
-static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
- size_t granule, void *cookie)
-{
- struct mtk_iommu_data *data = cookie;
- unsigned long flags;
-
- spin_lock_irqsave(&data->tlb_lock, flags);
- mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie);
- mtk_iommu_tlb_sync(cookie);
- spin_unlock_irqrestore(&data->tlb_lock, flags);
-}
-
static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
struct mtk_iommu_data *data = cookie;
- unsigned long flags;
+ struct iommu_domain *domain = &data->m4u_dom->domain;
- spin_lock_irqsave(&data->tlb_lock, flags);
- mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
- spin_unlock_irqrestore(&data->tlb_lock, flags);
+ iommu_iotlb_gather_add_page(domain, gather, iova, granule);
}
static const struct iommu_flush_ops mtk_iommu_flush_ops = {
.tlb_flush_all = mtk_iommu_tlb_flush_all,
- .tlb_flush_walk = mtk_iommu_tlb_flush_walk,
- .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
+ .tlb_flush_walk = mtk_iommu_tlb_flush_range_sync,
+ .tlb_flush_leaf = mtk_iommu_tlb_flush_range_sync,
.tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
};
@@ -264,7 +249,7 @@
struct mtk_iommu_data *data = dev_id;
struct mtk_iommu_domain *dom = data->m4u_dom;
u32 int_state, regval, fault_iova, fault_pa;
- unsigned int fault_larb, fault_port;
+ unsigned int fault_larb, fault_port, sub_comm = 0;
bool layer, write;
/* Read error info from registers */
@@ -280,10 +265,14 @@
}
layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
- fault_larb = F_MMU_INT_ID_LARB_ID(regval);
fault_port = F_MMU_INT_ID_PORT_ID(regval);
-
- fault_larb = data->plat_data->larbid_remap[fault_larb];
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM)) {
+ fault_larb = F_MMU_INT_ID_COMM_ID(regval);
+ sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval);
+ } else {
+ fault_larb = F_MMU_INT_ID_LARB_ID(regval);
+ }
+ fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm];
if (report_iommu_fault(&dom->domain, data->dev, fault_iova,
write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
@@ -331,8 +320,6 @@
{
struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
- spin_lock_init(&dom->pgtlock);
-
dom->cfg = (struct io_pgtable_cfg) {
.quirks = IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_NO_PERMS |
@@ -398,8 +385,8 @@
static int mtk_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
+ struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
- struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
if (!data)
return -ENODEV;
@@ -407,7 +394,7 @@
/* Update the pgtable base address register of the M4U HW */
if (!data->m4u_dom) {
data->m4u_dom = dom;
- writel(dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK,
+ writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK,
data->base + REG_MMU_PT_BASE_ADDR);
}
@@ -418,7 +405,7 @@
static void mtk_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
+ struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
if (!data)
return;
@@ -427,22 +414,17 @@
}
static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
- unsigned long flags;
- int ret;
/* The "4GB mode" M4U physically can not use the lower remap of Dram. */
if (data->enable_4GB)
paddr |= BIT_ULL(32);
- spin_lock_irqsave(&dom->pgtlock, flags);
- ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
- spin_unlock_irqrestore(&dom->pgtlock, flags);
-
- return ret;
+ /* Synchronize with the tlb_lock */
+ return dom->iop->map(dom->iop, iova, paddr, size, prot, gfp);
}
static size_t mtk_iommu_unmap(struct iommu_domain *domain,
@@ -450,14 +432,8 @@
struct iommu_iotlb_gather *gather)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
- unsigned long flags;
- size_t unmapsz;
- spin_lock_irqsave(&dom->pgtlock, flags);
- unmapsz = dom->iop->unmap(dom->iop, iova, size, gather);
- spin_unlock_irqrestore(&dom->pgtlock, flags);
-
- return unmapsz;
+ return dom->iop->unmap(dom->iop, iova, size, gather);
}
static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
@@ -469,11 +445,13 @@
struct iommu_iotlb_gather *gather)
{
struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
- unsigned long flags;
+ size_t length = gather->end - gather->start + 1;
- spin_lock_irqsave(&data->tlb_lock, flags);
- mtk_iommu_tlb_sync(data);
- spin_unlock_irqrestore(&data->tlb_lock, flags);
+ if (gather->start == ULONG_MAX)
+ return;
+
+ mtk_iommu_tlb_flush_range_sync(gather->start, length, gather->pgsize,
+ data);
}
static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -481,51 +459,35 @@
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
- unsigned long flags;
phys_addr_t pa;
- spin_lock_irqsave(&dom->pgtlock, flags);
pa = dom->iop->iova_to_phys(dom->iop, iova);
- spin_unlock_irqrestore(&dom->pgtlock, flags);
-
if (data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE)
pa &= ~BIT_ULL(32);
return pa;
}
-static int mtk_iommu_add_device(struct device *dev)
+static struct iommu_device *mtk_iommu_probe_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_data *data;
- struct iommu_group *group;
if (!fwspec || fwspec->ops != &mtk_iommu_ops)
- return -ENODEV; /* Not a iommu client device */
+ return ERR_PTR(-ENODEV); /* Not a iommu client device */
- data = fwspec->iommu_priv;
- iommu_device_link(&data->iommu, dev);
+ data = dev_iommu_priv_get(dev);
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
- return 0;
+ return &data->iommu;
}
-static void mtk_iommu_remove_device(struct device *dev)
+static void mtk_iommu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct mtk_iommu_data *data;
if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return;
- data = fwspec->iommu_priv;
- iommu_device_unlink(&data->iommu, dev);
-
- iommu_group_remove_device(dev);
iommu_fwspec_free(dev);
}
@@ -549,7 +511,6 @@
static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct platform_device *m4updev;
if (args->args_count != 1) {
@@ -558,13 +519,13 @@
return -EINVAL;
}
- if (!fwspec->iommu_priv) {
+ if (!dev_iommu_priv_get(dev)) {
/* Get the m4u device */
m4updev = of_find_device_by_node(args->np);
if (WARN_ON(!m4updev))
return -EINVAL;
- fwspec->iommu_priv = platform_get_drvdata(m4updev);
+ dev_iommu_priv_set(dev, platform_get_drvdata(m4updev));
}
return iommu_fwspec_add_ids(dev, args->args, 1);
@@ -580,8 +541,8 @@
.flush_iotlb_all = mtk_iommu_flush_iotlb_all,
.iotlb_sync = mtk_iommu_iotlb_sync,
.iova_to_phys = mtk_iommu_iova_to_phys,
- .add_device = mtk_iommu_add_device,
- .remove_device = mtk_iommu_remove_device,
+ .probe_device = mtk_iommu_probe_device,
+ .release_device = mtk_iommu_release_device,
.device_group = mtk_iommu_device_group,
.of_xlate = mtk_iommu_of_xlate,
.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
@@ -598,11 +559,13 @@
return ret;
}
- if (data->plat_data->m4u_plat == M4U_MT8173)
+ if (data->plat_data->m4u_plat == M4U_MT8173) {
regval = F_MMU_PREFETCH_RT_REPLACE_MOD |
F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173;
- else
- regval = F_MMU_TF_PROT_TO_PROGRAM_ADDR;
+ } else {
+ regval = readl_relaxed(data->base + REG_MMU_CTRL_REG);
+ regval |= F_MMU_TF_PROT_TO_PROGRAM_ADDR;
+ }
writel_relaxed(regval, data->base + REG_MMU_CTRL_REG);
regval = F_L2_MULIT_HIT_EN |
@@ -622,14 +585,15 @@
F_INT_PRETETCH_TRANSATION_FIFO_FAULT;
writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL);
- if (data->plat_data->m4u_plat == M4U_MT8173)
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_LEGACY_IVRP_PADDR))
regval = (data->protect_base >> 1) | (data->enable_4GB << 31);
else
regval = lower_32_bits(data->protect_base) |
upper_32_bits(data->protect_base);
writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR);
- if (data->enable_4GB && data->plat_data->has_vld_pa_rng) {
+ if (data->enable_4GB &&
+ MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_VLD_PA_RNG)) {
/*
* If 4GB mode is enabled, the validate PA range is from
* 0x1_0000_0000 to 0x1_ffff_ffff. here record bit[32:30].
@@ -638,9 +602,23 @@
writel_relaxed(regval, data->base + REG_MMU_VLD_PA_RNG);
}
writel_relaxed(0, data->base + REG_MMU_DCM_DIS);
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, WR_THROT_EN)) {
+ /* write command throttling mode */
+ regval = readl_relaxed(data->base + REG_MMU_WR_LEN_CTRL);
+ regval &= ~F_MMU_WR_THROT_DIS_MASK;
+ writel_relaxed(regval, data->base + REG_MMU_WR_LEN_CTRL);
+ }
- if (data->plat_data->reset_axi)
- writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE);
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, RESET_AXI)) {
+ /* The register is called STANDARD_AXI_MODE in this case */
+ regval = 0;
+ } else {
+ regval = readl_relaxed(data->base + REG_MMU_MISC_CTRL);
+ regval &= ~F_MMU_STANDARD_AXI_MODE_MASK;
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, OUT_ORDER_WR_EN))
+ regval &= ~F_MMU_IN_ORDER_WR_EN_MASK;
+ }
+ writel_relaxed(regval, data->base + REG_MMU_MISC_CTRL);
if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0,
dev_name(data->dev), (void *)data)) {
@@ -665,8 +643,11 @@
struct resource *res;
resource_size_t ioaddr;
struct component_match *match = NULL;
+ struct regmap *infracfg;
void *protect;
int i, larb_nr, ret;
+ u32 val;
+ char *p;
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (!data)
@@ -680,10 +661,28 @@
return -ENOMEM;
data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
- /* Whether the current dram is over 4GB */
- data->enable_4GB = !!(max_pfn > (BIT_ULL(32) >> PAGE_SHIFT));
- if (!data->plat_data->has_4gb_mode)
- data->enable_4GB = false;
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE)) {
+ switch (data->plat_data->m4u_plat) {
+ case M4U_MT2712:
+ p = "mediatek,mt2712-infracfg";
+ break;
+ case M4U_MT8173:
+ p = "mediatek,mt8173-infracfg";
+ break;
+ default:
+ p = NULL;
+ }
+
+ infracfg = syscon_regmap_lookup_by_compatible(p);
+
+ if (IS_ERR(infracfg))
+ return PTR_ERR(infracfg);
+
+ ret = regmap_read(infracfg, REG_INFRA_MISC, &val);
+ if (ret)
+ return ret;
+ data->enable_4GB = !!(val & F_DDR_4GB_SUPPORT_EN);
+ }
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
data->base = devm_ioremap_resource(dev, res);
@@ -695,7 +694,7 @@
if (data->irq < 0)
return data->irq;
- if (data->plat_data->has_bclk) {
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_BCLK)) {
data->bclk = devm_clk_get(dev, "bclk");
if (IS_ERR(data->bclk))
return PTR_ERR(data->bclk);
@@ -784,8 +783,8 @@
struct mtk_iommu_suspend_reg *reg = &data->reg;
void __iomem *base = data->base;
- reg->standard_axi_mode = readl_relaxed(base +
- REG_MMU_STANDARD_AXI_MODE);
+ reg->wr_len_ctrl = readl_relaxed(base + REG_MMU_WR_LEN_CTRL);
+ reg->misc_ctrl = readl_relaxed(base + REG_MMU_MISC_CTRL);
reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM_DIS);
reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG);
reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0);
@@ -809,8 +808,8 @@
dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret);
return ret;
}
- writel_relaxed(reg->standard_axi_mode,
- base + REG_MMU_STANDARD_AXI_MODE);
+ writel_relaxed(reg->wr_len_ctrl, base + REG_MMU_WR_LEN_CTRL);
+ writel_relaxed(reg->misc_ctrl, base + REG_MMU_MISC_CTRL);
writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS);
writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG);
writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0);
@@ -818,7 +817,7 @@
writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR);
writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG);
if (m4u_dom)
- writel(m4u_dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK,
+ writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK,
base + REG_MMU_PT_BASE_ADDR);
return 0;
}
@@ -829,28 +828,44 @@
static const struct mtk_iommu_plat_data mt2712_data = {
.m4u_plat = M4U_MT2712,
- .has_4gb_mode = true,
- .has_bclk = true,
- .has_vld_pa_rng = true,
- .larbid_remap = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+ .flags = HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG,
+ .inv_sel_reg = REG_MMU_INV_SEL_GEN1,
+ .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}},
+};
+
+static const struct mtk_iommu_plat_data mt6779_data = {
+ .m4u_plat = M4U_MT6779,
+ .flags = HAS_SUB_COMM | OUT_ORDER_WR_EN | WR_THROT_EN,
+ .inv_sel_reg = REG_MMU_INV_SEL_GEN2,
+ .larbid_remap = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}},
+};
+
+static const struct mtk_iommu_plat_data mt8167_data = {
+ .m4u_plat = M4U_MT8167,
+ .flags = RESET_AXI | HAS_LEGACY_IVRP_PADDR,
+ .inv_sel_reg = REG_MMU_INV_SEL_GEN1,
+ .larbid_remap = {{0}, {1}, {2}}, /* Linear mapping. */
};
static const struct mtk_iommu_plat_data mt8173_data = {
.m4u_plat = M4U_MT8173,
- .has_4gb_mode = true,
- .has_bclk = true,
- .reset_axi = true,
- .larbid_remap = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */
+ .flags = HAS_4GB_MODE | HAS_BCLK | RESET_AXI |
+ HAS_LEGACY_IVRP_PADDR,
+ .inv_sel_reg = REG_MMU_INV_SEL_GEN1,
+ .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */
};
static const struct mtk_iommu_plat_data mt8183_data = {
.m4u_plat = M4U_MT8183,
- .reset_axi = true,
- .larbid_remap = {0, 4, 5, 6, 7, 2, 3, 1},
+ .flags = RESET_AXI,
+ .inv_sel_reg = REG_MMU_INV_SEL_GEN1,
+ .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}},
};
static const struct of_device_id mtk_iommu_of_ids[] = {
{ .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data},
+ { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data},
+ { .compatible = "mediatek,mt8167-m4u", .data = &mt8167_data},
{ .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data},
{ .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data},
{}
@@ -861,7 +876,7 @@
.remove = mtk_iommu_remove,
.driver = {
.name = "mtk-iommu",
- .of_match_table = of_match_ptr(mtk_iommu_of_ids),
+ .of_match_table = mtk_iommu_of_ids,
.pm = &mtk_iommu_pm_ops,
}
};
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index 8cae22d..df32b3e 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -15,34 +15,40 @@
#include <linux/iommu.h>
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
#include <soc/mediatek/smi.h>
+#define MTK_LARB_COM_MAX 8
+#define MTK_LARB_SUBCOM_MAX 4
+
struct mtk_iommu_suspend_reg {
- u32 standard_axi_mode;
+ union {
+ u32 standard_axi_mode;/* v1 */
+ u32 misc_ctrl;/* v2 */
+ };
u32 dcm_dis;
u32 ctrl_reg;
u32 int_control0;
u32 int_main_control;
u32 ivrp_paddr;
u32 vld_pa_rng;
+ u32 wr_len_ctrl;
};
enum mtk_iommu_plat {
M4U_MT2701,
M4U_MT2712,
+ M4U_MT6779,
+ M4U_MT8167,
M4U_MT8173,
M4U_MT8183,
};
struct mtk_iommu_plat_data {
enum mtk_iommu_plat m4u_plat;
- bool has_4gb_mode;
-
- /* HW will use the EMI clock if there isn't the "bclk". */
- bool has_bclk;
- bool has_vld_pa_rng;
- bool reset_axi;
- unsigned char larbid_remap[MTK_LARB_NR_MAX];
+ u32 flags;
+ u32 inv_sel_reg;
+ unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX];
};
struct mtk_iommu_domain;
@@ -57,12 +63,13 @@
struct mtk_iommu_domain *m4u_dom;
struct iommu_group *m4u_group;
bool enable_4GB;
- bool tlb_flush_active;
spinlock_t tlb_lock; /* lock for tlb range flush */
struct iommu_device iommu;
const struct mtk_iommu_plat_data *plat_data;
+ struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */
+
struct list_head list;
struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX];
};
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index b5efd6d..82ddfe9 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -263,12 +263,15 @@
static int mtk_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
+ struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
- struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
+ struct dma_iommu_mapping *mtk_mapping;
int ret;
- if (!data)
- return -ENODEV;
+ /* Only allow the domain created internally. */
+ mtk_mapping = data->mapping;
+ if (mtk_mapping->domain != domain)
+ return 0;
if (!data->m4u_dom) {
data->m4u_dom = dom;
@@ -286,16 +289,13 @@
static void mtk_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
-
- if (!data)
- return;
+ struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
mtk_iommu_config(data, dev, false);
}
static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT;
@@ -369,7 +369,6 @@
struct mtk_iommu_data *data;
struct platform_device *m4updev;
struct dma_iommu_mapping *mtk_mapping;
- struct device *m4udev;
int ret;
if (args->args_count != 1) {
@@ -387,22 +386,21 @@
return -EINVAL;
}
- if (!fwspec->iommu_priv) {
+ if (!dev_iommu_priv_get(dev)) {
/* Get the m4u device */
m4updev = of_find_device_by_node(args->np);
if (WARN_ON(!m4updev))
return -EINVAL;
- fwspec->iommu_priv = platform_get_drvdata(m4updev);
+ dev_iommu_priv_set(dev, platform_get_drvdata(m4updev));
}
ret = iommu_fwspec_add_ids(dev, args->args, 1);
if (ret)
return ret;
- data = fwspec->iommu_priv;
- m4udev = data->dev;
- mtk_mapping = m4udev->archdata.iommu;
+ data = dev_iommu_priv_get(dev);
+ mtk_mapping = data->mapping;
if (!mtk_mapping) {
/* MTK iommu support 4GB iova address space. */
mtk_mapping = arm_iommu_create_mapping(&platform_bus_type,
@@ -410,20 +408,23 @@
if (IS_ERR(mtk_mapping))
return PTR_ERR(mtk_mapping);
- m4udev->archdata.iommu = mtk_mapping;
+ data->mapping = mtk_mapping;
}
return 0;
}
-static int mtk_iommu_add_device(struct device *dev)
+static int mtk_iommu_def_domain_type(struct device *dev)
+{
+ return IOMMU_DOMAIN_UNMANAGED;
+}
+
+static struct iommu_device *mtk_iommu_probe_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct dma_iommu_mapping *mtk_mapping;
struct of_phandle_args iommu_spec;
struct of_phandle_iterator it;
struct mtk_iommu_data *data;
- struct iommu_group *group;
int err;
of_for_each_phandle(&it, err, dev->of_node, "iommus",
@@ -442,46 +443,34 @@
}
if (!fwspec || fwspec->ops != &mtk_iommu_ops)
- return -ENODEV; /* Not a iommu client device */
+ return ERR_PTR(-ENODEV); /* Not a iommu client device */
- /*
- * This is a short-term bodge because the ARM DMA code doesn't
- * understand multi-device groups, but we have to call into it
- * successfully (and not just rely on a normal IOMMU API attach
- * here) in order to set the correct DMA API ops on @dev.
- */
- group = iommu_group_alloc();
- if (IS_ERR(group))
- return PTR_ERR(group);
+ data = dev_iommu_priv_get(dev);
- err = iommu_group_add_device(group, dev);
- iommu_group_put(group);
- if (err)
- return err;
-
- data = fwspec->iommu_priv;
- mtk_mapping = data->dev->archdata.iommu;
- err = arm_iommu_attach_device(dev, mtk_mapping);
- if (err) {
- iommu_group_remove_device(dev);
- return err;
- }
-
- return iommu_device_link(&data->iommu, dev);
+ return &data->iommu;
}
-static void mtk_iommu_remove_device(struct device *dev)
+static void mtk_iommu_probe_finalize(struct device *dev)
+{
+ struct dma_iommu_mapping *mtk_mapping;
+ struct mtk_iommu_data *data;
+ int err;
+
+ data = dev_iommu_priv_get(dev);
+ mtk_mapping = data->mapping;
+
+ err = arm_iommu_attach_device(dev, mtk_mapping);
+ if (err)
+ dev_err(dev, "Can't create IOMMU mapping - DMA-OPS will not work\n");
+}
+
+static void mtk_iommu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct mtk_iommu_data *data;
if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return;
- data = fwspec->iommu_priv;
- iommu_device_unlink(&data->iommu, dev);
-
- iommu_group_remove_device(dev);
iommu_fwspec_free(dev);
}
@@ -534,8 +523,11 @@
.map = mtk_iommu_map,
.unmap = mtk_iommu_unmap,
.iova_to_phys = mtk_iommu_iova_to_phys,
- .add_device = mtk_iommu_add_device,
- .remove_device = mtk_iommu_remove_device,
+ .probe_device = mtk_iommu_probe_device,
+ .probe_finalize = mtk_iommu_probe_finalize,
+ .release_device = mtk_iommu_release_device,
+ .def_domain_type = mtk_iommu_def_domain_type,
+ .device_group = generic_device_group,
.pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT,
};
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 614a93a..e505b91 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -8,9 +8,12 @@
#include <linux/export.h>
#include <linux/iommu.h>
#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_iommu.h>
#include <linux/of_pci.h>
+#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/fsl/mc.h>
@@ -89,16 +92,16 @@
{
const struct iommu_ops *ops;
struct fwnode_handle *fwnode = &iommu_spec->np->fwnode;
- int err;
+ int ret;
ops = iommu_ops_from_fwnode(fwnode);
if ((ops && !ops->of_xlate) ||
!of_device_is_available(iommu_spec->np))
return NO_IOMMU;
- err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops);
- if (err)
- return err;
+ ret = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops);
+ if (ret)
+ return ret;
/*
* The otherwise-empty fwspec handily serves to indicate the specific
* IOMMU device we're waiting for, which will be useful if we ever get
@@ -107,7 +110,49 @@
if (!ops)
return driver_deferred_probe_check_state(dev);
- return ops->of_xlate(dev, iommu_spec);
+ if (!try_module_get(ops->owner))
+ return -ENODEV;
+
+ ret = ops->of_xlate(dev, iommu_spec);
+ module_put(ops->owner);
+ return ret;
+}
+
+static int of_iommu_configure_dev_id(struct device_node *master_np,
+ struct device *dev,
+ const u32 *id)
+{
+ struct of_phandle_args iommu_spec = { .args_count = 1 };
+ int err;
+
+ err = of_map_id(master_np, *id, "iommu-map",
+ "iommu-map-mask", &iommu_spec.np,
+ iommu_spec.args);
+ if (err)
+ return err == -ENODEV ? NO_IOMMU : err;
+
+ err = of_iommu_xlate(dev, &iommu_spec);
+ of_node_put(iommu_spec.np);
+ return err;
+}
+
+static int of_iommu_configure_dev(struct device_node *master_np,
+ struct device *dev)
+{
+ struct of_phandle_args iommu_spec;
+ int err = NO_IOMMU, idx = 0;
+
+ while (!of_parse_phandle_with_args(master_np, "iommus",
+ "#iommu-cells",
+ idx, &iommu_spec)) {
+ err = of_iommu_xlate(dev, &iommu_spec);
+ of_node_put(iommu_spec.np);
+ idx++;
+ if (err)
+ break;
+ }
+
+ return err;
}
struct of_pci_iommu_alias_info {
@@ -118,38 +163,21 @@
static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
{
struct of_pci_iommu_alias_info *info = data;
- struct of_phandle_args iommu_spec = { .args_count = 1 };
- int err;
+ u32 input_id = alias;
- err = of_map_rid(info->np, alias, "iommu-map", "iommu-map-mask",
- &iommu_spec.np, iommu_spec.args);
- if (err)
- return err == -ENODEV ? NO_IOMMU : err;
-
- err = of_iommu_xlate(info->dev, &iommu_spec);
- of_node_put(iommu_spec.np);
- return err;
+ return of_iommu_configure_dev_id(info->np, info->dev, &input_id);
}
-static int of_fsl_mc_iommu_init(struct fsl_mc_device *mc_dev,
- struct device_node *master_np)
+static int of_iommu_configure_device(struct device_node *master_np,
+ struct device *dev, const u32 *id)
{
- struct of_phandle_args iommu_spec = { .args_count = 1 };
- int err;
-
- err = of_map_rid(master_np, mc_dev->icid, "iommu-map",
- "iommu-map-mask", &iommu_spec.np,
- iommu_spec.args);
- if (err)
- return err == -ENODEV ? NO_IOMMU : err;
-
- err = of_iommu_xlate(&mc_dev->dev, &iommu_spec);
- of_node_put(iommu_spec.np);
- return err;
+ return (id) ? of_iommu_configure_dev_id(master_np, dev, id) :
+ of_iommu_configure_dev(master_np, dev);
}
const struct iommu_ops *of_iommu_configure(struct device *dev,
- struct device_node *master_np)
+ struct device_node *master_np,
+ const u32 *id)
{
const struct iommu_ops *ops = NULL;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
@@ -177,26 +205,18 @@
.np = master_np,
};
+ pci_request_acs();
err = pci_for_each_dma_alias(to_pci_dev(dev),
of_pci_iommu_init, &info);
- } else if (dev_is_fsl_mc(dev)) {
- err = of_fsl_mc_iommu_init(to_fsl_mc_device(dev), master_np);
} else {
- struct of_phandle_args iommu_spec;
- int idx = 0;
+ err = of_iommu_configure_device(master_np, dev, id);
- while (!of_parse_phandle_with_args(master_np, "iommus",
- "#iommu-cells",
- idx, &iommu_spec)) {
- err = of_iommu_xlate(dev, &iommu_spec);
- of_node_put(iommu_spec.np);
- idx++;
- if (err)
- break;
- }
+ fwspec = dev_iommu_fwspec_get(dev);
+ if (!err && fwspec)
+ of_property_read_u32(master_np, "pasid-num-bits",
+ &fwspec->num_pasid_bits);
}
-
/*
* Two success conditions can be represented by non-negative err here:
* >0 : there is no IOMMU, or one was unavailable for non-fatal reasons
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 09c6e1c..71f29c0 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -3,7 +3,7 @@
* omap iommu: tlb and pagetable primitives
*
* Copyright (C) 2008-2010 Nokia Corporation
- * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2013-2017 Texas Instruments Incorporated - https://www.ti.com/
*
* Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
* Paul Mundt and Toshihiro Kobayashi
@@ -35,15 +35,6 @@
static const struct iommu_ops omap_iommu_ops;
-struct orphan_dev {
- struct device *dev;
- struct list_head node;
-};
-
-static LIST_HEAD(orphan_dev_list);
-
-static DEFINE_SPINLOCK(orphan_lock);
-
#define to_iommu(dev) ((struct omap_iommu *)dev_get_drvdata(dev))
/* bitmap of the page sizes currently supported */
@@ -62,8 +53,6 @@
static struct platform_driver omap_iommu_driver;
static struct kmem_cache *iopte_cachep;
-static int _omap_iommu_add_device(struct device *dev);
-
/**
* to_omap_domain - Get struct omap_iommu_domain from generic iommu_domain
* @dom: generic iommu domain handle
@@ -82,7 +71,7 @@
**/
void omap_iommu_save_ctx(struct device *dev)
{
- struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+ struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
struct omap_iommu *obj;
u32 *p;
int i;
@@ -112,7 +101,7 @@
**/
void omap_iommu_restore_ctx(struct device *dev)
{
- struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+ struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
struct omap_iommu *obj;
u32 *p;
int i;
@@ -167,7 +156,7 @@
{
u32 l, pa;
- if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd, SZ_16K))
+ if (!obj->iopgd || !IS_ALIGNED((unsigned long)obj->iopgd, SZ_16K))
return -EINVAL;
pa = virt_to_phys(obj->iopgd);
@@ -434,7 +423,7 @@
bytes = iopgsz_to_bytes(cr.cam & 3);
if ((start <= da) && (da < start + bytes)) {
- dev_dbg(obj->dev, "%s: %08x<=%08x(%x)\n",
+ dev_dbg(obj->dev, "%s: %08x<=%08x(%zx)\n",
__func__, start, da, bytes);
iotlb_load_cr(obj, &cr);
iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
@@ -1177,7 +1166,6 @@
struct omap_iommu *obj;
struct resource *res;
struct device_node *of = pdev->dev.of_node;
- struct orphan_dev *orphan_dev, *tmp;
if (!of) {
pr_err("%s: only DT-based devices are supported\n", __func__);
@@ -1248,6 +1236,7 @@
goto out_group;
iommu_device_set_ops(&obj->iommu, &omap_iommu_ops);
+ iommu_device_set_fwnode(&obj->iommu, &of->fwnode);
err = iommu_device_register(&obj->iommu);
if (err)
@@ -1260,13 +1249,8 @@
dev_info(&pdev->dev, "%s registered\n", obj->name);
- list_for_each_entry_safe(orphan_dev, tmp, &orphan_dev_list, node) {
- err = _omap_iommu_add_device(orphan_dev->dev);
- if (!err) {
- list_del(&orphan_dev->node);
- kfree(orphan_dev);
- }
- }
+ /* Re-probe bus to probe device attached to this IOMMU */
+ bus_iommu_probe(&platform_bus_type);
return 0;
@@ -1339,7 +1323,7 @@
}
static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
- phys_addr_t pa, size_t bytes, int prot)
+ phys_addr_t pa, size_t bytes, int prot, gfp_t gfp)
{
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct device *dev = omap_domain->dev;
@@ -1352,11 +1336,11 @@
omap_pgsz = bytes_to_iopgsz(bytes);
if (omap_pgsz < 0) {
- dev_err(dev, "invalid size to map: %d\n", bytes);
+ dev_err(dev, "invalid size to map: %zu\n", bytes);
return -EINVAL;
}
- dev_dbg(dev, "mapping da 0x%lx to pa %pa size 0x%x\n", da, &pa, bytes);
+ dev_dbg(dev, "mapping da 0x%lx to pa %pa size 0x%zx\n", da, &pa, bytes);
iotlb_init_entry(&e, da, pa, omap_pgsz);
@@ -1393,7 +1377,7 @@
size_t bytes = 0;
int i;
- dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size);
+ dev_dbg(dev, "unmapping da 0x%lx size %zu\n", da, size);
iommu = omap_domain->iommus;
for (i = 0; i < omap_domain->num_iommus; i++, iommu++) {
@@ -1414,7 +1398,7 @@
static int omap_iommu_count(struct device *dev)
{
- struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+ struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
int count = 0;
while (arch_data->iommu_dev) {
@@ -1475,8 +1459,8 @@
static int
omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
+ struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
- struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
struct omap_iommu_device *iommu;
struct omap_iommu *oiommu;
int ret = 0;
@@ -1540,7 +1524,7 @@
static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
struct device *dev)
{
- struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+ struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
struct omap_iommu_device *iommu = omap_domain->iommus;
struct omap_iommu *oiommu;
int i;
@@ -1657,26 +1641,22 @@
return ret;
}
-static int _omap_iommu_add_device(struct device *dev)
+static struct iommu_device *omap_iommu_probe_device(struct device *dev)
{
struct omap_iommu_arch_data *arch_data, *tmp;
- struct omap_iommu *oiommu;
- struct iommu_group *group;
- struct device_node *np;
struct platform_device *pdev;
+ struct omap_iommu *oiommu;
+ struct device_node *np;
int num_iommus, i;
- int ret;
- struct orphan_dev *orphan_dev;
- unsigned long flags;
/*
- * Allocate the archdata iommu structure for DT-based devices.
+ * Allocate the per-device iommu structure for DT-based devices.
*
* TODO: Simplify this when removing non-DT support completely from the
* IOMMU users.
*/
if (!dev->of_node)
- return 0;
+ return ERR_PTR(-ENODEV);
/*
* retrieve the count of IOMMU nodes using phandle size as element size
@@ -1689,43 +1669,27 @@
arch_data = kcalloc(num_iommus + 1, sizeof(*arch_data), GFP_KERNEL);
if (!arch_data)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
for (i = 0, tmp = arch_data; i < num_iommus; i++, tmp++) {
np = of_parse_phandle(dev->of_node, "iommus", i);
if (!np) {
kfree(arch_data);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
pdev = of_find_device_by_node(np);
if (!pdev) {
of_node_put(np);
kfree(arch_data);
- spin_lock_irqsave(&orphan_lock, flags);
- list_for_each_entry(orphan_dev, &orphan_dev_list,
- node) {
- if (orphan_dev->dev == dev)
- break;
- }
- spin_unlock_irqrestore(&orphan_lock, flags);
-
- if (orphan_dev && orphan_dev->dev == dev)
- return -EPROBE_DEFER;
-
- orphan_dev = kzalloc(sizeof(*orphan_dev), GFP_KERNEL);
- orphan_dev->dev = dev;
- spin_lock_irqsave(&orphan_lock, flags);
- list_add(&orphan_dev->node, &orphan_dev_list);
- spin_unlock_irqrestore(&orphan_lock, flags);
- return -EPROBE_DEFER;
+ return ERR_PTR(-ENODEV);
}
oiommu = platform_get_drvdata(pdev);
if (!oiommu) {
of_node_put(np);
kfree(arch_data);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
tmp->iommu_dev = oiommu;
@@ -1734,67 +1698,38 @@
of_node_put(np);
}
+ dev_iommu_priv_set(dev, arch_data);
+
/*
* use the first IOMMU alone for the sysfs device linking.
* TODO: Evaluate if a single iommu_group needs to be
* maintained for both IOMMUs
*/
oiommu = arch_data->iommu_dev;
- ret = iommu_device_link(&oiommu->iommu, dev);
- if (ret) {
- kfree(arch_data);
- return ret;
- }
- dev->archdata.iommu = arch_data;
-
- /*
- * IOMMU group initialization calls into omap_iommu_device_group, which
- * needs a valid dev->archdata.iommu pointer
- */
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group)) {
- iommu_device_unlink(&oiommu->iommu, dev);
- dev->archdata.iommu = NULL;
- kfree(arch_data);
- return PTR_ERR(group);
- }
- iommu_group_put(group);
-
- return 0;
+ return &oiommu->iommu;
}
-static int omap_iommu_add_device(struct device *dev)
+static void omap_iommu_release_device(struct device *dev)
{
- int ret;
-
- ret = _omap_iommu_add_device(dev);
- if (ret == -EPROBE_DEFER)
- return 0;
-
- return ret;
-}
-
-static void omap_iommu_remove_device(struct device *dev)
-{
- struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+ struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
if (!dev->of_node || !arch_data)
return;
- iommu_device_unlink(&arch_data->iommu_dev->iommu, dev);
- iommu_group_remove_device(dev);
-
- dev->archdata.iommu = NULL;
+ dev_iommu_priv_set(dev, NULL);
kfree(arch_data);
}
static struct iommu_group *omap_iommu_device_group(struct device *dev)
{
- struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+ struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
struct iommu_group *group = ERR_PTR(-EINVAL);
+ if (!arch_data)
+ return ERR_PTR(-ENODEV);
+
if (arch_data->iommu_dev)
group = iommu_group_ref_get(arch_data->iommu_dev->group);
@@ -1809,8 +1744,8 @@
.map = omap_iommu_map,
.unmap = omap_iommu_unmap,
.iova_to_phys = omap_iommu_iova_to_phys,
- .add_device = omap_iommu_add_device,
- .remove_device = omap_iommu_remove_device,
+ .probe_device = omap_iommu_probe_device,
+ .release_device = omap_iommu_release_device,
.device_group = omap_iommu_device_group,
.pgsize_bitmap = OMAP_IOMMU_PGSIZES,
};
diff --git a/drivers/iommu/omap-iopgtable.h b/drivers/iommu/omap-iopgtable.h
index 1a4adb5..51d7400 100644
--- a/drivers/iommu/omap-iopgtable.h
+++ b/drivers/iommu/omap-iopgtable.h
@@ -63,7 +63,8 @@
*
* va to pa translation
*/
-static inline phys_addr_t omap_iommu_translate(u32 d, u32 va, u32 mask)
+static inline phys_addr_t omap_iommu_translate(unsigned long d, dma_addr_t va,
+ dma_addr_t mask)
{
return (d & mask) | (va & (~mask));
}
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 0df0919..e5d86b7 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -527,7 +527,7 @@
int i, err;
err = pm_runtime_get_if_in_use(iommu->dev);
- if (WARN_ON_ONCE(err <= 0))
+ if (!err || WARN_ON_ONCE(err < 0))
return ret;
if (WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks)))
@@ -758,7 +758,7 @@
}
static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
@@ -836,7 +836,7 @@
static struct rk_iommu *rk_iommu_from_dev(struct device *dev)
{
- struct rk_iommudata *data = dev->archdata.iommu;
+ struct rk_iommudata *data = dev_iommu_priv_get(dev);
return data ? data->iommu : NULL;
}
@@ -1054,40 +1054,28 @@
kfree(rk_domain);
}
-static int rk_iommu_add_device(struct device *dev)
+static struct iommu_device *rk_iommu_probe_device(struct device *dev)
{
- struct iommu_group *group;
- struct rk_iommu *iommu;
struct rk_iommudata *data;
+ struct rk_iommu *iommu;
- data = dev->archdata.iommu;
+ data = dev_iommu_priv_get(dev);
if (!data)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
iommu = rk_iommu_from_dev(dev);
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
- iommu_group_put(group);
-
- iommu_device_link(&iommu->iommu, dev);
data->link = device_link_add(dev, iommu->dev,
DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
- return 0;
+ return &iommu->iommu;
}
-static void rk_iommu_remove_device(struct device *dev)
+static void rk_iommu_release_device(struct device *dev)
{
- struct rk_iommu *iommu;
- struct rk_iommudata *data = dev->archdata.iommu;
-
- iommu = rk_iommu_from_dev(dev);
+ struct rk_iommudata *data = dev_iommu_priv_get(dev);
device_link_del(data->link);
- iommu_device_unlink(&iommu->iommu, dev);
- iommu_group_remove_device(dev);
}
static struct iommu_group *rk_iommu_device_group(struct device *dev)
@@ -1112,7 +1100,7 @@
iommu_dev = of_find_device_by_node(args->np);
data->iommu = platform_get_drvdata(iommu_dev);
- dev->archdata.iommu = data;
+ dev_iommu_priv_set(dev, data);
platform_device_put(iommu_dev);
@@ -1126,8 +1114,8 @@
.detach_dev = rk_iommu_detach_device,
.map = rk_iommu_map,
.unmap = rk_iommu_unmap,
- .add_device = rk_iommu_add_device,
- .remove_device = rk_iommu_remove_device,
+ .probe_device = rk_iommu_probe_device,
+ .release_device = rk_iommu_release_device,
.iova_to_phys = rk_iommu_iova_to_phys,
.device_group = rk_iommu_device_group,
.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 3b0b18e..8895dbb 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -87,7 +87,7 @@
struct device *dev)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
- struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+ struct zpci_dev *zdev = to_zpci_dev(dev);
struct s390_domain_device *domain_device;
unsigned long flags;
int rc;
@@ -139,7 +139,7 @@
struct device *dev)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
- struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+ struct zpci_dev *zdev = to_zpci_dev(dev);
struct s390_domain_device *domain_device, *tmp;
unsigned long flags;
int found = 0;
@@ -166,23 +166,16 @@
}
}
-static int s390_iommu_add_device(struct device *dev)
+static struct iommu_device *s390_iommu_probe_device(struct device *dev)
{
- struct iommu_group *group = iommu_group_get_for_dev(dev);
- struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+ struct zpci_dev *zdev = to_zpci_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
- iommu_device_link(&zdev->iommu_dev, dev);
-
- return 0;
+ return &zdev->iommu_dev;
}
-static void s390_iommu_remove_device(struct device *dev)
+static void s390_iommu_release_device(struct device *dev)
{
- struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+ struct zpci_dev *zdev = to_zpci_dev(dev);
struct iommu_domain *domain;
/*
@@ -191,7 +184,7 @@
* to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers
* the attach_dev), removing the device via
* "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev,
- * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE
+ * only release_device will be called via the BUS_NOTIFY_REMOVED_DEVICE
* notifier.
*
* So let's call detach_dev from here if it hasn't been called before.
@@ -201,9 +194,6 @@
if (domain)
s390_iommu_detach_device(domain, dev);
}
-
- iommu_device_unlink(&zdev->iommu_dev, dev);
- iommu_group_remove_device(dev);
}
static int s390_iommu_update_trans(struct s390_domain *s390_domain,
@@ -265,7 +255,7 @@
}
static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
int flags = ZPCI_PTE_VALID, rc = 0;
@@ -373,8 +363,8 @@
.map = s390_iommu_map,
.unmap = s390_iommu_unmap,
.iova_to_phys = s390_iommu_iova_to_phys,
- .add_device = s390_iommu_add_device,
- .remove_device = s390_iommu_remove_device,
+ .probe_device = s390_iommu_probe_device,
+ .release_device = s390_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = S390_IOMMU_PGSIZES,
};
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
new file mode 100644
index 0000000..ea6db13
--- /dev/null
+++ b/drivers/iommu/sun50i-iommu.c
@@ -0,0 +1,1020 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (C) 2016-2018, Allwinner Technology CO., LTD.
+// Copyright (C) 2019-2020, Cerno
+
+#include <linux/bitfield.h>
+#include <linux/bug.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-iommu.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/ioport.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define IOMMU_RESET_REG 0x010
+#define IOMMU_ENABLE_REG 0x020
+#define IOMMU_ENABLE_ENABLE BIT(0)
+
+#define IOMMU_BYPASS_REG 0x030
+#define IOMMU_AUTO_GATING_REG 0x040
+#define IOMMU_AUTO_GATING_ENABLE BIT(0)
+
+#define IOMMU_WBUF_CTRL_REG 0x044
+#define IOMMU_OOO_CTRL_REG 0x048
+#define IOMMU_4KB_BDY_PRT_CTRL_REG 0x04c
+#define IOMMU_TTB_REG 0x050
+#define IOMMU_TLB_ENABLE_REG 0x060
+#define IOMMU_TLB_PREFETCH_REG 0x070
+#define IOMMU_TLB_PREFETCH_MASTER_ENABLE(m) BIT(m)
+
+#define IOMMU_TLB_FLUSH_REG 0x080
+#define IOMMU_TLB_FLUSH_PTW_CACHE BIT(17)
+#define IOMMU_TLB_FLUSH_MACRO_TLB BIT(16)
+#define IOMMU_TLB_FLUSH_MICRO_TLB(i) (BIT(i) & GENMASK(5, 0))
+
+#define IOMMU_TLB_IVLD_ADDR_REG 0x090
+#define IOMMU_TLB_IVLD_ADDR_MASK_REG 0x094
+#define IOMMU_TLB_IVLD_ENABLE_REG 0x098
+#define IOMMU_TLB_IVLD_ENABLE_ENABLE BIT(0)
+
+#define IOMMU_PC_IVLD_ADDR_REG 0x0a0
+#define IOMMU_PC_IVLD_ENABLE_REG 0x0a8
+#define IOMMU_PC_IVLD_ENABLE_ENABLE BIT(0)
+
+#define IOMMU_DM_AUT_CTRL_REG(d) (0x0b0 + ((d) / 2) * 4)
+#define IOMMU_DM_AUT_CTRL_RD_UNAVAIL(d, m) (1 << (((d & 1) * 16) + ((m) * 2)))
+#define IOMMU_DM_AUT_CTRL_WR_UNAVAIL(d, m) (1 << (((d & 1) * 16) + ((m) * 2) + 1))
+
+#define IOMMU_DM_AUT_OVWT_REG 0x0d0
+#define IOMMU_INT_ENABLE_REG 0x100
+#define IOMMU_INT_CLR_REG 0x104
+#define IOMMU_INT_STA_REG 0x108
+#define IOMMU_INT_ERR_ADDR_REG(i) (0x110 + (i) * 4)
+#define IOMMU_INT_ERR_ADDR_L1_REG 0x130
+#define IOMMU_INT_ERR_ADDR_L2_REG 0x134
+#define IOMMU_INT_ERR_DATA_REG(i) (0x150 + (i) * 4)
+#define IOMMU_L1PG_INT_REG 0x0180
+#define IOMMU_L2PG_INT_REG 0x0184
+
+#define IOMMU_INT_INVALID_L2PG BIT(17)
+#define IOMMU_INT_INVALID_L1PG BIT(16)
+#define IOMMU_INT_MASTER_PERMISSION(m) BIT(m)
+#define IOMMU_INT_MASTER_MASK (IOMMU_INT_MASTER_PERMISSION(0) | \
+ IOMMU_INT_MASTER_PERMISSION(1) | \
+ IOMMU_INT_MASTER_PERMISSION(2) | \
+ IOMMU_INT_MASTER_PERMISSION(3) | \
+ IOMMU_INT_MASTER_PERMISSION(4) | \
+ IOMMU_INT_MASTER_PERMISSION(5))
+#define IOMMU_INT_MASK (IOMMU_INT_INVALID_L1PG | \
+ IOMMU_INT_INVALID_L2PG | \
+ IOMMU_INT_MASTER_MASK)
+
+#define PT_ENTRY_SIZE sizeof(u32)
+
+#define NUM_DT_ENTRIES 4096
+#define DT_SIZE (NUM_DT_ENTRIES * PT_ENTRY_SIZE)
+
+#define NUM_PT_ENTRIES 256
+#define PT_SIZE (NUM_PT_ENTRIES * PT_ENTRY_SIZE)
+
+struct sun50i_iommu {
+ struct iommu_device iommu;
+
+ /* Lock to modify the IOMMU registers */
+ spinlock_t iommu_lock;
+
+ struct device *dev;
+ void __iomem *base;
+ struct reset_control *reset;
+ struct clk *clk;
+
+ struct iommu_domain *domain;
+ struct iommu_group *group;
+ struct kmem_cache *pt_pool;
+};
+
+struct sun50i_iommu_domain {
+ struct iommu_domain domain;
+
+ /* Number of devices attached to the domain */
+ refcount_t refcnt;
+
+ /* L1 Page Table */
+ u32 *dt;
+ dma_addr_t dt_dma;
+
+ struct sun50i_iommu *iommu;
+};
+
+static struct sun50i_iommu_domain *to_sun50i_domain(struct iommu_domain *domain)
+{
+ return container_of(domain, struct sun50i_iommu_domain, domain);
+}
+
+static struct sun50i_iommu *sun50i_iommu_from_dev(struct device *dev)
+{
+ return dev_iommu_priv_get(dev);
+}
+
+static u32 iommu_read(struct sun50i_iommu *iommu, u32 offset)
+{
+ return readl(iommu->base + offset);
+}
+
+static void iommu_write(struct sun50i_iommu *iommu, u32 offset, u32 value)
+{
+ writel(value, iommu->base + offset);
+}
+
+/*
+ * The Allwinner H6 IOMMU uses a 2-level page table.
+ *
+ * The first level is the usual Directory Table (DT), that consists of
+ * 4096 4-bytes Directory Table Entries (DTE), each pointing to a Page
+ * Table (PT).
+ *
+ * Each PT consits of 256 4-bytes Page Table Entries (PTE), each
+ * pointing to a 4kB page of physical memory.
+ *
+ * The IOMMU supports a single DT, pointed by the IOMMU_TTB_REG
+ * register that contains its physical address.
+ */
+
+#define SUN50I_IOVA_DTE_MASK GENMASK(31, 20)
+#define SUN50I_IOVA_PTE_MASK GENMASK(19, 12)
+#define SUN50I_IOVA_PAGE_MASK GENMASK(11, 0)
+
+static u32 sun50i_iova_get_dte_index(dma_addr_t iova)
+{
+ return FIELD_GET(SUN50I_IOVA_DTE_MASK, iova);
+}
+
+static u32 sun50i_iova_get_pte_index(dma_addr_t iova)
+{
+ return FIELD_GET(SUN50I_IOVA_PTE_MASK, iova);
+}
+
+static u32 sun50i_iova_get_page_offset(dma_addr_t iova)
+{
+ return FIELD_GET(SUN50I_IOVA_PAGE_MASK, iova);
+}
+
+/*
+ * Each Directory Table Entry has a Page Table address and a valid
+ * bit:
+
+ * +---------------------+-----------+-+
+ * | PT address | Reserved |V|
+ * +---------------------+-----------+-+
+ * 31:10 - Page Table address
+ * 9:2 - Reserved
+ * 1:0 - 1 if the entry is valid
+ */
+
+#define SUN50I_DTE_PT_ADDRESS_MASK GENMASK(31, 10)
+#define SUN50I_DTE_PT_ATTRS GENMASK(1, 0)
+#define SUN50I_DTE_PT_VALID 1
+
+static phys_addr_t sun50i_dte_get_pt_address(u32 dte)
+{
+ return (phys_addr_t)dte & SUN50I_DTE_PT_ADDRESS_MASK;
+}
+
+static bool sun50i_dte_is_pt_valid(u32 dte)
+{
+ return (dte & SUN50I_DTE_PT_ATTRS) == SUN50I_DTE_PT_VALID;
+}
+
+static u32 sun50i_mk_dte(dma_addr_t pt_dma)
+{
+ return (pt_dma & SUN50I_DTE_PT_ADDRESS_MASK) | SUN50I_DTE_PT_VALID;
+}
+
+/*
+ * Each PTE has a Page address, an authority index and a valid bit:
+ *
+ * +----------------+-----+-----+-----+---+-----+
+ * | Page address | Rsv | ACI | Rsv | V | Rsv |
+ * +----------------+-----+-----+-----+---+-----+
+ * 31:12 - Page address
+ * 11:8 - Reserved
+ * 7:4 - Authority Control Index
+ * 3:2 - Reserved
+ * 1 - 1 if the entry is valid
+ * 0 - Reserved
+ *
+ * The way permissions work is that the IOMMU has 16 "domains" that
+ * can be configured to give each masters either read or write
+ * permissions through the IOMMU_DM_AUT_CTRL_REG registers. The domain
+ * 0 seems like the default domain, and its permissions in the
+ * IOMMU_DM_AUT_CTRL_REG are only read-only, so it's not really
+ * useful to enforce any particular permission.
+ *
+ * Each page entry will then have a reference to the domain they are
+ * affected to, so that we can actually enforce them on a per-page
+ * basis.
+ *
+ * In order to make it work with the IOMMU framework, we will be using
+ * 4 different domains, starting at 1: RD_WR, RD, WR and NONE
+ * depending on the permission we want to enforce. Each domain will
+ * have each master setup in the same way, since the IOMMU framework
+ * doesn't seem to restrict page access on a per-device basis. And
+ * then we will use the relevant domain index when generating the page
+ * table entry depending on the permissions we want to be enforced.
+ */
+
+enum sun50i_iommu_aci {
+ SUN50I_IOMMU_ACI_DO_NOT_USE = 0,
+ SUN50I_IOMMU_ACI_NONE,
+ SUN50I_IOMMU_ACI_RD,
+ SUN50I_IOMMU_ACI_WR,
+ SUN50I_IOMMU_ACI_RD_WR,
+};
+
+#define SUN50I_PTE_PAGE_ADDRESS_MASK GENMASK(31, 12)
+#define SUN50I_PTE_ACI_MASK GENMASK(7, 4)
+#define SUN50I_PTE_PAGE_VALID BIT(1)
+
+static phys_addr_t sun50i_pte_get_page_address(u32 pte)
+{
+ return (phys_addr_t)pte & SUN50I_PTE_PAGE_ADDRESS_MASK;
+}
+
+static enum sun50i_iommu_aci sun50i_get_pte_aci(u32 pte)
+{
+ return FIELD_GET(SUN50I_PTE_ACI_MASK, pte);
+}
+
+static bool sun50i_pte_is_page_valid(u32 pte)
+{
+ return pte & SUN50I_PTE_PAGE_VALID;
+}
+
+static u32 sun50i_mk_pte(phys_addr_t page, int prot)
+{
+ enum sun50i_iommu_aci aci;
+ u32 flags = 0;
+
+ if (prot & (IOMMU_READ | IOMMU_WRITE))
+ aci = SUN50I_IOMMU_ACI_RD_WR;
+ else if (prot & IOMMU_READ)
+ aci = SUN50I_IOMMU_ACI_RD;
+ else if (prot & IOMMU_WRITE)
+ aci = SUN50I_IOMMU_ACI_WR;
+ else
+ aci = SUN50I_IOMMU_ACI_NONE;
+
+ flags |= FIELD_PREP(SUN50I_PTE_ACI_MASK, aci);
+ page &= SUN50I_PTE_PAGE_ADDRESS_MASK;
+ return page | flags | SUN50I_PTE_PAGE_VALID;
+}
+
+static void sun50i_table_flush(struct sun50i_iommu_domain *sun50i_domain,
+ void *vaddr, unsigned int count)
+{
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ dma_addr_t dma = virt_to_phys(vaddr);
+ size_t size = count * PT_ENTRY_SIZE;
+
+ dma_sync_single_for_device(iommu->dev, dma, size, DMA_TO_DEVICE);
+}
+
+static int sun50i_iommu_flush_all_tlb(struct sun50i_iommu *iommu)
+{
+ u32 reg;
+ int ret;
+
+ assert_spin_locked(&iommu->iommu_lock);
+
+ iommu_write(iommu,
+ IOMMU_TLB_FLUSH_REG,
+ IOMMU_TLB_FLUSH_PTW_CACHE |
+ IOMMU_TLB_FLUSH_MACRO_TLB |
+ IOMMU_TLB_FLUSH_MICRO_TLB(5) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(4) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(3) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(2) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(1) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(0));
+
+ ret = readl_poll_timeout_atomic(iommu->base + IOMMU_TLB_FLUSH_REG,
+ reg, !reg,
+ 1, 2000);
+ if (ret)
+ dev_warn(iommu->dev, "TLB Flush timed out!\n");
+
+ return ret;
+}
+
+static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ unsigned long flags;
+
+ /*
+ * At boot, we'll have a first call into .flush_iotlb_all right after
+ * .probe_device, and since we link our (single) domain to our iommu in
+ * the .attach_device callback, we don't have that pointer set.
+ *
+ * It shouldn't really be any trouble to ignore it though since we flush
+ * all caches as part of the device powerup.
+ */
+ if (!iommu)
+ return;
+
+ spin_lock_irqsave(&iommu->iommu_lock, flags);
+ sun50i_iommu_flush_all_tlb(iommu);
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+}
+
+static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
+{
+ sun50i_iommu_flush_iotlb_all(domain);
+}
+
+static int sun50i_iommu_enable(struct sun50i_iommu *iommu)
+{
+ struct sun50i_iommu_domain *sun50i_domain;
+ unsigned long flags;
+ int ret;
+
+ if (!iommu->domain)
+ return 0;
+
+ sun50i_domain = to_sun50i_domain(iommu->domain);
+
+ ret = reset_control_deassert(iommu->reset);
+ if (ret)
+ return ret;
+
+ ret = clk_prepare_enable(iommu->clk);
+ if (ret)
+ goto err_reset_assert;
+
+ spin_lock_irqsave(&iommu->iommu_lock, flags);
+
+ iommu_write(iommu, IOMMU_TTB_REG, sun50i_domain->dt_dma);
+ iommu_write(iommu, IOMMU_TLB_PREFETCH_REG,
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(0) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(1) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(2) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(3) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(4) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(5));
+ iommu_write(iommu, IOMMU_INT_ENABLE_REG, IOMMU_INT_MASK);
+ iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_NONE),
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 1) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 1) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 2) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 2) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 3) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 3) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 4) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 4) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 5) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 5));
+
+ iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_RD),
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 0) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 1) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 2) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 3) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 4) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 5));
+
+ iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_WR),
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 0) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 1) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 2) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 3) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 4) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 5));
+
+ ret = sun50i_iommu_flush_all_tlb(iommu);
+ if (ret) {
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+ goto err_clk_disable;
+ }
+
+ iommu_write(iommu, IOMMU_AUTO_GATING_REG, IOMMU_AUTO_GATING_ENABLE);
+ iommu_write(iommu, IOMMU_ENABLE_REG, IOMMU_ENABLE_ENABLE);
+
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+
+ return 0;
+
+err_clk_disable:
+ clk_disable_unprepare(iommu->clk);
+
+err_reset_assert:
+ reset_control_assert(iommu->reset);
+
+ return ret;
+}
+
+static void sun50i_iommu_disable(struct sun50i_iommu *iommu)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->iommu_lock, flags);
+
+ iommu_write(iommu, IOMMU_ENABLE_REG, 0);
+ iommu_write(iommu, IOMMU_TTB_REG, 0);
+
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+
+ clk_disable_unprepare(iommu->clk);
+ reset_control_assert(iommu->reset);
+}
+
+static void *sun50i_iommu_alloc_page_table(struct sun50i_iommu *iommu,
+ gfp_t gfp)
+{
+ dma_addr_t pt_dma;
+ u32 *page_table;
+
+ page_table = kmem_cache_zalloc(iommu->pt_pool, gfp);
+ if (!page_table)
+ return ERR_PTR(-ENOMEM);
+
+ pt_dma = dma_map_single(iommu->dev, page_table, PT_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(iommu->dev, pt_dma)) {
+ dev_err(iommu->dev, "Couldn't map L2 Page Table\n");
+ kmem_cache_free(iommu->pt_pool, page_table);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* We rely on the physical address and DMA address being the same */
+ WARN_ON(pt_dma != virt_to_phys(page_table));
+
+ return page_table;
+}
+
+static void sun50i_iommu_free_page_table(struct sun50i_iommu *iommu,
+ u32 *page_table)
+{
+ phys_addr_t pt_phys = virt_to_phys(page_table);
+
+ dma_unmap_single(iommu->dev, pt_phys, PT_SIZE, DMA_TO_DEVICE);
+ kmem_cache_free(iommu->pt_pool, page_table);
+}
+
+static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain,
+ dma_addr_t iova, gfp_t gfp)
+{
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ u32 *page_table;
+ u32 *dte_addr;
+ u32 old_dte;
+ u32 dte;
+
+ dte_addr = &sun50i_domain->dt[sun50i_iova_get_dte_index(iova)];
+ dte = *dte_addr;
+ if (sun50i_dte_is_pt_valid(dte)) {
+ phys_addr_t pt_phys = sun50i_dte_get_pt_address(dte);
+ return (u32 *)phys_to_virt(pt_phys);
+ }
+
+ page_table = sun50i_iommu_alloc_page_table(iommu, gfp);
+ if (IS_ERR(page_table))
+ return page_table;
+
+ dte = sun50i_mk_dte(virt_to_phys(page_table));
+ old_dte = cmpxchg(dte_addr, 0, dte);
+ if (old_dte) {
+ phys_addr_t installed_pt_phys =
+ sun50i_dte_get_pt_address(old_dte);
+ u32 *installed_pt = phys_to_virt(installed_pt_phys);
+ u32 *drop_pt = page_table;
+
+ page_table = installed_pt;
+ dte = old_dte;
+ sun50i_iommu_free_page_table(iommu, drop_pt);
+ }
+
+ sun50i_table_flush(sun50i_domain, page_table, PT_SIZE);
+ sun50i_table_flush(sun50i_domain, dte_addr, 1);
+
+ return page_table;
+}
+
+static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ u32 pte_index;
+ u32 *page_table, *pte_addr;
+ int ret = 0;
+
+ page_table = sun50i_dte_get_page_table(sun50i_domain, iova, gfp);
+ if (IS_ERR(page_table)) {
+ ret = PTR_ERR(page_table);
+ goto out;
+ }
+
+ pte_index = sun50i_iova_get_pte_index(iova);
+ pte_addr = &page_table[pte_index];
+ if (unlikely(sun50i_pte_is_page_valid(*pte_addr))) {
+ phys_addr_t page_phys = sun50i_pte_get_page_address(*pte_addr);
+ dev_err(iommu->dev,
+ "iova %pad already mapped to %pa cannot remap to %pa prot: %#x\n",
+ &iova, &page_phys, &paddr, prot);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ *pte_addr = sun50i_mk_pte(paddr, prot);
+ sun50i_table_flush(sun50i_domain, pte_addr, 1);
+
+out:
+ return ret;
+}
+
+static size_t sun50i_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size, struct iommu_iotlb_gather *gather)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ phys_addr_t pt_phys;
+ u32 *pte_addr;
+ u32 dte;
+
+ dte = sun50i_domain->dt[sun50i_iova_get_dte_index(iova)];
+ if (!sun50i_dte_is_pt_valid(dte))
+ return 0;
+
+ pt_phys = sun50i_dte_get_pt_address(dte);
+ pte_addr = (u32 *)phys_to_virt(pt_phys) + sun50i_iova_get_pte_index(iova);
+
+ if (!sun50i_pte_is_page_valid(*pte_addr))
+ return 0;
+
+ memset(pte_addr, 0, sizeof(*pte_addr));
+ sun50i_table_flush(sun50i_domain, pte_addr, 1);
+
+ return SZ_4K;
+}
+
+static phys_addr_t sun50i_iommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ phys_addr_t pt_phys;
+ u32 *page_table;
+ u32 dte, pte;
+
+ dte = sun50i_domain->dt[sun50i_iova_get_dte_index(iova)];
+ if (!sun50i_dte_is_pt_valid(dte))
+ return 0;
+
+ pt_phys = sun50i_dte_get_pt_address(dte);
+ page_table = (u32 *)phys_to_virt(pt_phys);
+ pte = page_table[sun50i_iova_get_pte_index(iova)];
+ if (!sun50i_pte_is_page_valid(pte))
+ return 0;
+
+ return sun50i_pte_get_page_address(pte) +
+ sun50i_iova_get_page_offset(iova);
+}
+
+static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
+{
+ struct sun50i_iommu_domain *sun50i_domain;
+
+ if (type != IOMMU_DOMAIN_DMA &&
+ type != IOMMU_DOMAIN_IDENTITY &&
+ type != IOMMU_DOMAIN_UNMANAGED)
+ return NULL;
+
+ sun50i_domain = kzalloc(sizeof(*sun50i_domain), GFP_KERNEL);
+ if (!sun50i_domain)
+ return NULL;
+
+ if (type == IOMMU_DOMAIN_DMA &&
+ iommu_get_dma_cookie(&sun50i_domain->domain))
+ goto err_free_domain;
+
+ sun50i_domain->dt = (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(DT_SIZE));
+ if (!sun50i_domain->dt)
+ goto err_put_cookie;
+
+ refcount_set(&sun50i_domain->refcnt, 1);
+
+ sun50i_domain->domain.geometry.aperture_start = 0;
+ sun50i_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32);
+ sun50i_domain->domain.geometry.force_aperture = true;
+
+ return &sun50i_domain->domain;
+
+err_put_cookie:
+ if (type == IOMMU_DOMAIN_DMA)
+ iommu_put_dma_cookie(&sun50i_domain->domain);
+
+err_free_domain:
+ kfree(sun50i_domain);
+
+ return NULL;
+}
+
+static void sun50i_iommu_domain_free(struct iommu_domain *domain)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+
+ free_pages((unsigned long)sun50i_domain->dt, get_order(DT_SIZE));
+ sun50i_domain->dt = NULL;
+
+ iommu_put_dma_cookie(domain);
+
+ kfree(sun50i_domain);
+}
+
+static int sun50i_iommu_attach_domain(struct sun50i_iommu *iommu,
+ struct sun50i_iommu_domain *sun50i_domain)
+{
+ iommu->domain = &sun50i_domain->domain;
+ sun50i_domain->iommu = iommu;
+
+ sun50i_domain->dt_dma = dma_map_single(iommu->dev, sun50i_domain->dt,
+ DT_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(iommu->dev, sun50i_domain->dt_dma)) {
+ dev_err(iommu->dev, "Couldn't map L1 Page Table\n");
+ return -ENOMEM;
+ }
+
+ return sun50i_iommu_enable(iommu);
+}
+
+static void sun50i_iommu_detach_domain(struct sun50i_iommu *iommu,
+ struct sun50i_iommu_domain *sun50i_domain)
+{
+ unsigned int i;
+
+ for (i = 0; i < NUM_DT_ENTRIES; i++) {
+ phys_addr_t pt_phys;
+ u32 *page_table;
+ u32 *dte_addr;
+ u32 dte;
+
+ dte_addr = &sun50i_domain->dt[i];
+ dte = *dte_addr;
+ if (!sun50i_dte_is_pt_valid(dte))
+ continue;
+
+ memset(dte_addr, 0, sizeof(*dte_addr));
+ sun50i_table_flush(sun50i_domain, dte_addr, 1);
+
+ pt_phys = sun50i_dte_get_pt_address(dte);
+ page_table = phys_to_virt(pt_phys);
+ sun50i_iommu_free_page_table(iommu, page_table);
+ }
+
+
+ sun50i_iommu_disable(iommu);
+
+ dma_unmap_single(iommu->dev, virt_to_phys(sun50i_domain->dt),
+ DT_SIZE, DMA_TO_DEVICE);
+
+ iommu->domain = NULL;
+}
+
+static void sun50i_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu = dev_iommu_priv_get(dev);
+
+ dev_dbg(dev, "Detaching from IOMMU domain\n");
+
+ if (iommu->domain != domain)
+ return;
+
+ if (refcount_dec_and_test(&sun50i_domain->refcnt))
+ sun50i_iommu_detach_domain(iommu, sun50i_domain);
+}
+
+static int sun50i_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu;
+
+ iommu = sun50i_iommu_from_dev(dev);
+ if (!iommu)
+ return -ENODEV;
+
+ dev_dbg(dev, "Attaching to IOMMU domain\n");
+
+ refcount_inc(&sun50i_domain->refcnt);
+
+ if (iommu->domain == domain)
+ return 0;
+
+ if (iommu->domain)
+ sun50i_iommu_detach_device(iommu->domain, dev);
+
+ sun50i_iommu_attach_domain(iommu, sun50i_domain);
+
+ return 0;
+}
+
+static struct iommu_device *sun50i_iommu_probe_device(struct device *dev)
+{
+ struct sun50i_iommu *iommu;
+
+ iommu = sun50i_iommu_from_dev(dev);
+ if (!iommu)
+ return ERR_PTR(-ENODEV);
+
+ return &iommu->iommu;
+}
+
+static void sun50i_iommu_release_device(struct device *dev) {}
+
+static struct iommu_group *sun50i_iommu_device_group(struct device *dev)
+{
+ struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev);
+
+ return iommu_group_ref_get(iommu->group);
+}
+
+static int sun50i_iommu_of_xlate(struct device *dev,
+ struct of_phandle_args *args)
+{
+ struct platform_device *iommu_pdev = of_find_device_by_node(args->np);
+ unsigned id = args->args[0];
+
+ dev_iommu_priv_set(dev, platform_get_drvdata(iommu_pdev));
+
+ return iommu_fwspec_add_ids(dev, &id, 1);
+}
+
+static const struct iommu_ops sun50i_iommu_ops = {
+ .pgsize_bitmap = SZ_4K,
+ .attach_dev = sun50i_iommu_attach_device,
+ .detach_dev = sun50i_iommu_detach_device,
+ .device_group = sun50i_iommu_device_group,
+ .domain_alloc = sun50i_iommu_domain_alloc,
+ .domain_free = sun50i_iommu_domain_free,
+ .flush_iotlb_all = sun50i_iommu_flush_iotlb_all,
+ .iotlb_sync = sun50i_iommu_iotlb_sync,
+ .iova_to_phys = sun50i_iommu_iova_to_phys,
+ .map = sun50i_iommu_map,
+ .of_xlate = sun50i_iommu_of_xlate,
+ .probe_device = sun50i_iommu_probe_device,
+ .release_device = sun50i_iommu_release_device,
+ .unmap = sun50i_iommu_unmap,
+};
+
+static void sun50i_iommu_report_fault(struct sun50i_iommu *iommu,
+ unsigned master, phys_addr_t iova,
+ unsigned prot)
+{
+ dev_err(iommu->dev, "Page fault for %pad (master %d, dir %s)\n",
+ &iova, master, (prot == IOMMU_FAULT_WRITE) ? "wr" : "rd");
+
+ if (iommu->domain)
+ report_iommu_fault(iommu->domain, iommu->dev, iova, prot);
+ else
+ dev_err(iommu->dev, "Page fault while iommu not attached to any domain?\n");
+}
+
+static phys_addr_t sun50i_iommu_handle_pt_irq(struct sun50i_iommu *iommu,
+ unsigned addr_reg,
+ unsigned blame_reg)
+{
+ phys_addr_t iova;
+ unsigned master;
+ u32 blame;
+
+ assert_spin_locked(&iommu->iommu_lock);
+
+ iova = iommu_read(iommu, addr_reg);
+ blame = iommu_read(iommu, blame_reg);
+ master = ilog2(blame & IOMMU_INT_MASTER_MASK);
+
+ /*
+ * If the address is not in the page table, we can't get what
+ * operation triggered the fault. Assume it's a read
+ * operation.
+ */
+ sun50i_iommu_report_fault(iommu, master, iova, IOMMU_FAULT_READ);
+
+ return iova;
+}
+
+static phys_addr_t sun50i_iommu_handle_perm_irq(struct sun50i_iommu *iommu)
+{
+ enum sun50i_iommu_aci aci;
+ phys_addr_t iova;
+ unsigned master;
+ unsigned dir;
+ u32 blame;
+
+ assert_spin_locked(&iommu->iommu_lock);
+
+ blame = iommu_read(iommu, IOMMU_INT_STA_REG);
+ master = ilog2(blame & IOMMU_INT_MASTER_MASK);
+ iova = iommu_read(iommu, IOMMU_INT_ERR_ADDR_REG(master));
+ aci = sun50i_get_pte_aci(iommu_read(iommu,
+ IOMMU_INT_ERR_DATA_REG(master)));
+
+ switch (aci) {
+ /*
+ * If we are in the read-only domain, then it means we
+ * tried to write.
+ */
+ case SUN50I_IOMMU_ACI_RD:
+ dir = IOMMU_FAULT_WRITE;
+ break;
+
+ /*
+ * If we are in the write-only domain, then it means
+ * we tried to read.
+ */
+ case SUN50I_IOMMU_ACI_WR:
+
+ /*
+ * If we are in the domain without any permission, we
+ * can't really tell. Let's default to a read
+ * operation.
+ */
+ case SUN50I_IOMMU_ACI_NONE:
+
+ /* WTF? */
+ case SUN50I_IOMMU_ACI_RD_WR:
+ default:
+ dir = IOMMU_FAULT_READ;
+ break;
+ }
+
+ /*
+ * If the address is not in the page table, we can't get what
+ * operation triggered the fault. Assume it's a read
+ * operation.
+ */
+ sun50i_iommu_report_fault(iommu, master, iova, dir);
+
+ return iova;
+}
+
+static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id)
+{
+ struct sun50i_iommu *iommu = dev_id;
+ u32 status;
+
+ spin_lock(&iommu->iommu_lock);
+
+ status = iommu_read(iommu, IOMMU_INT_STA_REG);
+ if (!(status & IOMMU_INT_MASK)) {
+ spin_unlock(&iommu->iommu_lock);
+ return IRQ_NONE;
+ }
+
+ if (status & IOMMU_INT_INVALID_L2PG)
+ sun50i_iommu_handle_pt_irq(iommu,
+ IOMMU_INT_ERR_ADDR_L2_REG,
+ IOMMU_L2PG_INT_REG);
+ else if (status & IOMMU_INT_INVALID_L1PG)
+ sun50i_iommu_handle_pt_irq(iommu,
+ IOMMU_INT_ERR_ADDR_L1_REG,
+ IOMMU_L1PG_INT_REG);
+ else
+ sun50i_iommu_handle_perm_irq(iommu);
+
+ iommu_write(iommu, IOMMU_INT_CLR_REG, status);
+
+ iommu_write(iommu, IOMMU_RESET_REG, ~status);
+ iommu_write(iommu, IOMMU_RESET_REG, status);
+
+ spin_unlock(&iommu->iommu_lock);
+
+ return IRQ_HANDLED;
+}
+
+static int sun50i_iommu_probe(struct platform_device *pdev)
+{
+ struct sun50i_iommu *iommu;
+ int ret, irq;
+
+ iommu = devm_kzalloc(&pdev->dev, sizeof(*iommu), GFP_KERNEL);
+ if (!iommu)
+ return -ENOMEM;
+ spin_lock_init(&iommu->iommu_lock);
+ platform_set_drvdata(pdev, iommu);
+ iommu->dev = &pdev->dev;
+
+ iommu->pt_pool = kmem_cache_create(dev_name(&pdev->dev),
+ PT_SIZE, PT_SIZE,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!iommu->pt_pool)
+ return -ENOMEM;
+
+ iommu->group = iommu_group_alloc();
+ if (IS_ERR(iommu->group)) {
+ ret = PTR_ERR(iommu->group);
+ goto err_free_cache;
+ }
+
+ iommu->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(iommu->base)) {
+ ret = PTR_ERR(iommu->base);
+ goto err_free_group;
+ }
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto err_free_group;
+ }
+
+ iommu->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(iommu->clk)) {
+ dev_err(&pdev->dev, "Couldn't get our clock.\n");
+ ret = PTR_ERR(iommu->clk);
+ goto err_free_group;
+ }
+
+ iommu->reset = devm_reset_control_get(&pdev->dev, NULL);
+ if (IS_ERR(iommu->reset)) {
+ dev_err(&pdev->dev, "Couldn't get our reset line.\n");
+ ret = PTR_ERR(iommu->reset);
+ goto err_free_group;
+ }
+
+ ret = iommu_device_sysfs_add(&iommu->iommu, &pdev->dev,
+ NULL, dev_name(&pdev->dev));
+ if (ret)
+ goto err_free_group;
+
+ iommu_device_set_ops(&iommu->iommu, &sun50i_iommu_ops);
+ iommu_device_set_fwnode(&iommu->iommu, &pdev->dev.of_node->fwnode);
+
+ ret = iommu_device_register(&iommu->iommu);
+ if (ret)
+ goto err_remove_sysfs;
+
+ ret = devm_request_irq(&pdev->dev, irq, sun50i_iommu_irq, 0,
+ dev_name(&pdev->dev), iommu);
+ if (ret < 0)
+ goto err_unregister;
+
+ bus_set_iommu(&platform_bus_type, &sun50i_iommu_ops);
+
+ return 0;
+
+err_unregister:
+ iommu_device_unregister(&iommu->iommu);
+
+err_remove_sysfs:
+ iommu_device_sysfs_remove(&iommu->iommu);
+
+err_free_group:
+ iommu_group_put(iommu->group);
+
+err_free_cache:
+ kmem_cache_destroy(iommu->pt_pool);
+
+ return ret;
+}
+
+static const struct of_device_id sun50i_iommu_dt[] = {
+ { .compatible = "allwinner,sun50i-h6-iommu", },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, sun50i_iommu_dt);
+
+static struct platform_driver sun50i_iommu_driver = {
+ .driver = {
+ .name = "sun50i-iommu",
+ .of_match_table = sun50i_iommu_dt,
+ .suppress_bind_attrs = true,
+ }
+};
+builtin_platform_driver_probe(sun50i_iommu_driver, sun50i_iommu_probe);
+
+MODULE_DESCRIPTION("Allwinner H6 IOMMU driver");
+MODULE_AUTHOR("Maxime Ripard <maxime@cerno.tech>");
+MODULE_AUTHOR("zhuxianbin <zhuxianbin@allwinnertech.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 3924f7c..fac7202 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -113,8 +113,8 @@
if (gart->active_domain && gart->active_domain != domain) {
ret = -EBUSY;
- } else if (dev->archdata.iommu != domain) {
- dev->archdata.iommu = domain;
+ } else if (dev_iommu_priv_get(dev) != domain) {
+ dev_iommu_priv_set(dev, domain);
gart->active_domain = domain;
gart->active_devices++;
}
@@ -131,8 +131,8 @@
spin_lock(&gart->dom_lock);
- if (dev->archdata.iommu == domain) {
- dev->archdata.iommu = NULL;
+ if (dev_iommu_priv_get(dev) == domain) {
+ dev_iommu_priv_set(dev, NULL);
if (--gart->active_devices == 0)
gart->active_domain = NULL;
@@ -178,7 +178,7 @@
}
static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t pa, size_t bytes, int prot)
+ phys_addr_t pa, size_t bytes, int prot, gfp_t gfp)
{
struct gart_device *gart = gart_handle;
int ret;
@@ -243,28 +243,16 @@
return false;
}
-static int gart_iommu_add_device(struct device *dev)
+static struct iommu_device *gart_iommu_probe_device(struct device *dev)
{
- struct iommu_group *group;
+ if (!dev_iommu_fwspec_get(dev))
+ return ERR_PTR(-ENODEV);
- if (!dev->iommu_fwspec)
- return -ENODEV;
-
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
-
- iommu_device_link(&gart_handle->iommu, dev);
-
- return 0;
+ return &gart_handle->iommu;
}
-static void gart_iommu_remove_device(struct device *dev)
+static void gart_iommu_release_device(struct device *dev)
{
- iommu_group_remove_device(dev);
- iommu_device_unlink(&gart_handle->iommu, dev);
}
static int gart_iommu_of_xlate(struct device *dev,
@@ -290,8 +278,8 @@
.domain_free = gart_iommu_domain_free,
.attach_dev = gart_iommu_attach_dev,
.detach_dev = gart_iommu_detach_dev,
- .add_device = gart_iommu_add_device,
- .remove_device = gart_iommu_remove_device,
+ .probe_device = gart_iommu_probe_device,
+ .release_device = gart_iommu_release_device,
.device_group = generic_device_group,
.map = gart_iommu_map,
.unmap = gart_iommu_unmap,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index dd48623..0becdbf 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -12,6 +12,7 @@
#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <linux/dma-mapping.h>
#include <soc/tegra/ahb.h>
@@ -19,8 +20,10 @@
struct tegra_smmu_group {
struct list_head list;
+ struct tegra_smmu *smmu;
const struct tegra_smmu_group_soc *soc;
struct iommu_group *group;
+ unsigned int swgroup;
};
struct tegra_smmu {
@@ -49,6 +52,7 @@
struct iommu_domain domain;
struct tegra_smmu *smmu;
unsigned int use_count;
+ spinlock_t lock;
u32 *count;
struct page **pts;
struct page *pd;
@@ -127,6 +131,11 @@
#define SMMU_PDE_SHIFT 22
#define SMMU_PTE_SHIFT 12
+#define SMMU_PAGE_MASK (~(SMMU_SIZE_PT-1))
+#define SMMU_OFFSET_IN_PAGE(x) ((unsigned long)(x) & ~SMMU_PAGE_MASK)
+#define SMMU_PFN_PHYS(x) ((phys_addr_t)(x) << SMMU_PTE_SHIFT)
+#define SMMU_PHYS_PFN(x) ((unsigned long)((x) >> SMMU_PTE_SHIFT))
+
#define SMMU_PD_READABLE (1 << 31)
#define SMMU_PD_WRITABLE (1 << 30)
#define SMMU_PD_NONSECURE (1 << 29)
@@ -240,7 +249,7 @@
static inline void smmu_flush(struct tegra_smmu *smmu)
{
- smmu_readl(smmu, SMMU_CONFIG);
+ smmu_readl(smmu, SMMU_PTB_ASID);
}
static int tegra_smmu_alloc_asid(struct tegra_smmu *smmu, unsigned int *idp)
@@ -308,6 +317,8 @@
return NULL;
}
+ spin_lock_init(&as->lock);
+
/* setup aperture */
as->domain.geometry.aperture_start = 0;
as->domain.geometry.aperture_end = 0xffffffff;
@@ -351,6 +362,20 @@
unsigned int i;
u32 value;
+ group = tegra_smmu_find_swgroup(smmu, swgroup);
+ if (group) {
+ value = smmu_readl(smmu, group->reg);
+ value &= ~SMMU_ASID_MASK;
+ value |= SMMU_ASID_VALUE(asid);
+ value |= SMMU_ASID_ENABLE;
+ smmu_writel(smmu, value, group->reg);
+ } else {
+ pr_warn("%s group from swgroup %u not found\n", __func__,
+ swgroup);
+ /* No point moving ahead if group was not found */
+ return;
+ }
+
for (i = 0; i < smmu->soc->num_clients; i++) {
const struct tegra_mc_client *client = &smmu->soc->clients[i];
@@ -361,15 +386,6 @@
value |= BIT(client->smmu.bit);
smmu_writel(smmu, value, client->smmu.reg);
}
-
- group = tegra_smmu_find_swgroup(smmu, swgroup);
- if (group) {
- value = smmu_readl(smmu, group->reg);
- value &= ~SMMU_ASID_MASK;
- value |= SMMU_ASID_VALUE(asid);
- value |= SMMU_ASID_ENABLE;
- smmu_writel(smmu, value, group->reg);
- }
}
static void tegra_smmu_disable(struct tegra_smmu *smmu, unsigned int swgroup,
@@ -460,7 +476,7 @@
static int tegra_smmu_attach_dev(struct iommu_domain *domain,
struct device *dev)
{
- struct tegra_smmu *smmu = dev->archdata.iommu;
+ struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
struct tegra_smmu_as *as = to_smmu_as(domain);
struct device_node *np = dev->of_node;
struct of_phandle_args args;
@@ -564,19 +580,14 @@
}
static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
- dma_addr_t *dmap)
+ dma_addr_t *dmap, struct page *page)
{
unsigned int pde = iova_pd_index(iova);
struct tegra_smmu *smmu = as->smmu;
if (!as->pts[pde]) {
- struct page *page;
dma_addr_t dma;
- page = alloc_page(GFP_KERNEL | __GFP_DMA | __GFP_ZERO);
- if (!page)
- return NULL;
-
dma = dma_map_page(smmu->dev, page, 0, SMMU_SIZE_PT,
DMA_TO_DEVICE);
if (dma_mapping_error(smmu->dev, dma)) {
@@ -639,7 +650,7 @@
u32 *pte, dma_addr_t pte_dma, u32 val)
{
struct tegra_smmu *smmu = as->smmu;
- unsigned long offset = offset_in_page(pte);
+ unsigned long offset = SMMU_OFFSET_IN_PAGE(pte);
*pte = val;
@@ -650,15 +661,61 @@
smmu_flush(smmu);
}
-static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+static struct page *as_get_pde_page(struct tegra_smmu_as *as,
+ unsigned long iova, gfp_t gfp,
+ unsigned long *flags)
+{
+ unsigned int pde = iova_pd_index(iova);
+ struct page *page = as->pts[pde];
+
+ /* at first check whether allocation needs to be done at all */
+ if (page)
+ return page;
+
+ /*
+ * In order to prevent exhaustion of the atomic memory pool, we
+ * allocate page in a sleeping context if GFP flags permit. Hence
+ * spinlock needs to be unlocked and re-locked after allocation.
+ */
+ if (!(gfp & __GFP_ATOMIC))
+ spin_unlock_irqrestore(&as->lock, *flags);
+
+ page = alloc_page(gfp | __GFP_DMA | __GFP_ZERO);
+
+ if (!(gfp & __GFP_ATOMIC))
+ spin_lock_irqsave(&as->lock, *flags);
+
+ /*
+ * In a case of blocking allocation, a concurrent mapping may win
+ * the PDE allocation. In this case the allocated page isn't needed
+ * if allocation succeeded and the allocation failure isn't fatal.
+ */
+ if (as->pts[pde]) {
+ if (page)
+ __free_page(page);
+
+ page = as->pts[pde];
+ }
+
+ return page;
+}
+
+static int
+__tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp,
+ unsigned long *flags)
{
struct tegra_smmu_as *as = to_smmu_as(domain);
dma_addr_t pte_dma;
+ struct page *page;
u32 pte_attrs;
u32 *pte;
- pte = as_get_pte(as, iova, &pte_dma);
+ page = as_get_pde_page(as, iova, gfp, flags);
+ if (!page)
+ return -ENOMEM;
+
+ pte = as_get_pte(as, iova, &pte_dma, page);
if (!pte)
return -ENOMEM;
@@ -675,13 +732,14 @@
pte_attrs |= SMMU_PTE_WRITABLE;
tegra_smmu_set_pte(as, iova, pte, pte_dma,
- __phys_to_pfn(paddr) | pte_attrs);
+ SMMU_PHYS_PFN(paddr) | pte_attrs);
return 0;
}
-static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t size, struct iommu_iotlb_gather *gather)
+static size_t
+__tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size, struct iommu_iotlb_gather *gather)
{
struct tegra_smmu_as *as = to_smmu_as(domain);
dma_addr_t pte_dma;
@@ -697,6 +755,33 @@
return size;
}
+static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ struct tegra_smmu_as *as = to_smmu_as(domain);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&as->lock, flags);
+ ret = __tegra_smmu_map(domain, iova, paddr, size, prot, gfp, &flags);
+ spin_unlock_irqrestore(&as->lock, flags);
+
+ return ret;
+}
+
+static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size, struct iommu_iotlb_gather *gather)
+{
+ struct tegra_smmu_as *as = to_smmu_as(domain);
+ unsigned long flags;
+
+ spin_lock_irqsave(&as->lock, flags);
+ size = __tegra_smmu_unmap(domain, iova, size, gather);
+ spin_unlock_irqrestore(&as->lock, flags);
+
+ return size;
+}
+
static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
@@ -711,7 +796,7 @@
pfn = *pte & as->smmu->pfn_mask;
- return PFN_PHYS(pfn);
+ return SMMU_PFN_PHYS(pfn) + SMMU_OFFSET_IN_PAGE(iova);
}
static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
@@ -752,11 +837,10 @@
return 0;
}
-static int tegra_smmu_add_device(struct device *dev)
+static struct iommu_device *tegra_smmu_probe_device(struct device *dev)
{
struct device_node *np = dev->of_node;
struct tegra_smmu *smmu = NULL;
- struct iommu_group *group;
struct of_phandle_args args;
unsigned int index = 0;
int err;
@@ -769,16 +853,14 @@
of_node_put(args.np);
if (err < 0)
- return err;
+ return ERR_PTR(err);
/*
* Only a single IOMMU master interface is currently
* supported by the Linux kernel, so abort after the
* first match.
*/
- dev->archdata.iommu = smmu;
-
- iommu_device_link(&smmu->iommu, dev);
+ dev_iommu_priv_set(dev, smmu);
break;
}
@@ -788,26 +870,14 @@
}
if (!smmu)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
-
- return 0;
+ return &smmu->iommu;
}
-static void tegra_smmu_remove_device(struct device *dev)
+static void tegra_smmu_release_device(struct device *dev)
{
- struct tegra_smmu *smmu = dev->archdata.iommu;
-
- if (smmu)
- iommu_device_unlink(&smmu->iommu, dev);
-
- dev->archdata.iommu = NULL;
- iommu_group_remove_device(dev);
+ dev_iommu_priv_set(dev, NULL);
}
static const struct tegra_smmu_group_soc *
@@ -823,22 +893,34 @@
return NULL;
}
+static void tegra_smmu_group_release(void *iommu_data)
+{
+ struct tegra_smmu_group *group = iommu_data;
+ struct tegra_smmu *smmu = group->smmu;
+
+ mutex_lock(&smmu->lock);
+ list_del(&group->list);
+ mutex_unlock(&smmu->lock);
+}
+
static struct iommu_group *tegra_smmu_group_get(struct tegra_smmu *smmu,
unsigned int swgroup)
{
const struct tegra_smmu_group_soc *soc;
struct tegra_smmu_group *group;
+ struct iommu_group *grp;
+ /* Find group_soc associating with swgroup */
soc = tegra_smmu_find_group(smmu, swgroup);
- if (!soc)
- return NULL;
mutex_lock(&smmu->lock);
+ /* Find existing iommu_group associating with swgroup or group_soc */
list_for_each_entry(group, &smmu->groups, list)
- if (group->soc == soc) {
+ if ((group->swgroup == swgroup) || (soc && group->soc == soc)) {
+ grp = iommu_group_ref_get(group->group);
mutex_unlock(&smmu->lock);
- return group->group;
+ return grp;
}
group = devm_kzalloc(smmu->dev, sizeof(*group), GFP_KERNEL);
@@ -848,6 +930,8 @@
}
INIT_LIST_HEAD(&group->list);
+ group->swgroup = swgroup;
+ group->smmu = smmu;
group->soc = soc;
group->group = iommu_group_alloc();
@@ -857,6 +941,9 @@
return NULL;
}
+ iommu_group_set_iommudata(group->group, group, tegra_smmu_group_release);
+ if (soc)
+ iommu_group_set_name(group->group, soc->name);
list_add_tail(&group->list, &smmu->groups);
mutex_unlock(&smmu->lock);
@@ -866,7 +953,7 @@
static struct iommu_group *tegra_smmu_device_group(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct tegra_smmu *smmu = dev->archdata.iommu;
+ struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
struct iommu_group *group;
group = tegra_smmu_group_get(smmu, fwspec->ids[0]);
@@ -890,8 +977,8 @@
.domain_free = tegra_smmu_domain_free,
.attach_dev = tegra_smmu_attach_dev,
.detach_dev = tegra_smmu_detach_dev,
- .add_device = tegra_smmu_add_device,
- .remove_device = tegra_smmu_remove_device,
+ .probe_device = tegra_smmu_probe_device,
+ .release_device = tegra_smmu_release_device,
.device_group = tegra_smmu_device_group,
.map = tegra_smmu_map,
.unmap = tegra_smmu_unmap,
@@ -1010,7 +1097,7 @@
* value. However the IOMMU registration process will attempt to add
* all devices to the IOMMU when bus_set_iommu() is called. In order
* not to rely on global variables to track the IOMMU instance, we
- * set it here so that it can be looked up from the .add_device()
+ * set it here so that it can be looked up from the .probe_device()
* callback via the IOMMU device's .drvdata field.
*/
mc->smmu = smmu;
@@ -1029,10 +1116,11 @@
smmu->dev = dev;
smmu->mc = mc;
- smmu->pfn_mask = BIT_MASK(mc->soc->num_address_bits - PAGE_SHIFT) - 1;
+ smmu->pfn_mask =
+ BIT_MASK(mc->soc->num_address_bits - SMMU_PTE_SHIFT) - 1;
dev_dbg(dev, "address bits: %u, PFN mask: %#lx\n",
mc->soc->num_address_bits, smmu->pfn_mask);
- smmu->tlb_mask = (smmu->soc->num_tlb_lines << 1) - 1;
+ smmu->tlb_mask = (1 << fls(smmu->soc->num_tlb_lines)) - 1;
dev_dbg(dev, "TLB lines: %u, mask: %#lx\n", smmu->soc->num_tlb_lines,
smmu->tlb_mask);
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 60e659a..81dea4c 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -153,7 +153,6 @@
*/
static int __viommu_sync_req(struct viommu_dev *viommu)
{
- int ret = 0;
unsigned int len;
size_t write_len;
struct viommu_request *req;
@@ -182,7 +181,7 @@
kfree(req);
}
- return ret;
+ return 0;
}
static int viommu_sync_req(struct viommu_dev *viommu)
@@ -441,7 +440,7 @@
default:
dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n",
mem->subtype);
- /* Fall-through */
+ fallthrough;
case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
region = iommu_alloc_resv_region(start, size, 0,
IOMMU_RESV_RESERVED);
@@ -467,7 +466,7 @@
struct virtio_iommu_req_probe *probe;
struct virtio_iommu_probe_property *prop;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct viommu_endpoint *vdev = fwspec->iommu_priv;
+ struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
if (!fwspec->num_ids)
return -EINVAL;
@@ -608,12 +607,22 @@
return &vdomain->domain;
}
-static int viommu_domain_finalise(struct viommu_dev *viommu,
+static int viommu_domain_finalise(struct viommu_endpoint *vdev,
struct iommu_domain *domain)
{
int ret;
+ unsigned long viommu_page_size;
+ struct viommu_dev *viommu = vdev->viommu;
struct viommu_domain *vdomain = to_viommu_domain(domain);
+ viommu_page_size = 1UL << __ffs(viommu->pgsize_bitmap);
+ if (viommu_page_size > PAGE_SIZE) {
+ dev_err(vdev->dev,
+ "granule 0x%lx larger than system page size 0x%lx\n",
+ viommu_page_size, PAGE_SIZE);
+ return -EINVAL;
+ }
+
ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain,
viommu->last_domain, GFP_KERNEL);
if (ret < 0)
@@ -651,7 +660,7 @@
int ret = 0;
struct virtio_iommu_req_attach req;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct viommu_endpoint *vdev = fwspec->iommu_priv;
+ struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
struct viommu_domain *vdomain = to_viommu_domain(domain);
mutex_lock(&vdomain->mutex);
@@ -660,7 +669,7 @@
* Properly initialize the domain now that we know which viommu
* owns it.
*/
- ret = viommu_domain_finalise(vdev->viommu, domain);
+ ret = viommu_domain_finalise(vdev, domain);
} else if (vdomain->viommu != vdev->viommu) {
dev_err(dev, "cannot attach to foreign vIOMMU\n");
ret = -EXDEV;
@@ -715,7 +724,7 @@
}
static int viommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
int ret;
u32 flags;
@@ -810,8 +819,7 @@
static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
{
struct iommu_resv_region *entry, *new_entry, *msi = NULL;
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct viommu_endpoint *vdev = fwspec->iommu_priv;
+ struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
list_for_each_entry(entry, &vdev->resv_regions, list) {
@@ -840,14 +848,6 @@
iommu_dma_get_resv_regions(dev, head);
}
-static void viommu_put_resv_regions(struct device *dev, struct list_head *head)
-{
- struct iommu_resv_region *entry, *next;
-
- list_for_each_entry_safe(entry, next, head, list)
- kfree(entry);
-}
-
static struct iommu_ops viommu_ops;
static struct virtio_driver virtio_iommu_drv;
@@ -865,29 +865,28 @@
return dev ? dev_to_virtio(dev)->priv : NULL;
}
-static int viommu_add_device(struct device *dev)
+static struct iommu_device *viommu_probe_device(struct device *dev)
{
int ret;
- struct iommu_group *group;
struct viommu_endpoint *vdev;
struct viommu_dev *viommu = NULL;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
if (!fwspec || fwspec->ops != &viommu_ops)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode);
if (!viommu)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
if (!vdev)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
vdev->dev = dev;
vdev->viommu = viommu;
INIT_LIST_HEAD(&vdev->resv_regions);
- fwspec->iommu_priv = vdev;
+ dev_iommu_priv_set(dev, vdev);
if (viommu->probe_size) {
/* Get additional information for this endpoint */
@@ -896,46 +895,26 @@
goto err_free_dev;
}
- ret = iommu_device_link(&viommu->iommu, dev);
- if (ret)
- goto err_free_dev;
+ return &viommu->iommu;
- /*
- * Last step creates a default domain and attaches to it. Everything
- * must be ready.
- */
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group)) {
- ret = PTR_ERR(group);
- goto err_unlink_dev;
- }
-
- iommu_group_put(group);
-
- return PTR_ERR_OR_ZERO(group);
-
-err_unlink_dev:
- iommu_device_unlink(&viommu->iommu, dev);
err_free_dev:
- viommu_put_resv_regions(dev, &vdev->resv_regions);
+ generic_iommu_put_resv_regions(dev, &vdev->resv_regions);
kfree(vdev);
- return ret;
+ return ERR_PTR(ret);
}
-static void viommu_remove_device(struct device *dev)
+static void viommu_release_device(struct device *dev)
{
- struct viommu_endpoint *vdev;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct viommu_endpoint *vdev;
if (!fwspec || fwspec->ops != &viommu_ops)
return;
- vdev = fwspec->iommu_priv;
+ vdev = dev_iommu_priv_get(dev);
- iommu_group_remove_device(dev);
- iommu_device_unlink(&vdev->viommu->iommu, dev);
- viommu_put_resv_regions(dev, &vdev->resv_regions);
+ generic_iommu_put_resv_regions(dev, &vdev->resv_regions);
kfree(vdev);
}
@@ -960,11 +939,11 @@
.unmap = viommu_unmap,
.iova_to_phys = viommu_iova_to_phys,
.iotlb_sync = viommu_iotlb_sync,
- .add_device = viommu_add_device,
- .remove_device = viommu_remove_device,
+ .probe_device = viommu_probe_device,
+ .release_device = viommu_release_device,
.device_group = viommu_device_group,
.get_resv_regions = viommu_get_resv_regions,
- .put_resv_regions = viommu_put_resv_regions,
+ .put_resv_regions = generic_iommu_put_resv_regions,
.of_xlate = viommu_of_xlate,
};
@@ -1031,8 +1010,8 @@
if (ret)
return ret;
- virtio_cread(vdev, struct virtio_iommu_config, page_size_mask,
- &viommu->pgsize_bitmap);
+ virtio_cread_le(vdev, struct virtio_iommu_config, page_size_mask,
+ &viommu->pgsize_bitmap);
if (!viommu->pgsize_bitmap) {
ret = -EINVAL;
@@ -1043,25 +1022,25 @@
viommu->last_domain = ~0U;
/* Optional features */
- virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
- struct virtio_iommu_config, input_range.start,
- &input_start);
+ virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
+ struct virtio_iommu_config, input_range.start,
+ &input_start);
- virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
- struct virtio_iommu_config, input_range.end,
- &input_end);
+ virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
+ struct virtio_iommu_config, input_range.end,
+ &input_end);
- virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
- struct virtio_iommu_config, domain_range.start,
- &viommu->first_domain);
+ virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
+ struct virtio_iommu_config, domain_range.start,
+ &viommu->first_domain);
- virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
- struct virtio_iommu_config, domain_range.end,
- &viommu->last_domain);
+ virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
+ struct virtio_iommu_config, domain_range.end,
+ &viommu->last_domain);
- virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
- struct virtio_iommu_config, probe_size,
- &viommu->probe_size);
+ virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_PROBE,
+ struct virtio_iommu_config, probe_size,
+ &viommu->probe_size);
viommu->geometry = (struct iommu_domain_geometry) {
.aperture_start = input_start,
@@ -1093,7 +1072,6 @@
#ifdef CONFIG_PCI
if (pci_bus_type.iommu_ops != &viommu_ops) {
- pci_request_acs();
ret = bus_set_iommu(&pci_bus_type, &viommu_ops);
if (ret)
goto err_unregister;
@@ -1160,6 +1138,7 @@
{ VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID },
{ 0 },
};
+MODULE_DEVICE_TABLE(virtio, id_table);
static struct virtio_driver virtio_iommu_drv = {
.driver.name = KBUILD_MODNAME,