Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 6db6d96..a2a03df 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -123,29 +123,29 @@
return (level - 1) * LEVEL_STRIDE;
}
-static inline int pfn_level_offset(unsigned long pfn, int level)
+static inline int pfn_level_offset(u64 pfn, int level)
{
return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
}
-static inline unsigned long level_mask(int level)
+static inline u64 level_mask(int level)
{
- return -1UL << level_to_offset_bits(level);
+ return -1ULL << level_to_offset_bits(level);
}
-static inline unsigned long level_size(int level)
+static inline u64 level_size(int level)
{
- return 1UL << level_to_offset_bits(level);
+ return 1ULL << level_to_offset_bits(level);
}
-static inline unsigned long align_to_level(unsigned long pfn, int level)
+static inline u64 align_to_level(u64 pfn, int level)
{
return (pfn + level_size(level) - 1) & level_mask(level);
}
static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
{
- return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
+ return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
}
/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
@@ -179,7 +179,7 @@
* (used when kernel is launched w/ TXT)
*/
static int force_on = 0;
-int intel_iommu_tboot_noforce;
+static int intel_iommu_tboot_noforce;
static int no_platform_optin;
#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
@@ -611,6 +611,12 @@
return g_iommus[iommu_id];
}
+static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
+{
+ return sm_supported(iommu) ?
+ ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
+}
+
static void domain_update_iommu_coherency(struct dmar_domain *domain)
{
struct dmar_drhd_unit *drhd;
@@ -622,7 +628,7 @@
for_each_domain_iommu(i, domain) {
found = true;
- if (!ecap_coherent(g_iommus[i]->ecap)) {
+ if (!iommu_paging_structure_coherency(g_iommus[i])) {
domain->iommu_coherency = 0;
break;
}
@@ -633,7 +639,7 @@
/* No hardware attached; use lowest common denominator */
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
- if (!ecap_coherent(iommu->ecap)) {
+ if (!iommu_paging_structure_coherency(iommu)) {
domain->iommu_coherency = 0;
break;
}
@@ -1847,7 +1853,7 @@
static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
int guest_width)
{
- int adjust_width, agaw;
+ int adjust_width, agaw, cap_width;
unsigned long sagaw;
int err;
@@ -1861,8 +1867,9 @@
domain_reserve_special_ranges(domain);
/* calculate AGAW */
- if (guest_width > cap_mgaw(iommu->cap))
- guest_width = cap_mgaw(iommu->cap);
+ cap_width = min_t(int, cap_mgaw(iommu->cap), agaw_to_width(iommu->agaw));
+ if (guest_width > cap_width)
+ guest_width = cap_width;
domain->gaw = guest_width;
adjust_width = guestwidth_to_adjustwidth(guest_width);
agaw = width_to_agaw(adjust_width);
@@ -2090,7 +2097,8 @@
context_set_fault_enable(context);
context_set_present(context);
- domain_flush_cache(domain, context, sizeof(*context));
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(context, sizeof(*context));
/*
* It's a non-present to present mapping. If hardware doesn't cache
@@ -2553,14 +2561,14 @@
}
/* Setup the PASID entry for requests without PASID: */
- spin_lock(&iommu->lock);
+ spin_lock_irqsave(&iommu->lock, flags);
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
- spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&iommu->lock, flags);
if (ret) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
@@ -2762,10 +2770,8 @@
}
/*
- * Normally we use DMA domains for devices which have RMRRs. But we
- * loose this requirement for graphic and usb devices. Identity map
- * the RMRRs for graphic and USB devices so that they could use the
- * si_domain.
+ * Identity map the RMRRs so that devices with RMRRs could also use
+ * the si_domain.
*/
for_each_rmrr_units(rmrr) {
for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
@@ -2773,9 +2779,6 @@
unsigned long long start = rmrr->base_address;
unsigned long long end = rmrr->end_address;
- if (device_is_rmrr_locked(dev))
- continue;
-
if (WARN_ON(end < start ||
end >> agaw_to_width(si_domain->agaw)))
continue;
@@ -2914,9 +2917,6 @@
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
- if (device_is_rmrr_locked(dev))
- return IOMMU_DOMAIN_DMA;
-
/*
* Prevent any device marked as untrusted from getting
* placed into the statically identity mapping domain.
@@ -2954,9 +2954,6 @@
return IOMMU_DOMAIN_DMA;
} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
return IOMMU_DOMAIN_DMA;
- } else {
- if (device_has_rmrr(dev))
- return IOMMU_DOMAIN_DMA;
}
return (iommu_identity_mapping & IDENTMAP_ALL) ?
@@ -3289,6 +3286,12 @@
if (!ecap_pass_through(iommu->ecap))
hw_pass_through = 0;
+
+ if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) {
+ pr_info("Disable batched IOTLB flush due to virtualization");
+ intel_iommu_strict = 1;
+ }
+
#ifdef CONFIG_INTEL_IOMMU_SVM
if (pasid_supported(iommu))
intel_svm_init(iommu);
@@ -3401,7 +3404,8 @@
iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
IOVA_PFN(dma_mask), true);
if (unlikely(!iova_pfn)) {
- dev_err(dev, "Allocating %ld-page iova failed", nrpages);
+ dev_err_once(dev, "Allocating %ld-page iova failed\n",
+ nrpages);
return 0;
}
@@ -4128,10 +4132,11 @@
/* we know that the this iommu should be at offset 0xa000 from vtbar */
drhd = dmar_find_matched_drhd_unit(pdev);
- if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
- TAINT_FIRMWARE_WORKAROUND,
- "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
+ if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
+ pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+ }
}
DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
@@ -4344,7 +4349,8 @@
struct dmar_atsr_unit *atsru;
struct acpi_dmar_atsr *tmp;
- list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
+ list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
+ dmar_rcu_check()) {
tmp = (struct acpi_dmar_atsr *)atsru->hdr;
if (atsr->segment != tmp->segment)
continue;
@@ -4928,7 +4934,8 @@
* Intel IOMMU is required for a TXT/tboot launch or platform
* opt in, so enforce that.
*/
- force_on = tboot_force_iommu() || platform_optin_force_iommu();
+ force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
+ platform_optin_force_iommu();
if (iommu_init_mempool()) {
if (force_on)
@@ -4959,6 +4966,9 @@
down_write(&dmar_global_lock);
+ if (!no_iommu)
+ intel_iommu_debugfs_init();
+
if (no_iommu || dmar_disabled) {
/*
* We exit the function here to ensure IOMMU's remapping and
@@ -5022,6 +5032,7 @@
init_iommu_pm_ops();
+ down_read(&dmar_global_lock);
for_each_active_iommu(iommu, drhd) {
iommu_device_sysfs_add(&iommu->iommu, NULL,
intel_iommu_groups,
@@ -5029,6 +5040,7 @@
iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
iommu_device_register(&iommu->iommu);
}
+ up_read(&dmar_global_lock);
bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
if (si_domain && !hw_pass_through)
@@ -5039,7 +5051,6 @@
down_read(&dmar_global_lock);
if (probe_acpi_namespace_devices())
pr_warn("ACPI name space devices didn't probe correctly\n");
- up_read(&dmar_global_lock);
/* Finally, we enable the DMA remapping hardware. */
for_each_iommu(iommu, drhd) {
@@ -5048,10 +5059,11 @@
iommu_disable_protect_mem_regions(iommu);
}
+ up_read(&dmar_global_lock);
+
pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
intel_iommu_enabled = 1;
- intel_iommu_debugfs_init();
return 0;
@@ -5132,7 +5144,8 @@
spin_lock_irqsave(&device_domain_lock, flags);
info = dev->archdata.iommu;
- if (info)
+ if (info && info != DEFER_DEVICE_DOMAIN_INFO
+ && info != DUMMY_DEVICE_DOMAIN_INFO)
__dmar_remove_one_dev_info(info);
spin_unlock_irqrestore(&device_domain_lock, flags);
}
@@ -5447,9 +5460,6 @@
int prot = 0;
int ret;
- if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
- return -EINVAL;
-
if (iommu_prot & IOMMU_READ)
prot |= DMA_PTE_READ;
if (iommu_prot & IOMMU_WRITE)
@@ -5492,8 +5502,6 @@
/* Cope with horrid API which requires us to unmap more than the
size argument if it happens to be a large-page mapping. */
BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
- if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
- return 0;
if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
size = VTD_PAGE_SIZE << level_to_offset_bits(level);
@@ -5525,12 +5533,11 @@
int level = 0;
u64 phys = 0;
- if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
- return 0;
-
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
- if (pte)
- phys = dma_pte_addr(pte);
+ if (pte && dma_pte_present(pte))
+ phys = dma_pte_addr(pte) +
+ (iova & (BIT_MASK(level_to_offset_bits(level) +
+ VTD_PAGE_SHIFT) - 1));
return phys;
}
@@ -5601,8 +5608,10 @@
group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
+ if (IS_ERR(group)) {
+ ret = PTR_ERR(group);
+ goto unlink;
+ }
iommu_group_put(group);
@@ -5628,7 +5637,8 @@
if (!get_private_domain_for_dev(dev)) {
dev_warn(dev,
"Failed to get a private domain.\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto unlink;
}
dev_info(dev,
@@ -5643,6 +5653,10 @@
}
return 0;
+
+unlink:
+ iommu_device_unlink(&iommu->iommu, dev);
+ return ret;
}
static void intel_iommu_remove_device(struct device *dev)
@@ -5705,8 +5719,8 @@
struct pci_dev *pdev = to_pci_dev(device);
if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
- reg = iommu_alloc_resv_region(0, 1UL << 24, 0,
- IOMMU_RESV_DIRECT);
+ reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
+ IOMMU_RESV_DIRECT_RELAXABLE);
if (reg)
list_add_tail(®->list, head);
}
@@ -5794,6 +5808,13 @@
WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
}
+static struct iommu_group *intel_iommu_device_group(struct device *dev)
+{
+ if (dev_is_pci(dev))
+ return pci_device_group(dev);
+ return generic_device_group(dev);
+}
+
#ifdef CONFIG_INTEL_IOMMU_SVM
struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
{
@@ -5949,6 +5970,23 @@
return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
}
+/*
+ * Check that the device does not live on an external facing PCI port that is
+ * marked as untrusted. Such devices should not be able to apply quirks and
+ * thus not be able to bypass the IOMMU restrictions.
+ */
+static bool risky_device(struct pci_dev *pdev)
+{
+ if (pdev->untrusted) {
+ pci_info(pdev,
+ "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
+ pdev->vendor, pdev->device);
+ pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
+ return true;
+ }
+ return false;
+}
+
const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,
@@ -5966,7 +6004,7 @@
.get_resv_regions = intel_iommu_get_resv_regions,
.put_resv_regions = intel_iommu_put_resv_regions,
.apply_resv_region = intel_iommu_apply_resv_region,
- .device_group = pci_device_group,
+ .device_group = intel_iommu_device_group,
.dev_has_feat = intel_iommu_dev_has_feat,
.dev_feat_enabled = intel_iommu_dev_feat_enabled,
.dev_enable_feat = intel_iommu_dev_enable_feat,
@@ -5977,6 +6015,9 @@
static void quirk_iommu_igfx(struct pci_dev *dev)
{
+ if (risky_device(dev))
+ return;
+
pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
dmar_map_gfx = 0;
}
@@ -6018,6 +6059,9 @@
static void quirk_iommu_rwbf(struct pci_dev *dev)
{
+ if (risky_device(dev))
+ return;
+
/*
* Mobile 4 Series Chipset neglects to set RWBF capability,
* but needs it. Same seems to hold for the desktop versions.
@@ -6048,6 +6092,9 @@
{
unsigned short ggc;
+ if (risky_device(dev))
+ return;
+
if (pci_read_config_word(dev, GGC, &ggc))
return;
@@ -6081,6 +6128,12 @@
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
if (!pdev)
return;
+
+ if (risky_device(pdev)) {
+ pci_dev_put(pdev);
+ return;
+ }
+
pci_dev_put(pdev);
/* System Management Registers. Might be hidden, in which case
@@ -6090,6 +6143,11 @@
if (!pdev)
return;
+ if (risky_device(pdev)) {
+ pci_dev_put(pdev);
+ return;
+ }
+
if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
pci_dev_put(pdev);
return;