blob: a2a03df977046dbdadfb8d50dcc492ec784ada67 [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001// SPDX-License-Identifier: GPL-2.0-only
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002/*
3 * Copyright © 2006-2014 Intel Corporation.
4 *
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
11 */
12
13#define pr_fmt(fmt) "DMAR: " fmt
David Brazdil0f672f62019-12-10 10:32:29 +000014#define dev_fmt(fmt) pr_fmt(fmt)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000015
16#include <linux/init.h>
17#include <linux/bitmap.h>
18#include <linux/debugfs.h>
19#include <linux/export.h>
20#include <linux/slab.h>
21#include <linux/irq.h>
22#include <linux/interrupt.h>
23#include <linux/spinlock.h>
24#include <linux/pci.h>
25#include <linux/dmar.h>
26#include <linux/dma-mapping.h>
27#include <linux/mempool.h>
28#include <linux/memory.h>
29#include <linux/cpu.h>
30#include <linux/timer.h>
31#include <linux/io.h>
32#include <linux/iova.h>
33#include <linux/iommu.h>
34#include <linux/intel-iommu.h>
35#include <linux/syscore_ops.h>
36#include <linux/tboot.h>
37#include <linux/dmi.h>
38#include <linux/pci-ats.h>
39#include <linux/memblock.h>
40#include <linux/dma-contiguous.h>
41#include <linux/dma-direct.h>
42#include <linux/crash_dump.h>
David Brazdil0f672f62019-12-10 10:32:29 +000043#include <linux/numa.h>
44#include <linux/swiotlb.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000045#include <asm/irq_remapping.h>
46#include <asm/cacheflush.h>
47#include <asm/iommu.h>
David Brazdil0f672f62019-12-10 10:32:29 +000048#include <trace/events/intel_iommu.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000049
50#include "irq_remapping.h"
51#include "intel-pasid.h"
52
53#define ROOT_SIZE VTD_PAGE_SIZE
54#define CONTEXT_SIZE VTD_PAGE_SIZE
55
56#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
60
61#define IOAPIC_RANGE_START (0xfee00000)
62#define IOAPIC_RANGE_END (0xfeefffff)
63#define IOVA_START_ADDR (0x1000)
64
65#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
66
67#define MAX_AGAW_WIDTH 64
68#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
69
70#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72
73/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
78
79/* IO virtual address start page frame number */
80#define IOVA_START_PFN (1)
81
82#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
83
84/* page table handling */
85#define LEVEL_STRIDE (9)
86#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
87
88/*
89 * This bitmap is used to advertise the page sizes our hardware support
90 * to the IOMMU core, which will then use this information to split
91 * physically contiguous memory regions it is mapping into page sizes
92 * that we support.
93 *
94 * Traditionally the IOMMU core just handed us the mappings directly,
95 * after making sure the size is an order of a 4KiB page and that the
96 * mapping has natural alignment.
97 *
98 * To retain this behavior, we currently advertise that we support
99 * all page sizes that are an order of 4KiB.
100 *
101 * If at some point we'd like to utilize the IOMMU core's new behavior,
102 * we could change this to advertise the real page sizes we support.
103 */
104#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
105
106static inline int agaw_to_level(int agaw)
107{
108 return agaw + 2;
109}
110
111static inline int agaw_to_width(int agaw)
112{
113 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
114}
115
116static inline int width_to_agaw(int width)
117{
118 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
119}
120
121static inline unsigned int level_to_offset_bits(int level)
122{
123 return (level - 1) * LEVEL_STRIDE;
124}
125
Olivier Deprez0e641232021-09-23 10:07:05 +0200126static inline int pfn_level_offset(u64 pfn, int level)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000127{
128 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
129}
130
Olivier Deprez0e641232021-09-23 10:07:05 +0200131static inline u64 level_mask(int level)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000132{
Olivier Deprez0e641232021-09-23 10:07:05 +0200133 return -1ULL << level_to_offset_bits(level);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000134}
135
Olivier Deprez0e641232021-09-23 10:07:05 +0200136static inline u64 level_size(int level)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000137{
Olivier Deprez0e641232021-09-23 10:07:05 +0200138 return 1ULL << level_to_offset_bits(level);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000139}
140
Olivier Deprez0e641232021-09-23 10:07:05 +0200141static inline u64 align_to_level(u64 pfn, int level)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000142{
143 return (pfn + level_size(level) - 1) & level_mask(level);
144}
145
146static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
147{
Olivier Deprez0e641232021-09-23 10:07:05 +0200148 return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000149}
150
151/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152 are never going to work. */
153static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
154{
155 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
156}
157
158static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
159{
160 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
161}
162static inline unsigned long page_to_dma_pfn(struct page *pg)
163{
164 return mm_to_dma_pfn(page_to_pfn(pg));
165}
166static inline unsigned long virt_to_dma_pfn(void *p)
167{
168 return page_to_dma_pfn(virt_to_page(p));
169}
170
171/* global iommu list, set NULL for ignored DMAR units */
172static struct intel_iommu **g_iommus;
173
174static void __init check_tylersburg_isoch(void);
175static int rwbf_quirk;
176
177/*
178 * set to 1 to panic kernel if can't successfully enable VT-d
179 * (used when kernel is launched w/ TXT)
180 */
181static int force_on = 0;
Olivier Deprez0e641232021-09-23 10:07:05 +0200182static int intel_iommu_tboot_noforce;
David Brazdil0f672f62019-12-10 10:32:29 +0000183static int no_platform_optin;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000184
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000185#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
186
187/*
188 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
189 * if marked present.
190 */
191static phys_addr_t root_entry_lctp(struct root_entry *re)
192{
193 if (!(re->lo & 1))
194 return 0;
195
196 return re->lo & VTD_PAGE_MASK;
197}
198
199/*
200 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
201 * if marked present.
202 */
203static phys_addr_t root_entry_uctp(struct root_entry *re)
204{
205 if (!(re->hi & 1))
206 return 0;
207
208 return re->hi & VTD_PAGE_MASK;
209}
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000210
211static inline void context_clear_pasid_enable(struct context_entry *context)
212{
213 context->lo &= ~(1ULL << 11);
214}
215
216static inline bool context_pasid_enabled(struct context_entry *context)
217{
218 return !!(context->lo & (1ULL << 11));
219}
220
221static inline void context_set_copied(struct context_entry *context)
222{
223 context->hi |= (1ull << 3);
224}
225
226static inline bool context_copied(struct context_entry *context)
227{
228 return !!(context->hi & (1ULL << 3));
229}
230
231static inline bool __context_present(struct context_entry *context)
232{
233 return (context->lo & 1);
234}
235
David Brazdil0f672f62019-12-10 10:32:29 +0000236bool context_present(struct context_entry *context)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000237{
238 return context_pasid_enabled(context) ?
239 __context_present(context) :
240 __context_present(context) && !context_copied(context);
241}
242
243static inline void context_set_present(struct context_entry *context)
244{
245 context->lo |= 1;
246}
247
248static inline void context_set_fault_enable(struct context_entry *context)
249{
250 context->lo &= (((u64)-1) << 2) | 1;
251}
252
253static inline void context_set_translation_type(struct context_entry *context,
254 unsigned long value)
255{
256 context->lo &= (((u64)-1) << 4) | 3;
257 context->lo |= (value & 3) << 2;
258}
259
260static inline void context_set_address_root(struct context_entry *context,
261 unsigned long value)
262{
263 context->lo &= ~VTD_PAGE_MASK;
264 context->lo |= value & VTD_PAGE_MASK;
265}
266
267static inline void context_set_address_width(struct context_entry *context,
268 unsigned long value)
269{
270 context->hi |= value & 7;
271}
272
273static inline void context_set_domain_id(struct context_entry *context,
274 unsigned long value)
275{
276 context->hi |= (value & ((1 << 16) - 1)) << 8;
277}
278
279static inline int context_domain_id(struct context_entry *c)
280{
281 return((c->hi >> 8) & 0xffff);
282}
283
284static inline void context_clear_entry(struct context_entry *context)
285{
286 context->lo = 0;
287 context->hi = 0;
288}
289
290/*
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000291 * This domain is a statically identity mapping domain.
292 * 1. This domain creats a static 1:1 mapping to all usable memory.
293 * 2. It maps to each iommu if successful.
294 * 3. Each iommu mapps to this domain if successful.
295 */
296static struct dmar_domain *si_domain;
297static int hw_pass_through = 1;
298
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000299/* si_domain contains mulitple devices */
David Brazdil0f672f62019-12-10 10:32:29 +0000300#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
301
302/*
303 * This is a DMA domain allocated through the iommu domain allocation
304 * interface. But one or more devices belonging to this domain have
305 * been chosen to use a private domain. We should avoid to use the
306 * map/unmap/iova_to_phys APIs on it.
307 */
308#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000309
310#define for_each_domain_iommu(idx, domain) \
311 for (idx = 0; idx < g_num_of_iommus; idx++) \
312 if (domain->iommu_refcnt[idx])
313
314struct dmar_rmrr_unit {
315 struct list_head list; /* list of rmrr units */
316 struct acpi_dmar_header *hdr; /* ACPI header */
317 u64 base_address; /* reserved base address*/
318 u64 end_address; /* reserved end address */
319 struct dmar_dev_scope *devices; /* target devices */
320 int devices_cnt; /* target device count */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000321};
322
323struct dmar_atsr_unit {
324 struct list_head list; /* list of ATSR units */
325 struct acpi_dmar_header *hdr; /* ACPI header */
326 struct dmar_dev_scope *devices; /* target devices */
327 int devices_cnt; /* target device count */
328 u8 include_all:1; /* include all ports */
329};
330
331static LIST_HEAD(dmar_atsr_units);
332static LIST_HEAD(dmar_rmrr_units);
333
334#define for_each_rmrr_units(rmrr) \
335 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
336
337/* bitmap for indexing intel_iommus */
338static int g_num_of_iommus;
339
340static void domain_exit(struct dmar_domain *domain);
341static void domain_remove_dev_info(struct dmar_domain *domain);
David Brazdil0f672f62019-12-10 10:32:29 +0000342static void dmar_remove_one_dev_info(struct device *dev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000343static void __dmar_remove_one_dev_info(struct device_domain_info *info);
344static void domain_context_clear(struct intel_iommu *iommu,
345 struct device *dev);
346static int domain_detach_iommu(struct dmar_domain *domain,
347 struct intel_iommu *iommu);
David Brazdil0f672f62019-12-10 10:32:29 +0000348static bool device_is_rmrr_locked(struct device *dev);
349static int intel_iommu_attach_device(struct iommu_domain *domain,
350 struct device *dev);
351static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
352 dma_addr_t iova);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000353
354#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
355int dmar_disabled = 0;
356#else
357int dmar_disabled = 1;
358#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
359
David Brazdil0f672f62019-12-10 10:32:29 +0000360int intel_iommu_sm;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000361int intel_iommu_enabled = 0;
362EXPORT_SYMBOL_GPL(intel_iommu_enabled);
363
364static int dmar_map_gfx = 1;
365static int dmar_forcedac;
366static int intel_iommu_strict;
367static int intel_iommu_superpage = 1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000368static int iommu_identity_mapping;
David Brazdil0f672f62019-12-10 10:32:29 +0000369static int intel_no_bounce;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000370
371#define IDENTMAP_ALL 1
372#define IDENTMAP_GFX 2
373#define IDENTMAP_AZALIA 4
374
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000375int intel_iommu_gfx_mapped;
376EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
377
378#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
David Brazdil0f672f62019-12-10 10:32:29 +0000379#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000380static DEFINE_SPINLOCK(device_domain_lock);
381static LIST_HEAD(device_domain_list);
382
David Brazdil0f672f62019-12-10 10:32:29 +0000383#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
384 to_pci_dev(d)->untrusted)
385
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000386/*
387 * Iterate over elements in device_domain_list and call the specified
David Brazdil0f672f62019-12-10 10:32:29 +0000388 * callback @fn against each element.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000389 */
390int for_each_device_domain(int (*fn)(struct device_domain_info *info,
391 void *data), void *data)
392{
393 int ret = 0;
David Brazdil0f672f62019-12-10 10:32:29 +0000394 unsigned long flags;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000395 struct device_domain_info *info;
396
David Brazdil0f672f62019-12-10 10:32:29 +0000397 spin_lock_irqsave(&device_domain_lock, flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000398 list_for_each_entry(info, &device_domain_list, global) {
399 ret = fn(info, data);
David Brazdil0f672f62019-12-10 10:32:29 +0000400 if (ret) {
401 spin_unlock_irqrestore(&device_domain_lock, flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000402 return ret;
David Brazdil0f672f62019-12-10 10:32:29 +0000403 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000404 }
David Brazdil0f672f62019-12-10 10:32:29 +0000405 spin_unlock_irqrestore(&device_domain_lock, flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000406
407 return 0;
408}
409
410const struct iommu_ops intel_iommu_ops;
411
412static bool translation_pre_enabled(struct intel_iommu *iommu)
413{
414 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
415}
416
417static void clear_translation_pre_enabled(struct intel_iommu *iommu)
418{
419 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
420}
421
422static void init_translation_status(struct intel_iommu *iommu)
423{
424 u32 gsts;
425
426 gsts = readl(iommu->reg + DMAR_GSTS_REG);
427 if (gsts & DMA_GSTS_TES)
428 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
429}
430
431/* Convert generic 'struct iommu_domain to private struct dmar_domain */
432static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
433{
434 return container_of(dom, struct dmar_domain, domain);
435}
436
437static int __init intel_iommu_setup(char *str)
438{
439 if (!str)
440 return -EINVAL;
441 while (*str) {
442 if (!strncmp(str, "on", 2)) {
443 dmar_disabled = 0;
444 pr_info("IOMMU enabled\n");
445 } else if (!strncmp(str, "off", 3)) {
446 dmar_disabled = 1;
David Brazdil0f672f62019-12-10 10:32:29 +0000447 no_platform_optin = 1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000448 pr_info("IOMMU disabled\n");
449 } else if (!strncmp(str, "igfx_off", 8)) {
450 dmar_map_gfx = 0;
451 pr_info("Disable GFX device mapping\n");
452 } else if (!strncmp(str, "forcedac", 8)) {
453 pr_info("Forcing DAC for PCI devices\n");
454 dmar_forcedac = 1;
455 } else if (!strncmp(str, "strict", 6)) {
456 pr_info("Disable batched IOTLB flush\n");
457 intel_iommu_strict = 1;
458 } else if (!strncmp(str, "sp_off", 6)) {
459 pr_info("Disable supported super page\n");
460 intel_iommu_superpage = 0;
David Brazdil0f672f62019-12-10 10:32:29 +0000461 } else if (!strncmp(str, "sm_on", 5)) {
462 pr_info("Intel-IOMMU: scalable mode supported\n");
463 intel_iommu_sm = 1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000464 } else if (!strncmp(str, "tboot_noforce", 13)) {
465 printk(KERN_INFO
466 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
467 intel_iommu_tboot_noforce = 1;
David Brazdil0f672f62019-12-10 10:32:29 +0000468 } else if (!strncmp(str, "nobounce", 8)) {
469 pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
470 intel_no_bounce = 1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000471 }
472
473 str += strcspn(str, ",");
474 while (*str == ',')
475 str++;
476 }
477 return 0;
478}
479__setup("intel_iommu=", intel_iommu_setup);
480
481static struct kmem_cache *iommu_domain_cache;
482static struct kmem_cache *iommu_devinfo_cache;
483
484static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
485{
486 struct dmar_domain **domains;
487 int idx = did >> 8;
488
489 domains = iommu->domains[idx];
490 if (!domains)
491 return NULL;
492
493 return domains[did & 0xff];
494}
495
496static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
497 struct dmar_domain *domain)
498{
499 struct dmar_domain **domains;
500 int idx = did >> 8;
501
502 if (!iommu->domains[idx]) {
503 size_t size = 256 * sizeof(struct dmar_domain *);
504 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
505 }
506
507 domains = iommu->domains[idx];
508 if (WARN_ON(!domains))
509 return;
510 else
511 domains[did & 0xff] = domain;
512}
513
514void *alloc_pgtable_page(int node)
515{
516 struct page *page;
517 void *vaddr = NULL;
518
519 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
520 if (page)
521 vaddr = page_address(page);
522 return vaddr;
523}
524
525void free_pgtable_page(void *vaddr)
526{
527 free_page((unsigned long)vaddr);
528}
529
530static inline void *alloc_domain_mem(void)
531{
532 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
533}
534
535static void free_domain_mem(void *vaddr)
536{
537 kmem_cache_free(iommu_domain_cache, vaddr);
538}
539
540static inline void * alloc_devinfo_mem(void)
541{
542 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
543}
544
545static inline void free_devinfo_mem(void *vaddr)
546{
547 kmem_cache_free(iommu_devinfo_cache, vaddr);
548}
549
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000550static inline int domain_type_is_si(struct dmar_domain *domain)
551{
552 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
553}
554
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000555static inline int domain_pfn_supported(struct dmar_domain *domain,
556 unsigned long pfn)
557{
558 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
559
560 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
561}
562
563static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
564{
565 unsigned long sagaw;
566 int agaw = -1;
567
568 sagaw = cap_sagaw(iommu->cap);
569 for (agaw = width_to_agaw(max_gaw);
570 agaw >= 0; agaw--) {
571 if (test_bit(agaw, &sagaw))
572 break;
573 }
574
575 return agaw;
576}
577
578/*
579 * Calculate max SAGAW for each iommu.
580 */
581int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
582{
583 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
584}
585
586/*
587 * calculate agaw for each iommu.
588 * "SAGAW" may be different across iommus, use a default agaw, and
589 * get a supported less agaw for iommus that don't support the default agaw.
590 */
591int iommu_calculate_agaw(struct intel_iommu *iommu)
592{
593 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
594}
595
596/* This functionin only returns single iommu in a domain */
597struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
598{
599 int iommu_id;
600
601 /* si_domain and vm domain should not get here. */
David Brazdil0f672f62019-12-10 10:32:29 +0000602 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
603 return NULL;
604
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000605 for_each_domain_iommu(iommu_id, domain)
606 break;
607
608 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
609 return NULL;
610
611 return g_iommus[iommu_id];
612}
613
Olivier Deprez0e641232021-09-23 10:07:05 +0200614static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
615{
616 return sm_supported(iommu) ?
617 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
618}
619
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000620static void domain_update_iommu_coherency(struct dmar_domain *domain)
621{
622 struct dmar_drhd_unit *drhd;
623 struct intel_iommu *iommu;
624 bool found = false;
625 int i;
626
627 domain->iommu_coherency = 1;
628
629 for_each_domain_iommu(i, domain) {
630 found = true;
Olivier Deprez0e641232021-09-23 10:07:05 +0200631 if (!iommu_paging_structure_coherency(g_iommus[i])) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000632 domain->iommu_coherency = 0;
633 break;
634 }
635 }
636 if (found)
637 return;
638
639 /* No hardware attached; use lowest common denominator */
640 rcu_read_lock();
641 for_each_active_iommu(iommu, drhd) {
Olivier Deprez0e641232021-09-23 10:07:05 +0200642 if (!iommu_paging_structure_coherency(iommu)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000643 domain->iommu_coherency = 0;
644 break;
645 }
646 }
647 rcu_read_unlock();
648}
649
650static int domain_update_iommu_snooping(struct intel_iommu *skip)
651{
652 struct dmar_drhd_unit *drhd;
653 struct intel_iommu *iommu;
654 int ret = 1;
655
656 rcu_read_lock();
657 for_each_active_iommu(iommu, drhd) {
658 if (iommu != skip) {
659 if (!ecap_sc_support(iommu->ecap)) {
660 ret = 0;
661 break;
662 }
663 }
664 }
665 rcu_read_unlock();
666
667 return ret;
668}
669
670static int domain_update_iommu_superpage(struct intel_iommu *skip)
671{
672 struct dmar_drhd_unit *drhd;
673 struct intel_iommu *iommu;
674 int mask = 0xf;
675
676 if (!intel_iommu_superpage) {
677 return 0;
678 }
679
680 /* set iommu_superpage to the smallest common denominator */
681 rcu_read_lock();
682 for_each_active_iommu(iommu, drhd) {
683 if (iommu != skip) {
684 mask &= cap_super_page_val(iommu->cap);
685 if (!mask)
686 break;
687 }
688 }
689 rcu_read_unlock();
690
691 return fls(mask);
692}
693
694/* Some capabilities may be different across iommus */
695static void domain_update_iommu_cap(struct dmar_domain *domain)
696{
697 domain_update_iommu_coherency(domain);
698 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
699 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
700}
701
David Brazdil0f672f62019-12-10 10:32:29 +0000702struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
703 u8 devfn, int alloc)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000704{
705 struct root_entry *root = &iommu->root_entry[bus];
706 struct context_entry *context;
707 u64 *entry;
708
709 entry = &root->lo;
David Brazdil0f672f62019-12-10 10:32:29 +0000710 if (sm_supported(iommu)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000711 if (devfn >= 0x80) {
712 devfn -= 0x80;
713 entry = &root->hi;
714 }
715 devfn *= 2;
716 }
717 if (*entry & 1)
718 context = phys_to_virt(*entry & VTD_PAGE_MASK);
719 else {
720 unsigned long phy_addr;
721 if (!alloc)
722 return NULL;
723
724 context = alloc_pgtable_page(iommu->node);
725 if (!context)
726 return NULL;
727
728 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
729 phy_addr = virt_to_phys((void *)context);
730 *entry = phy_addr | 1;
731 __iommu_flush_cache(iommu, entry, sizeof(*entry));
732 }
733 return &context[devfn];
734}
735
736static int iommu_dummy(struct device *dev)
737{
738 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
739}
740
David Brazdil0f672f62019-12-10 10:32:29 +0000741/**
742 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
743 * sub-hierarchy of a candidate PCI-PCI bridge
744 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
745 * @bridge: the candidate PCI-PCI bridge
746 *
747 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
748 */
749static bool
750is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
751{
752 struct pci_dev *pdev, *pbridge;
753
754 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
755 return false;
756
757 pdev = to_pci_dev(dev);
758 pbridge = to_pci_dev(bridge);
759
760 if (pbridge->subordinate &&
761 pbridge->subordinate->number <= pdev->bus->number &&
762 pbridge->subordinate->busn_res.end >= pdev->bus->number)
763 return true;
764
765 return false;
766}
767
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000768static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
769{
770 struct dmar_drhd_unit *drhd = NULL;
771 struct intel_iommu *iommu;
772 struct device *tmp;
David Brazdil0f672f62019-12-10 10:32:29 +0000773 struct pci_dev *pdev = NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000774 u16 segment = 0;
775 int i;
776
777 if (iommu_dummy(dev))
778 return NULL;
779
780 if (dev_is_pci(dev)) {
781 struct pci_dev *pf_pdev;
782
783 pdev = to_pci_dev(dev);
784
785#ifdef CONFIG_X86
786 /* VMD child devices currently cannot be handled individually */
787 if (is_vmd(pdev->bus))
788 return NULL;
789#endif
790
791 /* VFs aren't listed in scope tables; we need to look up
792 * the PF instead to find the IOMMU. */
793 pf_pdev = pci_physfn(pdev);
794 dev = &pf_pdev->dev;
795 segment = pci_domain_nr(pdev->bus);
796 } else if (has_acpi_companion(dev))
797 dev = &ACPI_COMPANION(dev)->dev;
798
799 rcu_read_lock();
800 for_each_active_iommu(iommu, drhd) {
801 if (pdev && segment != drhd->segment)
802 continue;
803
804 for_each_active_dev_scope(drhd->devices,
805 drhd->devices_cnt, i, tmp) {
806 if (tmp == dev) {
807 /* For a VF use its original BDF# not that of the PF
808 * which we used for the IOMMU lookup. Strictly speaking
809 * we could do this for all PCI devices; we only need to
810 * get the BDF# from the scope table for ACPI matches. */
811 if (pdev && pdev->is_virtfn)
812 goto got_pdev;
813
814 *bus = drhd->devices[i].bus;
815 *devfn = drhd->devices[i].devfn;
816 goto out;
817 }
818
David Brazdil0f672f62019-12-10 10:32:29 +0000819 if (is_downstream_to_pci_bridge(dev, tmp))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000820 goto got_pdev;
821 }
822
823 if (pdev && drhd->include_all) {
824 got_pdev:
825 *bus = pdev->bus->number;
826 *devfn = pdev->devfn;
827 goto out;
828 }
829 }
830 iommu = NULL;
831 out:
832 rcu_read_unlock();
833
834 return iommu;
835}
836
837static void domain_flush_cache(struct dmar_domain *domain,
838 void *addr, int size)
839{
840 if (!domain->iommu_coherency)
841 clflush_cache_range(addr, size);
842}
843
844static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
845{
846 struct context_entry *context;
847 int ret = 0;
848 unsigned long flags;
849
850 spin_lock_irqsave(&iommu->lock, flags);
851 context = iommu_context_addr(iommu, bus, devfn, 0);
852 if (context)
853 ret = context_present(context);
854 spin_unlock_irqrestore(&iommu->lock, flags);
855 return ret;
856}
857
858static void free_context_table(struct intel_iommu *iommu)
859{
860 int i;
861 unsigned long flags;
862 struct context_entry *context;
863
864 spin_lock_irqsave(&iommu->lock, flags);
865 if (!iommu->root_entry) {
866 goto out;
867 }
868 for (i = 0; i < ROOT_ENTRY_NR; i++) {
869 context = iommu_context_addr(iommu, i, 0, 0);
870 if (context)
871 free_pgtable_page(context);
872
David Brazdil0f672f62019-12-10 10:32:29 +0000873 if (!sm_supported(iommu))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000874 continue;
875
876 context = iommu_context_addr(iommu, i, 0x80, 0);
877 if (context)
878 free_pgtable_page(context);
879
880 }
881 free_pgtable_page(iommu->root_entry);
882 iommu->root_entry = NULL;
883out:
884 spin_unlock_irqrestore(&iommu->lock, flags);
885}
886
887static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
888 unsigned long pfn, int *target_level)
889{
David Brazdil0f672f62019-12-10 10:32:29 +0000890 struct dma_pte *parent, *pte;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000891 int level = agaw_to_level(domain->agaw);
892 int offset;
893
894 BUG_ON(!domain->pgd);
895
896 if (!domain_pfn_supported(domain, pfn))
897 /* Address beyond IOMMU's addressing capabilities. */
898 return NULL;
899
900 parent = domain->pgd;
901
902 while (1) {
903 void *tmp_page;
904
905 offset = pfn_level_offset(pfn, level);
906 pte = &parent[offset];
907 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
908 break;
909 if (level == *target_level)
910 break;
911
912 if (!dma_pte_present(pte)) {
913 uint64_t pteval;
914
915 tmp_page = alloc_pgtable_page(domain->nid);
916
917 if (!tmp_page)
918 return NULL;
919
920 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
921 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
922 if (cmpxchg64(&pte->val, 0ULL, pteval))
923 /* Someone else set it while we were thinking; use theirs. */
924 free_pgtable_page(tmp_page);
925 else
926 domain_flush_cache(domain, pte, sizeof(*pte));
927 }
928 if (level == 1)
929 break;
930
931 parent = phys_to_virt(dma_pte_addr(pte));
932 level--;
933 }
934
935 if (!*target_level)
936 *target_level = level;
937
938 return pte;
939}
940
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000941/* return address's pte at specific level */
942static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
943 unsigned long pfn,
944 int level, int *large_page)
945{
David Brazdil0f672f62019-12-10 10:32:29 +0000946 struct dma_pte *parent, *pte;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000947 int total = agaw_to_level(domain->agaw);
948 int offset;
949
950 parent = domain->pgd;
951 while (level <= total) {
952 offset = pfn_level_offset(pfn, total);
953 pte = &parent[offset];
954 if (level == total)
955 return pte;
956
957 if (!dma_pte_present(pte)) {
958 *large_page = total;
959 break;
960 }
961
962 if (dma_pte_superpage(pte)) {
963 *large_page = total;
964 return pte;
965 }
966
967 parent = phys_to_virt(dma_pte_addr(pte));
968 total--;
969 }
970 return NULL;
971}
972
973/* clear last level pte, a tlb flush should be followed */
974static void dma_pte_clear_range(struct dmar_domain *domain,
975 unsigned long start_pfn,
976 unsigned long last_pfn)
977{
David Brazdil0f672f62019-12-10 10:32:29 +0000978 unsigned int large_page;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000979 struct dma_pte *first_pte, *pte;
980
981 BUG_ON(!domain_pfn_supported(domain, start_pfn));
982 BUG_ON(!domain_pfn_supported(domain, last_pfn));
983 BUG_ON(start_pfn > last_pfn);
984
985 /* we don't need lock here; nobody else touches the iova range */
986 do {
987 large_page = 1;
988 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
989 if (!pte) {
990 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
991 continue;
992 }
993 do {
994 dma_clear_pte(pte);
995 start_pfn += lvl_to_nr_pages(large_page);
996 pte++;
997 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
998
999 domain_flush_cache(domain, first_pte,
1000 (void *)pte - (void *)first_pte);
1001
1002 } while (start_pfn && start_pfn <= last_pfn);
1003}
1004
1005static void dma_pte_free_level(struct dmar_domain *domain, int level,
1006 int retain_level, struct dma_pte *pte,
1007 unsigned long pfn, unsigned long start_pfn,
1008 unsigned long last_pfn)
1009{
1010 pfn = max(start_pfn, pfn);
1011 pte = &pte[pfn_level_offset(pfn, level)];
1012
1013 do {
1014 unsigned long level_pfn;
1015 struct dma_pte *level_pte;
1016
1017 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1018 goto next;
1019
1020 level_pfn = pfn & level_mask(level);
1021 level_pte = phys_to_virt(dma_pte_addr(pte));
1022
1023 if (level > 2) {
1024 dma_pte_free_level(domain, level - 1, retain_level,
1025 level_pte, level_pfn, start_pfn,
1026 last_pfn);
1027 }
1028
1029 /*
1030 * Free the page table if we're below the level we want to
1031 * retain and the range covers the entire table.
1032 */
1033 if (level < retain_level && !(start_pfn > level_pfn ||
1034 last_pfn < level_pfn + level_size(level) - 1)) {
1035 dma_clear_pte(pte);
1036 domain_flush_cache(domain, pte, sizeof(*pte));
1037 free_pgtable_page(level_pte);
1038 }
1039next:
1040 pfn += level_size(level);
1041 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1042}
1043
1044/*
1045 * clear last level (leaf) ptes and free page table pages below the
1046 * level we wish to keep intact.
1047 */
1048static void dma_pte_free_pagetable(struct dmar_domain *domain,
1049 unsigned long start_pfn,
1050 unsigned long last_pfn,
1051 int retain_level)
1052{
1053 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1054 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1055 BUG_ON(start_pfn > last_pfn);
1056
1057 dma_pte_clear_range(domain, start_pfn, last_pfn);
1058
1059 /* We don't need lock here; nobody else touches the iova range */
1060 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1061 domain->pgd, 0, start_pfn, last_pfn);
1062
1063 /* free pgd */
1064 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1065 free_pgtable_page(domain->pgd);
1066 domain->pgd = NULL;
1067 }
1068}
1069
1070/* When a page at a given level is being unlinked from its parent, we don't
1071 need to *modify* it at all. All we need to do is make a list of all the
1072 pages which can be freed just as soon as we've flushed the IOTLB and we
1073 know the hardware page-walk will no longer touch them.
1074 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1075 be freed. */
1076static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1077 int level, struct dma_pte *pte,
1078 struct page *freelist)
1079{
1080 struct page *pg;
1081
1082 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1083 pg->freelist = freelist;
1084 freelist = pg;
1085
1086 if (level == 1)
1087 return freelist;
1088
1089 pte = page_address(pg);
1090 do {
1091 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1092 freelist = dma_pte_list_pagetables(domain, level - 1,
1093 pte, freelist);
1094 pte++;
1095 } while (!first_pte_in_page(pte));
1096
1097 return freelist;
1098}
1099
1100static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1101 struct dma_pte *pte, unsigned long pfn,
1102 unsigned long start_pfn,
1103 unsigned long last_pfn,
1104 struct page *freelist)
1105{
1106 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1107
1108 pfn = max(start_pfn, pfn);
1109 pte = &pte[pfn_level_offset(pfn, level)];
1110
1111 do {
1112 unsigned long level_pfn;
1113
1114 if (!dma_pte_present(pte))
1115 goto next;
1116
1117 level_pfn = pfn & level_mask(level);
1118
1119 /* If range covers entire pagetable, free it */
1120 if (start_pfn <= level_pfn &&
1121 last_pfn >= level_pfn + level_size(level) - 1) {
1122 /* These suborbinate page tables are going away entirely. Don't
1123 bother to clear them; we're just going to *free* them. */
1124 if (level > 1 && !dma_pte_superpage(pte))
1125 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1126
1127 dma_clear_pte(pte);
1128 if (!first_pte)
1129 first_pte = pte;
1130 last_pte = pte;
1131 } else if (level > 1) {
1132 /* Recurse down into a level that isn't *entirely* obsolete */
1133 freelist = dma_pte_clear_level(domain, level - 1,
1134 phys_to_virt(dma_pte_addr(pte)),
1135 level_pfn, start_pfn, last_pfn,
1136 freelist);
1137 }
1138next:
1139 pfn += level_size(level);
1140 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1141
1142 if (first_pte)
1143 domain_flush_cache(domain, first_pte,
1144 (void *)++last_pte - (void *)first_pte);
1145
1146 return freelist;
1147}
1148
1149/* We can't just free the pages because the IOMMU may still be walking
1150 the page tables, and may have cached the intermediate levels. The
1151 pages can only be freed after the IOTLB flush has been done. */
1152static struct page *domain_unmap(struct dmar_domain *domain,
1153 unsigned long start_pfn,
1154 unsigned long last_pfn)
1155{
David Brazdil0f672f62019-12-10 10:32:29 +00001156 struct page *freelist;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001157
1158 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1159 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1160 BUG_ON(start_pfn > last_pfn);
1161
1162 /* we don't need lock here; nobody else touches the iova range */
1163 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1164 domain->pgd, 0, start_pfn, last_pfn, NULL);
1165
1166 /* free pgd */
1167 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1168 struct page *pgd_page = virt_to_page(domain->pgd);
1169 pgd_page->freelist = freelist;
1170 freelist = pgd_page;
1171
1172 domain->pgd = NULL;
1173 }
1174
1175 return freelist;
1176}
1177
1178static void dma_free_pagelist(struct page *freelist)
1179{
1180 struct page *pg;
1181
1182 while ((pg = freelist)) {
1183 freelist = pg->freelist;
1184 free_pgtable_page(page_address(pg));
1185 }
1186}
1187
1188static void iova_entry_free(unsigned long data)
1189{
1190 struct page *freelist = (struct page *)data;
1191
1192 dma_free_pagelist(freelist);
1193}
1194
1195/* iommu handling */
1196static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1197{
1198 struct root_entry *root;
1199 unsigned long flags;
1200
1201 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1202 if (!root) {
1203 pr_err("Allocating root entry for %s failed\n",
1204 iommu->name);
1205 return -ENOMEM;
1206 }
1207
1208 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1209
1210 spin_lock_irqsave(&iommu->lock, flags);
1211 iommu->root_entry = root;
1212 spin_unlock_irqrestore(&iommu->lock, flags);
1213
1214 return 0;
1215}
1216
1217static void iommu_set_root_entry(struct intel_iommu *iommu)
1218{
1219 u64 addr;
1220 u32 sts;
1221 unsigned long flag;
1222
1223 addr = virt_to_phys(iommu->root_entry);
David Brazdil0f672f62019-12-10 10:32:29 +00001224 if (sm_supported(iommu))
1225 addr |= DMA_RTADDR_SMT;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001226
1227 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1228 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1229
1230 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1231
1232 /* Make sure hardware complete it */
1233 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1234 readl, (sts & DMA_GSTS_RTPS), sts);
1235
1236 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1237}
1238
David Brazdil0f672f62019-12-10 10:32:29 +00001239void iommu_flush_write_buffer(struct intel_iommu *iommu)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001240{
1241 u32 val;
1242 unsigned long flag;
1243
1244 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1245 return;
1246
1247 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1248 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1249
1250 /* Make sure hardware complete it */
1251 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1252 readl, (!(val & DMA_GSTS_WBFS)), val);
1253
1254 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1255}
1256
1257/* return value determine if we need a write buffer flush */
1258static void __iommu_flush_context(struct intel_iommu *iommu,
1259 u16 did, u16 source_id, u8 function_mask,
1260 u64 type)
1261{
1262 u64 val = 0;
1263 unsigned long flag;
1264
1265 switch (type) {
1266 case DMA_CCMD_GLOBAL_INVL:
1267 val = DMA_CCMD_GLOBAL_INVL;
1268 break;
1269 case DMA_CCMD_DOMAIN_INVL:
1270 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1271 break;
1272 case DMA_CCMD_DEVICE_INVL:
1273 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1274 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1275 break;
1276 default:
1277 BUG();
1278 }
1279 val |= DMA_CCMD_ICC;
1280
1281 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1282 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1283
1284 /* Make sure hardware complete it */
1285 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1286 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1287
1288 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1289}
1290
1291/* return value determine if we need a write buffer flush */
1292static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1293 u64 addr, unsigned int size_order, u64 type)
1294{
1295 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1296 u64 val = 0, val_iva = 0;
1297 unsigned long flag;
1298
1299 switch (type) {
1300 case DMA_TLB_GLOBAL_FLUSH:
1301 /* global flush doesn't need set IVA_REG */
1302 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1303 break;
1304 case DMA_TLB_DSI_FLUSH:
1305 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1306 break;
1307 case DMA_TLB_PSI_FLUSH:
1308 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1309 /* IH bit is passed in as part of address */
1310 val_iva = size_order | addr;
1311 break;
1312 default:
1313 BUG();
1314 }
1315 /* Note: set drain read/write */
1316#if 0
1317 /*
1318 * This is probably to be super secure.. Looks like we can
1319 * ignore it without any impact.
1320 */
1321 if (cap_read_drain(iommu->cap))
1322 val |= DMA_TLB_READ_DRAIN;
1323#endif
1324 if (cap_write_drain(iommu->cap))
1325 val |= DMA_TLB_WRITE_DRAIN;
1326
1327 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1328 /* Note: Only uses first TLB reg currently */
1329 if (val_iva)
1330 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1331 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1332
1333 /* Make sure hardware complete it */
1334 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1335 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1336
1337 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1338
1339 /* check IOTLB invalidation granularity */
1340 if (DMA_TLB_IAIG(val) == 0)
1341 pr_err("Flush IOTLB failed\n");
1342 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1343 pr_debug("TLB flush request %Lx, actual %Lx\n",
1344 (unsigned long long)DMA_TLB_IIRG(type),
1345 (unsigned long long)DMA_TLB_IAIG(val));
1346}
1347
1348static struct device_domain_info *
1349iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1350 u8 bus, u8 devfn)
1351{
1352 struct device_domain_info *info;
1353
1354 assert_spin_locked(&device_domain_lock);
1355
1356 if (!iommu->qi)
1357 return NULL;
1358
1359 list_for_each_entry(info, &domain->devices, link)
1360 if (info->iommu == iommu && info->bus == bus &&
1361 info->devfn == devfn) {
1362 if (info->ats_supported && info->dev)
1363 return info;
1364 break;
1365 }
1366
1367 return NULL;
1368}
1369
1370static void domain_update_iotlb(struct dmar_domain *domain)
1371{
1372 struct device_domain_info *info;
1373 bool has_iotlb_device = false;
1374
1375 assert_spin_locked(&device_domain_lock);
1376
1377 list_for_each_entry(info, &domain->devices, link) {
1378 struct pci_dev *pdev;
1379
1380 if (!info->dev || !dev_is_pci(info->dev))
1381 continue;
1382
1383 pdev = to_pci_dev(info->dev);
1384 if (pdev->ats_enabled) {
1385 has_iotlb_device = true;
1386 break;
1387 }
1388 }
1389
1390 domain->has_iotlb_device = has_iotlb_device;
1391}
1392
1393static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1394{
1395 struct pci_dev *pdev;
1396
1397 assert_spin_locked(&device_domain_lock);
1398
1399 if (!info || !dev_is_pci(info->dev))
1400 return;
1401
1402 pdev = to_pci_dev(info->dev);
1403 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1404 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1405 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1406 * reserved, which should be set to 0.
1407 */
1408 if (!ecap_dit(info->iommu->ecap))
1409 info->pfsid = 0;
1410 else {
1411 struct pci_dev *pf_pdev;
1412
1413 /* pdev will be returned if device is not a vf */
1414 pf_pdev = pci_physfn(pdev);
David Brazdil0f672f62019-12-10 10:32:29 +00001415 info->pfsid = pci_dev_id(pf_pdev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001416 }
1417
1418#ifdef CONFIG_INTEL_IOMMU_SVM
1419 /* The PCIe spec, in its wisdom, declares that the behaviour of
1420 the device if you enable PASID support after ATS support is
1421 undefined. So always enable PASID support on devices which
1422 have it, even if we can't yet know if we're ever going to
1423 use it. */
1424 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1425 info->pasid_enabled = 1;
1426
David Brazdil0f672f62019-12-10 10:32:29 +00001427 if (info->pri_supported &&
1428 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1429 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001430 info->pri_enabled = 1;
1431#endif
David Brazdil0f672f62019-12-10 10:32:29 +00001432 if (!pdev->untrusted && info->ats_supported &&
1433 pci_ats_page_aligned(pdev) &&
1434 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001435 info->ats_enabled = 1;
1436 domain_update_iotlb(info->domain);
1437 info->ats_qdep = pci_ats_queue_depth(pdev);
1438 }
1439}
1440
1441static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1442{
1443 struct pci_dev *pdev;
1444
1445 assert_spin_locked(&device_domain_lock);
1446
1447 if (!dev_is_pci(info->dev))
1448 return;
1449
1450 pdev = to_pci_dev(info->dev);
1451
1452 if (info->ats_enabled) {
1453 pci_disable_ats(pdev);
1454 info->ats_enabled = 0;
1455 domain_update_iotlb(info->domain);
1456 }
1457#ifdef CONFIG_INTEL_IOMMU_SVM
1458 if (info->pri_enabled) {
1459 pci_disable_pri(pdev);
1460 info->pri_enabled = 0;
1461 }
1462 if (info->pasid_enabled) {
1463 pci_disable_pasid(pdev);
1464 info->pasid_enabled = 0;
1465 }
1466#endif
1467}
1468
1469static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1470 u64 addr, unsigned mask)
1471{
1472 u16 sid, qdep;
1473 unsigned long flags;
1474 struct device_domain_info *info;
1475
1476 if (!domain->has_iotlb_device)
1477 return;
1478
1479 spin_lock_irqsave(&device_domain_lock, flags);
1480 list_for_each_entry(info, &domain->devices, link) {
1481 if (!info->ats_enabled)
1482 continue;
1483
1484 sid = info->bus << 8 | info->devfn;
1485 qdep = info->ats_qdep;
1486 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1487 qdep, addr, mask);
1488 }
1489 spin_unlock_irqrestore(&device_domain_lock, flags);
1490}
1491
1492static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1493 struct dmar_domain *domain,
1494 unsigned long pfn, unsigned int pages,
1495 int ih, int map)
1496{
1497 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1498 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1499 u16 did = domain->iommu_did[iommu->seq_id];
1500
1501 BUG_ON(pages == 0);
1502
1503 if (ih)
1504 ih = 1 << 6;
1505 /*
1506 * Fallback to domain selective flush if no PSI support or the size is
1507 * too big.
1508 * PSI requires page size to be 2 ^ x, and the base address is naturally
1509 * aligned to the size
1510 */
1511 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1512 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1513 DMA_TLB_DSI_FLUSH);
1514 else
1515 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1516 DMA_TLB_PSI_FLUSH);
1517
1518 /*
1519 * In caching mode, changes of pages from non-present to present require
1520 * flush. However, device IOTLB doesn't need to be flushed in this case.
1521 */
1522 if (!cap_caching_mode(iommu->cap) || !map)
1523 iommu_flush_dev_iotlb(domain, addr, mask);
1524}
1525
1526/* Notification for newly created mappings */
1527static inline void __mapping_notify_one(struct intel_iommu *iommu,
1528 struct dmar_domain *domain,
1529 unsigned long pfn, unsigned int pages)
1530{
1531 /* It's a non-present to present mapping. Only flush if caching mode */
1532 if (cap_caching_mode(iommu->cap))
1533 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1534 else
1535 iommu_flush_write_buffer(iommu);
1536}
1537
1538static void iommu_flush_iova(struct iova_domain *iovad)
1539{
1540 struct dmar_domain *domain;
1541 int idx;
1542
1543 domain = container_of(iovad, struct dmar_domain, iovad);
1544
1545 for_each_domain_iommu(idx, domain) {
1546 struct intel_iommu *iommu = g_iommus[idx];
1547 u16 did = domain->iommu_did[iommu->seq_id];
1548
1549 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1550
1551 if (!cap_caching_mode(iommu->cap))
1552 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1553 0, MAX_AGAW_PFN_WIDTH);
1554 }
1555}
1556
1557static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1558{
1559 u32 pmen;
1560 unsigned long flags;
1561
David Brazdil0f672f62019-12-10 10:32:29 +00001562 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1563 return;
1564
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001565 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1566 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1567 pmen &= ~DMA_PMEN_EPM;
1568 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1569
1570 /* wait for the protected region status bit to clear */
1571 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1572 readl, !(pmen & DMA_PMEN_PRS), pmen);
1573
1574 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1575}
1576
1577static void iommu_enable_translation(struct intel_iommu *iommu)
1578{
1579 u32 sts;
1580 unsigned long flags;
1581
1582 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1583 iommu->gcmd |= DMA_GCMD_TE;
1584 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1585
1586 /* Make sure hardware complete it */
1587 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1588 readl, (sts & DMA_GSTS_TES), sts);
1589
1590 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1591}
1592
1593static void iommu_disable_translation(struct intel_iommu *iommu)
1594{
1595 u32 sts;
1596 unsigned long flag;
1597
1598 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1599 iommu->gcmd &= ~DMA_GCMD_TE;
1600 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1601
1602 /* Make sure hardware complete it */
1603 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1604 readl, (!(sts & DMA_GSTS_TES)), sts);
1605
1606 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1607}
1608
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001609static int iommu_init_domains(struct intel_iommu *iommu)
1610{
1611 u32 ndomains, nlongs;
1612 size_t size;
1613
1614 ndomains = cap_ndoms(iommu->cap);
1615 pr_debug("%s: Number of Domains supported <%d>\n",
1616 iommu->name, ndomains);
1617 nlongs = BITS_TO_LONGS(ndomains);
1618
1619 spin_lock_init(&iommu->lock);
1620
1621 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1622 if (!iommu->domain_ids) {
1623 pr_err("%s: Allocating domain id array failed\n",
1624 iommu->name);
1625 return -ENOMEM;
1626 }
1627
1628 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1629 iommu->domains = kzalloc(size, GFP_KERNEL);
1630
1631 if (iommu->domains) {
1632 size = 256 * sizeof(struct dmar_domain *);
1633 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1634 }
1635
1636 if (!iommu->domains || !iommu->domains[0]) {
1637 pr_err("%s: Allocating domain array failed\n",
1638 iommu->name);
1639 kfree(iommu->domain_ids);
1640 kfree(iommu->domains);
1641 iommu->domain_ids = NULL;
1642 iommu->domains = NULL;
1643 return -ENOMEM;
1644 }
1645
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001646 /*
1647 * If Caching mode is set, then invalid translations are tagged
1648 * with domain-id 0, hence we need to pre-allocate it. We also
1649 * use domain-id 0 as a marker for non-allocated domain-id, so
1650 * make sure it is not used for a real domain.
1651 */
1652 set_bit(0, iommu->domain_ids);
1653
David Brazdil0f672f62019-12-10 10:32:29 +00001654 /*
1655 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1656 * entry for first-level or pass-through translation modes should
1657 * be programmed with a domain id different from those used for
1658 * second-level or nested translation. We reserve a domain id for
1659 * this purpose.
1660 */
1661 if (sm_supported(iommu))
1662 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1663
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001664 return 0;
1665}
1666
1667static void disable_dmar_iommu(struct intel_iommu *iommu)
1668{
1669 struct device_domain_info *info, *tmp;
1670 unsigned long flags;
1671
1672 if (!iommu->domains || !iommu->domain_ids)
1673 return;
1674
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001675 spin_lock_irqsave(&device_domain_lock, flags);
1676 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001677 if (info->iommu != iommu)
1678 continue;
1679
1680 if (!info->dev || !info->domain)
1681 continue;
1682
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001683 __dmar_remove_one_dev_info(info);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001684 }
1685 spin_unlock_irqrestore(&device_domain_lock, flags);
1686
1687 if (iommu->gcmd & DMA_GCMD_TE)
1688 iommu_disable_translation(iommu);
1689}
1690
1691static void free_dmar_iommu(struct intel_iommu *iommu)
1692{
1693 if ((iommu->domains) && (iommu->domain_ids)) {
1694 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1695 int i;
1696
1697 for (i = 0; i < elems; i++)
1698 kfree(iommu->domains[i]);
1699 kfree(iommu->domains);
1700 kfree(iommu->domain_ids);
1701 iommu->domains = NULL;
1702 iommu->domain_ids = NULL;
1703 }
1704
1705 g_iommus[iommu->seq_id] = NULL;
1706
1707 /* free context mapping */
1708 free_context_table(iommu);
1709
1710#ifdef CONFIG_INTEL_IOMMU_SVM
David Brazdil0f672f62019-12-10 10:32:29 +00001711 if (pasid_supported(iommu)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001712 if (ecap_prs(iommu->ecap))
1713 intel_svm_finish_prq(iommu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001714 }
1715#endif
1716}
1717
1718static struct dmar_domain *alloc_domain(int flags)
1719{
1720 struct dmar_domain *domain;
1721
1722 domain = alloc_domain_mem();
1723 if (!domain)
1724 return NULL;
1725
1726 memset(domain, 0, sizeof(*domain));
David Brazdil0f672f62019-12-10 10:32:29 +00001727 domain->nid = NUMA_NO_NODE;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001728 domain->flags = flags;
1729 domain->has_iotlb_device = false;
1730 INIT_LIST_HEAD(&domain->devices);
1731
1732 return domain;
1733}
1734
1735/* Must be called with iommu->lock */
1736static int domain_attach_iommu(struct dmar_domain *domain,
1737 struct intel_iommu *iommu)
1738{
1739 unsigned long ndomains;
1740 int num;
1741
1742 assert_spin_locked(&device_domain_lock);
1743 assert_spin_locked(&iommu->lock);
1744
1745 domain->iommu_refcnt[iommu->seq_id] += 1;
1746 domain->iommu_count += 1;
1747 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1748 ndomains = cap_ndoms(iommu->cap);
1749 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1750
1751 if (num >= ndomains) {
1752 pr_err("%s: No free domain ids\n", iommu->name);
1753 domain->iommu_refcnt[iommu->seq_id] -= 1;
1754 domain->iommu_count -= 1;
1755 return -ENOSPC;
1756 }
1757
1758 set_bit(num, iommu->domain_ids);
1759 set_iommu_domain(iommu, num, domain);
1760
1761 domain->iommu_did[iommu->seq_id] = num;
1762 domain->nid = iommu->node;
1763
1764 domain_update_iommu_cap(domain);
1765 }
1766
1767 return 0;
1768}
1769
1770static int domain_detach_iommu(struct dmar_domain *domain,
1771 struct intel_iommu *iommu)
1772{
David Brazdil0f672f62019-12-10 10:32:29 +00001773 int num, count;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001774
1775 assert_spin_locked(&device_domain_lock);
1776 assert_spin_locked(&iommu->lock);
1777
1778 domain->iommu_refcnt[iommu->seq_id] -= 1;
1779 count = --domain->iommu_count;
1780 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1781 num = domain->iommu_did[iommu->seq_id];
1782 clear_bit(num, iommu->domain_ids);
1783 set_iommu_domain(iommu, num, NULL);
1784
1785 domain_update_iommu_cap(domain);
1786 domain->iommu_did[iommu->seq_id] = 0;
1787 }
1788
1789 return count;
1790}
1791
1792static struct iova_domain reserved_iova_list;
1793static struct lock_class_key reserved_rbtree_key;
1794
1795static int dmar_init_reserved_ranges(void)
1796{
1797 struct pci_dev *pdev = NULL;
1798 struct iova *iova;
1799 int i;
1800
1801 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1802
1803 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1804 &reserved_rbtree_key);
1805
1806 /* IOAPIC ranges shouldn't be accessed by DMA */
1807 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1808 IOVA_PFN(IOAPIC_RANGE_END));
1809 if (!iova) {
1810 pr_err("Reserve IOAPIC range failed\n");
1811 return -ENODEV;
1812 }
1813
1814 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1815 for_each_pci_dev(pdev) {
1816 struct resource *r;
1817
1818 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1819 r = &pdev->resource[i];
1820 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1821 continue;
1822 iova = reserve_iova(&reserved_iova_list,
1823 IOVA_PFN(r->start),
1824 IOVA_PFN(r->end));
1825 if (!iova) {
David Brazdil0f672f62019-12-10 10:32:29 +00001826 pci_err(pdev, "Reserve iova for %pR failed\n", r);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001827 return -ENODEV;
1828 }
1829 }
1830 }
1831 return 0;
1832}
1833
1834static void domain_reserve_special_ranges(struct dmar_domain *domain)
1835{
1836 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1837}
1838
1839static inline int guestwidth_to_adjustwidth(int gaw)
1840{
1841 int agaw;
1842 int r = (gaw - 12) % 9;
1843
1844 if (r == 0)
1845 agaw = gaw;
1846 else
1847 agaw = gaw + 9 - r;
1848 if (agaw > 64)
1849 agaw = 64;
1850 return agaw;
1851}
1852
1853static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1854 int guest_width)
1855{
Olivier Deprez0e641232021-09-23 10:07:05 +02001856 int adjust_width, agaw, cap_width;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001857 unsigned long sagaw;
1858 int err;
1859
1860 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1861
1862 err = init_iova_flush_queue(&domain->iovad,
1863 iommu_flush_iova, iova_entry_free);
1864 if (err)
1865 return err;
1866
1867 domain_reserve_special_ranges(domain);
1868
1869 /* calculate AGAW */
Olivier Deprez0e641232021-09-23 10:07:05 +02001870 cap_width = min_t(int, cap_mgaw(iommu->cap), agaw_to_width(iommu->agaw));
1871 if (guest_width > cap_width)
1872 guest_width = cap_width;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001873 domain->gaw = guest_width;
1874 adjust_width = guestwidth_to_adjustwidth(guest_width);
1875 agaw = width_to_agaw(adjust_width);
1876 sagaw = cap_sagaw(iommu->cap);
1877 if (!test_bit(agaw, &sagaw)) {
1878 /* hardware doesn't support it, choose a bigger one */
1879 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1880 agaw = find_next_bit(&sagaw, 5, agaw);
1881 if (agaw >= 5)
1882 return -ENODEV;
1883 }
1884 domain->agaw = agaw;
1885
1886 if (ecap_coherent(iommu->ecap))
1887 domain->iommu_coherency = 1;
1888 else
1889 domain->iommu_coherency = 0;
1890
1891 if (ecap_sc_support(iommu->ecap))
1892 domain->iommu_snooping = 1;
1893 else
1894 domain->iommu_snooping = 0;
1895
1896 if (intel_iommu_superpage)
1897 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1898 else
1899 domain->iommu_superpage = 0;
1900
1901 domain->nid = iommu->node;
1902
1903 /* always allocate the top pgd */
1904 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1905 if (!domain->pgd)
1906 return -ENOMEM;
1907 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1908 return 0;
1909}
1910
1911static void domain_exit(struct dmar_domain *domain)
1912{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001913
1914 /* Remove associated devices and clear attached or cached domains */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001915 domain_remove_dev_info(domain);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001916
1917 /* destroy iovas */
1918 put_iova_domain(&domain->iovad);
1919
David Brazdil0f672f62019-12-10 10:32:29 +00001920 if (domain->pgd) {
1921 struct page *freelist;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001922
David Brazdil0f672f62019-12-10 10:32:29 +00001923 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1924 dma_free_pagelist(freelist);
1925 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001926
1927 free_domain_mem(domain);
1928}
1929
David Brazdil0f672f62019-12-10 10:32:29 +00001930/*
1931 * Get the PASID directory size for scalable mode context entry.
1932 * Value of X in the PDTS field of a scalable mode context entry
1933 * indicates PASID directory with 2^(X + 7) entries.
1934 */
1935static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1936{
1937 int pds, max_pde;
1938
1939 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1940 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1941 if (pds < 7)
1942 return 0;
1943
1944 return pds - 7;
1945}
1946
1947/*
1948 * Set the RID_PASID field of a scalable mode context entry. The
1949 * IOMMU hardware will use the PASID value set in this field for
1950 * DMA translations of DMA requests without PASID.
1951 */
1952static inline void
1953context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1954{
1955 context->hi |= pasid & ((1 << 20) - 1);
1956 context->hi |= (1 << 20);
1957}
1958
1959/*
1960 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1961 * entry.
1962 */
1963static inline void context_set_sm_dte(struct context_entry *context)
1964{
1965 context->lo |= (1 << 2);
1966}
1967
1968/*
1969 * Set the PRE(Page Request Enable) field of a scalable mode context
1970 * entry.
1971 */
1972static inline void context_set_sm_pre(struct context_entry *context)
1973{
1974 context->lo |= (1 << 4);
1975}
1976
1977/* Convert value to context PASID directory size field coding. */
1978#define context_pdts(pds) (((pds) & 0x7) << 9)
1979
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001980static int domain_context_mapping_one(struct dmar_domain *domain,
1981 struct intel_iommu *iommu,
David Brazdil0f672f62019-12-10 10:32:29 +00001982 struct pasid_table *table,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001983 u8 bus, u8 devfn)
1984{
1985 u16 did = domain->iommu_did[iommu->seq_id];
1986 int translation = CONTEXT_TT_MULTI_LEVEL;
1987 struct device_domain_info *info = NULL;
1988 struct context_entry *context;
1989 unsigned long flags;
David Brazdil0f672f62019-12-10 10:32:29 +00001990 int ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001991
1992 WARN_ON(did == 0);
1993
1994 if (hw_pass_through && domain_type_is_si(domain))
1995 translation = CONTEXT_TT_PASS_THROUGH;
1996
1997 pr_debug("Set context mapping for %02x:%02x.%d\n",
1998 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1999
2000 BUG_ON(!domain->pgd);
2001
2002 spin_lock_irqsave(&device_domain_lock, flags);
2003 spin_lock(&iommu->lock);
2004
2005 ret = -ENOMEM;
2006 context = iommu_context_addr(iommu, bus, devfn, 1);
2007 if (!context)
2008 goto out_unlock;
2009
2010 ret = 0;
2011 if (context_present(context))
2012 goto out_unlock;
2013
2014 /*
2015 * For kdump cases, old valid entries may be cached due to the
2016 * in-flight DMA and copied pgtable, but there is no unmapping
2017 * behaviour for them, thus we need an explicit cache flush for
2018 * the newly-mapped device. For kdump, at this point, the device
2019 * is supposed to finish reset at its driver probe stage, so no
2020 * in-flight DMA will exist, and we don't need to worry anymore
2021 * hereafter.
2022 */
2023 if (context_copied(context)) {
2024 u16 did_old = context_domain_id(context);
2025
2026 if (did_old < cap_ndoms(iommu->cap)) {
2027 iommu->flush.flush_context(iommu, did_old,
2028 (((u16)bus) << 8) | devfn,
2029 DMA_CCMD_MASK_NOBIT,
2030 DMA_CCMD_DEVICE_INVL);
2031 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2032 DMA_TLB_DSI_FLUSH);
2033 }
2034 }
2035
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002036 context_clear_entry(context);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002037
David Brazdil0f672f62019-12-10 10:32:29 +00002038 if (sm_supported(iommu)) {
2039 unsigned long pds;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002040
David Brazdil0f672f62019-12-10 10:32:29 +00002041 WARN_ON(!table);
2042
2043 /* Setup the PASID DIR pointer: */
2044 pds = context_get_sm_pds(table);
2045 context->lo = (u64)virt_to_phys(table->table) |
2046 context_pdts(pds);
2047
2048 /* Setup the RID_PASID field: */
2049 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2050
2051 /*
2052 * Setup the Device-TLB enable bit and Page request
2053 * Enable bit:
2054 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002055 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2056 if (info && info->ats_supported)
David Brazdil0f672f62019-12-10 10:32:29 +00002057 context_set_sm_dte(context);
2058 if (info && info->pri_supported)
2059 context_set_sm_pre(context);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002060 } else {
David Brazdil0f672f62019-12-10 10:32:29 +00002061 struct dma_pte *pgd = domain->pgd;
2062 int agaw;
2063
2064 context_set_domain_id(context, did);
2065
2066 if (translation != CONTEXT_TT_PASS_THROUGH) {
2067 /*
2068 * Skip top levels of page tables for iommu which has
2069 * less agaw than default. Unnecessary for PT mode.
2070 */
2071 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2072 ret = -ENOMEM;
2073 pgd = phys_to_virt(dma_pte_addr(pgd));
2074 if (!dma_pte_present(pgd))
2075 goto out_unlock;
2076 }
2077
2078 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2079 if (info && info->ats_supported)
2080 translation = CONTEXT_TT_DEV_IOTLB;
2081 else
2082 translation = CONTEXT_TT_MULTI_LEVEL;
2083
2084 context_set_address_root(context, virt_to_phys(pgd));
2085 context_set_address_width(context, agaw);
2086 } else {
2087 /*
2088 * In pass through mode, AW must be programmed to
2089 * indicate the largest AGAW value supported by
2090 * hardware. And ASR is ignored by hardware.
2091 */
2092 context_set_address_width(context, iommu->msagaw);
2093 }
2094
2095 context_set_translation_type(context, translation);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002096 }
2097
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002098 context_set_fault_enable(context);
2099 context_set_present(context);
Olivier Deprez0e641232021-09-23 10:07:05 +02002100 if (!ecap_coherent(iommu->ecap))
2101 clflush_cache_range(context, sizeof(*context));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002102
2103 /*
2104 * It's a non-present to present mapping. If hardware doesn't cache
2105 * non-present entry we only need to flush the write-buffer. If the
2106 * _does_ cache non-present entries, then it does so in the special
2107 * domain #0, which we have to flush:
2108 */
2109 if (cap_caching_mode(iommu->cap)) {
2110 iommu->flush.flush_context(iommu, 0,
2111 (((u16)bus) << 8) | devfn,
2112 DMA_CCMD_MASK_NOBIT,
2113 DMA_CCMD_DEVICE_INVL);
2114 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2115 } else {
2116 iommu_flush_write_buffer(iommu);
2117 }
2118 iommu_enable_dev_iotlb(info);
2119
2120 ret = 0;
2121
2122out_unlock:
2123 spin_unlock(&iommu->lock);
2124 spin_unlock_irqrestore(&device_domain_lock, flags);
2125
2126 return ret;
2127}
2128
2129struct domain_context_mapping_data {
2130 struct dmar_domain *domain;
2131 struct intel_iommu *iommu;
David Brazdil0f672f62019-12-10 10:32:29 +00002132 struct pasid_table *table;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002133};
2134
2135static int domain_context_mapping_cb(struct pci_dev *pdev,
2136 u16 alias, void *opaque)
2137{
2138 struct domain_context_mapping_data *data = opaque;
2139
2140 return domain_context_mapping_one(data->domain, data->iommu,
David Brazdil0f672f62019-12-10 10:32:29 +00002141 data->table, PCI_BUS_NUM(alias),
2142 alias & 0xff);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002143}
2144
2145static int
2146domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2147{
David Brazdil0f672f62019-12-10 10:32:29 +00002148 struct domain_context_mapping_data data;
2149 struct pasid_table *table;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002150 struct intel_iommu *iommu;
2151 u8 bus, devfn;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002152
2153 iommu = device_to_iommu(dev, &bus, &devfn);
2154 if (!iommu)
2155 return -ENODEV;
2156
David Brazdil0f672f62019-12-10 10:32:29 +00002157 table = intel_pasid_get_table(dev);
2158
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002159 if (!dev_is_pci(dev))
David Brazdil0f672f62019-12-10 10:32:29 +00002160 return domain_context_mapping_one(domain, iommu, table,
2161 bus, devfn);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002162
2163 data.domain = domain;
2164 data.iommu = iommu;
David Brazdil0f672f62019-12-10 10:32:29 +00002165 data.table = table;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002166
2167 return pci_for_each_dma_alias(to_pci_dev(dev),
2168 &domain_context_mapping_cb, &data);
2169}
2170
2171static int domain_context_mapped_cb(struct pci_dev *pdev,
2172 u16 alias, void *opaque)
2173{
2174 struct intel_iommu *iommu = opaque;
2175
2176 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2177}
2178
2179static int domain_context_mapped(struct device *dev)
2180{
2181 struct intel_iommu *iommu;
2182 u8 bus, devfn;
2183
2184 iommu = device_to_iommu(dev, &bus, &devfn);
2185 if (!iommu)
2186 return -ENODEV;
2187
2188 if (!dev_is_pci(dev))
2189 return device_context_mapped(iommu, bus, devfn);
2190
2191 return !pci_for_each_dma_alias(to_pci_dev(dev),
2192 domain_context_mapped_cb, iommu);
2193}
2194
2195/* Returns a number of VTD pages, but aligned to MM page size */
2196static inline unsigned long aligned_nrpages(unsigned long host_addr,
2197 size_t size)
2198{
2199 host_addr &= ~PAGE_MASK;
2200 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2201}
2202
2203/* Return largest possible superpage level for a given mapping */
2204static inline int hardware_largepage_caps(struct dmar_domain *domain,
2205 unsigned long iov_pfn,
2206 unsigned long phy_pfn,
2207 unsigned long pages)
2208{
2209 int support, level = 1;
2210 unsigned long pfnmerge;
2211
2212 support = domain->iommu_superpage;
2213
2214 /* To use a large page, the virtual *and* physical addresses
2215 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2216 of them will mean we have to use smaller pages. So just
2217 merge them and check both at once. */
2218 pfnmerge = iov_pfn | phy_pfn;
2219
2220 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2221 pages >>= VTD_STRIDE_SHIFT;
2222 if (!pages)
2223 break;
2224 pfnmerge >>= VTD_STRIDE_SHIFT;
2225 level++;
2226 support--;
2227 }
2228 return level;
2229}
2230
2231static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2232 struct scatterlist *sg, unsigned long phys_pfn,
2233 unsigned long nr_pages, int prot)
2234{
2235 struct dma_pte *first_pte = NULL, *pte = NULL;
2236 phys_addr_t uninitialized_var(pteval);
2237 unsigned long sg_res = 0;
2238 unsigned int largepage_lvl = 0;
2239 unsigned long lvl_pages = 0;
2240
2241 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2242
2243 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2244 return -EINVAL;
2245
2246 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2247
2248 if (!sg) {
2249 sg_res = nr_pages;
2250 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2251 }
2252
2253 while (nr_pages > 0) {
2254 uint64_t tmp;
2255
2256 if (!sg_res) {
2257 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2258
2259 sg_res = aligned_nrpages(sg->offset, sg->length);
2260 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2261 sg->dma_length = sg->length;
2262 pteval = (sg_phys(sg) - pgoff) | prot;
2263 phys_pfn = pteval >> VTD_PAGE_SHIFT;
2264 }
2265
2266 if (!pte) {
2267 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2268
2269 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2270 if (!pte)
2271 return -ENOMEM;
2272 /* It is large page*/
2273 if (largepage_lvl > 1) {
2274 unsigned long nr_superpages, end_pfn;
2275
2276 pteval |= DMA_PTE_LARGE_PAGE;
2277 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2278
2279 nr_superpages = sg_res / lvl_pages;
2280 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2281
2282 /*
2283 * Ensure that old small page tables are
2284 * removed to make room for superpage(s).
2285 * We're adding new large pages, so make sure
2286 * we don't remove their parent tables.
2287 */
2288 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2289 largepage_lvl + 1);
2290 } else {
2291 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2292 }
2293
2294 }
2295 /* We don't need lock here, nobody else
2296 * touches the iova range
2297 */
2298 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2299 if (tmp) {
2300 static int dumps = 5;
2301 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2302 iov_pfn, tmp, (unsigned long long)pteval);
2303 if (dumps) {
2304 dumps--;
2305 debug_dma_dump_mappings(NULL);
2306 }
2307 WARN_ON(1);
2308 }
2309
2310 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2311
2312 BUG_ON(nr_pages < lvl_pages);
2313 BUG_ON(sg_res < lvl_pages);
2314
2315 nr_pages -= lvl_pages;
2316 iov_pfn += lvl_pages;
2317 phys_pfn += lvl_pages;
2318 pteval += lvl_pages * VTD_PAGE_SIZE;
2319 sg_res -= lvl_pages;
2320
2321 /* If the next PTE would be the first in a new page, then we
2322 need to flush the cache on the entries we've just written.
2323 And then we'll need to recalculate 'pte', so clear it and
2324 let it get set again in the if (!pte) block above.
2325
2326 If we're done (!nr_pages) we need to flush the cache too.
2327
2328 Also if we've been setting superpages, we may need to
2329 recalculate 'pte' and switch back to smaller pages for the
2330 end of the mapping, if the trailing size is not enough to
2331 use another superpage (i.e. sg_res < lvl_pages). */
2332 pte++;
2333 if (!nr_pages || first_pte_in_page(pte) ||
2334 (largepage_lvl > 1 && sg_res < lvl_pages)) {
2335 domain_flush_cache(domain, first_pte,
2336 (void *)pte - (void *)first_pte);
2337 pte = NULL;
2338 }
2339
2340 if (!sg_res && nr_pages)
2341 sg = sg_next(sg);
2342 }
2343 return 0;
2344}
2345
2346static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
David Brazdil0f672f62019-12-10 10:32:29 +00002347 struct scatterlist *sg, unsigned long phys_pfn,
2348 unsigned long nr_pages, int prot)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002349{
David Brazdil0f672f62019-12-10 10:32:29 +00002350 int iommu_id, ret;
2351 struct intel_iommu *iommu;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002352
David Brazdil0f672f62019-12-10 10:32:29 +00002353 /* Do the real mapping first */
2354 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2355 if (ret)
2356 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002357
David Brazdil0f672f62019-12-10 10:32:29 +00002358 for_each_domain_iommu(iommu_id, domain) {
2359 iommu = g_iommus[iommu_id];
2360 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2361 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002362
David Brazdil0f672f62019-12-10 10:32:29 +00002363 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002364}
2365
2366static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2367 struct scatterlist *sg, unsigned long nr_pages,
2368 int prot)
2369{
2370 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2371}
2372
2373static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2374 unsigned long phys_pfn, unsigned long nr_pages,
2375 int prot)
2376{
2377 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2378}
2379
2380static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2381{
2382 unsigned long flags;
2383 struct context_entry *context;
2384 u16 did_old;
2385
2386 if (!iommu)
2387 return;
2388
2389 spin_lock_irqsave(&iommu->lock, flags);
2390 context = iommu_context_addr(iommu, bus, devfn, 0);
2391 if (!context) {
2392 spin_unlock_irqrestore(&iommu->lock, flags);
2393 return;
2394 }
2395 did_old = context_domain_id(context);
2396 context_clear_entry(context);
2397 __iommu_flush_cache(iommu, context, sizeof(*context));
2398 spin_unlock_irqrestore(&iommu->lock, flags);
2399 iommu->flush.flush_context(iommu,
2400 did_old,
2401 (((u16)bus) << 8) | devfn,
2402 DMA_CCMD_MASK_NOBIT,
2403 DMA_CCMD_DEVICE_INVL);
2404 iommu->flush.flush_iotlb(iommu,
2405 did_old,
2406 0,
2407 0,
2408 DMA_TLB_DSI_FLUSH);
2409}
2410
2411static inline void unlink_domain_info(struct device_domain_info *info)
2412{
2413 assert_spin_locked(&device_domain_lock);
2414 list_del(&info->link);
2415 list_del(&info->global);
2416 if (info->dev)
2417 info->dev->archdata.iommu = NULL;
2418}
2419
2420static void domain_remove_dev_info(struct dmar_domain *domain)
2421{
2422 struct device_domain_info *info, *tmp;
2423 unsigned long flags;
2424
2425 spin_lock_irqsave(&device_domain_lock, flags);
2426 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2427 __dmar_remove_one_dev_info(info);
2428 spin_unlock_irqrestore(&device_domain_lock, flags);
2429}
2430
2431/*
2432 * find_domain
2433 * Note: we use struct device->archdata.iommu stores the info
2434 */
2435static struct dmar_domain *find_domain(struct device *dev)
2436{
2437 struct device_domain_info *info;
2438
David Brazdil0f672f62019-12-10 10:32:29 +00002439 if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
2440 struct iommu_domain *domain;
2441
2442 dev->archdata.iommu = NULL;
2443 domain = iommu_get_domain_for_dev(dev);
2444 if (domain)
2445 intel_iommu_attach_device(domain, dev);
2446 }
2447
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002448 /* No lock here, assumes no domain exit in normal case */
2449 info = dev->archdata.iommu;
David Brazdil0f672f62019-12-10 10:32:29 +00002450
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002451 if (likely(info))
2452 return info->domain;
2453 return NULL;
2454}
2455
2456static inline struct device_domain_info *
2457dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2458{
2459 struct device_domain_info *info;
2460
2461 list_for_each_entry(info, &device_domain_list, global)
2462 if (info->iommu->segment == segment && info->bus == bus &&
2463 info->devfn == devfn)
2464 return info;
2465
2466 return NULL;
2467}
2468
2469static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2470 int bus, int devfn,
2471 struct device *dev,
2472 struct dmar_domain *domain)
2473{
2474 struct dmar_domain *found = NULL;
2475 struct device_domain_info *info;
2476 unsigned long flags;
2477 int ret;
2478
2479 info = alloc_devinfo_mem();
2480 if (!info)
2481 return NULL;
2482
2483 info->bus = bus;
2484 info->devfn = devfn;
2485 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2486 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2487 info->ats_qdep = 0;
2488 info->dev = dev;
2489 info->domain = domain;
2490 info->iommu = iommu;
2491 info->pasid_table = NULL;
David Brazdil0f672f62019-12-10 10:32:29 +00002492 info->auxd_enabled = 0;
2493 INIT_LIST_HEAD(&info->auxiliary_domains);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002494
2495 if (dev && dev_is_pci(dev)) {
2496 struct pci_dev *pdev = to_pci_dev(info->dev);
2497
David Brazdil0f672f62019-12-10 10:32:29 +00002498 if (!pdev->untrusted &&
2499 !pci_ats_disabled() &&
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002500 ecap_dev_iotlb_support(iommu->ecap) &&
2501 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2502 dmar_find_matched_atsr_unit(pdev))
2503 info->ats_supported = 1;
2504
David Brazdil0f672f62019-12-10 10:32:29 +00002505 if (sm_supported(iommu)) {
2506 if (pasid_supported(iommu)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002507 int features = pci_pasid_features(pdev);
2508 if (features >= 0)
2509 info->pasid_supported = features | 1;
2510 }
2511
2512 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2513 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2514 info->pri_supported = 1;
2515 }
2516 }
2517
2518 spin_lock_irqsave(&device_domain_lock, flags);
2519 if (dev)
2520 found = find_domain(dev);
2521
2522 if (!found) {
2523 struct device_domain_info *info2;
2524 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2525 if (info2) {
2526 found = info2->domain;
2527 info2->dev = dev;
2528 }
2529 }
2530
2531 if (found) {
2532 spin_unlock_irqrestore(&device_domain_lock, flags);
2533 free_devinfo_mem(info);
2534 /* Caller must free the original domain */
2535 return found;
2536 }
2537
2538 spin_lock(&iommu->lock);
2539 ret = domain_attach_iommu(domain, iommu);
2540 spin_unlock(&iommu->lock);
2541
2542 if (ret) {
2543 spin_unlock_irqrestore(&device_domain_lock, flags);
2544 free_devinfo_mem(info);
2545 return NULL;
2546 }
2547
2548 list_add(&info->link, &domain->devices);
2549 list_add(&info->global, &device_domain_list);
2550 if (dev)
2551 dev->archdata.iommu = info;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002552 spin_unlock_irqrestore(&device_domain_lock, flags);
2553
David Brazdil0f672f62019-12-10 10:32:29 +00002554 /* PASID table is mandatory for a PCI device in scalable mode. */
2555 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2556 ret = intel_pasid_alloc_table(dev);
2557 if (ret) {
2558 dev_err(dev, "PASID table allocation failed\n");
2559 dmar_remove_one_dev_info(dev);
2560 return NULL;
2561 }
2562
2563 /* Setup the PASID entry for requests without PASID: */
Olivier Deprez0e641232021-09-23 10:07:05 +02002564 spin_lock_irqsave(&iommu->lock, flags);
David Brazdil0f672f62019-12-10 10:32:29 +00002565 if (hw_pass_through && domain_type_is_si(domain))
2566 ret = intel_pasid_setup_pass_through(iommu, domain,
2567 dev, PASID_RID2PASID);
2568 else
2569 ret = intel_pasid_setup_second_level(iommu, domain,
2570 dev, PASID_RID2PASID);
Olivier Deprez0e641232021-09-23 10:07:05 +02002571 spin_unlock_irqrestore(&iommu->lock, flags);
David Brazdil0f672f62019-12-10 10:32:29 +00002572 if (ret) {
2573 dev_err(dev, "Setup RID2PASID failed\n");
2574 dmar_remove_one_dev_info(dev);
2575 return NULL;
2576 }
2577 }
2578
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002579 if (dev && domain_context_mapping(domain, dev)) {
David Brazdil0f672f62019-12-10 10:32:29 +00002580 dev_err(dev, "Domain context map failed\n");
2581 dmar_remove_one_dev_info(dev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002582 return NULL;
2583 }
2584
2585 return domain;
2586}
2587
2588static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2589{
2590 *(u16 *)opaque = alias;
2591 return 0;
2592}
2593
2594static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2595{
David Brazdil0f672f62019-12-10 10:32:29 +00002596 struct device_domain_info *info;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002597 struct dmar_domain *domain = NULL;
2598 struct intel_iommu *iommu;
2599 u16 dma_alias;
2600 unsigned long flags;
2601 u8 bus, devfn;
2602
2603 iommu = device_to_iommu(dev, &bus, &devfn);
2604 if (!iommu)
2605 return NULL;
2606
2607 if (dev_is_pci(dev)) {
2608 struct pci_dev *pdev = to_pci_dev(dev);
2609
2610 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2611
2612 spin_lock_irqsave(&device_domain_lock, flags);
2613 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2614 PCI_BUS_NUM(dma_alias),
2615 dma_alias & 0xff);
2616 if (info) {
2617 iommu = info->iommu;
2618 domain = info->domain;
2619 }
2620 spin_unlock_irqrestore(&device_domain_lock, flags);
2621
2622 /* DMA alias already has a domain, use it */
2623 if (info)
2624 goto out;
2625 }
2626
2627 /* Allocate and initialize new domain for the device */
2628 domain = alloc_domain(0);
2629 if (!domain)
2630 return NULL;
2631 if (domain_init(domain, iommu, gaw)) {
2632 domain_exit(domain);
2633 return NULL;
2634 }
2635
2636out:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002637 return domain;
2638}
2639
2640static struct dmar_domain *set_domain_for_dev(struct device *dev,
2641 struct dmar_domain *domain)
2642{
2643 struct intel_iommu *iommu;
2644 struct dmar_domain *tmp;
2645 u16 req_id, dma_alias;
2646 u8 bus, devfn;
2647
2648 iommu = device_to_iommu(dev, &bus, &devfn);
2649 if (!iommu)
2650 return NULL;
2651
2652 req_id = ((u16)bus << 8) | devfn;
2653
2654 if (dev_is_pci(dev)) {
2655 struct pci_dev *pdev = to_pci_dev(dev);
2656
2657 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2658
2659 /* register PCI DMA alias device */
2660 if (req_id != dma_alias) {
2661 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2662 dma_alias & 0xff, NULL, domain);
2663
2664 if (!tmp || tmp != domain)
2665 return tmp;
2666 }
2667 }
2668
2669 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2670 if (!tmp || tmp != domain)
2671 return tmp;
2672
2673 return domain;
2674}
2675
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002676static int iommu_domain_identity_map(struct dmar_domain *domain,
2677 unsigned long long start,
2678 unsigned long long end)
2679{
2680 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2681 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2682
2683 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2684 dma_to_mm_pfn(last_vpfn))) {
2685 pr_err("Reserving iova failed\n");
2686 return -ENOMEM;
2687 }
2688
2689 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2690 /*
2691 * RMRR range might have overlap with physical memory range,
2692 * clear it first
2693 */
2694 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2695
2696 return __domain_mapping(domain, first_vpfn, NULL,
2697 first_vpfn, last_vpfn - first_vpfn + 1,
2698 DMA_PTE_READ|DMA_PTE_WRITE);
2699}
2700
2701static int domain_prepare_identity_map(struct device *dev,
2702 struct dmar_domain *domain,
2703 unsigned long long start,
2704 unsigned long long end)
2705{
2706 /* For _hardware_ passthrough, don't bother. But for software
2707 passthrough, we do it anyway -- it may indicate a memory
2708 range which is reserved in E820, so which didn't get set
2709 up to start with in si_domain */
2710 if (domain == si_domain && hw_pass_through) {
David Brazdil0f672f62019-12-10 10:32:29 +00002711 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2712 start, end);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002713 return 0;
2714 }
2715
David Brazdil0f672f62019-12-10 10:32:29 +00002716 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002717
2718 if (end < start) {
2719 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2720 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2721 dmi_get_system_info(DMI_BIOS_VENDOR),
2722 dmi_get_system_info(DMI_BIOS_VERSION),
2723 dmi_get_system_info(DMI_PRODUCT_VERSION));
2724 return -EIO;
2725 }
2726
2727 if (end >> agaw_to_width(domain->agaw)) {
2728 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2729 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2730 agaw_to_width(domain->agaw),
2731 dmi_get_system_info(DMI_BIOS_VENDOR),
2732 dmi_get_system_info(DMI_BIOS_VERSION),
2733 dmi_get_system_info(DMI_PRODUCT_VERSION));
2734 return -EIO;
2735 }
2736
2737 return iommu_domain_identity_map(domain, start, end);
2738}
2739
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002740static int md_domain_init(struct dmar_domain *domain, int guest_width);
2741
2742static int __init si_domain_init(int hw)
2743{
David Brazdil0f672f62019-12-10 10:32:29 +00002744 struct dmar_rmrr_unit *rmrr;
2745 struct device *dev;
2746 int i, nid, ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002747
2748 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2749 if (!si_domain)
2750 return -EFAULT;
2751
2752 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2753 domain_exit(si_domain);
2754 return -EFAULT;
2755 }
2756
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002757 if (hw)
2758 return 0;
2759
2760 for_each_online_node(nid) {
2761 unsigned long start_pfn, end_pfn;
2762 int i;
2763
2764 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2765 ret = iommu_domain_identity_map(si_domain,
2766 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2767 if (ret)
2768 return ret;
2769 }
2770 }
2771
David Brazdil0f672f62019-12-10 10:32:29 +00002772 /*
Olivier Deprez0e641232021-09-23 10:07:05 +02002773 * Identity map the RMRRs so that devices with RMRRs could also use
2774 * the si_domain.
David Brazdil0f672f62019-12-10 10:32:29 +00002775 */
2776 for_each_rmrr_units(rmrr) {
2777 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2778 i, dev) {
2779 unsigned long long start = rmrr->base_address;
2780 unsigned long long end = rmrr->end_address;
2781
David Brazdil0f672f62019-12-10 10:32:29 +00002782 if (WARN_ON(end < start ||
2783 end >> agaw_to_width(si_domain->agaw)))
2784 continue;
2785
2786 ret = iommu_domain_identity_map(si_domain, start, end);
2787 if (ret)
2788 return ret;
2789 }
2790 }
2791
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002792 return 0;
2793}
2794
2795static int identity_mapping(struct device *dev)
2796{
2797 struct device_domain_info *info;
2798
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002799 info = dev->archdata.iommu;
David Brazdil0f672f62019-12-10 10:32:29 +00002800 if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002801 return (info->domain == si_domain);
2802
2803 return 0;
2804}
2805
2806static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2807{
2808 struct dmar_domain *ndomain;
2809 struct intel_iommu *iommu;
2810 u8 bus, devfn;
2811
2812 iommu = device_to_iommu(dev, &bus, &devfn);
2813 if (!iommu)
2814 return -ENODEV;
2815
2816 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2817 if (ndomain != domain)
2818 return -EBUSY;
2819
2820 return 0;
2821}
2822
2823static bool device_has_rmrr(struct device *dev)
2824{
2825 struct dmar_rmrr_unit *rmrr;
2826 struct device *tmp;
2827 int i;
2828
2829 rcu_read_lock();
2830 for_each_rmrr_units(rmrr) {
2831 /*
2832 * Return TRUE if this RMRR contains the device that
2833 * is passed in.
2834 */
2835 for_each_active_dev_scope(rmrr->devices,
2836 rmrr->devices_cnt, i, tmp)
David Brazdil0f672f62019-12-10 10:32:29 +00002837 if (tmp == dev ||
2838 is_downstream_to_pci_bridge(dev, tmp)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002839 rcu_read_unlock();
2840 return true;
2841 }
2842 }
2843 rcu_read_unlock();
2844 return false;
2845}
2846
David Brazdil0f672f62019-12-10 10:32:29 +00002847/**
2848 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2849 * is relaxable (ie. is allowed to be not enforced under some conditions)
2850 * @dev: device handle
2851 *
2852 * We assume that PCI USB devices with RMRRs have them largely
2853 * for historical reasons and that the RMRR space is not actively used post
2854 * boot. This exclusion may change if vendors begin to abuse it.
2855 *
2856 * The same exception is made for graphics devices, with the requirement that
2857 * any use of the RMRR regions will be torn down before assigning the device
2858 * to a guest.
2859 *
2860 * Return: true if the RMRR is relaxable, false otherwise
2861 */
2862static bool device_rmrr_is_relaxable(struct device *dev)
2863{
2864 struct pci_dev *pdev;
2865
2866 if (!dev_is_pci(dev))
2867 return false;
2868
2869 pdev = to_pci_dev(dev);
2870 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2871 return true;
2872 else
2873 return false;
2874}
2875
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002876/*
2877 * There are a couple cases where we need to restrict the functionality of
2878 * devices associated with RMRRs. The first is when evaluating a device for
2879 * identity mapping because problems exist when devices are moved in and out
2880 * of domains and their respective RMRR information is lost. This means that
2881 * a device with associated RMRRs will never be in a "passthrough" domain.
2882 * The second is use of the device through the IOMMU API. This interface
2883 * expects to have full control of the IOVA space for the device. We cannot
2884 * satisfy both the requirement that RMRR access is maintained and have an
2885 * unencumbered IOVA space. We also have no ability to quiesce the device's
2886 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2887 * We therefore prevent devices associated with an RMRR from participating in
2888 * the IOMMU API, which eliminates them from device assignment.
2889 *
David Brazdil0f672f62019-12-10 10:32:29 +00002890 * In both cases, devices which have relaxable RMRRs are not concerned by this
2891 * restriction. See device_rmrr_is_relaxable comment.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002892 */
2893static bool device_is_rmrr_locked(struct device *dev)
2894{
2895 if (!device_has_rmrr(dev))
2896 return false;
2897
David Brazdil0f672f62019-12-10 10:32:29 +00002898 if (device_rmrr_is_relaxable(dev))
2899 return false;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002900
2901 return true;
2902}
2903
David Brazdil0f672f62019-12-10 10:32:29 +00002904/*
2905 * Return the required default domain type for a specific device.
2906 *
2907 * @dev: the device in query
2908 * @startup: true if this is during early boot
2909 *
2910 * Returns:
2911 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2912 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2913 * - 0: both identity and dynamic domains work for this device
2914 */
2915static int device_def_domain_type(struct device *dev)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002916{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002917 if (dev_is_pci(dev)) {
2918 struct pci_dev *pdev = to_pci_dev(dev);
2919
David Brazdil0f672f62019-12-10 10:32:29 +00002920 /*
2921 * Prevent any device marked as untrusted from getting
2922 * placed into the statically identity mapping domain.
2923 */
2924 if (pdev->untrusted)
2925 return IOMMU_DOMAIN_DMA;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002926
2927 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
David Brazdil0f672f62019-12-10 10:32:29 +00002928 return IOMMU_DOMAIN_IDENTITY;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002929
2930 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
David Brazdil0f672f62019-12-10 10:32:29 +00002931 return IOMMU_DOMAIN_IDENTITY;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002932
2933 /*
2934 * We want to start off with all devices in the 1:1 domain, and
2935 * take them out later if we find they can't access all of memory.
2936 *
2937 * However, we can't do this for PCI devices behind bridges,
2938 * because all PCI devices behind the same bridge will end up
2939 * with the same source-id on their transactions.
2940 *
2941 * Practically speaking, we can't change things around for these
2942 * devices at run-time, because we can't be sure there'll be no
2943 * DMA transactions in flight for any of their siblings.
2944 *
2945 * So PCI devices (unless they're on the root bus) as well as
2946 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2947 * the 1:1 domain, just in _case_ one of their siblings turns out
2948 * not to be able to map all of memory.
2949 */
2950 if (!pci_is_pcie(pdev)) {
2951 if (!pci_is_root_bus(pdev->bus))
David Brazdil0f672f62019-12-10 10:32:29 +00002952 return IOMMU_DOMAIN_DMA;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002953 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
David Brazdil0f672f62019-12-10 10:32:29 +00002954 return IOMMU_DOMAIN_DMA;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002955 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
David Brazdil0f672f62019-12-10 10:32:29 +00002956 return IOMMU_DOMAIN_DMA;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002957 }
2958
David Brazdil0f672f62019-12-10 10:32:29 +00002959 return (iommu_identity_mapping & IDENTMAP_ALL) ?
2960 IOMMU_DOMAIN_IDENTITY : 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002961}
2962
2963static void intel_iommu_init_qi(struct intel_iommu *iommu)
2964{
2965 /*
2966 * Start from the sane iommu hardware state.
2967 * If the queued invalidation is already initialized by us
2968 * (for example, while enabling interrupt-remapping) then
2969 * we got the things already rolling from a sane state.
2970 */
2971 if (!iommu->qi) {
2972 /*
2973 * Clear any previous faults.
2974 */
2975 dmar_fault(-1, iommu);
2976 /*
2977 * Disable queued invalidation if supported and already enabled
2978 * before OS handover.
2979 */
2980 dmar_disable_qi(iommu);
2981 }
2982
2983 if (dmar_enable_qi(iommu)) {
2984 /*
2985 * Queued Invalidate not enabled, use Register Based Invalidate
2986 */
2987 iommu->flush.flush_context = __iommu_flush_context;
2988 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2989 pr_info("%s: Using Register based invalidation\n",
2990 iommu->name);
2991 } else {
2992 iommu->flush.flush_context = qi_flush_context;
2993 iommu->flush.flush_iotlb = qi_flush_iotlb;
2994 pr_info("%s: Using Queued invalidation\n", iommu->name);
2995 }
2996}
2997
2998static int copy_context_table(struct intel_iommu *iommu,
2999 struct root_entry *old_re,
3000 struct context_entry **tbl,
3001 int bus, bool ext)
3002{
3003 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3004 struct context_entry *new_ce = NULL, ce;
3005 struct context_entry *old_ce = NULL;
3006 struct root_entry re;
3007 phys_addr_t old_ce_phys;
3008
3009 tbl_idx = ext ? bus * 2 : bus;
3010 memcpy(&re, old_re, sizeof(re));
3011
3012 for (devfn = 0; devfn < 256; devfn++) {
3013 /* First calculate the correct index */
3014 idx = (ext ? devfn * 2 : devfn) % 256;
3015
3016 if (idx == 0) {
3017 /* First save what we may have and clean up */
3018 if (new_ce) {
3019 tbl[tbl_idx] = new_ce;
3020 __iommu_flush_cache(iommu, new_ce,
3021 VTD_PAGE_SIZE);
3022 pos = 1;
3023 }
3024
3025 if (old_ce)
3026 memunmap(old_ce);
3027
3028 ret = 0;
3029 if (devfn < 0x80)
3030 old_ce_phys = root_entry_lctp(&re);
3031 else
3032 old_ce_phys = root_entry_uctp(&re);
3033
3034 if (!old_ce_phys) {
3035 if (ext && devfn == 0) {
3036 /* No LCTP, try UCTP */
3037 devfn = 0x7f;
3038 continue;
3039 } else {
3040 goto out;
3041 }
3042 }
3043
3044 ret = -ENOMEM;
3045 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3046 MEMREMAP_WB);
3047 if (!old_ce)
3048 goto out;
3049
3050 new_ce = alloc_pgtable_page(iommu->node);
3051 if (!new_ce)
3052 goto out_unmap;
3053
3054 ret = 0;
3055 }
3056
3057 /* Now copy the context entry */
3058 memcpy(&ce, old_ce + idx, sizeof(ce));
3059
3060 if (!__context_present(&ce))
3061 continue;
3062
3063 did = context_domain_id(&ce);
3064 if (did >= 0 && did < cap_ndoms(iommu->cap))
3065 set_bit(did, iommu->domain_ids);
3066
3067 /*
3068 * We need a marker for copied context entries. This
3069 * marker needs to work for the old format as well as
3070 * for extended context entries.
3071 *
3072 * Bit 67 of the context entry is used. In the old
3073 * format this bit is available to software, in the
3074 * extended format it is the PGE bit, but PGE is ignored
3075 * by HW if PASIDs are disabled (and thus still
3076 * available).
3077 *
3078 * So disable PASIDs first and then mark the entry
3079 * copied. This means that we don't copy PASID
3080 * translations from the old kernel, but this is fine as
3081 * faults there are not fatal.
3082 */
3083 context_clear_pasid_enable(&ce);
3084 context_set_copied(&ce);
3085
3086 new_ce[idx] = ce;
3087 }
3088
3089 tbl[tbl_idx + pos] = new_ce;
3090
3091 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3092
3093out_unmap:
3094 memunmap(old_ce);
3095
3096out:
3097 return ret;
3098}
3099
3100static int copy_translation_tables(struct intel_iommu *iommu)
3101{
3102 struct context_entry **ctxt_tbls;
3103 struct root_entry *old_rt;
3104 phys_addr_t old_rt_phys;
3105 int ctxt_table_entries;
3106 unsigned long flags;
3107 u64 rtaddr_reg;
3108 int bus, ret;
3109 bool new_ext, ext;
3110
3111 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3112 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
3113 new_ext = !!ecap_ecs(iommu->ecap);
3114
3115 /*
3116 * The RTT bit can only be changed when translation is disabled,
3117 * but disabling translation means to open a window for data
3118 * corruption. So bail out and don't copy anything if we would
3119 * have to change the bit.
3120 */
3121 if (new_ext != ext)
3122 return -EINVAL;
3123
3124 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3125 if (!old_rt_phys)
3126 return -EINVAL;
3127
3128 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3129 if (!old_rt)
3130 return -ENOMEM;
3131
3132 /* This is too big for the stack - allocate it from slab */
3133 ctxt_table_entries = ext ? 512 : 256;
3134 ret = -ENOMEM;
3135 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3136 if (!ctxt_tbls)
3137 goto out_unmap;
3138
3139 for (bus = 0; bus < 256; bus++) {
3140 ret = copy_context_table(iommu, &old_rt[bus],
3141 ctxt_tbls, bus, ext);
3142 if (ret) {
3143 pr_err("%s: Failed to copy context table for bus %d\n",
3144 iommu->name, bus);
3145 continue;
3146 }
3147 }
3148
3149 spin_lock_irqsave(&iommu->lock, flags);
3150
3151 /* Context tables are copied, now write them to the root_entry table */
3152 for (bus = 0; bus < 256; bus++) {
3153 int idx = ext ? bus * 2 : bus;
3154 u64 val;
3155
3156 if (ctxt_tbls[idx]) {
3157 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3158 iommu->root_entry[bus].lo = val;
3159 }
3160
3161 if (!ext || !ctxt_tbls[idx + 1])
3162 continue;
3163
3164 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3165 iommu->root_entry[bus].hi = val;
3166 }
3167
3168 spin_unlock_irqrestore(&iommu->lock, flags);
3169
3170 kfree(ctxt_tbls);
3171
3172 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3173
3174 ret = 0;
3175
3176out_unmap:
3177 memunmap(old_rt);
3178
3179 return ret;
3180}
3181
3182static int __init init_dmars(void)
3183{
3184 struct dmar_drhd_unit *drhd;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003185 struct intel_iommu *iommu;
David Brazdil0f672f62019-12-10 10:32:29 +00003186 int ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003187
3188 /*
3189 * for each drhd
3190 * allocate root
3191 * initialize and program root entry to not present
3192 * endfor
3193 */
3194 for_each_drhd_unit(drhd) {
3195 /*
3196 * lock not needed as this is only incremented in the single
3197 * threaded kernel __init code path all other access are read
3198 * only
3199 */
3200 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3201 g_num_of_iommus++;
3202 continue;
3203 }
3204 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3205 }
3206
3207 /* Preallocate enough resources for IOMMU hot-addition */
3208 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3209 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3210
3211 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3212 GFP_KERNEL);
3213 if (!g_iommus) {
3214 pr_err("Allocating global iommu array failed\n");
3215 ret = -ENOMEM;
3216 goto error;
3217 }
3218
David Brazdil0f672f62019-12-10 10:32:29 +00003219 for_each_iommu(iommu, drhd) {
3220 if (drhd->ignored) {
3221 iommu_disable_translation(iommu);
3222 continue;
3223 }
3224
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003225 /*
3226 * Find the max pasid size of all IOMMU's in the system.
3227 * We need to ensure the system pasid table is no bigger
3228 * than the smallest supported.
3229 */
David Brazdil0f672f62019-12-10 10:32:29 +00003230 if (pasid_supported(iommu)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003231 u32 temp = 2 << ecap_pss(iommu->ecap);
3232
3233 intel_pasid_max_id = min_t(u32, temp,
3234 intel_pasid_max_id);
3235 }
3236
3237 g_iommus[iommu->seq_id] = iommu;
3238
3239 intel_iommu_init_qi(iommu);
3240
3241 ret = iommu_init_domains(iommu);
3242 if (ret)
3243 goto free_iommu;
3244
3245 init_translation_status(iommu);
3246
3247 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3248 iommu_disable_translation(iommu);
3249 clear_translation_pre_enabled(iommu);
3250 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3251 iommu->name);
3252 }
3253
3254 /*
3255 * TBD:
3256 * we could share the same root & context tables
3257 * among all IOMMU's. Need to Split it later.
3258 */
3259 ret = iommu_alloc_root_entry(iommu);
3260 if (ret)
3261 goto free_iommu;
3262
3263 if (translation_pre_enabled(iommu)) {
3264 pr_info("Translation already enabled - trying to copy translation structures\n");
3265
3266 ret = copy_translation_tables(iommu);
3267 if (ret) {
3268 /*
3269 * We found the IOMMU with translation
3270 * enabled - but failed to copy over the
3271 * old root-entry table. Try to proceed
3272 * by disabling translation now and
3273 * allocating a clean root-entry table.
3274 * This might cause DMAR faults, but
3275 * probably the dump will still succeed.
3276 */
3277 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3278 iommu->name);
3279 iommu_disable_translation(iommu);
3280 clear_translation_pre_enabled(iommu);
3281 } else {
3282 pr_info("Copied translation tables from previous kernel for %s\n",
3283 iommu->name);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003284 }
3285 }
3286
3287 if (!ecap_pass_through(iommu->ecap))
3288 hw_pass_through = 0;
Olivier Deprez0e641232021-09-23 10:07:05 +02003289
3290 if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) {
3291 pr_info("Disable batched IOTLB flush due to virtualization");
3292 intel_iommu_strict = 1;
3293 }
3294
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003295#ifdef CONFIG_INTEL_IOMMU_SVM
David Brazdil0f672f62019-12-10 10:32:29 +00003296 if (pasid_supported(iommu))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003297 intel_svm_init(iommu);
3298#endif
3299 }
3300
3301 /*
3302 * Now that qi is enabled on all iommus, set the root entry and flush
3303 * caches. This is required on some Intel X58 chipsets, otherwise the
3304 * flush_context function will loop forever and the boot hangs.
3305 */
3306 for_each_active_iommu(iommu, drhd) {
3307 iommu_flush_write_buffer(iommu);
3308 iommu_set_root_entry(iommu);
3309 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3310 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3311 }
3312
David Brazdil0f672f62019-12-10 10:32:29 +00003313 if (iommu_default_passthrough())
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003314 iommu_identity_mapping |= IDENTMAP_ALL;
3315
3316#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
David Brazdil0f672f62019-12-10 10:32:29 +00003317 dmar_map_gfx = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003318#endif
3319
David Brazdil0f672f62019-12-10 10:32:29 +00003320 if (!dmar_map_gfx)
3321 iommu_identity_mapping |= IDENTMAP_GFX;
3322
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003323 check_tylersburg_isoch();
3324
David Brazdil0f672f62019-12-10 10:32:29 +00003325 ret = si_domain_init(hw_pass_through);
3326 if (ret)
3327 goto free_iommu;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003328
3329 /*
3330 * for each drhd
3331 * enable fault log
3332 * global invalidate context cache
3333 * global invalidate iotlb
3334 * enable translation
3335 */
3336 for_each_iommu(iommu, drhd) {
3337 if (drhd->ignored) {
3338 /*
3339 * we always have to disable PMRs or DMA may fail on
3340 * this device
3341 */
3342 if (force_on)
3343 iommu_disable_protect_mem_regions(iommu);
3344 continue;
3345 }
3346
3347 iommu_flush_write_buffer(iommu);
3348
3349#ifdef CONFIG_INTEL_IOMMU_SVM
David Brazdil0f672f62019-12-10 10:32:29 +00003350 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3351 /*
3352 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3353 * could cause possible lock race condition.
3354 */
3355 up_write(&dmar_global_lock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003356 ret = intel_svm_enable_prq(iommu);
David Brazdil0f672f62019-12-10 10:32:29 +00003357 down_write(&dmar_global_lock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003358 if (ret)
3359 goto free_iommu;
3360 }
3361#endif
3362 ret = dmar_set_interrupt(iommu);
3363 if (ret)
3364 goto free_iommu;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003365 }
3366
3367 return 0;
3368
3369free_iommu:
3370 for_each_active_iommu(iommu, drhd) {
3371 disable_dmar_iommu(iommu);
3372 free_dmar_iommu(iommu);
3373 }
3374
3375 kfree(g_iommus);
3376
3377error:
3378 return ret;
3379}
3380
3381/* This takes a number of _MM_ pages, not VTD pages */
3382static unsigned long intel_alloc_iova(struct device *dev,
3383 struct dmar_domain *domain,
3384 unsigned long nrpages, uint64_t dma_mask)
3385{
David Brazdil0f672f62019-12-10 10:32:29 +00003386 unsigned long iova_pfn;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003387
3388 /* Restrict dma_mask to the width that the iommu can handle */
3389 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3390 /* Ensure we reserve the whole size-aligned region */
3391 nrpages = __roundup_pow_of_two(nrpages);
3392
3393 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3394 /*
3395 * First try to allocate an io virtual address in
3396 * DMA_BIT_MASK(32) and if that fails then try allocating
3397 * from higher range
3398 */
3399 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3400 IOVA_PFN(DMA_BIT_MASK(32)), false);
3401 if (iova_pfn)
3402 return iova_pfn;
3403 }
3404 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3405 IOVA_PFN(dma_mask), true);
3406 if (unlikely(!iova_pfn)) {
Olivier Deprez0e641232021-09-23 10:07:05 +02003407 dev_err_once(dev, "Allocating %ld-page iova failed\n",
3408 nrpages);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003409 return 0;
3410 }
3411
3412 return iova_pfn;
3413}
3414
David Brazdil0f672f62019-12-10 10:32:29 +00003415static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003416{
3417 struct dmar_domain *domain, *tmp;
3418 struct dmar_rmrr_unit *rmrr;
3419 struct device *i_dev;
3420 int i, ret;
3421
David Brazdil0f672f62019-12-10 10:32:29 +00003422 /* Device shouldn't be attached by any domains. */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003423 domain = find_domain(dev);
3424 if (domain)
David Brazdil0f672f62019-12-10 10:32:29 +00003425 return NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003426
3427 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3428 if (!domain)
3429 goto out;
3430
3431 /* We have a new domain - setup possible RMRRs for the device */
3432 rcu_read_lock();
3433 for_each_rmrr_units(rmrr) {
3434 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3435 i, i_dev) {
3436 if (i_dev != dev)
3437 continue;
3438
3439 ret = domain_prepare_identity_map(dev, domain,
3440 rmrr->base_address,
3441 rmrr->end_address);
3442 if (ret)
3443 dev_err(dev, "Mapping reserved region failed\n");
3444 }
3445 }
3446 rcu_read_unlock();
3447
3448 tmp = set_domain_for_dev(dev, domain);
3449 if (!tmp || domain != tmp) {
3450 domain_exit(domain);
3451 domain = tmp;
3452 }
3453
3454out:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003455 if (!domain)
David Brazdil0f672f62019-12-10 10:32:29 +00003456 dev_err(dev, "Allocating domain failed\n");
3457 else
3458 domain->domain.type = IOMMU_DOMAIN_DMA;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003459
3460 return domain;
3461}
3462
3463/* Check if the dev needs to go through non-identity map and unmap process.*/
David Brazdil0f672f62019-12-10 10:32:29 +00003464static bool iommu_need_mapping(struct device *dev)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003465{
David Brazdil0f672f62019-12-10 10:32:29 +00003466 int ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003467
3468 if (iommu_dummy(dev))
David Brazdil0f672f62019-12-10 10:32:29 +00003469 return false;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003470
David Brazdil0f672f62019-12-10 10:32:29 +00003471 ret = identity_mapping(dev);
3472 if (ret) {
3473 u64 dma_mask = *dev->dma_mask;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003474
David Brazdil0f672f62019-12-10 10:32:29 +00003475 if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3476 dma_mask = dev->coherent_dma_mask;
3477
3478 if (dma_mask >= dma_direct_get_required_mask(dev))
3479 return false;
3480
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003481 /*
David Brazdil0f672f62019-12-10 10:32:29 +00003482 * 32 bit DMA is removed from si_domain and fall back to
3483 * non-identity mapping.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003484 */
David Brazdil0f672f62019-12-10 10:32:29 +00003485 dmar_remove_one_dev_info(dev);
3486 ret = iommu_request_dma_domain_for_dev(dev);
3487 if (ret) {
3488 struct iommu_domain *domain;
3489 struct dmar_domain *dmar_domain;
3490
3491 domain = iommu_get_domain_for_dev(dev);
3492 if (domain) {
3493 dmar_domain = to_dmar_domain(domain);
3494 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003495 }
David Brazdil0f672f62019-12-10 10:32:29 +00003496 dmar_remove_one_dev_info(dev);
3497 get_private_domain_for_dev(dev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003498 }
David Brazdil0f672f62019-12-10 10:32:29 +00003499
3500 dev_info(dev, "32bit DMA uses non-identity mapping\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003501 }
3502
David Brazdil0f672f62019-12-10 10:32:29 +00003503 return true;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003504}
3505
3506static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3507 size_t size, int dir, u64 dma_mask)
3508{
3509 struct dmar_domain *domain;
3510 phys_addr_t start_paddr;
3511 unsigned long iova_pfn;
3512 int prot = 0;
3513 int ret;
3514 struct intel_iommu *iommu;
3515 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3516
3517 BUG_ON(dir == DMA_NONE);
3518
David Brazdil0f672f62019-12-10 10:32:29 +00003519 domain = find_domain(dev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003520 if (!domain)
David Brazdil0f672f62019-12-10 10:32:29 +00003521 return DMA_MAPPING_ERROR;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003522
3523 iommu = domain_get_iommu(domain);
3524 size = aligned_nrpages(paddr, size);
3525
3526 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3527 if (!iova_pfn)
3528 goto error;
3529
3530 /*
3531 * Check if DMAR supports zero-length reads on write only
3532 * mappings..
3533 */
3534 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3535 !cap_zlr(iommu->cap))
3536 prot |= DMA_PTE_READ;
3537 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3538 prot |= DMA_PTE_WRITE;
3539 /*
3540 * paddr - (paddr + size) might be partial page, we should map the whole
3541 * page. Note: if two part of one page are separately mapped, we
3542 * might have two guest_addr mapping to the same host paddr, but this
3543 * is not a big problem
3544 */
3545 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3546 mm_to_dma_pfn(paddr_pfn), size, prot);
3547 if (ret)
3548 goto error;
3549
3550 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3551 start_paddr += paddr & ~PAGE_MASK;
David Brazdil0f672f62019-12-10 10:32:29 +00003552
3553 trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
3554
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003555 return start_paddr;
3556
3557error:
3558 if (iova_pfn)
3559 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
David Brazdil0f672f62019-12-10 10:32:29 +00003560 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3561 size, (unsigned long long)paddr, dir);
3562 return DMA_MAPPING_ERROR;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003563}
3564
3565static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3566 unsigned long offset, size_t size,
3567 enum dma_data_direction dir,
3568 unsigned long attrs)
3569{
David Brazdil0f672f62019-12-10 10:32:29 +00003570 if (iommu_need_mapping(dev))
3571 return __intel_map_single(dev, page_to_phys(page) + offset,
3572 size, dir, *dev->dma_mask);
3573 return dma_direct_map_page(dev, page, offset, size, dir, attrs);
3574}
3575
3576static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3577 size_t size, enum dma_data_direction dir,
3578 unsigned long attrs)
3579{
3580 if (iommu_need_mapping(dev))
3581 return __intel_map_single(dev, phys_addr, size, dir,
3582 *dev->dma_mask);
3583 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003584}
3585
3586static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3587{
3588 struct dmar_domain *domain;
3589 unsigned long start_pfn, last_pfn;
3590 unsigned long nrpages;
3591 unsigned long iova_pfn;
3592 struct intel_iommu *iommu;
3593 struct page *freelist;
David Brazdil0f672f62019-12-10 10:32:29 +00003594 struct pci_dev *pdev = NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003595
3596 domain = find_domain(dev);
3597 BUG_ON(!domain);
3598
3599 iommu = domain_get_iommu(domain);
3600
3601 iova_pfn = IOVA_PFN(dev_addr);
3602
3603 nrpages = aligned_nrpages(dev_addr, size);
3604 start_pfn = mm_to_dma_pfn(iova_pfn);
3605 last_pfn = start_pfn + nrpages - 1;
3606
David Brazdil0f672f62019-12-10 10:32:29 +00003607 if (dev_is_pci(dev))
3608 pdev = to_pci_dev(dev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003609
3610 freelist = domain_unmap(domain, start_pfn, last_pfn);
David Brazdil0f672f62019-12-10 10:32:29 +00003611 if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3612 !has_iova_flush_queue(&domain->iovad)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003613 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3614 nrpages, !freelist, 0);
3615 /* free iova */
3616 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3617 dma_free_pagelist(freelist);
3618 } else {
3619 queue_iova(&domain->iovad, iova_pfn, nrpages,
3620 (unsigned long)freelist);
3621 /*
3622 * queue up the release of the unmap to save the 1/6th of the
3623 * cpu used up by the iotlb flush operation...
3624 */
3625 }
David Brazdil0f672f62019-12-10 10:32:29 +00003626
3627 trace_unmap_single(dev, dev_addr, size);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003628}
3629
3630static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3631 size_t size, enum dma_data_direction dir,
3632 unsigned long attrs)
3633{
David Brazdil0f672f62019-12-10 10:32:29 +00003634 if (iommu_need_mapping(dev))
3635 intel_unmap(dev, dev_addr, size);
3636 else
3637 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3638}
3639
3640static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3641 size_t size, enum dma_data_direction dir, unsigned long attrs)
3642{
3643 if (iommu_need_mapping(dev))
3644 intel_unmap(dev, dev_addr, size);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003645}
3646
3647static void *intel_alloc_coherent(struct device *dev, size_t size,
3648 dma_addr_t *dma_handle, gfp_t flags,
3649 unsigned long attrs)
3650{
3651 struct page *page = NULL;
3652 int order;
3653
David Brazdil0f672f62019-12-10 10:32:29 +00003654 if (!iommu_need_mapping(dev))
3655 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3656
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003657 size = PAGE_ALIGN(size);
3658 order = get_order(size);
3659
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003660 if (gfpflags_allow_blocking(flags)) {
3661 unsigned int count = size >> PAGE_SHIFT;
3662
3663 page = dma_alloc_from_contiguous(dev, count, order,
3664 flags & __GFP_NOWARN);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003665 }
3666
3667 if (!page)
3668 page = alloc_pages(flags, order);
3669 if (!page)
3670 return NULL;
3671 memset(page_address(page), 0, size);
3672
3673 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3674 DMA_BIDIRECTIONAL,
3675 dev->coherent_dma_mask);
David Brazdil0f672f62019-12-10 10:32:29 +00003676 if (*dma_handle != DMA_MAPPING_ERROR)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003677 return page_address(page);
3678 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3679 __free_pages(page, order);
3680
3681 return NULL;
3682}
3683
3684static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3685 dma_addr_t dma_handle, unsigned long attrs)
3686{
3687 int order;
3688 struct page *page = virt_to_page(vaddr);
3689
David Brazdil0f672f62019-12-10 10:32:29 +00003690 if (!iommu_need_mapping(dev))
3691 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3692
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003693 size = PAGE_ALIGN(size);
3694 order = get_order(size);
3695
3696 intel_unmap(dev, dma_handle, size);
3697 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3698 __free_pages(page, order);
3699}
3700
3701static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3702 int nelems, enum dma_data_direction dir,
3703 unsigned long attrs)
3704{
3705 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3706 unsigned long nrpages = 0;
3707 struct scatterlist *sg;
3708 int i;
3709
David Brazdil0f672f62019-12-10 10:32:29 +00003710 if (!iommu_need_mapping(dev))
3711 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3712
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003713 for_each_sg(sglist, sg, nelems, i) {
3714 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3715 }
3716
3717 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003718
David Brazdil0f672f62019-12-10 10:32:29 +00003719 trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003720}
3721
3722static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3723 enum dma_data_direction dir, unsigned long attrs)
3724{
3725 int i;
3726 struct dmar_domain *domain;
3727 size_t size = 0;
3728 int prot = 0;
3729 unsigned long iova_pfn;
3730 int ret;
3731 struct scatterlist *sg;
3732 unsigned long start_vpfn;
3733 struct intel_iommu *iommu;
3734
3735 BUG_ON(dir == DMA_NONE);
David Brazdil0f672f62019-12-10 10:32:29 +00003736 if (!iommu_need_mapping(dev))
3737 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003738
David Brazdil0f672f62019-12-10 10:32:29 +00003739 domain = find_domain(dev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003740 if (!domain)
3741 return 0;
3742
3743 iommu = domain_get_iommu(domain);
3744
3745 for_each_sg(sglist, sg, nelems, i)
3746 size += aligned_nrpages(sg->offset, sg->length);
3747
3748 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3749 *dev->dma_mask);
3750 if (!iova_pfn) {
3751 sglist->dma_length = 0;
3752 return 0;
3753 }
3754
3755 /*
3756 * Check if DMAR supports zero-length reads on write only
3757 * mappings..
3758 */
3759 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3760 !cap_zlr(iommu->cap))
3761 prot |= DMA_PTE_READ;
3762 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3763 prot |= DMA_PTE_WRITE;
3764
3765 start_vpfn = mm_to_dma_pfn(iova_pfn);
3766
3767 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3768 if (unlikely(ret)) {
3769 dma_pte_free_pagetable(domain, start_vpfn,
3770 start_vpfn + size - 1,
3771 agaw_to_level(domain->agaw) + 1);
3772 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3773 return 0;
3774 }
3775
David Brazdil0f672f62019-12-10 10:32:29 +00003776 trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
3777 sg_phys(sglist), size << VTD_PAGE_SHIFT);
3778
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003779 return nelems;
3780}
3781
David Brazdil0f672f62019-12-10 10:32:29 +00003782static u64 intel_get_required_mask(struct device *dev)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003783{
David Brazdil0f672f62019-12-10 10:32:29 +00003784 if (!iommu_need_mapping(dev))
3785 return dma_direct_get_required_mask(dev);
3786 return DMA_BIT_MASK(32);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003787}
3788
David Brazdil0f672f62019-12-10 10:32:29 +00003789static const struct dma_map_ops intel_dma_ops = {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003790 .alloc = intel_alloc_coherent,
3791 .free = intel_free_coherent,
3792 .map_sg = intel_map_sg,
3793 .unmap_sg = intel_unmap_sg,
3794 .map_page = intel_map_page,
3795 .unmap_page = intel_unmap_page,
David Brazdil0f672f62019-12-10 10:32:29 +00003796 .map_resource = intel_map_resource,
3797 .unmap_resource = intel_unmap_resource,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003798 .dma_supported = dma_direct_supported,
David Brazdil0f672f62019-12-10 10:32:29 +00003799 .mmap = dma_common_mmap,
3800 .get_sgtable = dma_common_get_sgtable,
3801 .get_required_mask = intel_get_required_mask,
3802};
3803
3804static void
3805bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
3806 enum dma_data_direction dir, enum dma_sync_target target)
3807{
3808 struct dmar_domain *domain;
3809 phys_addr_t tlb_addr;
3810
3811 domain = find_domain(dev);
3812 if (WARN_ON(!domain))
3813 return;
3814
3815 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
3816 if (is_swiotlb_buffer(tlb_addr))
3817 swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
3818}
3819
3820static dma_addr_t
3821bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
3822 enum dma_data_direction dir, unsigned long attrs,
3823 u64 dma_mask)
3824{
3825 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3826 struct dmar_domain *domain;
3827 struct intel_iommu *iommu;
3828 unsigned long iova_pfn;
3829 unsigned long nrpages;
3830 phys_addr_t tlb_addr;
3831 int prot = 0;
3832 int ret;
3833
3834 domain = find_domain(dev);
3835 if (WARN_ON(dir == DMA_NONE || !domain))
3836 return DMA_MAPPING_ERROR;
3837
3838 iommu = domain_get_iommu(domain);
3839 if (WARN_ON(!iommu))
3840 return DMA_MAPPING_ERROR;
3841
3842 nrpages = aligned_nrpages(0, size);
3843 iova_pfn = intel_alloc_iova(dev, domain,
3844 dma_to_mm_pfn(nrpages), dma_mask);
3845 if (!iova_pfn)
3846 return DMA_MAPPING_ERROR;
3847
3848 /*
3849 * Check if DMAR supports zero-length reads on write only
3850 * mappings..
3851 */
3852 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
3853 !cap_zlr(iommu->cap))
3854 prot |= DMA_PTE_READ;
3855 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3856 prot |= DMA_PTE_WRITE;
3857
3858 /*
3859 * If both the physical buffer start address and size are
3860 * page aligned, we don't need to use a bounce page.
3861 */
3862 if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
3863 tlb_addr = swiotlb_tbl_map_single(dev,
3864 __phys_to_dma(dev, io_tlb_start),
3865 paddr, size, aligned_size, dir, attrs);
3866 if (tlb_addr == DMA_MAPPING_ERROR) {
3867 goto swiotlb_error;
3868 } else {
3869 /* Cleanup the padding area. */
3870 void *padding_start = phys_to_virt(tlb_addr);
3871 size_t padding_size = aligned_size;
3872
3873 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
3874 (dir == DMA_TO_DEVICE ||
3875 dir == DMA_BIDIRECTIONAL)) {
3876 padding_start += size;
3877 padding_size -= size;
3878 }
3879
3880 memset(padding_start, 0, padding_size);
3881 }
3882 } else {
3883 tlb_addr = paddr;
3884 }
3885
3886 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3887 tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
3888 if (ret)
3889 goto mapping_error;
3890
3891 trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
3892
3893 return (phys_addr_t)iova_pfn << PAGE_SHIFT;
3894
3895mapping_error:
3896 if (is_swiotlb_buffer(tlb_addr))
3897 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3898 aligned_size, dir, attrs);
3899swiotlb_error:
3900 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3901 dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
3902 size, (unsigned long long)paddr, dir);
3903
3904 return DMA_MAPPING_ERROR;
3905}
3906
3907static void
3908bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
3909 enum dma_data_direction dir, unsigned long attrs)
3910{
3911 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3912 struct dmar_domain *domain;
3913 phys_addr_t tlb_addr;
3914
3915 domain = find_domain(dev);
3916 if (WARN_ON(!domain))
3917 return;
3918
3919 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
3920 if (WARN_ON(!tlb_addr))
3921 return;
3922
3923 intel_unmap(dev, dev_addr, size);
3924 if (is_swiotlb_buffer(tlb_addr))
3925 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3926 aligned_size, dir, attrs);
3927
3928 trace_bounce_unmap_single(dev, dev_addr, size);
3929}
3930
3931static dma_addr_t
3932bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
3933 size_t size, enum dma_data_direction dir, unsigned long attrs)
3934{
3935 return bounce_map_single(dev, page_to_phys(page) + offset,
3936 size, dir, attrs, *dev->dma_mask);
3937}
3938
3939static dma_addr_t
3940bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
3941 enum dma_data_direction dir, unsigned long attrs)
3942{
3943 return bounce_map_single(dev, phys_addr, size,
3944 dir, attrs, *dev->dma_mask);
3945}
3946
3947static void
3948bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
3949 enum dma_data_direction dir, unsigned long attrs)
3950{
3951 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3952}
3953
3954static void
3955bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
3956 enum dma_data_direction dir, unsigned long attrs)
3957{
3958 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3959}
3960
3961static void
3962bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3963 enum dma_data_direction dir, unsigned long attrs)
3964{
3965 struct scatterlist *sg;
3966 int i;
3967
3968 for_each_sg(sglist, sg, nelems, i)
3969 bounce_unmap_page(dev, sg->dma_address,
3970 sg_dma_len(sg), dir, attrs);
3971}
3972
3973static int
3974bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3975 enum dma_data_direction dir, unsigned long attrs)
3976{
3977 int i;
3978 struct scatterlist *sg;
3979
3980 for_each_sg(sglist, sg, nelems, i) {
3981 sg->dma_address = bounce_map_page(dev, sg_page(sg),
3982 sg->offset, sg->length,
3983 dir, attrs);
3984 if (sg->dma_address == DMA_MAPPING_ERROR)
3985 goto out_unmap;
3986 sg_dma_len(sg) = sg->length;
3987 }
3988
3989 return nelems;
3990
3991out_unmap:
3992 bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
3993 return 0;
3994}
3995
3996static void
3997bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
3998 size_t size, enum dma_data_direction dir)
3999{
4000 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
4001}
4002
4003static void
4004bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
4005 size_t size, enum dma_data_direction dir)
4006{
4007 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
4008}
4009
4010static void
4011bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
4012 int nelems, enum dma_data_direction dir)
4013{
4014 struct scatterlist *sg;
4015 int i;
4016
4017 for_each_sg(sglist, sg, nelems, i)
4018 bounce_sync_single(dev, sg_dma_address(sg),
4019 sg_dma_len(sg), dir, SYNC_FOR_CPU);
4020}
4021
4022static void
4023bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
4024 int nelems, enum dma_data_direction dir)
4025{
4026 struct scatterlist *sg;
4027 int i;
4028
4029 for_each_sg(sglist, sg, nelems, i)
4030 bounce_sync_single(dev, sg_dma_address(sg),
4031 sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
4032}
4033
4034static const struct dma_map_ops bounce_dma_ops = {
4035 .alloc = intel_alloc_coherent,
4036 .free = intel_free_coherent,
4037 .map_sg = bounce_map_sg,
4038 .unmap_sg = bounce_unmap_sg,
4039 .map_page = bounce_map_page,
4040 .unmap_page = bounce_unmap_page,
4041 .sync_single_for_cpu = bounce_sync_single_for_cpu,
4042 .sync_single_for_device = bounce_sync_single_for_device,
4043 .sync_sg_for_cpu = bounce_sync_sg_for_cpu,
4044 .sync_sg_for_device = bounce_sync_sg_for_device,
4045 .map_resource = bounce_map_resource,
4046 .unmap_resource = bounce_unmap_resource,
4047 .dma_supported = dma_direct_supported,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004048};
4049
4050static inline int iommu_domain_cache_init(void)
4051{
4052 int ret = 0;
4053
4054 iommu_domain_cache = kmem_cache_create("iommu_domain",
4055 sizeof(struct dmar_domain),
4056 0,
4057 SLAB_HWCACHE_ALIGN,
4058
4059 NULL);
4060 if (!iommu_domain_cache) {
4061 pr_err("Couldn't create iommu_domain cache\n");
4062 ret = -ENOMEM;
4063 }
4064
4065 return ret;
4066}
4067
4068static inline int iommu_devinfo_cache_init(void)
4069{
4070 int ret = 0;
4071
4072 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4073 sizeof(struct device_domain_info),
4074 0,
4075 SLAB_HWCACHE_ALIGN,
4076 NULL);
4077 if (!iommu_devinfo_cache) {
4078 pr_err("Couldn't create devinfo cache\n");
4079 ret = -ENOMEM;
4080 }
4081
4082 return ret;
4083}
4084
4085static int __init iommu_init_mempool(void)
4086{
4087 int ret;
4088 ret = iova_cache_get();
4089 if (ret)
4090 return ret;
4091
4092 ret = iommu_domain_cache_init();
4093 if (ret)
4094 goto domain_error;
4095
4096 ret = iommu_devinfo_cache_init();
4097 if (!ret)
4098 return ret;
4099
4100 kmem_cache_destroy(iommu_domain_cache);
4101domain_error:
4102 iova_cache_put();
4103
4104 return -ENOMEM;
4105}
4106
4107static void __init iommu_exit_mempool(void)
4108{
4109 kmem_cache_destroy(iommu_devinfo_cache);
4110 kmem_cache_destroy(iommu_domain_cache);
4111 iova_cache_put();
4112}
4113
4114static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
4115{
4116 struct dmar_drhd_unit *drhd;
4117 u32 vtbar;
4118 int rc;
4119
4120 /* We know that this device on this chipset has its own IOMMU.
4121 * If we find it under a different IOMMU, then the BIOS is lying
4122 * to us. Hope that the IOMMU for this device is actually
4123 * disabled, and it needs no translation...
4124 */
4125 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4126 if (rc) {
4127 /* "can't" happen */
4128 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4129 return;
4130 }
4131 vtbar &= 0xffff0000;
4132
4133 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4134 drhd = dmar_find_matched_drhd_unit(pdev);
Olivier Deprez0e641232021-09-23 10:07:05 +02004135 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
4136 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
4137 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004138 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
Olivier Deprez0e641232021-09-23 10:07:05 +02004139 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004140}
4141DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4142
4143static void __init init_no_remapping_devices(void)
4144{
4145 struct dmar_drhd_unit *drhd;
4146 struct device *dev;
4147 int i;
4148
4149 for_each_drhd_unit(drhd) {
4150 if (!drhd->include_all) {
4151 for_each_active_dev_scope(drhd->devices,
4152 drhd->devices_cnt, i, dev)
4153 break;
4154 /* ignore DMAR unit if no devices exist */
4155 if (i == drhd->devices_cnt)
4156 drhd->ignored = 1;
4157 }
4158 }
4159
4160 for_each_active_drhd_unit(drhd) {
4161 if (drhd->include_all)
4162 continue;
4163
4164 for_each_active_dev_scope(drhd->devices,
4165 drhd->devices_cnt, i, dev)
4166 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
4167 break;
4168 if (i < drhd->devices_cnt)
4169 continue;
4170
4171 /* This IOMMU has *only* gfx devices. Either bypass it or
4172 set the gfx_mapped flag, as appropriate */
David Brazdil0f672f62019-12-10 10:32:29 +00004173 if (!dmar_map_gfx) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004174 drhd->ignored = 1;
4175 for_each_active_dev_scope(drhd->devices,
4176 drhd->devices_cnt, i, dev)
4177 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4178 }
4179 }
4180}
4181
4182#ifdef CONFIG_SUSPEND
4183static int init_iommu_hw(void)
4184{
4185 struct dmar_drhd_unit *drhd;
4186 struct intel_iommu *iommu = NULL;
4187
4188 for_each_active_iommu(iommu, drhd)
4189 if (iommu->qi)
4190 dmar_reenable_qi(iommu);
4191
4192 for_each_iommu(iommu, drhd) {
4193 if (drhd->ignored) {
4194 /*
4195 * we always have to disable PMRs or DMA may fail on
4196 * this device
4197 */
4198 if (force_on)
4199 iommu_disable_protect_mem_regions(iommu);
4200 continue;
4201 }
David Brazdil0f672f62019-12-10 10:32:29 +00004202
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004203 iommu_flush_write_buffer(iommu);
4204
4205 iommu_set_root_entry(iommu);
4206
4207 iommu->flush.flush_context(iommu, 0, 0, 0,
4208 DMA_CCMD_GLOBAL_INVL);
4209 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4210 iommu_enable_translation(iommu);
4211 iommu_disable_protect_mem_regions(iommu);
4212 }
4213
4214 return 0;
4215}
4216
4217static void iommu_flush_all(void)
4218{
4219 struct dmar_drhd_unit *drhd;
4220 struct intel_iommu *iommu;
4221
4222 for_each_active_iommu(iommu, drhd) {
4223 iommu->flush.flush_context(iommu, 0, 0, 0,
4224 DMA_CCMD_GLOBAL_INVL);
4225 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4226 DMA_TLB_GLOBAL_FLUSH);
4227 }
4228}
4229
4230static int iommu_suspend(void)
4231{
4232 struct dmar_drhd_unit *drhd;
4233 struct intel_iommu *iommu = NULL;
4234 unsigned long flag;
4235
4236 for_each_active_iommu(iommu, drhd) {
4237 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4238 GFP_ATOMIC);
4239 if (!iommu->iommu_state)
4240 goto nomem;
4241 }
4242
4243 iommu_flush_all();
4244
4245 for_each_active_iommu(iommu, drhd) {
4246 iommu_disable_translation(iommu);
4247
4248 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4249
4250 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4251 readl(iommu->reg + DMAR_FECTL_REG);
4252 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4253 readl(iommu->reg + DMAR_FEDATA_REG);
4254 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4255 readl(iommu->reg + DMAR_FEADDR_REG);
4256 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4257 readl(iommu->reg + DMAR_FEUADDR_REG);
4258
4259 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4260 }
4261 return 0;
4262
4263nomem:
4264 for_each_active_iommu(iommu, drhd)
4265 kfree(iommu->iommu_state);
4266
4267 return -ENOMEM;
4268}
4269
4270static void iommu_resume(void)
4271{
4272 struct dmar_drhd_unit *drhd;
4273 struct intel_iommu *iommu = NULL;
4274 unsigned long flag;
4275
4276 if (init_iommu_hw()) {
4277 if (force_on)
4278 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4279 else
4280 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4281 return;
4282 }
4283
4284 for_each_active_iommu(iommu, drhd) {
4285
4286 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4287
4288 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4289 iommu->reg + DMAR_FECTL_REG);
4290 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4291 iommu->reg + DMAR_FEDATA_REG);
4292 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4293 iommu->reg + DMAR_FEADDR_REG);
4294 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4295 iommu->reg + DMAR_FEUADDR_REG);
4296
4297 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4298 }
4299
4300 for_each_active_iommu(iommu, drhd)
4301 kfree(iommu->iommu_state);
4302}
4303
4304static struct syscore_ops iommu_syscore_ops = {
4305 .resume = iommu_resume,
4306 .suspend = iommu_suspend,
4307};
4308
4309static void __init init_iommu_pm_ops(void)
4310{
4311 register_syscore_ops(&iommu_syscore_ops);
4312}
4313
4314#else
4315static inline void init_iommu_pm_ops(void) {}
4316#endif /* CONFIG_PM */
4317
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004318int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4319{
4320 struct acpi_dmar_reserved_memory *rmrr;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004321 struct dmar_rmrr_unit *rmrru;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004322
4323 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4324 if (!rmrru)
4325 goto out;
4326
4327 rmrru->hdr = header;
4328 rmrr = (struct acpi_dmar_reserved_memory *)header;
4329 rmrru->base_address = rmrr->base_address;
4330 rmrru->end_address = rmrr->end_address;
4331
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004332 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4333 ((void *)rmrr) + rmrr->header.length,
4334 &rmrru->devices_cnt);
4335 if (rmrru->devices_cnt && rmrru->devices == NULL)
David Brazdil0f672f62019-12-10 10:32:29 +00004336 goto free_rmrru;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004337
4338 list_add(&rmrru->list, &dmar_rmrr_units);
4339
4340 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004341free_rmrru:
4342 kfree(rmrru);
4343out:
4344 return -ENOMEM;
4345}
4346
4347static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4348{
4349 struct dmar_atsr_unit *atsru;
4350 struct acpi_dmar_atsr *tmp;
4351
Olivier Deprez0e641232021-09-23 10:07:05 +02004352 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
4353 dmar_rcu_check()) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004354 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4355 if (atsr->segment != tmp->segment)
4356 continue;
4357 if (atsr->header.length != tmp->header.length)
4358 continue;
4359 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4360 return atsru;
4361 }
4362
4363 return NULL;
4364}
4365
4366int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4367{
4368 struct acpi_dmar_atsr *atsr;
4369 struct dmar_atsr_unit *atsru;
4370
4371 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4372 return 0;
4373
4374 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4375 atsru = dmar_find_atsr(atsr);
4376 if (atsru)
4377 return 0;
4378
4379 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4380 if (!atsru)
4381 return -ENOMEM;
4382
4383 /*
4384 * If memory is allocated from slab by ACPI _DSM method, we need to
4385 * copy the memory content because the memory buffer will be freed
4386 * on return.
4387 */
4388 atsru->hdr = (void *)(atsru + 1);
4389 memcpy(atsru->hdr, hdr, hdr->length);
4390 atsru->include_all = atsr->flags & 0x1;
4391 if (!atsru->include_all) {
4392 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4393 (void *)atsr + atsr->header.length,
4394 &atsru->devices_cnt);
4395 if (atsru->devices_cnt && atsru->devices == NULL) {
4396 kfree(atsru);
4397 return -ENOMEM;
4398 }
4399 }
4400
4401 list_add_rcu(&atsru->list, &dmar_atsr_units);
4402
4403 return 0;
4404}
4405
4406static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4407{
4408 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4409 kfree(atsru);
4410}
4411
4412int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4413{
4414 struct acpi_dmar_atsr *atsr;
4415 struct dmar_atsr_unit *atsru;
4416
4417 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4418 atsru = dmar_find_atsr(atsr);
4419 if (atsru) {
4420 list_del_rcu(&atsru->list);
4421 synchronize_rcu();
4422 intel_iommu_free_atsr(atsru);
4423 }
4424
4425 return 0;
4426}
4427
4428int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4429{
4430 int i;
4431 struct device *dev;
4432 struct acpi_dmar_atsr *atsr;
4433 struct dmar_atsr_unit *atsru;
4434
4435 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4436 atsru = dmar_find_atsr(atsr);
4437 if (!atsru)
4438 return 0;
4439
4440 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4441 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4442 i, dev)
4443 return -EBUSY;
4444 }
4445
4446 return 0;
4447}
4448
4449static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4450{
David Brazdil0f672f62019-12-10 10:32:29 +00004451 int sp, ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004452 struct intel_iommu *iommu = dmaru->iommu;
4453
4454 if (g_iommus[iommu->seq_id])
4455 return 0;
4456
4457 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4458 pr_warn("%s: Doesn't support hardware pass through.\n",
4459 iommu->name);
4460 return -ENXIO;
4461 }
4462 if (!ecap_sc_support(iommu->ecap) &&
4463 domain_update_iommu_snooping(iommu)) {
4464 pr_warn("%s: Doesn't support snooping.\n",
4465 iommu->name);
4466 return -ENXIO;
4467 }
4468 sp = domain_update_iommu_superpage(iommu) - 1;
4469 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4470 pr_warn("%s: Doesn't support large page.\n",
4471 iommu->name);
4472 return -ENXIO;
4473 }
4474
4475 /*
4476 * Disable translation if already enabled prior to OS handover.
4477 */
4478 if (iommu->gcmd & DMA_GCMD_TE)
4479 iommu_disable_translation(iommu);
4480
4481 g_iommus[iommu->seq_id] = iommu;
4482 ret = iommu_init_domains(iommu);
4483 if (ret == 0)
4484 ret = iommu_alloc_root_entry(iommu);
4485 if (ret)
4486 goto out;
4487
4488#ifdef CONFIG_INTEL_IOMMU_SVM
David Brazdil0f672f62019-12-10 10:32:29 +00004489 if (pasid_supported(iommu))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004490 intel_svm_init(iommu);
4491#endif
4492
4493 if (dmaru->ignored) {
4494 /*
4495 * we always have to disable PMRs or DMA may fail on this device
4496 */
4497 if (force_on)
4498 iommu_disable_protect_mem_regions(iommu);
4499 return 0;
4500 }
4501
4502 intel_iommu_init_qi(iommu);
4503 iommu_flush_write_buffer(iommu);
4504
4505#ifdef CONFIG_INTEL_IOMMU_SVM
David Brazdil0f672f62019-12-10 10:32:29 +00004506 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004507 ret = intel_svm_enable_prq(iommu);
4508 if (ret)
4509 goto disable_iommu;
4510 }
4511#endif
4512 ret = dmar_set_interrupt(iommu);
4513 if (ret)
4514 goto disable_iommu;
4515
4516 iommu_set_root_entry(iommu);
4517 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4518 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4519 iommu_enable_translation(iommu);
4520
4521 iommu_disable_protect_mem_regions(iommu);
4522 return 0;
4523
4524disable_iommu:
4525 disable_dmar_iommu(iommu);
4526out:
4527 free_dmar_iommu(iommu);
4528 return ret;
4529}
4530
4531int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4532{
4533 int ret = 0;
4534 struct intel_iommu *iommu = dmaru->iommu;
4535
4536 if (!intel_iommu_enabled)
4537 return 0;
4538 if (iommu == NULL)
4539 return -EINVAL;
4540
4541 if (insert) {
4542 ret = intel_iommu_add(dmaru);
4543 } else {
4544 disable_dmar_iommu(iommu);
4545 free_dmar_iommu(iommu);
4546 }
4547
4548 return ret;
4549}
4550
4551static void intel_iommu_free_dmars(void)
4552{
4553 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4554 struct dmar_atsr_unit *atsru, *atsr_n;
4555
4556 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4557 list_del(&rmrru->list);
4558 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004559 kfree(rmrru);
4560 }
4561
4562 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4563 list_del(&atsru->list);
4564 intel_iommu_free_atsr(atsru);
4565 }
4566}
4567
4568int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4569{
4570 int i, ret = 1;
4571 struct pci_bus *bus;
4572 struct pci_dev *bridge = NULL;
4573 struct device *tmp;
4574 struct acpi_dmar_atsr *atsr;
4575 struct dmar_atsr_unit *atsru;
4576
4577 dev = pci_physfn(dev);
4578 for (bus = dev->bus; bus; bus = bus->parent) {
4579 bridge = bus->self;
4580 /* If it's an integrated device, allow ATS */
4581 if (!bridge)
4582 return 1;
4583 /* Connected via non-PCIe: no ATS */
4584 if (!pci_is_pcie(bridge) ||
4585 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4586 return 0;
4587 /* If we found the root port, look it up in the ATSR */
4588 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4589 break;
4590 }
4591
4592 rcu_read_lock();
4593 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4594 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4595 if (atsr->segment != pci_domain_nr(dev->bus))
4596 continue;
4597
4598 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4599 if (tmp == &bridge->dev)
4600 goto out;
4601
4602 if (atsru->include_all)
4603 goto out;
4604 }
4605 ret = 0;
4606out:
4607 rcu_read_unlock();
4608
4609 return ret;
4610}
4611
4612int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4613{
David Brazdil0f672f62019-12-10 10:32:29 +00004614 int ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004615 struct dmar_rmrr_unit *rmrru;
4616 struct dmar_atsr_unit *atsru;
4617 struct acpi_dmar_atsr *atsr;
4618 struct acpi_dmar_reserved_memory *rmrr;
4619
4620 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4621 return 0;
4622
4623 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4624 rmrr = container_of(rmrru->hdr,
4625 struct acpi_dmar_reserved_memory, header);
4626 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4627 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4628 ((void *)rmrr) + rmrr->header.length,
4629 rmrr->segment, rmrru->devices,
4630 rmrru->devices_cnt);
David Brazdil0f672f62019-12-10 10:32:29 +00004631 if (ret < 0)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004632 return ret;
4633 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4634 dmar_remove_dev_scope(info, rmrr->segment,
4635 rmrru->devices, rmrru->devices_cnt);
4636 }
4637 }
4638
4639 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4640 if (atsru->include_all)
4641 continue;
4642
4643 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4644 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4645 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4646 (void *)atsr + atsr->header.length,
4647 atsr->segment, atsru->devices,
4648 atsru->devices_cnt);
4649 if (ret > 0)
4650 break;
David Brazdil0f672f62019-12-10 10:32:29 +00004651 else if (ret < 0)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004652 return ret;
4653 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4654 if (dmar_remove_dev_scope(info, atsr->segment,
4655 atsru->devices, atsru->devices_cnt))
4656 break;
4657 }
4658 }
4659
4660 return 0;
4661}
4662
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004663static int intel_iommu_memory_notifier(struct notifier_block *nb,
4664 unsigned long val, void *v)
4665{
4666 struct memory_notify *mhp = v;
4667 unsigned long long start, end;
4668 unsigned long start_vpfn, last_vpfn;
4669
4670 switch (val) {
4671 case MEM_GOING_ONLINE:
4672 start = mhp->start_pfn << PAGE_SHIFT;
4673 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4674 if (iommu_domain_identity_map(si_domain, start, end)) {
4675 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4676 start, end);
4677 return NOTIFY_BAD;
4678 }
4679 break;
4680
4681 case MEM_OFFLINE:
4682 case MEM_CANCEL_ONLINE:
4683 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4684 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4685 while (start_vpfn <= last_vpfn) {
4686 struct iova *iova;
4687 struct dmar_drhd_unit *drhd;
4688 struct intel_iommu *iommu;
4689 struct page *freelist;
4690
4691 iova = find_iova(&si_domain->iovad, start_vpfn);
4692 if (iova == NULL) {
4693 pr_debug("Failed get IOVA for PFN %lx\n",
4694 start_vpfn);
4695 break;
4696 }
4697
4698 iova = split_and_remove_iova(&si_domain->iovad, iova,
4699 start_vpfn, last_vpfn);
4700 if (iova == NULL) {
4701 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4702 start_vpfn, last_vpfn);
4703 return NOTIFY_BAD;
4704 }
4705
4706 freelist = domain_unmap(si_domain, iova->pfn_lo,
4707 iova->pfn_hi);
4708
4709 rcu_read_lock();
4710 for_each_active_iommu(iommu, drhd)
4711 iommu_flush_iotlb_psi(iommu, si_domain,
4712 iova->pfn_lo, iova_size(iova),
4713 !freelist, 0);
4714 rcu_read_unlock();
4715 dma_free_pagelist(freelist);
4716
4717 start_vpfn = iova->pfn_hi + 1;
4718 free_iova_mem(iova);
4719 }
4720 break;
4721 }
4722
4723 return NOTIFY_OK;
4724}
4725
4726static struct notifier_block intel_iommu_memory_nb = {
4727 .notifier_call = intel_iommu_memory_notifier,
4728 .priority = 0
4729};
4730
4731static void free_all_cpu_cached_iovas(unsigned int cpu)
4732{
4733 int i;
4734
4735 for (i = 0; i < g_num_of_iommus; i++) {
4736 struct intel_iommu *iommu = g_iommus[i];
4737 struct dmar_domain *domain;
4738 int did;
4739
4740 if (!iommu)
4741 continue;
4742
4743 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4744 domain = get_iommu_domain(iommu, (u16)did);
4745
4746 if (!domain)
4747 continue;
4748 free_cpu_cached_iovas(cpu, &domain->iovad);
4749 }
4750 }
4751}
4752
4753static int intel_iommu_cpu_dead(unsigned int cpu)
4754{
4755 free_all_cpu_cached_iovas(cpu);
4756 return 0;
4757}
4758
4759static void intel_disable_iommus(void)
4760{
4761 struct intel_iommu *iommu = NULL;
4762 struct dmar_drhd_unit *drhd;
4763
4764 for_each_iommu(iommu, drhd)
4765 iommu_disable_translation(iommu);
4766}
4767
4768static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4769{
4770 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4771
4772 return container_of(iommu_dev, struct intel_iommu, iommu);
4773}
4774
4775static ssize_t intel_iommu_show_version(struct device *dev,
4776 struct device_attribute *attr,
4777 char *buf)
4778{
4779 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4780 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4781 return sprintf(buf, "%d:%d\n",
4782 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4783}
4784static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4785
4786static ssize_t intel_iommu_show_address(struct device *dev,
4787 struct device_attribute *attr,
4788 char *buf)
4789{
4790 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4791 return sprintf(buf, "%llx\n", iommu->reg_phys);
4792}
4793static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4794
4795static ssize_t intel_iommu_show_cap(struct device *dev,
4796 struct device_attribute *attr,
4797 char *buf)
4798{
4799 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4800 return sprintf(buf, "%llx\n", iommu->cap);
4801}
4802static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4803
4804static ssize_t intel_iommu_show_ecap(struct device *dev,
4805 struct device_attribute *attr,
4806 char *buf)
4807{
4808 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4809 return sprintf(buf, "%llx\n", iommu->ecap);
4810}
4811static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4812
4813static ssize_t intel_iommu_show_ndoms(struct device *dev,
4814 struct device_attribute *attr,
4815 char *buf)
4816{
4817 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4818 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4819}
4820static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4821
4822static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4823 struct device_attribute *attr,
4824 char *buf)
4825{
4826 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4827 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4828 cap_ndoms(iommu->cap)));
4829}
4830static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4831
4832static struct attribute *intel_iommu_attrs[] = {
4833 &dev_attr_version.attr,
4834 &dev_attr_address.attr,
4835 &dev_attr_cap.attr,
4836 &dev_attr_ecap.attr,
4837 &dev_attr_domains_supported.attr,
4838 &dev_attr_domains_used.attr,
4839 NULL,
4840};
4841
4842static struct attribute_group intel_iommu_group = {
4843 .name = "intel-iommu",
4844 .attrs = intel_iommu_attrs,
4845};
4846
4847const struct attribute_group *intel_iommu_groups[] = {
4848 &intel_iommu_group,
4849 NULL,
4850};
4851
David Brazdil0f672f62019-12-10 10:32:29 +00004852static inline bool has_untrusted_dev(void)
4853{
4854 struct pci_dev *pdev = NULL;
4855
4856 for_each_pci_dev(pdev)
4857 if (pdev->untrusted)
4858 return true;
4859
4860 return false;
4861}
4862
4863static int __init platform_optin_force_iommu(void)
4864{
4865 if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
4866 return 0;
4867
4868 if (no_iommu || dmar_disabled)
4869 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4870
4871 /*
4872 * If Intel-IOMMU is disabled by default, we will apply identity
4873 * map for all devices except those marked as being untrusted.
4874 */
4875 if (dmar_disabled)
4876 iommu_identity_mapping |= IDENTMAP_ALL;
4877
4878 dmar_disabled = 0;
4879 no_iommu = 0;
4880
4881 return 1;
4882}
4883
4884static int __init probe_acpi_namespace_devices(void)
4885{
4886 struct dmar_drhd_unit *drhd;
4887 /* To avoid a -Wunused-but-set-variable warning. */
4888 struct intel_iommu *iommu __maybe_unused;
4889 struct device *dev;
4890 int i, ret = 0;
4891
4892 for_each_active_iommu(iommu, drhd) {
4893 for_each_active_dev_scope(drhd->devices,
4894 drhd->devices_cnt, i, dev) {
4895 struct acpi_device_physical_node *pn;
4896 struct iommu_group *group;
4897 struct acpi_device *adev;
4898
4899 if (dev->bus != &acpi_bus_type)
4900 continue;
4901
4902 adev = to_acpi_device(dev);
4903 mutex_lock(&adev->physical_node_lock);
4904 list_for_each_entry(pn,
4905 &adev->physical_node_list, node) {
4906 group = iommu_group_get(pn->dev);
4907 if (group) {
4908 iommu_group_put(group);
4909 continue;
4910 }
4911
4912 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4913 ret = iommu_probe_device(pn->dev);
4914 if (ret)
4915 break;
4916 }
4917 mutex_unlock(&adev->physical_node_lock);
4918
4919 if (ret)
4920 return ret;
4921 }
4922 }
4923
4924 return 0;
4925}
4926
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004927int __init intel_iommu_init(void)
4928{
4929 int ret = -ENODEV;
4930 struct dmar_drhd_unit *drhd;
4931 struct intel_iommu *iommu;
4932
David Brazdil0f672f62019-12-10 10:32:29 +00004933 /*
4934 * Intel IOMMU is required for a TXT/tboot launch or platform
4935 * opt in, so enforce that.
4936 */
Olivier Deprez0e641232021-09-23 10:07:05 +02004937 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
4938 platform_optin_force_iommu();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004939
4940 if (iommu_init_mempool()) {
4941 if (force_on)
4942 panic("tboot: Failed to initialize iommu memory\n");
4943 return -ENOMEM;
4944 }
4945
4946 down_write(&dmar_global_lock);
4947 if (dmar_table_init()) {
4948 if (force_on)
4949 panic("tboot: Failed to initialize DMAR table\n");
4950 goto out_free_dmar;
4951 }
4952
4953 if (dmar_dev_scope_init() < 0) {
4954 if (force_on)
4955 panic("tboot: Failed to initialize DMAR device scope\n");
4956 goto out_free_dmar;
4957 }
4958
4959 up_write(&dmar_global_lock);
4960
4961 /*
4962 * The bus notifier takes the dmar_global_lock, so lockdep will
4963 * complain later when we register it under the lock.
4964 */
4965 dmar_register_bus_notifier();
4966
4967 down_write(&dmar_global_lock);
4968
Olivier Deprez0e641232021-09-23 10:07:05 +02004969 if (!no_iommu)
4970 intel_iommu_debugfs_init();
4971
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004972 if (no_iommu || dmar_disabled) {
4973 /*
4974 * We exit the function here to ensure IOMMU's remapping and
4975 * mempool aren't setup, which means that the IOMMU's PMRs
4976 * won't be disabled via the call to init_dmars(). So disable
4977 * it explicitly here. The PMRs were setup by tboot prior to
4978 * calling SENTER, but the kernel is expected to reset/tear
4979 * down the PMRs.
4980 */
4981 if (intel_iommu_tboot_noforce) {
4982 for_each_iommu(iommu, drhd)
4983 iommu_disable_protect_mem_regions(iommu);
4984 }
4985
4986 /*
4987 * Make sure the IOMMUs are switched off, even when we
4988 * boot into a kexec kernel and the previous kernel left
4989 * them enabled
4990 */
4991 intel_disable_iommus();
4992 goto out_free_dmar;
4993 }
4994
4995 if (list_empty(&dmar_rmrr_units))
4996 pr_info("No RMRR found\n");
4997
4998 if (list_empty(&dmar_atsr_units))
4999 pr_info("No ATSR found\n");
5000
5001 if (dmar_init_reserved_ranges()) {
5002 if (force_on)
5003 panic("tboot: Failed to reserve iommu ranges\n");
5004 goto out_free_reserved_range;
5005 }
5006
David Brazdil0f672f62019-12-10 10:32:29 +00005007 if (dmar_map_gfx)
5008 intel_iommu_gfx_mapped = 1;
5009
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005010 init_no_remapping_devices();
5011
5012 ret = init_dmars();
5013 if (ret) {
5014 if (force_on)
5015 panic("tboot: Failed to initialize DMARs\n");
5016 pr_err("Initialization failed\n");
5017 goto out_free_reserved_range;
5018 }
5019 up_write(&dmar_global_lock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005020
5021#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
David Brazdil0f672f62019-12-10 10:32:29 +00005022 /*
5023 * If the system has no untrusted device or the user has decided
5024 * to disable the bounce page mechanisms, we don't need swiotlb.
5025 * Mark this and the pre-allocated bounce pages will be released
5026 * later.
5027 */
5028 if (!has_untrusted_dev() || intel_no_bounce)
5029 swiotlb = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005030#endif
5031 dma_ops = &intel_dma_ops;
5032
5033 init_iommu_pm_ops();
5034
Olivier Deprez0e641232021-09-23 10:07:05 +02005035 down_read(&dmar_global_lock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005036 for_each_active_iommu(iommu, drhd) {
5037 iommu_device_sysfs_add(&iommu->iommu, NULL,
5038 intel_iommu_groups,
5039 "%s", iommu->name);
5040 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
5041 iommu_device_register(&iommu->iommu);
5042 }
Olivier Deprez0e641232021-09-23 10:07:05 +02005043 up_read(&dmar_global_lock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005044
5045 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005046 if (si_domain && !hw_pass_through)
5047 register_memory_notifier(&intel_iommu_memory_nb);
5048 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
5049 intel_iommu_cpu_dead);
David Brazdil0f672f62019-12-10 10:32:29 +00005050
5051 down_read(&dmar_global_lock);
5052 if (probe_acpi_namespace_devices())
5053 pr_warn("ACPI name space devices didn't probe correctly\n");
David Brazdil0f672f62019-12-10 10:32:29 +00005054
5055 /* Finally, we enable the DMA remapping hardware. */
5056 for_each_iommu(iommu, drhd) {
5057 if (!drhd->ignored && !translation_pre_enabled(iommu))
5058 iommu_enable_translation(iommu);
5059
5060 iommu_disable_protect_mem_regions(iommu);
5061 }
Olivier Deprez0e641232021-09-23 10:07:05 +02005062 up_read(&dmar_global_lock);
5063
David Brazdil0f672f62019-12-10 10:32:29 +00005064 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5065
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005066 intel_iommu_enabled = 1;
5067
5068 return 0;
5069
5070out_free_reserved_range:
5071 put_iova_domain(&reserved_iova_list);
5072out_free_dmar:
5073 intel_iommu_free_dmars();
5074 up_write(&dmar_global_lock);
5075 iommu_exit_mempool();
5076 return ret;
5077}
5078
5079static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
5080{
5081 struct intel_iommu *iommu = opaque;
5082
5083 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
5084 return 0;
5085}
5086
5087/*
5088 * NB - intel-iommu lacks any sort of reference counting for the users of
5089 * dependent devices. If multiple endpoints have intersecting dependent
5090 * devices, unbinding the driver from any one of them will possibly leave
5091 * the others unable to operate.
5092 */
5093static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
5094{
5095 if (!iommu || !dev || !dev_is_pci(dev))
5096 return;
5097
5098 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
5099}
5100
5101static void __dmar_remove_one_dev_info(struct device_domain_info *info)
5102{
David Brazdil0f672f62019-12-10 10:32:29 +00005103 struct dmar_domain *domain;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005104 struct intel_iommu *iommu;
5105 unsigned long flags;
5106
5107 assert_spin_locked(&device_domain_lock);
5108
5109 if (WARN_ON(!info))
5110 return;
5111
5112 iommu = info->iommu;
David Brazdil0f672f62019-12-10 10:32:29 +00005113 domain = info->domain;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005114
5115 if (info->dev) {
David Brazdil0f672f62019-12-10 10:32:29 +00005116 if (dev_is_pci(info->dev) && sm_supported(iommu))
5117 intel_pasid_tear_down_entry(iommu, info->dev,
5118 PASID_RID2PASID);
5119
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005120 iommu_disable_dev_iotlb(info);
5121 domain_context_clear(iommu, info->dev);
5122 intel_pasid_free_table(info->dev);
5123 }
5124
5125 unlink_domain_info(info);
5126
5127 spin_lock_irqsave(&iommu->lock, flags);
David Brazdil0f672f62019-12-10 10:32:29 +00005128 domain_detach_iommu(domain, iommu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005129 spin_unlock_irqrestore(&iommu->lock, flags);
5130
David Brazdil0f672f62019-12-10 10:32:29 +00005131 /* free the private domain */
5132 if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
5133 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
5134 list_empty(&domain->devices))
5135 domain_exit(info->domain);
5136
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005137 free_devinfo_mem(info);
5138}
5139
David Brazdil0f672f62019-12-10 10:32:29 +00005140static void dmar_remove_one_dev_info(struct device *dev)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005141{
5142 struct device_domain_info *info;
5143 unsigned long flags;
5144
5145 spin_lock_irqsave(&device_domain_lock, flags);
5146 info = dev->archdata.iommu;
Olivier Deprez0e641232021-09-23 10:07:05 +02005147 if (info && info != DEFER_DEVICE_DOMAIN_INFO
5148 && info != DUMMY_DEVICE_DOMAIN_INFO)
David Brazdil0f672f62019-12-10 10:32:29 +00005149 __dmar_remove_one_dev_info(info);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005150 spin_unlock_irqrestore(&device_domain_lock, flags);
5151}
5152
5153static int md_domain_init(struct dmar_domain *domain, int guest_width)
5154{
5155 int adjust_width;
5156
5157 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5158 domain_reserve_special_ranges(domain);
5159
5160 /* calculate AGAW */
5161 domain->gaw = guest_width;
5162 adjust_width = guestwidth_to_adjustwidth(guest_width);
5163 domain->agaw = width_to_agaw(adjust_width);
5164
5165 domain->iommu_coherency = 0;
5166 domain->iommu_snooping = 0;
5167 domain->iommu_superpage = 0;
5168 domain->max_addr = 0;
5169
5170 /* always allocate the top pgd */
5171 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5172 if (!domain->pgd)
5173 return -ENOMEM;
5174 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5175 return 0;
5176}
5177
5178static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
5179{
5180 struct dmar_domain *dmar_domain;
5181 struct iommu_domain *domain;
5182
David Brazdil0f672f62019-12-10 10:32:29 +00005183 switch (type) {
5184 case IOMMU_DOMAIN_DMA:
5185 /* fallthrough */
5186 case IOMMU_DOMAIN_UNMANAGED:
5187 dmar_domain = alloc_domain(0);
5188 if (!dmar_domain) {
5189 pr_err("Can't allocate dmar_domain\n");
5190 return NULL;
5191 }
5192 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
5193 pr_err("Domain initialization failed\n");
5194 domain_exit(dmar_domain);
5195 return NULL;
5196 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005197
David Brazdil0f672f62019-12-10 10:32:29 +00005198 if (type == IOMMU_DOMAIN_DMA &&
5199 init_iova_flush_queue(&dmar_domain->iovad,
5200 iommu_flush_iova, iova_entry_free)) {
5201 pr_warn("iova flush queue initialization failed\n");
5202 intel_iommu_strict = 1;
5203 }
5204
5205 domain_update_iommu_cap(dmar_domain);
5206
5207 domain = &dmar_domain->domain;
5208 domain->geometry.aperture_start = 0;
5209 domain->geometry.aperture_end =
5210 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5211 domain->geometry.force_aperture = true;
5212
5213 return domain;
5214 case IOMMU_DOMAIN_IDENTITY:
5215 return &si_domain->domain;
5216 default:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005217 return NULL;
5218 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005219
David Brazdil0f672f62019-12-10 10:32:29 +00005220 return NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005221}
5222
5223static void intel_iommu_domain_free(struct iommu_domain *domain)
5224{
David Brazdil0f672f62019-12-10 10:32:29 +00005225 if (domain != &si_domain->domain)
5226 domain_exit(to_dmar_domain(domain));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005227}
5228
David Brazdil0f672f62019-12-10 10:32:29 +00005229/*
5230 * Check whether a @domain could be attached to the @dev through the
5231 * aux-domain attach/detach APIs.
5232 */
5233static inline bool
5234is_aux_domain(struct device *dev, struct iommu_domain *domain)
5235{
5236 struct device_domain_info *info = dev->archdata.iommu;
5237
5238 return info && info->auxd_enabled &&
5239 domain->type == IOMMU_DOMAIN_UNMANAGED;
5240}
5241
5242static void auxiliary_link_device(struct dmar_domain *domain,
5243 struct device *dev)
5244{
5245 struct device_domain_info *info = dev->archdata.iommu;
5246
5247 assert_spin_locked(&device_domain_lock);
5248 if (WARN_ON(!info))
5249 return;
5250
5251 domain->auxd_refcnt++;
5252 list_add(&domain->auxd, &info->auxiliary_domains);
5253}
5254
5255static void auxiliary_unlink_device(struct dmar_domain *domain,
5256 struct device *dev)
5257{
5258 struct device_domain_info *info = dev->archdata.iommu;
5259
5260 assert_spin_locked(&device_domain_lock);
5261 if (WARN_ON(!info))
5262 return;
5263
5264 list_del(&domain->auxd);
5265 domain->auxd_refcnt--;
5266
5267 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5268 intel_pasid_free_id(domain->default_pasid);
5269}
5270
5271static int aux_domain_add_dev(struct dmar_domain *domain,
5272 struct device *dev)
5273{
5274 int ret;
5275 u8 bus, devfn;
5276 unsigned long flags;
5277 struct intel_iommu *iommu;
5278
5279 iommu = device_to_iommu(dev, &bus, &devfn);
5280 if (!iommu)
5281 return -ENODEV;
5282
5283 if (domain->default_pasid <= 0) {
5284 int pasid;
5285
5286 pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5287 pci_max_pasids(to_pci_dev(dev)),
5288 GFP_KERNEL);
5289 if (pasid <= 0) {
5290 pr_err("Can't allocate default pasid\n");
5291 return -ENODEV;
5292 }
5293 domain->default_pasid = pasid;
5294 }
5295
5296 spin_lock_irqsave(&device_domain_lock, flags);
5297 /*
5298 * iommu->lock must be held to attach domain to iommu and setup the
5299 * pasid entry for second level translation.
5300 */
5301 spin_lock(&iommu->lock);
5302 ret = domain_attach_iommu(domain, iommu);
5303 if (ret)
5304 goto attach_failed;
5305
5306 /* Setup the PASID entry for mediated devices: */
5307 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5308 domain->default_pasid);
5309 if (ret)
5310 goto table_failed;
5311 spin_unlock(&iommu->lock);
5312
5313 auxiliary_link_device(domain, dev);
5314
5315 spin_unlock_irqrestore(&device_domain_lock, flags);
5316
5317 return 0;
5318
5319table_failed:
5320 domain_detach_iommu(domain, iommu);
5321attach_failed:
5322 spin_unlock(&iommu->lock);
5323 spin_unlock_irqrestore(&device_domain_lock, flags);
5324 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5325 intel_pasid_free_id(domain->default_pasid);
5326
5327 return ret;
5328}
5329
5330static void aux_domain_remove_dev(struct dmar_domain *domain,
5331 struct device *dev)
5332{
5333 struct device_domain_info *info;
5334 struct intel_iommu *iommu;
5335 unsigned long flags;
5336
5337 if (!is_aux_domain(dev, &domain->domain))
5338 return;
5339
5340 spin_lock_irqsave(&device_domain_lock, flags);
5341 info = dev->archdata.iommu;
5342 iommu = info->iommu;
5343
5344 auxiliary_unlink_device(domain, dev);
5345
5346 spin_lock(&iommu->lock);
5347 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5348 domain_detach_iommu(domain, iommu);
5349 spin_unlock(&iommu->lock);
5350
5351 spin_unlock_irqrestore(&device_domain_lock, flags);
5352}
5353
5354static int prepare_domain_attach_device(struct iommu_domain *domain,
5355 struct device *dev)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005356{
5357 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5358 struct intel_iommu *iommu;
5359 int addr_width;
5360 u8 bus, devfn;
5361
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005362 iommu = device_to_iommu(dev, &bus, &devfn);
5363 if (!iommu)
5364 return -ENODEV;
5365
5366 /* check if this iommu agaw is sufficient for max mapped address */
5367 addr_width = agaw_to_width(iommu->agaw);
5368 if (addr_width > cap_mgaw(iommu->cap))
5369 addr_width = cap_mgaw(iommu->cap);
5370
5371 if (dmar_domain->max_addr > (1LL << addr_width)) {
David Brazdil0f672f62019-12-10 10:32:29 +00005372 dev_err(dev, "%s: iommu width (%d) is not "
5373 "sufficient for the mapped address (%llx)\n",
5374 __func__, addr_width, dmar_domain->max_addr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005375 return -EFAULT;
5376 }
5377 dmar_domain->gaw = addr_width;
5378
5379 /*
5380 * Knock out extra levels of page tables if necessary
5381 */
5382 while (iommu->agaw < dmar_domain->agaw) {
5383 struct dma_pte *pte;
5384
5385 pte = dmar_domain->pgd;
5386 if (dma_pte_present(pte)) {
5387 dmar_domain->pgd = (struct dma_pte *)
5388 phys_to_virt(dma_pte_addr(pte));
5389 free_pgtable_page(pte);
5390 }
5391 dmar_domain->agaw--;
5392 }
5393
David Brazdil0f672f62019-12-10 10:32:29 +00005394 return 0;
5395}
5396
5397static int intel_iommu_attach_device(struct iommu_domain *domain,
5398 struct device *dev)
5399{
5400 int ret;
5401
5402 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5403 device_is_rmrr_locked(dev)) {
5404 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5405 return -EPERM;
5406 }
5407
5408 if (is_aux_domain(dev, domain))
5409 return -EPERM;
5410
5411 /* normally dev is not mapped */
5412 if (unlikely(domain_context_mapped(dev))) {
5413 struct dmar_domain *old_domain;
5414
5415 old_domain = find_domain(dev);
5416 if (old_domain)
5417 dmar_remove_one_dev_info(dev);
5418 }
5419
5420 ret = prepare_domain_attach_device(domain, dev);
5421 if (ret)
5422 return ret;
5423
5424 return domain_add_dev_info(to_dmar_domain(domain), dev);
5425}
5426
5427static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5428 struct device *dev)
5429{
5430 int ret;
5431
5432 if (!is_aux_domain(dev, domain))
5433 return -EPERM;
5434
5435 ret = prepare_domain_attach_device(domain, dev);
5436 if (ret)
5437 return ret;
5438
5439 return aux_domain_add_dev(to_dmar_domain(domain), dev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005440}
5441
5442static void intel_iommu_detach_device(struct iommu_domain *domain,
5443 struct device *dev)
5444{
David Brazdil0f672f62019-12-10 10:32:29 +00005445 dmar_remove_one_dev_info(dev);
5446}
5447
5448static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5449 struct device *dev)
5450{
5451 aux_domain_remove_dev(to_dmar_domain(domain), dev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005452}
5453
5454static int intel_iommu_map(struct iommu_domain *domain,
5455 unsigned long iova, phys_addr_t hpa,
5456 size_t size, int iommu_prot)
5457{
5458 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5459 u64 max_addr;
5460 int prot = 0;
5461 int ret;
5462
5463 if (iommu_prot & IOMMU_READ)
5464 prot |= DMA_PTE_READ;
5465 if (iommu_prot & IOMMU_WRITE)
5466 prot |= DMA_PTE_WRITE;
5467 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5468 prot |= DMA_PTE_SNP;
5469
5470 max_addr = iova + size;
5471 if (dmar_domain->max_addr < max_addr) {
5472 u64 end;
5473
5474 /* check if minimum agaw is sufficient for mapped address */
5475 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5476 if (end < max_addr) {
5477 pr_err("%s: iommu width (%d) is not "
5478 "sufficient for the mapped address (%llx)\n",
5479 __func__, dmar_domain->gaw, max_addr);
5480 return -EFAULT;
5481 }
5482 dmar_domain->max_addr = max_addr;
5483 }
5484 /* Round up size to next multiple of PAGE_SIZE, if it and
5485 the low bits of hpa would take us onto the next page */
5486 size = aligned_nrpages(hpa, size);
5487 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5488 hpa >> VTD_PAGE_SHIFT, size, prot);
5489 return ret;
5490}
5491
5492static size_t intel_iommu_unmap(struct iommu_domain *domain,
David Brazdil0f672f62019-12-10 10:32:29 +00005493 unsigned long iova, size_t size,
5494 struct iommu_iotlb_gather *gather)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005495{
5496 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5497 struct page *freelist = NULL;
5498 unsigned long start_pfn, last_pfn;
5499 unsigned int npages;
5500 int iommu_id, level = 0;
5501
5502 /* Cope with horrid API which requires us to unmap more than the
5503 size argument if it happens to be a large-page mapping. */
5504 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5505
5506 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5507 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5508
5509 start_pfn = iova >> VTD_PAGE_SHIFT;
5510 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5511
5512 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5513
5514 npages = last_pfn - start_pfn + 1;
5515
5516 for_each_domain_iommu(iommu_id, dmar_domain)
5517 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5518 start_pfn, npages, !freelist, 0);
5519
5520 dma_free_pagelist(freelist);
5521
5522 if (dmar_domain->max_addr == iova + size)
5523 dmar_domain->max_addr = iova;
5524
5525 return size;
5526}
5527
5528static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5529 dma_addr_t iova)
5530{
5531 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5532 struct dma_pte *pte;
5533 int level = 0;
5534 u64 phys = 0;
5535
5536 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
Olivier Deprez0e641232021-09-23 10:07:05 +02005537 if (pte && dma_pte_present(pte))
5538 phys = dma_pte_addr(pte) +
5539 (iova & (BIT_MASK(level_to_offset_bits(level) +
5540 VTD_PAGE_SHIFT) - 1));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005541
5542 return phys;
5543}
5544
David Brazdil0f672f62019-12-10 10:32:29 +00005545static inline bool scalable_mode_support(void)
5546{
5547 struct dmar_drhd_unit *drhd;
5548 struct intel_iommu *iommu;
5549 bool ret = true;
5550
5551 rcu_read_lock();
5552 for_each_active_iommu(iommu, drhd) {
5553 if (!sm_supported(iommu)) {
5554 ret = false;
5555 break;
5556 }
5557 }
5558 rcu_read_unlock();
5559
5560 return ret;
5561}
5562
5563static inline bool iommu_pasid_support(void)
5564{
5565 struct dmar_drhd_unit *drhd;
5566 struct intel_iommu *iommu;
5567 bool ret = true;
5568
5569 rcu_read_lock();
5570 for_each_active_iommu(iommu, drhd) {
5571 if (!pasid_supported(iommu)) {
5572 ret = false;
5573 break;
5574 }
5575 }
5576 rcu_read_unlock();
5577
5578 return ret;
5579}
5580
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005581static bool intel_iommu_capable(enum iommu_cap cap)
5582{
5583 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5584 return domain_update_iommu_snooping(NULL) == 1;
5585 if (cap == IOMMU_CAP_INTR_REMAP)
5586 return irq_remapping_enabled == 1;
5587
5588 return false;
5589}
5590
5591static int intel_iommu_add_device(struct device *dev)
5592{
David Brazdil0f672f62019-12-10 10:32:29 +00005593 struct dmar_domain *dmar_domain;
5594 struct iommu_domain *domain;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005595 struct intel_iommu *iommu;
5596 struct iommu_group *group;
5597 u8 bus, devfn;
David Brazdil0f672f62019-12-10 10:32:29 +00005598 int ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005599
5600 iommu = device_to_iommu(dev, &bus, &devfn);
5601 if (!iommu)
5602 return -ENODEV;
5603
5604 iommu_device_link(&iommu->iommu, dev);
5605
David Brazdil0f672f62019-12-10 10:32:29 +00005606 if (translation_pre_enabled(iommu))
5607 dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5608
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005609 group = iommu_group_get_for_dev(dev);
5610
Olivier Deprez0e641232021-09-23 10:07:05 +02005611 if (IS_ERR(group)) {
5612 ret = PTR_ERR(group);
5613 goto unlink;
5614 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005615
5616 iommu_group_put(group);
David Brazdil0f672f62019-12-10 10:32:29 +00005617
5618 domain = iommu_get_domain_for_dev(dev);
5619 dmar_domain = to_dmar_domain(domain);
5620 if (domain->type == IOMMU_DOMAIN_DMA) {
5621 if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
5622 ret = iommu_request_dm_for_dev(dev);
5623 if (ret) {
5624 dmar_remove_one_dev_info(dev);
5625 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5626 domain_add_dev_info(si_domain, dev);
5627 dev_info(dev,
5628 "Device uses a private identity domain.\n");
5629 }
5630 }
5631 } else {
5632 if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
5633 ret = iommu_request_dma_domain_for_dev(dev);
5634 if (ret) {
5635 dmar_remove_one_dev_info(dev);
5636 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5637 if (!get_private_domain_for_dev(dev)) {
5638 dev_warn(dev,
5639 "Failed to get a private domain.\n");
Olivier Deprez0e641232021-09-23 10:07:05 +02005640 ret = -ENOMEM;
5641 goto unlink;
David Brazdil0f672f62019-12-10 10:32:29 +00005642 }
5643
5644 dev_info(dev,
5645 "Device uses a private dma domain.\n");
5646 }
5647 }
5648 }
5649
5650 if (device_needs_bounce(dev)) {
5651 dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
5652 set_dma_ops(dev, &bounce_dma_ops);
5653 }
5654
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005655 return 0;
Olivier Deprez0e641232021-09-23 10:07:05 +02005656
5657unlink:
5658 iommu_device_unlink(&iommu->iommu, dev);
5659 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005660}
5661
5662static void intel_iommu_remove_device(struct device *dev)
5663{
5664 struct intel_iommu *iommu;
5665 u8 bus, devfn;
5666
5667 iommu = device_to_iommu(dev, &bus, &devfn);
5668 if (!iommu)
5669 return;
5670
David Brazdil0f672f62019-12-10 10:32:29 +00005671 dmar_remove_one_dev_info(dev);
5672
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005673 iommu_group_remove_device(dev);
5674
5675 iommu_device_unlink(&iommu->iommu, dev);
David Brazdil0f672f62019-12-10 10:32:29 +00005676
5677 if (device_needs_bounce(dev))
5678 set_dma_ops(dev, NULL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005679}
5680
5681static void intel_iommu_get_resv_regions(struct device *device,
5682 struct list_head *head)
5683{
David Brazdil0f672f62019-12-10 10:32:29 +00005684 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005685 struct iommu_resv_region *reg;
5686 struct dmar_rmrr_unit *rmrr;
5687 struct device *i_dev;
5688 int i;
5689
David Brazdil0f672f62019-12-10 10:32:29 +00005690 down_read(&dmar_global_lock);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005691 for_each_rmrr_units(rmrr) {
5692 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5693 i, i_dev) {
David Brazdil0f672f62019-12-10 10:32:29 +00005694 struct iommu_resv_region *resv;
5695 enum iommu_resv_type type;
5696 size_t length;
5697
5698 if (i_dev != device &&
5699 !is_downstream_to_pci_bridge(device, i_dev))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005700 continue;
5701
David Brazdil0f672f62019-12-10 10:32:29 +00005702 length = rmrr->end_address - rmrr->base_address + 1;
5703
5704 type = device_rmrr_is_relaxable(device) ?
5705 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5706
5707 resv = iommu_alloc_resv_region(rmrr->base_address,
5708 length, prot, type);
5709 if (!resv)
5710 break;
5711
5712 list_add_tail(&resv->list, head);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005713 }
5714 }
David Brazdil0f672f62019-12-10 10:32:29 +00005715 up_read(&dmar_global_lock);
5716
5717#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5718 if (dev_is_pci(device)) {
5719 struct pci_dev *pdev = to_pci_dev(device);
5720
5721 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
Olivier Deprez0e641232021-09-23 10:07:05 +02005722 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
5723 IOMMU_RESV_DIRECT_RELAXABLE);
David Brazdil0f672f62019-12-10 10:32:29 +00005724 if (reg)
5725 list_add_tail(&reg->list, head);
5726 }
5727 }
5728#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005729
5730 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5731 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5732 0, IOMMU_RESV_MSI);
5733 if (!reg)
5734 return;
5735 list_add_tail(&reg->list, head);
5736}
5737
5738static void intel_iommu_put_resv_regions(struct device *dev,
5739 struct list_head *head)
5740{
5741 struct iommu_resv_region *entry, *next;
5742
David Brazdil0f672f62019-12-10 10:32:29 +00005743 list_for_each_entry_safe(entry, next, head, list)
5744 kfree(entry);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005745}
5746
David Brazdil0f672f62019-12-10 10:32:29 +00005747int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005748{
5749 struct device_domain_info *info;
5750 struct context_entry *context;
5751 struct dmar_domain *domain;
5752 unsigned long flags;
5753 u64 ctx_lo;
5754 int ret;
5755
David Brazdil0f672f62019-12-10 10:32:29 +00005756 domain = find_domain(dev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005757 if (!domain)
5758 return -EINVAL;
5759
5760 spin_lock_irqsave(&device_domain_lock, flags);
5761 spin_lock(&iommu->lock);
5762
5763 ret = -EINVAL;
David Brazdil0f672f62019-12-10 10:32:29 +00005764 info = dev->archdata.iommu;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005765 if (!info || !info->pasid_supported)
5766 goto out;
5767
5768 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5769 if (WARN_ON(!context))
5770 goto out;
5771
5772 ctx_lo = context[0].lo;
5773
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005774 if (!(ctx_lo & CONTEXT_PASIDE)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005775 ctx_lo |= CONTEXT_PASIDE;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005776 context[0].lo = ctx_lo;
5777 wmb();
David Brazdil0f672f62019-12-10 10:32:29 +00005778 iommu->flush.flush_context(iommu,
5779 domain->iommu_did[iommu->seq_id],
5780 PCI_DEVID(info->bus, info->devfn),
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005781 DMA_CCMD_MASK_NOBIT,
5782 DMA_CCMD_DEVICE_INVL);
5783 }
5784
5785 /* Enable PASID support in the device, if it wasn't already */
5786 if (!info->pasid_enabled)
5787 iommu_enable_dev_iotlb(info);
5788
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005789 ret = 0;
5790
5791 out:
5792 spin_unlock(&iommu->lock);
5793 spin_unlock_irqrestore(&device_domain_lock, flags);
5794
5795 return ret;
5796}
5797
David Brazdil0f672f62019-12-10 10:32:29 +00005798static void intel_iommu_apply_resv_region(struct device *dev,
5799 struct iommu_domain *domain,
5800 struct iommu_resv_region *region)
5801{
5802 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5803 unsigned long start, end;
5804
5805 start = IOVA_PFN(region->start);
5806 end = IOVA_PFN(region->start + region->length - 1);
5807
5808 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5809}
5810
Olivier Deprez0e641232021-09-23 10:07:05 +02005811static struct iommu_group *intel_iommu_device_group(struct device *dev)
5812{
5813 if (dev_is_pci(dev))
5814 return pci_device_group(dev);
5815 return generic_device_group(dev);
5816}
5817
David Brazdil0f672f62019-12-10 10:32:29 +00005818#ifdef CONFIG_INTEL_IOMMU_SVM
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005819struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5820{
5821 struct intel_iommu *iommu;
5822 u8 bus, devfn;
5823
5824 if (iommu_dummy(dev)) {
5825 dev_warn(dev,
5826 "No IOMMU translation for device; cannot enable SVM\n");
5827 return NULL;
5828 }
5829
5830 iommu = device_to_iommu(dev, &bus, &devfn);
5831 if ((!iommu)) {
5832 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5833 return NULL;
5834 }
5835
5836 return iommu;
5837}
5838#endif /* CONFIG_INTEL_IOMMU_SVM */
5839
David Brazdil0f672f62019-12-10 10:32:29 +00005840static int intel_iommu_enable_auxd(struct device *dev)
5841{
5842 struct device_domain_info *info;
5843 struct intel_iommu *iommu;
5844 unsigned long flags;
5845 u8 bus, devfn;
5846 int ret;
5847
5848 iommu = device_to_iommu(dev, &bus, &devfn);
5849 if (!iommu || dmar_disabled)
5850 return -EINVAL;
5851
5852 if (!sm_supported(iommu) || !pasid_supported(iommu))
5853 return -EINVAL;
5854
5855 ret = intel_iommu_enable_pasid(iommu, dev);
5856 if (ret)
5857 return -ENODEV;
5858
5859 spin_lock_irqsave(&device_domain_lock, flags);
5860 info = dev->archdata.iommu;
5861 info->auxd_enabled = 1;
5862 spin_unlock_irqrestore(&device_domain_lock, flags);
5863
5864 return 0;
5865}
5866
5867static int intel_iommu_disable_auxd(struct device *dev)
5868{
5869 struct device_domain_info *info;
5870 unsigned long flags;
5871
5872 spin_lock_irqsave(&device_domain_lock, flags);
5873 info = dev->archdata.iommu;
5874 if (!WARN_ON(!info))
5875 info->auxd_enabled = 0;
5876 spin_unlock_irqrestore(&device_domain_lock, flags);
5877
5878 return 0;
5879}
5880
5881/*
5882 * A PCI express designated vendor specific extended capability is defined
5883 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5884 * for system software and tools to detect endpoint devices supporting the
5885 * Intel scalable IO virtualization without host driver dependency.
5886 *
5887 * Returns the address of the matching extended capability structure within
5888 * the device's PCI configuration space or 0 if the device does not support
5889 * it.
5890 */
5891static int siov_find_pci_dvsec(struct pci_dev *pdev)
5892{
5893 int pos;
5894 u16 vendor, id;
5895
5896 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5897 while (pos) {
5898 pci_read_config_word(pdev, pos + 4, &vendor);
5899 pci_read_config_word(pdev, pos + 8, &id);
5900 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5901 return pos;
5902
5903 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5904 }
5905
5906 return 0;
5907}
5908
5909static bool
5910intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5911{
5912 if (feat == IOMMU_DEV_FEAT_AUX) {
5913 int ret;
5914
5915 if (!dev_is_pci(dev) || dmar_disabled ||
5916 !scalable_mode_support() || !iommu_pasid_support())
5917 return false;
5918
5919 ret = pci_pasid_features(to_pci_dev(dev));
5920 if (ret < 0)
5921 return false;
5922
5923 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5924 }
5925
5926 return false;
5927}
5928
5929static int
5930intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5931{
5932 if (feat == IOMMU_DEV_FEAT_AUX)
5933 return intel_iommu_enable_auxd(dev);
5934
5935 return -ENODEV;
5936}
5937
5938static int
5939intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5940{
5941 if (feat == IOMMU_DEV_FEAT_AUX)
5942 return intel_iommu_disable_auxd(dev);
5943
5944 return -ENODEV;
5945}
5946
5947static bool
5948intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5949{
5950 struct device_domain_info *info = dev->archdata.iommu;
5951
5952 if (feat == IOMMU_DEV_FEAT_AUX)
5953 return scalable_mode_support() && info && info->auxd_enabled;
5954
5955 return false;
5956}
5957
5958static int
5959intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5960{
5961 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5962
5963 return dmar_domain->default_pasid > 0 ?
5964 dmar_domain->default_pasid : -EINVAL;
5965}
5966
5967static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5968 struct device *dev)
5969{
5970 return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
5971}
5972
Olivier Deprez0e641232021-09-23 10:07:05 +02005973/*
5974 * Check that the device does not live on an external facing PCI port that is
5975 * marked as untrusted. Such devices should not be able to apply quirks and
5976 * thus not be able to bypass the IOMMU restrictions.
5977 */
5978static bool risky_device(struct pci_dev *pdev)
5979{
5980 if (pdev->untrusted) {
5981 pci_info(pdev,
5982 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
5983 pdev->vendor, pdev->device);
5984 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
5985 return true;
5986 }
5987 return false;
5988}
5989
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005990const struct iommu_ops intel_iommu_ops = {
5991 .capable = intel_iommu_capable,
5992 .domain_alloc = intel_iommu_domain_alloc,
5993 .domain_free = intel_iommu_domain_free,
5994 .attach_dev = intel_iommu_attach_device,
5995 .detach_dev = intel_iommu_detach_device,
David Brazdil0f672f62019-12-10 10:32:29 +00005996 .aux_attach_dev = intel_iommu_aux_attach_device,
5997 .aux_detach_dev = intel_iommu_aux_detach_device,
5998 .aux_get_pasid = intel_iommu_aux_get_pasid,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005999 .map = intel_iommu_map,
6000 .unmap = intel_iommu_unmap,
6001 .iova_to_phys = intel_iommu_iova_to_phys,
6002 .add_device = intel_iommu_add_device,
6003 .remove_device = intel_iommu_remove_device,
6004 .get_resv_regions = intel_iommu_get_resv_regions,
6005 .put_resv_regions = intel_iommu_put_resv_regions,
David Brazdil0f672f62019-12-10 10:32:29 +00006006 .apply_resv_region = intel_iommu_apply_resv_region,
Olivier Deprez0e641232021-09-23 10:07:05 +02006007 .device_group = intel_iommu_device_group,
David Brazdil0f672f62019-12-10 10:32:29 +00006008 .dev_has_feat = intel_iommu_dev_has_feat,
6009 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
6010 .dev_enable_feat = intel_iommu_dev_enable_feat,
6011 .dev_disable_feat = intel_iommu_dev_disable_feat,
6012 .is_attach_deferred = intel_iommu_is_attach_deferred,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006013 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
6014};
6015
David Brazdil0f672f62019-12-10 10:32:29 +00006016static void quirk_iommu_igfx(struct pci_dev *dev)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006017{
Olivier Deprez0e641232021-09-23 10:07:05 +02006018 if (risky_device(dev))
6019 return;
6020
David Brazdil0f672f62019-12-10 10:32:29 +00006021 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006022 dmar_map_gfx = 0;
6023}
6024
David Brazdil0f672f62019-12-10 10:32:29 +00006025/* G4x/GM45 integrated gfx dmar support is totally busted. */
6026DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
6027DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
6028DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
6029DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
6030DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
6031DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
6032DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
6033
6034/* Broadwell igfx malfunctions with dmar */
6035DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
6036DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
6037DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
6038DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
6039DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
6040DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
6041DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
6042DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
6043DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
6044DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
6045DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
6046DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
6047DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
6048DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
6049DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
6050DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
6051DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
6052DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
6053DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
6054DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
6055DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
6056DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
6057DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
6058DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006059
6060static void quirk_iommu_rwbf(struct pci_dev *dev)
6061{
Olivier Deprez0e641232021-09-23 10:07:05 +02006062 if (risky_device(dev))
6063 return;
6064
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006065 /*
6066 * Mobile 4 Series Chipset neglects to set RWBF capability,
6067 * but needs it. Same seems to hold for the desktop versions.
6068 */
David Brazdil0f672f62019-12-10 10:32:29 +00006069 pci_info(dev, "Forcing write-buffer flush capability\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006070 rwbf_quirk = 1;
6071}
6072
6073DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
6074DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
6075DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
6076DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
6077DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
6078DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
6079DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
6080
6081#define GGC 0x52
6082#define GGC_MEMORY_SIZE_MASK (0xf << 8)
6083#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
6084#define GGC_MEMORY_SIZE_1M (0x1 << 8)
6085#define GGC_MEMORY_SIZE_2M (0x3 << 8)
6086#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
6087#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
6088#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
6089#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
6090
6091static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
6092{
6093 unsigned short ggc;
6094
Olivier Deprez0e641232021-09-23 10:07:05 +02006095 if (risky_device(dev))
6096 return;
6097
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006098 if (pci_read_config_word(dev, GGC, &ggc))
6099 return;
6100
6101 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
David Brazdil0f672f62019-12-10 10:32:29 +00006102 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006103 dmar_map_gfx = 0;
6104 } else if (dmar_map_gfx) {
6105 /* we have to ensure the gfx device is idle before we flush */
David Brazdil0f672f62019-12-10 10:32:29 +00006106 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006107 intel_iommu_strict = 1;
6108 }
6109}
6110DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
6111DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
6112DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
6113DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
6114
6115/* On Tylersburg chipsets, some BIOSes have been known to enable the
6116 ISOCH DMAR unit for the Azalia sound device, but not give it any
6117 TLB entries, which causes it to deadlock. Check for that. We do
6118 this in a function called from init_dmars(), instead of in a PCI
6119 quirk, because we don't want to print the obnoxious "BIOS broken"
6120 message if VT-d is actually disabled.
6121*/
6122static void __init check_tylersburg_isoch(void)
6123{
6124 struct pci_dev *pdev;
6125 uint32_t vtisochctrl;
6126
6127 /* If there's no Azalia in the system anyway, forget it. */
6128 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
6129 if (!pdev)
6130 return;
Olivier Deprez0e641232021-09-23 10:07:05 +02006131
6132 if (risky_device(pdev)) {
6133 pci_dev_put(pdev);
6134 return;
6135 }
6136
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006137 pci_dev_put(pdev);
6138
6139 /* System Management Registers. Might be hidden, in which case
6140 we can't do the sanity check. But that's OK, because the
6141 known-broken BIOSes _don't_ actually hide it, so far. */
6142 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
6143 if (!pdev)
6144 return;
6145
Olivier Deprez0e641232021-09-23 10:07:05 +02006146 if (risky_device(pdev)) {
6147 pci_dev_put(pdev);
6148 return;
6149 }
6150
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006151 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
6152 pci_dev_put(pdev);
6153 return;
6154 }
6155
6156 pci_dev_put(pdev);
6157
6158 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6159 if (vtisochctrl & 1)
6160 return;
6161
6162 /* Drop all bits other than the number of TLB entries */
6163 vtisochctrl &= 0x1c;
6164
6165 /* If we have the recommended number of TLB entries (16), fine. */
6166 if (vtisochctrl == 0x10)
6167 return;
6168
6169 /* Zero TLB entries? You get to ride the short bus to school. */
6170 if (!vtisochctrl) {
6171 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6172 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6173 dmi_get_system_info(DMI_BIOS_VENDOR),
6174 dmi_get_system_info(DMI_BIOS_VERSION),
6175 dmi_get_system_info(DMI_PRODUCT_VERSION));
6176 iommu_identity_mapping |= IDENTMAP_AZALIA;
6177 return;
6178 }
6179
6180 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
6181 vtisochctrl);
6182}