Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f99b79d..0ed0e1f 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -22,6 +22,7 @@
#include <linux/cache.h>
#include <linux/dma-direct.h>
+#include <linux/dma-map-ops.h>
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/spinlock.h>
@@ -49,9 +50,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/swiotlb.h>
-#define OFFSET(val,align) ((unsigned long) \
- ( (val) & ( (align) - 1)))
-
#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
/*
@@ -92,7 +90,7 @@ static unsigned int io_tlb_index;
* Max segment that we can provide which (if pages are contingous) will
* not be bounced (unless SWIOTLB_FORCE is set).
*/
-unsigned int max_segment;
+static unsigned int max_segment;
/*
* We need to save away the original address corresponding to a mapped entry
@@ -171,12 +169,20 @@ void swiotlb_print_info(void)
return;
}
- pr_info("mapped [mem %#010llx-%#010llx] (%luMB)\n",
- (unsigned long long)io_tlb_start,
- (unsigned long long)io_tlb_end,
+ pr_info("mapped [mem %pa-%pa] (%luMB)\n", &io_tlb_start, &io_tlb_end,
bytes >> 20);
}
+static inline unsigned long io_tlb_offset(unsigned long val)
+{
+ return val & (IO_TLB_SEGSIZE - 1);
+}
+
+static inline unsigned long nr_slots(u64 val)
+{
+ return DIV_ROUND_UP(val, IO_TLB_SIZE);
+}
+
/*
* Early SWIOTLB allocation may be too early to allow an architecture to
* perform the desired operations. This function allows the architecture to
@@ -226,7 +232,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
__func__, alloc_size, PAGE_SIZE);
for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
}
io_tlb_index = 0;
@@ -360,7 +366,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
goto cleanup4;
for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
}
io_tlb_index = 0;
@@ -446,22 +452,120 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
}
}
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
- dma_addr_t tbl_dma_addr,
- phys_addr_t orig_addr,
- size_t mapping_size,
- size_t alloc_size,
- enum dma_data_direction dir,
- unsigned long attrs)
+#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT))
+
+/*
+ * Return the offset into a iotlb slot required to keep the device happy.
+ */
+static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
{
+ return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+}
+
+/*
+ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
+ */
+static inline unsigned long get_max_slots(unsigned long boundary_mask)
+{
+ if (boundary_mask == ~0UL)
+ return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+ return nr_slots(boundary_mask + 1);
+}
+
+static unsigned int wrap_index(unsigned int index)
+{
+ if (index >= io_tlb_nslabs)
+ return 0;
+ return index;
+}
+
+/*
+ * Find a suitable number of IO TLB entries size that will fit this request and
+ * allocate a buffer from that IO TLB pool.
+ */
+static int find_slots(struct device *dev, phys_addr_t orig_addr,
+ size_t alloc_size)
+{
+ unsigned long boundary_mask = dma_get_seg_boundary(dev);
+ dma_addr_t tbl_dma_addr =
+ phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
+ unsigned long max_slots = get_max_slots(boundary_mask);
+ unsigned int iotlb_align_mask =
+ dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
+ unsigned int nslots = nr_slots(alloc_size), stride;
+ unsigned int index, wrap, count = 0, i;
unsigned long flags;
+
+ BUG_ON(!nslots);
+
+ /*
+ * For mappings with an alignment requirement don't bother looping to
+ * unaligned slots once we found an aligned one. For allocations of
+ * PAGE_SIZE or larger only look for page aligned allocations.
+ */
+ stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
+ if (alloc_size >= PAGE_SIZE)
+ stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
+
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+ goto not_found;
+
+ index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
+ do {
+ if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
+ (orig_addr & iotlb_align_mask)) {
+ index = wrap_index(index + 1);
+ continue;
+ }
+
+ /*
+ * If we find a slot that indicates we have 'nslots' number of
+ * contiguous buffers, we allocate the buffers from that slot
+ * and mark the entries as '0' indicating unavailable.
+ */
+ if (!iommu_is_span_boundary(index, nslots,
+ nr_slots(tbl_dma_addr),
+ max_slots)) {
+ if (io_tlb_list[index] >= nslots)
+ goto found;
+ }
+ index = wrap_index(index + stride);
+ } while (index != wrap);
+
+not_found:
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+ return -1;
+
+found:
+ for (i = index; i < index + nslots; i++)
+ io_tlb_list[i] = 0;
+ for (i = index - 1;
+ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+ io_tlb_list[i]; i--)
+ io_tlb_list[i] = ++count;
+
+ /*
+ * Update the indices to avoid searching in the next round.
+ */
+ if (index + nslots < io_tlb_nslabs)
+ io_tlb_index = index + nslots;
+ else
+ io_tlb_index = 0;
+ io_tlb_used += nslots;
+
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+ return index;
+}
+
+phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
+ size_t mapping_size, size_t alloc_size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int i;
+ int index;
phys_addr_t tlb_addr;
- unsigned int nslots, stride, index, wrap;
- int i;
- unsigned long mask;
- unsigned long offset_slots;
- unsigned long max_slots;
- unsigned long tmp_io_tlb_used;
if (no_iotlb_memory)
panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
@@ -470,111 +574,32 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
if (mapping_size > alloc_size) {
- dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+ dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
mapping_size, alloc_size);
return (phys_addr_t)DMA_MAPPING_ERROR;
}
- mask = dma_get_seg_boundary(hwdev);
-
- tbl_dma_addr &= mask;
-
- offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-
- /*
- * Carefully handle integer overflow which can occur when mask == ~0UL.
- */
- max_slots = mask + 1
- ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
- : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
-
- /*
- * For mappings greater than or equal to a page, we limit the stride
- * (and hence alignment) to a page size.
- */
- nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- if (alloc_size >= PAGE_SIZE)
- stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
- else
- stride = 1;
-
- BUG_ON(!nslots);
-
- /*
- * Find suitable number of IO TLB entries size that will fit this
- * request and allocate a buffer from that IO TLB pool.
- */
- spin_lock_irqsave(&io_tlb_lock, flags);
-
- if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
- goto not_found;
-
- index = ALIGN(io_tlb_index, stride);
- if (index >= io_tlb_nslabs)
- index = 0;
- wrap = index;
-
- do {
- while (iommu_is_span_boundary(index, nslots, offset_slots,
- max_slots)) {
- index += stride;
- if (index >= io_tlb_nslabs)
- index = 0;
- if (index == wrap)
- goto not_found;
- }
-
- /*
- * If we find a slot that indicates we have 'nslots' number of
- * contiguous buffers, we allocate the buffers from that slot
- * and mark the entries as '0' indicating unavailable.
- */
- if (io_tlb_list[index] >= nslots) {
- int count = 0;
-
- for (i = index; i < (int) (index + nslots); i++)
- io_tlb_list[i] = 0;
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
- tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
- /*
- * Update the indices to avoid searching in the next
- * round.
- */
- io_tlb_index = ((index + nslots) < io_tlb_nslabs
- ? (index + nslots) : 0);
-
- goto found;
- }
- index += stride;
- if (index >= io_tlb_nslabs)
- index = 0;
- } while (index != wrap);
-
-not_found:
- tmp_io_tlb_used = io_tlb_used;
-
- spin_unlock_irqrestore(&io_tlb_lock, flags);
- if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
- dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
- alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
- return (phys_addr_t)DMA_MAPPING_ERROR;
-found:
- io_tlb_used += nslots;
- spin_unlock_irqrestore(&io_tlb_lock, flags);
+ index = find_slots(dev, orig_addr, alloc_size + offset);
+ if (index == -1) {
+ if (!(attrs & DMA_ATTR_NO_WARN))
+ dev_warn_ratelimited(dev,
+ "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
+ alloc_size, io_tlb_nslabs, io_tlb_used);
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
/*
* Save away the mapping from the original address to the DMA address.
* This is needed when we sync the memory. Then we sync the buffer if
* needed.
*/
- for (i = 0; i < nslots; i++)
- io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+ for (i = 0; i < nr_slots(alloc_size + offset); i++)
+ io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
+
+ tlb_addr = slot_addr(io_tlb_start, index) + offset;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
-
return tlb_addr;
}
@@ -586,8 +611,9 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
enum dma_data_direction dir, unsigned long attrs)
{
unsigned long flags;
- int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
+ int i, count, nslots = nr_slots(alloc_size + offset);
+ int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = io_tlb_orig_addr[index];
/*
@@ -605,26 +631,29 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
* with slots below and above the pool being returned.
*/
spin_lock_irqsave(&io_tlb_lock, flags);
- {
- count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
- io_tlb_list[index + nslots] : 0);
- /*
- * Step 1: return the slots to the free list, merging the
- * slots with superceeding slots
- */
- for (i = index + nslots - 1; i >= index; i--) {
- io_tlb_list[i] = ++count;
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
- }
- /*
- * Step 2: merge the returned slots with the preceding slots,
- * if available (non zero)
- */
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
+ if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
+ count = io_tlb_list[index + nslots];
+ else
+ count = 0;
- io_tlb_used -= nslots;
+ /*
+ * Step 1: return the slots to the free list, merging the slots with
+ * superceeding slots
+ */
+ for (i = index + nslots - 1; i >= index; i--) {
+ io_tlb_list[i] = ++count;
+ io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
}
+
+ /*
+ * Step 2: merge the returned slots with the preceding slots, if
+ * available (non zero)
+ */
+ for (i = index - 1;
+ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i];
+ i--)
+ io_tlb_list[i] = ++count;
+ io_tlb_used -= nslots;
spin_unlock_irqrestore(&io_tlb_lock, flags);
}
@@ -637,7 +666,9 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
if (orig_addr == INVALID_PHYS_ADDR)
return;
- orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
+
+ orig_addr += (tlb_addr & (IO_TLB_SIZE - 1)) -
+ swiotlb_align_offset(hwdev, orig_addr);
switch (target) {
case SYNC_FOR_CPU:
@@ -660,40 +691,42 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
}
/*
- * Create a swiotlb mapping for the buffer at @phys, and in case of DMAing
+ * Create a swiotlb mapping for the buffer at @paddr, and in case of DMAing
* to the device copy the data into it as well.
*/
-bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
- size_t size, enum dma_data_direction dir, unsigned long attrs)
+dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
{
- trace_swiotlb_bounced(dev, *dma_addr, size, swiotlb_force);
+ phys_addr_t swiotlb_addr;
+ dma_addr_t dma_addr;
- if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) {
- dev_warn_ratelimited(dev,
- "Cannot do DMA to address %pa\n", phys);
- return false;
- }
+ trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
+ swiotlb_force);
- /* Oh well, have to allocate and map a bounce buffer. */
- *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
- *phys, size, size, dir, attrs);
- if (*phys == (phys_addr_t)DMA_MAPPING_ERROR)
- return false;
+ swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir,
+ attrs);
+ if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
+ return DMA_MAPPING_ERROR;
/* Ensure that the address returned is DMA'ble */
- *dma_addr = __phys_to_dma(dev, *phys);
- if (unlikely(!dma_capable(dev, *dma_addr, size))) {
- swiotlb_tbl_unmap_single(dev, *phys, size, size, dir,
+ dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
+ if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
+ swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
- return false;
+ dev_WARN_ONCE(dev, 1,
+ "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
+ &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
+ return DMA_MAPPING_ERROR;
}
- return true;
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(swiotlb_addr, size, dir);
+ return dma_addr;
}
size_t swiotlb_max_mapping_size(struct device *dev)
{
- return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
+ return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
}
bool is_swiotlb_active(void)