Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 13efc29..fc48298 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -157,10 +157,9 @@
static unsigned long long kvm_createvm_count;
static unsigned long long kvm_active_vms;
-__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end, bool blockable)
+__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
{
- return 0;
}
bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
@@ -186,6 +185,7 @@
*/
if (pfn_valid(pfn))
return PageReserved(pfn_to_page(pfn)) &&
+ !is_zero_pfn(pfn) &&
!kvm_is_zone_device_pfn(pfn);
return true;
@@ -381,6 +381,18 @@
return container_of(mn, struct kvm, mmu_notifier);
}
+static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+ srcu_read_unlock(&kvm->srcu, idx);
+}
+
static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address,
@@ -405,7 +417,6 @@
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int need_tlb_flush = 0, idx;
- int ret;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
@@ -415,21 +426,16 @@
* count is also read inside the mmu_lock critical section.
*/
kvm->mmu_notifier_count++;
- need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end);
- need_tlb_flush |= kvm->tlbs_dirty;
+ need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
+ range->flags);
/* we've to flush the tlb before the pages can be freed */
- if (need_tlb_flush)
+ if (need_tlb_flush || kvm->tlbs_dirty)
kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
-
- ret = kvm_arch_mmu_notifier_invalidate_range(kvm, range->start,
- range->end,
- mmu_notifier_range_blockable(range));
-
srcu_read_unlock(&kvm->srcu, idx);
- return ret;
+ return 0;
}
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
@@ -535,6 +541,7 @@
}
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
+ .invalidate_range = kvm_mmu_notifier_invalidate_range,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
@@ -628,6 +635,8 @@
static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
{
+ static DEFINE_MUTEX(kvm_debugfs_lock);
+ struct dentry *dent;
char dir_name[ITOA_MAX_LEN * 2];
struct kvm_stat_data *stat_data;
struct kvm_stats_debugfs_item *p;
@@ -636,8 +645,20 @@
return 0;
snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
- kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+ mutex_lock(&kvm_debugfs_lock);
+ dent = debugfs_lookup(dir_name, kvm_debugfs_dir);
+ if (dent) {
+ pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name);
+ dput(dent);
+ mutex_unlock(&kvm_debugfs_lock);
+ return 0;
+ }
+ dent = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+ mutex_unlock(&kvm_debugfs_lock);
+ if (IS_ERR(dent))
+ return 0;
+ kvm->debugfs_dentry = dent;
kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
sizeof(*kvm->debugfs_stat_data),
GFP_KERNEL_ACCOUNT);
@@ -1010,6 +1031,7 @@
/* We can read the guest memory with __xxx_user() later on. */
if ((id < KVM_USER_MEM_SLOTS) &&
((mem->userspace_addr & (PAGE_SIZE - 1)) ||
+ (mem->userspace_addr != untagged_addr(mem->userspace_addr)) ||
!access_ok((void __user *)(unsigned long)mem->userspace_addr,
mem->memory_size)))
goto out;
@@ -1394,14 +1416,14 @@
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
+unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn)
{
struct vm_area_struct *vma;
unsigned long addr, size;
size = PAGE_SIZE;
- addr = gfn_to_hva(kvm, gfn);
+ addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL);
if (kvm_is_error_hva(addr))
return PAGE_SIZE;
@@ -1585,15 +1607,24 @@
return true;
}
+static int kvm_try_get_pfn(kvm_pfn_t pfn)
+{
+ if (kvm_is_reserved_pfn(pfn))
+ return 1;
+ return get_page_unless_zero(pfn_to_page(pfn));
+}
+
static int hva_to_pfn_remapped(struct vm_area_struct *vma,
unsigned long addr, bool *async,
bool write_fault, bool *writable,
kvm_pfn_t *p_pfn)
{
- unsigned long pfn;
+ kvm_pfn_t pfn;
+ pte_t *ptep;
+ spinlock_t *ptl;
int r;
- r = follow_pfn(vma, addr, &pfn);
+ r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
if (r) {
/*
* get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
@@ -1608,14 +1639,19 @@
if (r)
return r;
- r = follow_pfn(vma, addr, &pfn);
+ r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
if (r)
return r;
+ }
+ if (write_fault && !pte_write(*ptep)) {
+ pfn = KVM_PFN_ERR_RO_FAULT;
+ goto out;
}
if (writable)
- *writable = true;
+ *writable = pte_write(*ptep);
+ pfn = pte_pfn(*ptep);
/*
* Get a reference here because callers of *hva_to_pfn* and
@@ -1627,11 +1663,21 @@
* Whoever called remap_pfn_range is also going to call e.g.
* unmap_mapping_range before the underlying pages are freed,
* causing a call to our MMU notifier.
+ *
+ * Certain IO or PFNMAP mappings can be backed with valid
+ * struct pages, but be allocated without refcounting e.g.,
+ * tail pages of non-compound higher order allocations, which
+ * would then underflow the refcount when the caller does the
+ * required put_page. Don't allow those pages here.
*/
- kvm_get_pfn(pfn);
+ if (!kvm_try_get_pfn(pfn))
+ r = -EFAULT;
+out:
+ pte_unmap_unlock(ptep, ptl);
*p_pfn = pfn;
- return 0;
+
+ return r;
}
/*
@@ -1809,26 +1855,72 @@
}
EXPORT_SYMBOL_GPL(gfn_to_page);
-static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn,
- struct kvm_host_map *map)
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache)
+{
+ if (pfn == 0)
+ return;
+
+ if (cache)
+ cache->pfn = cache->gfn = 0;
+
+ if (dirty)
+ kvm_release_pfn_dirty(pfn);
+ else
+ kvm_release_pfn_clean(pfn);
+}
+
+static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn,
+ struct gfn_to_pfn_cache *cache, u64 gen)
+{
+ kvm_release_pfn(cache->pfn, cache->dirty, cache);
+
+ cache->pfn = gfn_to_pfn_memslot(slot, gfn);
+ cache->gfn = gfn;
+ cache->dirty = false;
+ cache->generation = gen;
+}
+
+static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
+ struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache,
+ bool atomic)
{
kvm_pfn_t pfn;
void *hva = NULL;
struct page *page = KVM_UNMAPPED_PAGE;
+ struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
+ u64 gen = slots->generation;
if (!map)
return -EINVAL;
- pfn = gfn_to_pfn_memslot(slot, gfn);
+ if (cache) {
+ if (!cache->pfn || cache->gfn != gfn ||
+ cache->generation != gen) {
+ if (atomic)
+ return -EAGAIN;
+ kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
+ }
+ pfn = cache->pfn;
+ } else {
+ if (atomic)
+ return -EAGAIN;
+ pfn = gfn_to_pfn_memslot(slot, gfn);
+ }
if (is_error_noslot_pfn(pfn))
return -EINVAL;
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
- hva = kmap(page);
+ if (atomic)
+ hva = kmap_atomic(page);
+ else
+ hva = kmap(page);
#ifdef CONFIG_HAS_IOMEM
- } else {
+ } else if (!atomic) {
hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
+ } else {
+ return -EINVAL;
#endif
}
@@ -1843,14 +1935,25 @@
return 0;
}
+int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache, bool atomic)
+{
+ return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map,
+ cache, atomic);
+}
+EXPORT_SYMBOL_GPL(kvm_map_gfn);
+
int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
{
- return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map);
+ return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map,
+ NULL, false);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_map);
-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
- bool dirty)
+static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
+ struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache,
+ bool dirty, bool atomic)
{
if (!map)
return;
@@ -1858,23 +1961,45 @@
if (!map->hva)
return;
- if (map->page != KVM_UNMAPPED_PAGE)
- kunmap(map->page);
+ if (map->page != KVM_UNMAPPED_PAGE) {
+ if (atomic)
+ kunmap_atomic(map->hva);
+ else
+ kunmap(map->page);
+ }
#ifdef CONFIG_HAS_IOMEM
- else
+ else if (!atomic)
memunmap(map->hva);
+ else
+ WARN_ONCE(1, "Unexpected unmapping in atomic context");
#endif
- if (dirty) {
- kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
- kvm_release_pfn_dirty(map->pfn);
- } else {
- kvm_release_pfn_clean(map->pfn);
- }
+ if (dirty)
+ mark_page_dirty_in_slot(memslot, map->gfn);
+
+ if (cache)
+ cache->dirty |= dirty;
+ else
+ kvm_release_pfn(map->pfn, dirty, NULL);
map->hva = NULL;
map->page = NULL;
}
+
+int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
+{
+ __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map,
+ cache, dirty, atomic);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
+
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
+{
+ __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL,
+ dirty, false);
+}
EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -2196,12 +2321,12 @@
if (slots->generation != ghc->generation)
__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
- if (unlikely(!ghc->memslot))
- return kvm_write_guest(kvm, gpa, data, len);
-
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
+ if (unlikely(!ghc->memslot))
+ return kvm_write_guest(kvm, gpa, data, len);
+
r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
if (r)
return -EFAULT;
@@ -2229,12 +2354,12 @@
if (slots->generation != ghc->generation)
__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
- if (unlikely(!ghc->memslot))
- return kvm_read_guest(kvm, ghc->gpa, data, len);
-
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
+ if (unlikely(!ghc->memslot))
+ return kvm_read_guest(kvm, ghc->gpa, data, len);
+
r = __copy_from_user(data, (void __user *)ghc->hva, len);
if (r)
return -EFAULT;
@@ -3459,6 +3584,16 @@
};
};
+struct compat_kvm_clear_dirty_log {
+ __u32 slot;
+ __u32 num_pages;
+ __u64 first_page;
+ union {
+ compat_uptr_t dirty_bitmap; /* one bit per page */
+ __u64 padding2;
+ };
+};
+
static long kvm_vm_compat_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -3468,6 +3603,24 @@
if (kvm->mm != current->mm)
return -EIO;
switch (ioctl) {
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ case KVM_CLEAR_DIRTY_LOG: {
+ struct compat_kvm_clear_dirty_log compat_log;
+ struct kvm_clear_dirty_log log;
+
+ if (copy_from_user(&compat_log, (void __user *)arg,
+ sizeof(compat_log)))
+ return -EFAULT;
+ log.slot = compat_log.slot;
+ log.num_pages = compat_log.num_pages;
+ log.first_page = compat_log.first_page;
+ log.padding2 = compat_log.padding2;
+ log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
+
+ r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
+ break;
+ }
+#endif
case KVM_GET_DIRTY_LOG: {
struct compat_kvm_dirty_log compat_log;
struct kvm_dirty_log log;
@@ -3921,15 +4074,15 @@
}
/* Caller must hold slots_lock. */
-void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev)
+int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev)
{
- int i;
+ int i, j;
struct kvm_io_bus *new_bus, *bus;
bus = kvm_get_bus(kvm, bus_idx);
if (!bus)
- return;
+ return 0;
for (i = 0; i < bus->dev_count; i++)
if (bus->range[i].dev == dev) {
@@ -3937,25 +4090,28 @@
}
if (i == bus->dev_count)
- return;
+ return 0;
new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1),
GFP_KERNEL_ACCOUNT);
- if (!new_bus) {
+ if (new_bus) {
+ memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
+ new_bus->dev_count--;
+ memcpy(new_bus->range + i, bus->range + i + 1,
+ (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
+ } else {
pr_err("kvm: failed to shrink bus, removing it completely\n");
- goto broken;
+ for (j = 0; j < bus->dev_count; j++) {
+ if (j == i)
+ continue;
+ kvm_iodevice_destructor(bus->range[j].dev);
+ }
}
- memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
- new_bus->dev_count--;
- memcpy(new_bus->range + i, bus->range + i + 1,
- (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
-
-broken:
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
- return;
+ return new_bus ? 0 : -ENOMEM;
}
struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -4225,7 +4381,7 @@
}
add_uevent_var(env, "PID=%d", kvm->userspace_pid);
- if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) {
+ if (kvm->debugfs_dentry) {
char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
if (p) {