Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a50dadd..ea0efd2 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -13,12 +13,16 @@
config XEN_BALLOON_MEMORY_HOTPLUG
bool "Memory hotplug support for Xen balloon driver"
depends on XEN_BALLOON && MEMORY_HOTPLUG
+ default y
help
Memory hotplug support for Xen balloon driver allows expanding memory
available for the system above limit declared at system startup.
It is very useful on critical systems which require long
run without rebooting.
+ It's also very useful for non PV domains to obtain unpopulated physical
+ memory ranges to use in order to map foreign memory or grants.
+
Memory could be hotplugged in following steps:
1) target domain: ensure that memory auto online policy is in
@@ -46,13 +50,11 @@
SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'"
-config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
+config XEN_MEMORY_HOTPLUG_LIMIT
int "Hotplugged memory limit (in GiB) for a PV guest"
- default 512 if X86_64
- default 4 if X86_32
- range 0 64 if X86_32
+ default 512
depends on XEN_HAVE_PVMMU
- depends on XEN_BALLOON_MEMORY_HOTPLUG
+ depends on MEMORY_HOTPLUG
help
Maxmium amount of memory (in GiB) that a PV guest can be
expanded to when using memory hotplug.
@@ -106,27 +108,27 @@
If in doubt, say yes.
config XEN_COMPAT_XENFS
- bool "Create compatibility mount point /proc/xen"
- depends on XENFS
- default y
- help
- The old xenstore userspace tools expect to find "xenbus"
- under /proc/xen, but "xenbus" is now found at the root of the
- xenfs filesystem. Selecting this causes the kernel to create
- the compatibility mount point /proc/xen if it is running on
- a xen platform.
- If in doubt, say yes.
+ bool "Create compatibility mount point /proc/xen"
+ depends on XENFS
+ default y
+ help
+ The old xenstore userspace tools expect to find "xenbus"
+ under /proc/xen, but "xenbus" is now found at the root of the
+ xenfs filesystem. Selecting this causes the kernel to create
+ the compatibility mount point /proc/xen if it is running on
+ a xen platform.
+ If in doubt, say yes.
config XEN_SYS_HYPERVISOR
- bool "Create xen entries under /sys/hypervisor"
- depends on SYSFS
- select SYS_HYPERVISOR
- default y
- help
- Create entries under /sys/hypervisor describing the Xen
- hypervisor environment. When running native or in another
- virtual environment, /sys/hypervisor will still be present,
- but will have no xen contents.
+ bool "Create xen entries under /sys/hypervisor"
+ depends on SYSFS
+ select SYS_HYPERVISOR
+ default y
+ help
+ Create entries under /sys/hypervisor describing the Xen
+ hypervisor environment. When running native or in another
+ virtual environment, /sys/hypervisor will still be present,
+ but will have no xen contents.
config XEN_XENBUS_FRONTEND
tristate
@@ -175,6 +177,7 @@
config SWIOTLB_XEN
def_bool y
+ select DMA_OPS
select SWIOTLB
config XEN_PCIDEV_BACKEND
@@ -271,7 +274,7 @@
depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
default m
help
- This ACPI processor uploads Power Management information to the Xen
+ This ACPI processor uploads Power Management information to the Xen
hypervisor.
To do that the driver parses the Power Management data and uploads
@@ -280,19 +283,19 @@
SMM so that other drivers (such as ACPI cpufreq scaling driver) will
not load.
- To compile this driver as a module, choose M here: the module will be
+ To compile this driver as a module, choose M here: the module will be
called xen_acpi_processor If you do not know what to choose, select
M here. If the CPUFREQ drivers are built in, select Y here.
config XEN_MCE_LOG
bool "Xen platform mcelog"
- depends on XEN_DOM0 && X86_64 && X86_MCE
+ depends on XEN_DOM0 && X86_MCE
help
Allow kernel fetching MCE error from Xen platform and
converting it into Linux mcelog format for mcelog tools
config XEN_HAVE_PVMMU
- bool
+ bool
config XEN_EFI
def_bool y
@@ -309,17 +312,27 @@
depends on X86 && ACPI
config XEN_SYMS
- bool "Xen symbols"
- depends on X86 && XEN_DOM0 && XENFS
- default y if KALLSYMS
- help
- Exports hypervisor symbols (along with their types and addresses) via
- /proc/xen/xensyms file, similar to /proc/kallsyms
+ bool "Xen symbols"
+ depends on X86 && XEN_DOM0 && XENFS
+ default y if KALLSYMS
+ help
+ Exports hypervisor symbols (along with their types and addresses) via
+ /proc/xen/xensyms file, similar to /proc/kallsyms
config XEN_HAVE_VPMU
- bool
+ bool
config XEN_FRONT_PGDIR_SHBUF
tristate
+config XEN_UNPOPULATED_ALLOC
+ bool "Use unpopulated memory ranges for guest mappings"
+ depends on X86 && ZONE_DEVICE
+ default XEN_BACKEND || XEN_GNTDEV || XEN_DOM0
+ help
+ Use unpopulated memory ranges in order to create mappings for guest
+ memory regions, including grant maps and foreign pages. This avoids
+ having to balloon out RAM regions in order to obtain physical memory
+ space to create such mappings.
+
endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 0c4efa6..babdca8 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,12 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
-obj-y += grant-table.o features.o balloon.o manage.o preempt.o time.o
+obj-y += grant-table.o features.o balloon.o manage.o time.o
obj-y += mem-reservation.o
obj-y += events/
obj-y += xenbus/
-nostackp := $(call cc-option, -fno-stack-protector)
-CFLAGS_features.o := $(nostackp)
+CFLAGS_features.o := -fno-stack-protector
dom0-$(CONFIG_ARM64) += arm-device.o
dom0-$(CONFIG_PCI) += pci.o
@@ -42,3 +41,4 @@
xen-gntalloc-y := gntalloc.o
xen-privcmd-y := privcmd.o privcmd-buf.o
obj-$(CONFIG_XEN_FRONT_PGDIR_SHBUF) += xen-front-pgdir-shbuf.o
+obj-$(CONFIG_XEN_UNPOPULATED_ALLOC) += unpopulated-alloc.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index ebb0551..c5b0236 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -43,6 +43,8 @@
#include <linux/sched.h>
#include <linux/cred.h>
#include <linux/errno.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/memblock.h>
#include <linux/pagemap.h>
@@ -56,10 +58,9 @@
#include <linux/percpu-defs.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
+#include <linux/moduleparam.h>
#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#include <asm/tlb.h>
#include <asm/xen/hypervisor.h>
@@ -73,6 +74,12 @@
#include <xen/page.h>
#include <xen/mem-reservation.h>
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static uint __read_mostly balloon_boot_timeout = 180;
+module_param(balloon_boot_timeout, uint, 0444);
+
static int xen_hotplug_unpopulated;
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
@@ -117,7 +124,7 @@
#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
/*
- * balloon_process() state:
+ * balloon_thread() state:
*
* BP_DONE: done or nothing to do,
* BP_WAIT: wait to be rescheduled,
@@ -125,13 +132,15 @@
* BP_ECANCELED: error, balloon operation canceled.
*/
-enum bp_state {
+static enum bp_state {
BP_DONE,
BP_WAIT,
BP_EAGAIN,
BP_ECANCELED
-};
+} balloon_state = BP_DONE;
+/* Main waiting point for xen-balloon thread. */
+static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq);
static DEFINE_MUTEX(balloon_mutex);
@@ -146,10 +155,6 @@
static LIST_HEAD(ballooned_pages);
static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
-/* Main work function, always executed in process context. */
-static void balloon_process(struct work_struct *work);
-static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
-
/* When ballooning out (allocating memory to return to Xen) we don't really
want the kernel to try too hard since that can trigger the oom killer. */
#define GFP_BALLOON \
@@ -201,18 +206,15 @@
return list_entry(next, struct page, lru);
}
-static enum bp_state update_schedule(enum bp_state state)
+static void update_schedule(void)
{
- if (state == BP_WAIT)
- return BP_WAIT;
+ if (balloon_state == BP_WAIT || balloon_state == BP_ECANCELED)
+ return;
- if (state == BP_ECANCELED)
- return BP_ECANCELED;
-
- if (state == BP_DONE) {
+ if (balloon_state == BP_DONE) {
balloon_stats.schedule_delay = 1;
balloon_stats.retry_count = 1;
- return BP_DONE;
+ return;
}
++balloon_stats.retry_count;
@@ -221,7 +223,8 @@
balloon_stats.retry_count > balloon_stats.max_retry_count) {
balloon_stats.schedule_delay = 1;
balloon_stats.retry_count = 1;
- return BP_ECANCELED;
+ balloon_state = BP_ECANCELED;
+ return;
}
balloon_stats.schedule_delay <<= 1;
@@ -229,7 +232,7 @@
if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
- return BP_EAGAIN;
+ balloon_state = BP_EAGAIN;
}
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
@@ -267,20 +270,6 @@
return NULL;
}
-#ifdef CONFIG_SPARSEMEM
- {
- unsigned long limit = 1UL << (MAX_PHYSMEM_BITS - PAGE_SHIFT);
- unsigned long pfn = res->start >> PAGE_SHIFT;
-
- if (pfn > limit) {
- pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
- pfn, limit);
- release_memory_resource(res);
- return NULL;
- }
- }
-#endif
-
return res;
}
@@ -347,7 +336,7 @@
mutex_unlock(&balloon_mutex);
/* add_memory_resource() requires the device_hotplug lock */
lock_device_hotplug();
- rc = add_memory_resource(nid, resource);
+ rc = add_memory_resource(nid, resource, MEMHP_MERGE_RESOURCE);
unlock_device_hotplug();
mutex_lock(&balloon_mutex);
@@ -374,7 +363,6 @@
mutex_lock(&balloon_mutex);
for (i = 0; i < size; i++) {
p = pfn_to_page(start_pfn + i);
- __online_page_set_limits(p);
balloon_append(p);
}
mutex_unlock(&balloon_mutex);
@@ -383,7 +371,7 @@
static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
{
if (val == MEM_ONLINE)
- schedule_delayed_work(&balloon_worker, 0);
+ wake_up(&balloon_thread_wq);
return NOTIFY_OK;
}
@@ -508,50 +496,79 @@
}
/*
- * As this is a work item it is guaranteed to run as a single instance only.
+ * Stop waiting if either state is BP_DONE and ballooning action is
+ * needed, or if the credit has changed while state is not BP_DONE.
+ */
+static bool balloon_thread_cond(long credit)
+{
+ if (balloon_state == BP_DONE)
+ credit = 0;
+
+ return current_credit() != credit || kthread_should_stop();
+}
+
+/*
+ * As this is a kthread it is guaranteed to run as a single instance only.
* We may of course race updates of the target counts (which are protected
* by the balloon lock), or with changes to the Xen hard limit, but we will
* recover from these in time.
*/
-static void balloon_process(struct work_struct *work)
+static int balloon_thread(void *unused)
{
- enum bp_state state = BP_DONE;
long credit;
+ unsigned long timeout;
+ set_freezable();
+ for (;;) {
+ switch (balloon_state) {
+ case BP_DONE:
+ case BP_ECANCELED:
+ timeout = 3600 * HZ;
+ break;
+ case BP_EAGAIN:
+ timeout = balloon_stats.schedule_delay * HZ;
+ break;
+ case BP_WAIT:
+ timeout = HZ;
+ break;
+ }
- do {
+ credit = current_credit();
+
+ wait_event_freezable_timeout(balloon_thread_wq,
+ balloon_thread_cond(credit), timeout);
+
+ if (kthread_should_stop())
+ return 0;
+
mutex_lock(&balloon_mutex);
credit = current_credit();
if (credit > 0) {
if (balloon_is_inflated())
- state = increase_reservation(credit);
+ balloon_state = increase_reservation(credit);
else
- state = reserve_additional_memory();
+ balloon_state = reserve_additional_memory();
}
if (credit < 0) {
long n_pages;
n_pages = min(-credit, si_mem_available());
- state = decrease_reservation(n_pages, GFP_BALLOON);
- if (state == BP_DONE && n_pages != -credit &&
+ balloon_state = decrease_reservation(n_pages,
+ GFP_BALLOON);
+ if (balloon_state == BP_DONE && n_pages != -credit &&
n_pages < totalreserve_pages)
- state = BP_EAGAIN;
+ balloon_state = BP_EAGAIN;
}
- state = update_schedule(state);
+ update_schedule();
mutex_unlock(&balloon_mutex);
cond_resched();
-
- } while (credit && state == BP_DONE);
-
- /* Schedule more work if there is some still to be done. */
- if (state == BP_EAGAIN)
- schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
+ }
}
/* Resets the Xen limit, sets new target, and kicks off processing. */
@@ -559,7 +576,7 @@
{
/* No need for lock. Not read-modify-write updates. */
balloon_stats.target_pages = target;
- schedule_delayed_work(&balloon_worker, 0);
+ wake_up(&balloon_thread_wq);
}
EXPORT_SYMBOL_GPL(balloon_set_new_target);
@@ -664,13 +681,13 @@
/* The balloon may be too large now. Shrink it if needed. */
if (current_credit())
- schedule_delayed_work(&balloon_worker, 0);
+ wake_up(&balloon_thread_wq);
mutex_unlock(&balloon_mutex);
}
EXPORT_SYMBOL(free_xenballooned_pages);
-#ifdef CONFIG_XEN_PV
+#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC)
static void __init balloon_add_region(unsigned long start_pfn,
unsigned long pages)
{
@@ -696,6 +713,8 @@
static int __init balloon_init(void)
{
+ struct task_struct *task;
+
if (!xen_domain())
return -ENODEV;
@@ -724,7 +743,7 @@
register_sysctl_table(xen_root);
#endif
-#ifdef CONFIG_XEN_PV
+#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC)
{
int i;
@@ -739,9 +758,50 @@
}
#endif
+ task = kthread_run(balloon_thread, NULL, "xen-balloon");
+ if (IS_ERR(task)) {
+ pr_err("xen-balloon thread could not be started, ballooning will not work!\n");
+ return PTR_ERR(task);
+ }
+
/* Init the xen-balloon driver. */
xen_balloon_init();
return 0;
}
subsys_initcall(balloon_init);
+
+static int __init balloon_wait_finish(void)
+{
+ long credit, last_credit = 0;
+ unsigned long last_changed = 0;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+ /* PV guests don't need to wait. */
+ if (xen_pv_domain() || !current_credit())
+ return 0;
+
+ pr_notice("Waiting for initial ballooning down having finished.\n");
+
+ while ((credit = current_credit()) < 0) {
+ if (credit != last_credit) {
+ last_changed = jiffies;
+ last_credit = credit;
+ }
+ if (balloon_state == BP_ECANCELED) {
+ pr_warn_once("Initial ballooning failed, %ld pages need to be freed.\n",
+ -credit);
+ if (jiffies - last_changed >= HZ * balloon_boot_timeout)
+ panic("Initial ballooning failed!\n");
+ }
+
+ schedule_timeout_interruptible(HZ / 10);
+ }
+
+ pr_notice("Initial ballooning down finished.\n");
+
+ return 0;
+}
+late_initcall_sync(balloon_wait_finish);
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index f192b6f..b96b11e 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -93,10 +93,8 @@
(void)register_xenbus_watch(&cpu_watch);
for_each_possible_cpu(cpu) {
- if (vcpu_online(cpu) == 0) {
- (void)cpu_down(cpu);
- set_cpu_present(cpu, false);
- }
+ if (vcpu_online(cpu) == 0)
+ disable_hotplug_cpu(cpu);
}
return NOTIFY_DONE;
@@ -119,5 +117,5 @@
return 0;
}
-arch_initcall(setup_vcpu_hotplug_event);
+late_initcall(setup_vcpu_hotplug_event);
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index 77cc80b..b8f2f97 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -52,37 +52,38 @@
clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
}
-static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu)
+static void evtchn_2l_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ unsigned int old_cpu)
{
- clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu)));
- set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
+ clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, old_cpu)));
+ set_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
}
-static void evtchn_2l_clear_pending(unsigned port)
+static void evtchn_2l_clear_pending(evtchn_port_t port)
{
struct shared_info *s = HYPERVISOR_shared_info;
sync_clear_bit(port, BM(&s->evtchn_pending[0]));
}
-static void evtchn_2l_set_pending(unsigned port)
+static void evtchn_2l_set_pending(evtchn_port_t port)
{
struct shared_info *s = HYPERVISOR_shared_info;
sync_set_bit(port, BM(&s->evtchn_pending[0]));
}
-static bool evtchn_2l_is_pending(unsigned port)
+static bool evtchn_2l_is_pending(evtchn_port_t port)
{
struct shared_info *s = HYPERVISOR_shared_info;
return sync_test_bit(port, BM(&s->evtchn_pending[0]));
}
-static void evtchn_2l_mask(unsigned port)
+static void evtchn_2l_mask(evtchn_port_t port)
{
struct shared_info *s = HYPERVISOR_shared_info;
sync_set_bit(port, BM(&s->evtchn_mask[0]));
}
-static void evtchn_2l_unmask(unsigned port)
+static void evtchn_2l_unmask(evtchn_port_t port)
{
struct shared_info *s = HYPERVISOR_shared_info;
unsigned int cpu = get_cpu();
@@ -174,7 +175,7 @@
/* Timer interrupt has highest priority. */
irq = irq_from_virq(cpu, VIRQ_TIMER);
if (irq != -1) {
- unsigned int evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
word_idx = evtchn / BITS_PER_LONG;
bit_idx = evtchn % BITS_PER_LONG;
if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx))
@@ -229,7 +230,7 @@
do {
xen_ulong_t bits;
- int port;
+ evtchn_port_t port;
bits = MASK_LSBS(pending_bits, bit_idx);
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 87cfadd..fba78da 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -41,6 +41,7 @@
#ifdef CONFIG_X86
#include <asm/desc.h>
#include <asm/ptrace.h>
+#include <asm/idtentry.h>
#include <asm/irq.h>
#include <asm/io_apic.h>
#include <asm/i8259.h>
@@ -69,6 +70,63 @@
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "xen."
+/* Interrupt types. */
+enum xen_irq_type {
+ IRQT_UNBOUND = 0,
+ IRQT_PIRQ,
+ IRQT_VIRQ,
+ IRQT_IPI,
+ IRQT_EVTCHN
+};
+
+/*
+ * Packed IRQ information:
+ * type - enum xen_irq_type
+ * event channel - irq->event channel mapping
+ * cpu - cpu this event channel is bound to
+ * index - type-specific information:
+ * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
+ * guest, or GSI (real passthrough IRQ) of the device.
+ * VIRQ - virq number
+ * IPI - IPI vector
+ * EVTCHN -
+ */
+struct irq_info {
+ struct list_head list;
+ struct list_head eoi_list;
+ short refcnt;
+ short spurious_cnt;
+ short type; /* type */
+ u8 mask_reason; /* Why is event channel masked */
+#define EVT_MASK_REASON_EXPLICIT 0x01
+#define EVT_MASK_REASON_TEMPORARY 0x02
+#define EVT_MASK_REASON_EOI_PENDING 0x04
+ u8 is_active; /* Is event just being handled? */
+ unsigned irq;
+ evtchn_port_t evtchn; /* event channel */
+ unsigned short cpu; /* cpu bound */
+ unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
+ unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
+ u64 eoi_time; /* Time in jiffies when to EOI. */
+ raw_spinlock_t lock;
+
+ union {
+ unsigned short virq;
+ enum ipi_vector ipi;
+ struct {
+ unsigned short pirq;
+ unsigned short gsi;
+ unsigned char vector;
+ unsigned char flags;
+ uint16_t domid;
+ } pirq;
+ } u;
+};
+
+#define PIRQ_NEEDS_EOI (1 << 0)
+#define PIRQ_SHAREABLE (1 << 1)
+#define PIRQ_MSI_GROUP (1 << 2)
+
static uint __read_mostly event_loop_timeout = 2;
module_param(event_loop_timeout, uint, 0644);
@@ -110,7 +168,7 @@
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
-int **evtchn_to_irq;
+static int **evtchn_to_irq;
#ifdef CONFIG_X86
static unsigned long *pirq_eoi_map;
#endif
@@ -153,7 +211,7 @@
}
}
-static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
+static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
{
unsigned row;
unsigned col;
@@ -188,7 +246,7 @@
return 0;
}
-int get_evtchn_to_irq(unsigned evtchn)
+int get_evtchn_to_irq(evtchn_port_t evtchn)
{
if (evtchn >= xen_evtchn_max_channels())
return -1;
@@ -198,7 +256,7 @@
}
/* Get info for IRQ */
-struct irq_info *info_for_irq(unsigned irq)
+static struct irq_info *info_for_irq(unsigned irq)
{
if (irq < nr_legacy_irqs())
return legacy_info_ptrs[irq];
@@ -218,7 +276,7 @@
static int xen_irq_info_common_setup(struct irq_info *info,
unsigned irq,
enum xen_irq_type type,
- unsigned evtchn,
+ evtchn_port_t evtchn,
unsigned short cpu)
{
int ret;
@@ -238,11 +296,11 @@
irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
- return xen_evtchn_port_setup(info);
+ return xen_evtchn_port_setup(evtchn);
}
static int xen_irq_info_evtchn_setup(unsigned irq,
- unsigned evtchn)
+ evtchn_port_t evtchn)
{
struct irq_info *info = info_for_irq(irq);
@@ -251,7 +309,7 @@
static int xen_irq_info_ipi_setup(unsigned cpu,
unsigned irq,
- unsigned evtchn,
+ evtchn_port_t evtchn,
enum ipi_vector ipi)
{
struct irq_info *info = info_for_irq(irq);
@@ -265,7 +323,7 @@
static int xen_irq_info_virq_setup(unsigned cpu,
unsigned irq,
- unsigned evtchn,
+ evtchn_port_t evtchn,
unsigned virq)
{
struct irq_info *info = info_for_irq(irq);
@@ -278,7 +336,7 @@
}
static int xen_irq_info_pirq_setup(unsigned irq,
- unsigned evtchn,
+ evtchn_port_t evtchn,
unsigned pirq,
unsigned gsi,
uint16_t domid,
@@ -304,7 +362,7 @@
/*
* Accessors for packed IRQ information.
*/
-unsigned int evtchn_from_irq(unsigned irq)
+evtchn_port_t evtchn_from_irq(unsigned irq)
{
const struct irq_info *info = NULL;
@@ -316,7 +374,7 @@
return info->evtchn;
}
-unsigned irq_from_evtchn(unsigned int evtchn)
+unsigned int irq_from_evtchn(evtchn_port_t evtchn)
{
return get_evtchn_to_irq(evtchn);
}
@@ -362,12 +420,12 @@
return info_for_irq(irq)->type;
}
-unsigned cpu_from_irq(unsigned irq)
+static unsigned cpu_from_irq(unsigned irq)
{
return info_for_irq(irq)->cpu;
}
-unsigned int cpu_from_evtchn(unsigned int evtchn)
+unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
{
int irq = get_evtchn_to_irq(evtchn);
unsigned ret = 0;
@@ -421,16 +479,16 @@
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}
-static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
+static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu)
{
- int irq = get_evtchn_to_irq(chn);
+ int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info = info_for_irq(irq);
BUG_ON(irq == -1);
#ifdef CONFIG_SMP
cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
#endif
- xen_evtchn_port_bind_to_cpu(info, cpu);
+ xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
info->cpu = cpu;
}
@@ -445,7 +503,7 @@
*/
void notify_remote_via_irq(int irq)
{
- int evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
notify_remote_via_evtchn(evtchn);
@@ -696,7 +754,7 @@
irq_free_desc(irq);
}
-static void xen_evtchn_close(unsigned int port)
+static void xen_evtchn_close(evtchn_port_t port)
{
struct evtchn_close close;
@@ -730,7 +788,7 @@
static void eoi_pirq(struct irq_data *data)
{
struct irq_info *info = info_for_irq(data->irq);
- int evtchn = info ? info->evtchn : 0;
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
int rc = 0;
@@ -765,7 +823,7 @@
{
struct evtchn_bind_pirq bind_pirq;
struct irq_info *info = info_for_irq(irq);
- int evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
int rc;
BUG_ON(info->type != IRQT_PIRQ);
@@ -793,7 +851,7 @@
info->evtchn = evtchn;
bind_evtchn_to_cpu(evtchn, 0);
- rc = xen_evtchn_port_setup(info);
+ rc = xen_evtchn_port_setup(evtchn);
if (rc)
goto err;
@@ -819,7 +877,7 @@
{
unsigned int irq = data->irq;
struct irq_info *info = info_for_irq(irq);
- unsigned evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
BUG_ON(info->type != IRQT_PIRQ);
@@ -859,7 +917,7 @@
static void __unbind_from_irq(unsigned int irq)
{
- int evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
struct irq_info *info = info_for_irq(irq);
if (info->refcnt > 0) {
@@ -1139,8 +1197,8 @@
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
- int evtchn, irq;
- int ret;
+ evtchn_port_t evtchn;
+ int ret, irq;
mutex_lock(&irq_mapping_update_lock);
@@ -1194,14 +1252,6 @@
chip);
}
-int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
- evtchn_port_t remote_port)
-{
- return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
- &xen_dynamic_chip);
-}
-EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq);
-
int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
evtchn_port_t remote_port)
{
@@ -1210,10 +1260,11 @@
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
-static int find_virq(unsigned int virq, unsigned int cpu)
+static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
{
struct evtchn_status status;
- int port, rc = -ENOENT;
+ evtchn_port_t port;
+ int rc = -ENOENT;
memset(&status, 0, sizeof(status));
for (port = 0; port < xen_evtchn_max_channels(); port++) {
@@ -1225,7 +1276,7 @@
if (status.status != EVTCHNSTAT_virq)
continue;
if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
- rc = port;
+ *evtchn = port;
break;
}
}
@@ -1248,7 +1299,8 @@
int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
{
struct evtchn_bind_virq bind_virq;
- int evtchn, irq, ret;
+ evtchn_port_t evtchn = 0;
+ int irq, ret;
mutex_lock(&irq_mapping_update_lock);
@@ -1274,9 +1326,8 @@
evtchn = bind_virq.port;
else {
if (ret == -EEXIST)
- ret = find_virq(virq, cpu);
+ ret = find_virq(virq, cpu, &evtchn);
BUG_ON(ret < 0);
- evtchn = ret;
}
ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
@@ -1368,19 +1419,6 @@
return irq;
}
-int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
- evtchn_port_t remote_port,
- irq_handler_t handler,
- unsigned long irqflags,
- const char *devname,
- void *dev_id)
-{
- return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
- remote_port, handler, irqflags, devname,
- dev_id, &xen_dynamic_chip);
-}
-EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
-
int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
evtchn_port_t remote_port,
irq_handler_t handler,
@@ -1464,7 +1502,7 @@
}
EXPORT_SYMBOL_GPL(xen_set_irq_priority);
-int evtchn_make_refcounted(unsigned int evtchn)
+int evtchn_make_refcounted(evtchn_port_t evtchn)
{
int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info;
@@ -1485,7 +1523,7 @@
}
EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
-int evtchn_get(unsigned int evtchn)
+int evtchn_get(evtchn_port_t evtchn)
{
int irq;
struct irq_info *info;
@@ -1518,7 +1556,7 @@
}
EXPORT_SYMBOL_GPL(evtchn_get);
-void evtchn_put(unsigned int evtchn)
+void evtchn_put(evtchn_port_t evtchn)
{
int irq = get_evtchn_to_irq(evtchn);
if (WARN_ON(irq == -1))
@@ -1595,13 +1633,10 @@
generic_handle_irq(irq);
}
-static DEFINE_PER_CPU(unsigned, xed_nesting_count);
-
static void __xen_evtchn_do_upcall(void)
{
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
- int cpu = get_cpu();
- unsigned count;
+ int cpu = smp_processor_id();
struct evtchn_loop_ctrl ctrl = { 0 };
read_lock(&evtchn_rwlock);
@@ -1609,18 +1644,14 @@
do {
vcpu_info->evtchn_upcall_pending = 0;
- if (__this_cpu_inc_return(xed_nesting_count) - 1)
- goto out;
-
xen_evtchn_handle_events(cpu, &ctrl);
BUG_ON(!irqs_disabled());
- count = __this_cpu_read(xed_nesting_count);
- __this_cpu_write(xed_nesting_count, 0);
- } while (count != 1 || vcpu_info->evtchn_upcall_pending);
+ virt_rmb(); /* Hypervisor can set upcall pending. */
-out:
+ } while (vcpu_info->evtchn_upcall_pending);
+
read_unlock(&evtchn_rwlock);
/*
@@ -1629,8 +1660,6 @@
* above.
*/
__this_cpu_inc(irq_epoch);
-
- put_cpu();
}
void xen_evtchn_do_upcall(struct pt_regs *regs)
@@ -1638,9 +1667,6 @@
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
-#ifdef CONFIG_X86
- inc_irq_stat(irq_hv_callback_count);
-#endif
__xen_evtchn_do_upcall();
@@ -1655,7 +1681,7 @@
EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
/* Rebind a new event channel to an existing irq. */
-void rebind_evtchn_irq(int evtchn, int irq)
+void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
{
struct irq_info *info = info_for_irq(irq);
@@ -1877,7 +1903,8 @@
static void restore_cpu_virqs(unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
- int virq, irq, evtchn;
+ evtchn_port_t evtchn;
+ int virq, irq;
for (virq = 0; virq < NR_VIRQS; virq++) {
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
@@ -1902,7 +1929,8 @@
static void restore_cpu_ipis(unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
- int ipi, irq, evtchn;
+ evtchn_port_t evtchn;
+ int ipi, irq;
for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
@@ -1935,7 +1963,7 @@
EXPORT_SYMBOL(xen_clear_irq_pending);
void xen_set_irq_pending(int irq)
{
- int evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
set_evtchn(evtchn);
@@ -1943,7 +1971,7 @@
bool xen_test_irq_pending(int irq)
{
- int evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
bool ret = false;
if (VALID_EVTCHN(evtchn))
@@ -2079,30 +2107,34 @@
/* Vector callbacks are better than PCI interrupts to receive event
* channel notifications because we can receive vector callbacks on any
* vcpu and we don't need PCI support or APIC interactions. */
-void xen_callback_vector(void)
+void xen_setup_callback_vector(void)
{
- int rc;
uint64_t callback_via;
if (xen_have_vector_callback) {
callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
- rc = xen_set_callback_via(callback_via);
- if (rc) {
+ if (xen_set_callback_via(callback_via)) {
pr_err("Request for Xen HVM callback vector failed\n");
xen_have_vector_callback = 0;
- return;
}
- pr_info_once("Xen HVM callback vector for event delivery is enabled\n");
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
- xen_hvm_callback_vector);
}
}
+
+static __init void xen_alloc_callback_vector(void)
+{
+ if (!xen_have_vector_callback)
+ return;
+
+ pr_info("Xen HVM callback vector for event delivery is enabled\n");
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
+}
#else
-void xen_callback_vector(void) {}
+void xen_setup_callback_vector(void) {}
+static inline void xen_alloc_callback_vector(void) {}
#endif
-static bool fifo_events = true;
-module_param(fifo_events, bool, 0);
+bool xen_fifo_events = true;
+module_param_named(fifo_events, xen_fifo_events, bool, 0);
static int xen_evtchn_cpu_prepare(unsigned int cpu)
{
@@ -2129,12 +2161,14 @@
void __init xen_init_IRQ(void)
{
int ret = -EINVAL;
- unsigned int evtchn;
+ evtchn_port_t evtchn;
- if (fifo_events)
+ if (xen_fifo_events)
ret = xen_evtchn_fifo_init();
- if (ret < 0)
+ if (ret < 0) {
xen_evtchn_2l_init();
+ xen_fifo_events = false;
+ }
xen_cpu_init_eoi(smp_processor_id());
@@ -2157,8 +2191,10 @@
if (xen_initial_domain())
pci_xen_initial_domain();
}
- if (xen_feature(XENFEAT_hvm_callback_vector))
- xen_callback_vector();
+ if (xen_feature(XENFEAT_hvm_callback_vector)) {
+ xen_setup_callback_vector();
+ xen_alloc_callback_vector();
+ }
if (xen_hvm_domain()) {
native_init_IRQ();
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index 360a7f8..ad9fe51 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -82,7 +82,7 @@
#endif
-static inline event_word_t *event_word_from_port(unsigned port)
+static inline event_word_t *event_word_from_port(evtchn_port_t port)
{
unsigned i = port / EVENT_WORDS_PER_PAGE;
@@ -138,9 +138,8 @@
array_page[i] = 1 << EVTCHN_FIFO_MASKED;
}
-static int evtchn_fifo_setup(struct irq_info *info)
+static int evtchn_fifo_setup(evtchn_port_t port)
{
- unsigned port = info->evtchn;
unsigned new_array_pages;
int ret;
@@ -186,36 +185,37 @@
return ret;
}
-static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu)
+static void evtchn_fifo_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ unsigned int old_cpu)
{
/* no-op */
}
-static void evtchn_fifo_clear_pending(unsigned port)
+static void evtchn_fifo_clear_pending(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
sync_clear_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
}
-static void evtchn_fifo_set_pending(unsigned port)
+static void evtchn_fifo_set_pending(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
sync_set_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
}
-static bool evtchn_fifo_is_pending(unsigned port)
+static bool evtchn_fifo_is_pending(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
}
-static void evtchn_fifo_mask(unsigned port)
+static void evtchn_fifo_mask(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
sync_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
}
-static bool evtchn_fifo_is_masked(unsigned port)
+static bool evtchn_fifo_is_masked(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
@@ -231,6 +231,9 @@
w = *word;
do {
+ if (!(w & (1 << EVTCHN_FIFO_MASKED)))
+ return true;
+
if (w & (1 << EVTCHN_FIFO_PENDING))
return false;
@@ -242,7 +245,7 @@
return true;
}
-static void evtchn_fifo_unmask(unsigned port)
+static void evtchn_fifo_unmask(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
@@ -275,7 +278,7 @@
{
struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
uint32_t head;
- unsigned port;
+ evtchn_port_t port;
event_word_t *word;
head = q->head[priority];
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
index eb012fb..4d3398e 100644
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -7,78 +7,22 @@
#ifndef __EVENTS_INTERNAL_H__
#define __EVENTS_INTERNAL_H__
-/* Interrupt types. */
-enum xen_irq_type {
- IRQT_UNBOUND = 0,
- IRQT_PIRQ,
- IRQT_VIRQ,
- IRQT_IPI,
- IRQT_EVTCHN
-};
-
-/*
- * Packed IRQ information:
- * type - enum xen_irq_type
- * event channel - irq->event channel mapping
- * cpu - cpu this event channel is bound to
- * index - type-specific information:
- * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
- * guest, or GSI (real passthrough IRQ) of the device.
- * VIRQ - virq number
- * IPI - IPI vector
- * EVTCHN -
- */
-struct irq_info {
- struct list_head list;
- struct list_head eoi_list;
- short refcnt;
- short spurious_cnt;
- short type; /* type */
- u8 mask_reason; /* Why is event channel masked */
-#define EVT_MASK_REASON_EXPLICIT 0x01
-#define EVT_MASK_REASON_TEMPORARY 0x02
-#define EVT_MASK_REASON_EOI_PENDING 0x04
- u8 is_active; /* Is event just being handled? */
- unsigned irq;
- unsigned int evtchn; /* event channel */
- unsigned short cpu; /* cpu bound */
- unsigned short eoi_cpu; /* EOI must happen on this cpu */
- unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
- u64 eoi_time; /* Time in jiffies when to EOI. */
- raw_spinlock_t lock;
-
- union {
- unsigned short virq;
- enum ipi_vector ipi;
- struct {
- unsigned short pirq;
- unsigned short gsi;
- unsigned char vector;
- unsigned char flags;
- uint16_t domid;
- } pirq;
- } u;
-};
-
-#define PIRQ_NEEDS_EOI (1 << 0)
-#define PIRQ_SHAREABLE (1 << 1)
-#define PIRQ_MSI_GROUP (1 << 2)
-
struct evtchn_loop_ctrl;
struct evtchn_ops {
unsigned (*max_channels)(void);
unsigned (*nr_channels)(void);
- int (*setup)(struct irq_info *info);
+ int (*setup)(evtchn_port_t port);
void (*remove)(evtchn_port_t port, unsigned int cpu);
- void (*bind_to_cpu)(struct irq_info *info, unsigned cpu);
+ void (*bind_to_cpu)(evtchn_port_t evtchn, unsigned int cpu,
+ unsigned int old_cpu);
- void (*clear_pending)(unsigned port);
- void (*set_pending)(unsigned port);
- bool (*is_pending)(unsigned port);
- void (*mask)(unsigned port);
- void (*unmask)(unsigned port);
+ void (*clear_pending)(evtchn_port_t port);
+ void (*set_pending)(evtchn_port_t port);
+ bool (*is_pending)(evtchn_port_t port);
+ void (*mask)(evtchn_port_t port);
+ void (*unmask)(evtchn_port_t port);
void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl);
void (*resume)(void);
@@ -89,13 +33,10 @@
extern const struct evtchn_ops *evtchn_ops;
-extern int **evtchn_to_irq;
-int get_evtchn_to_irq(unsigned int evtchn);
+int get_evtchn_to_irq(evtchn_port_t evtchn);
void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
-struct irq_info *info_for_irq(unsigned irq);
-unsigned cpu_from_irq(unsigned irq);
-unsigned cpu_from_evtchn(unsigned int evtchn);
+unsigned int cpu_from_evtchn(evtchn_port_t evtchn);
static inline unsigned xen_evtchn_max_channels(void)
{
@@ -106,10 +47,10 @@
* Do any ABI specific setup for a bound event channel before it can
* be unmasked and used.
*/
-static inline int xen_evtchn_port_setup(struct irq_info *info)
+static inline int xen_evtchn_port_setup(evtchn_port_t evtchn)
{
if (evtchn_ops->setup)
- return evtchn_ops->setup(info);
+ return evtchn_ops->setup(evtchn);
return 0;
}
@@ -120,33 +61,34 @@
evtchn_ops->remove(evtchn, cpu);
}
-static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info,
- unsigned cpu)
+static inline void xen_evtchn_port_bind_to_cpu(evtchn_port_t evtchn,
+ unsigned int cpu,
+ unsigned int old_cpu)
{
- evtchn_ops->bind_to_cpu(info, cpu);
+ evtchn_ops->bind_to_cpu(evtchn, cpu, old_cpu);
}
-static inline void clear_evtchn(unsigned port)
+static inline void clear_evtchn(evtchn_port_t port)
{
evtchn_ops->clear_pending(port);
}
-static inline void set_evtchn(unsigned port)
+static inline void set_evtchn(evtchn_port_t port)
{
evtchn_ops->set_pending(port);
}
-static inline bool test_evtchn(unsigned port)
+static inline bool test_evtchn(evtchn_port_t port)
{
return evtchn_ops->is_pending(port);
}
-static inline void mask_evtchn(unsigned port)
+static inline void mask_evtchn(evtchn_port_t port)
{
return evtchn_ops->mask(port);
}
-static inline void unmask_evtchn(unsigned port)
+static inline void unmask_evtchn(evtchn_port_t port)
{
return evtchn_ops->unmask(port);
}
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index a439301..5dc016d 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -83,7 +83,7 @@
struct user_evtchn {
struct rb_node node;
struct per_user_data *user;
- unsigned port;
+ evtchn_port_t port;
bool enabled;
};
@@ -138,7 +138,8 @@
kfree(evtchn);
}
-static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
+static struct user_evtchn *find_evtchn(struct per_user_data *u,
+ evtchn_port_t port)
{
struct rb_node *node = u->evtchns.rb_node;
@@ -163,7 +164,7 @@
struct per_user_data *u = evtchn->user;
WARN(!evtchn->enabled,
- "Interrupt for port %d, but apparently not enabled; per-user %p\n",
+ "Interrupt for port %u, but apparently not enabled; per-user %p\n",
evtchn->port, u);
evtchn->enabled = false;
@@ -285,7 +286,7 @@
mutex_lock(&u->bind_mutex);
for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
- unsigned port = kbuf[i];
+ evtchn_port_t port = kbuf[i];
struct user_evtchn *evtchn;
evtchn = find_evtchn(u, port);
@@ -360,7 +361,7 @@
return 0;
}
-static int evtchn_bind_to_user(struct per_user_data *u, int port)
+static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
{
struct user_evtchn *evtchn;
struct evtchn_close close;
@@ -422,7 +423,7 @@
static DEFINE_PER_CPU(int, bind_last_selected_cpu);
-static void evtchn_bind_interdom_next_vcpu(int evtchn)
+static void evtchn_bind_interdom_next_vcpu(evtchn_port_t evtchn)
{
unsigned int selected_cpu, irq;
struct irq_desc *desc;
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 3fa40c7..edb0acd 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -169,20 +169,14 @@
__del_gref(gref);
}
- /* It's possible for the target domain to map the just-allocated grant
- * references by blindly guessing their IDs; if this is done, then
- * __del_gref will leave them in the queue_gref list. They need to be
- * added to the global list so that we can free them when they are no
- * longer referenced.
- */
- if (unlikely(!list_empty(&queue_gref)))
- list_splice_tail(&queue_gref, &gref_list);
mutex_unlock(&gref_mutex);
return rc;
}
static void __del_gref(struct gntalloc_gref *gref)
{
+ unsigned long addr;
+
if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
uint8_t *tmp = kmap(gref->page);
tmp[gref->notify.pgoff] = 0;
@@ -196,21 +190,16 @@
gref->notify.flags = 0;
if (gref->gref_id) {
- if (gnttab_query_foreign_access(gref->gref_id))
- return;
-
- if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
- return;
-
- gnttab_free_grant_reference(gref->gref_id);
+ if (gref->page) {
+ addr = (unsigned long)page_to_virt(gref->page);
+ gnttab_end_foreign_access(gref->gref_id, 0, addr);
+ } else
+ gnttab_free_grant_reference(gref->gref_id);
}
gref_size--;
list_del(&gref->next_gref);
- if (gref->page)
- __free_page(gref->page);
-
kfree(gref);
}
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 2f8b949..20d7d05 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -15,21 +15,15 @@
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/types.h>
+#include <xen/interface/event_channel.h>
struct gntdev_dmabuf_priv;
struct gntdev_priv {
/* Maps with visible offsets in the file descriptor. */
struct list_head maps;
- /*
- * Maps that are not visible; will be freed on munmap.
- * Only populated if populate_freeable_maps == 1
- */
- struct list_head freeable_maps;
/* lock protects maps and freeable_maps. */
struct mutex lock;
- struct mm_struct *mm;
- struct mmu_notifier mn;
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
/* Device for which DMA memory is allocated. */
@@ -45,10 +39,11 @@
int flags;
/* Address relative to the start of the gntdev_grant_map. */
int addr;
- int event;
+ evtchn_port_t event;
};
struct gntdev_grant_map {
+ struct mmu_interval_notifier notifier;
struct list_head next;
struct vm_area_struct *vma;
int index;
@@ -87,7 +82,7 @@
void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map);
-bool gntdev_account_mapped_pages(int count);
+bool gntdev_test_page_count(unsigned int count);
int gntdev_map_grant_pages(struct gntdev_grant_map *map);
diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c
index da79992..4c13cbc 100644
--- a/drivers/xen/gntdev-dmabuf.c
+++ b/drivers/xen/gntdev-dmabuf.c
@@ -247,10 +247,9 @@
if (sgt) {
if (gntdev_dmabuf_attach->dir != DMA_NONE)
- dma_unmap_sg_attrs(attach->dev, sgt->sgl,
- sgt->nents,
- gntdev_dmabuf_attach->dir,
- DMA_ATTR_SKIP_CPU_SYNC);
+ dma_unmap_sgtable(attach->dev, sgt,
+ gntdev_dmabuf_attach->dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
sg_free_table(sgt);
}
@@ -288,8 +287,8 @@
sgt = dmabuf_pages_to_sgt(gntdev_dmabuf->pages,
gntdev_dmabuf->nr_pages);
if (!IS_ERR(sgt)) {
- if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
- DMA_ATTR_SKIP_CPU_SYNC)) {
+ if (dma_map_sgtable(attach->dev, sgt, dir,
+ DMA_ATTR_SKIP_CPU_SYNC)) {
sg_free_table(sgt);
kfree(sgt);
sgt = ERR_PTR(-ENOMEM);
@@ -342,35 +341,12 @@
mutex_unlock(&priv->lock);
}
-static void *dmabuf_exp_ops_kmap(struct dma_buf *dma_buf,
- unsigned long page_num)
-{
- /* Not implemented. */
- return NULL;
-}
-
-static void dmabuf_exp_ops_kunmap(struct dma_buf *dma_buf,
- unsigned long page_num, void *addr)
-{
- /* Not implemented. */
-}
-
-static int dmabuf_exp_ops_mmap(struct dma_buf *dma_buf,
- struct vm_area_struct *vma)
-{
- /* Not implemented. */
- return 0;
-}
-
static const struct dma_buf_ops dmabuf_exp_ops = {
.attach = dmabuf_exp_ops_attach,
.detach = dmabuf_exp_ops_detach,
.map_dma_buf = dmabuf_exp_ops_map_dma_buf,
.unmap_dma_buf = dmabuf_exp_ops_unmap_dma_buf,
.release = dmabuf_exp_ops_release,
- .map = dmabuf_exp_ops_kmap,
- .unmap = dmabuf_exp_ops_kunmap,
- .mmap = dmabuf_exp_ops_mmap,
};
struct gntdev_dmabuf_export_args {
@@ -446,7 +422,7 @@
{
struct gntdev_grant_map *map;
- if (unlikely(count <= 0))
+ if (unlikely(gntdev_test_page_count(count)))
return ERR_PTR(-EINVAL);
if ((dmabuf_flags & GNTDEV_DMA_FLAG_WC) &&
@@ -459,11 +435,6 @@
if (!map)
return ERR_PTR(-ENOMEM);
- if (unlikely(gntdev_account_mapped_pages(count))) {
- pr_debug("can't map %d pages: over limit\n", count);
- gntdev_put_map(NULL, map);
- return ERR_PTR(-ENOMEM);
- }
return map;
}
@@ -661,7 +632,7 @@
/* Now convert sgt to array of pages and check for page validity. */
i = 0;
- for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0) {
+ for_each_sgtable_page(sgt, &sg_iter, 0) {
struct page *page = sg_page_iter_page(&sg_iter);
/*
* Check if page is valid: this can happen if we are given
@@ -779,7 +750,7 @@
if (copy_from_user(&op, u, sizeof(op)) != 0)
return -EFAULT;
- if (unlikely(op.count <= 0))
+ if (unlikely(gntdev_test_page_count(op.count)))
return -EINVAL;
refs = kcalloc(op.count, sizeof(*refs), GFP_KERNEL);
@@ -826,7 +797,7 @@
if (copy_from_user(&op, u, sizeof(op)) != 0)
return -EFAULT;
- if (unlikely(op.count <= 0))
+ if (unlikely(gntdev_test_page_count(op.count)))
return -EINVAL;
gntdev_dmabuf = dmabuf_imp_to_refs(priv->dmabuf_priv,
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e953ea3..54778aa 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -55,15 +55,12 @@
"Gerd Hoffmann <kraxel@redhat.com>");
MODULE_DESCRIPTION("User-space granted page access driver");
-static int limit = 1024*1024;
-module_param(limit, int, 0644);
-MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
- "the gntdev device");
-
-static atomic_t pages_mapped = ATOMIC_INIT(0);
+static unsigned int limit = 64*1024;
+module_param(limit, uint, 0644);
+MODULE_PARM_DESC(limit,
+ "Maximum number of grants that may be mapped by one mapping request");
static int use_ptemod;
-#define populate_freeable_maps use_ptemod
static int unmap_grant_pages(struct gntdev_grant_map *map,
int offset, int pages);
@@ -72,9 +69,9 @@
/* ------------------------------------------------------------------ */
-bool gntdev_account_mapped_pages(int count)
+bool gntdev_test_page_count(unsigned int count)
{
- return atomic_add_return(count, &pages_mapped) > limit;
+ return !count || count > limit;
}
static void gntdev_print_maps(struct gntdev_priv *priv,
@@ -115,14 +112,14 @@
gnttab_free_pages(map->count, map->pages);
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
- kfree(map->frames);
+ kvfree(map->frames);
#endif
- kfree(map->pages);
- kfree(map->grants);
- kfree(map->map_ops);
- kfree(map->unmap_ops);
- kfree(map->kmap_ops);
- kfree(map->kunmap_ops);
+ kvfree(map->pages);
+ kvfree(map->grants);
+ kvfree(map->map_ops);
+ kvfree(map->unmap_ops);
+ kvfree(map->kmap_ops);
+ kvfree(map->kunmap_ops);
kfree(map);
}
@@ -136,12 +133,13 @@
if (NULL == add)
return NULL;
- add->grants = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL);
- add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL);
- add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL);
- add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
- add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
- add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+ add->grants = kvcalloc(count, sizeof(add->grants[0]), GFP_KERNEL);
+ add->map_ops = kvcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL);
+ add->unmap_ops = kvcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL);
+ add->kmap_ops = kvcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
+ add->kunmap_ops = kvcalloc(count,
+ sizeof(add->kunmap_ops[0]), GFP_KERNEL);
+ add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
@@ -160,8 +158,8 @@
if (dma_flags & (GNTDEV_DMA_FLAG_WC | GNTDEV_DMA_FLAG_COHERENT)) {
struct gnttab_dma_alloc_args args;
- add->frames = kcalloc(count, sizeof(add->frames[0]),
- GFP_KERNEL);
+ add->frames = kvcalloc(count, sizeof(add->frames[0]),
+ GFP_KERNEL);
if (!add->frames)
goto err;
@@ -242,21 +240,13 @@
if (!refcount_dec_and_test(&map->users))
return;
- atomic_sub(map->count, &pages_mapped);
+ if (map->pages && !use_ptemod)
+ unmap_grant_pages(map, 0, map->count);
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
}
-
- if (populate_freeable_maps && priv) {
- mutex_lock(&priv->lock);
- list_del(&map->next);
- mutex_unlock(&priv->lock);
- }
-
- if (map->pages && !use_ptemod)
- unmap_grant_pages(map, 0, map->count);
gntdev_free_map(map);
}
@@ -447,16 +437,9 @@
pr_debug("gntdev_vma_close %p\n", vma);
if (use_ptemod) {
- /* It is possible that an mmu notifier could be running
- * concurrently, so take priv->lock to ensure that the vma won't
- * vanishing during the unmap_grant_pages call, since we will
- * spin here until that completes. Such a concurrent call will
- * not do any unmapping, since that has been done prior to
- * closing the vma, but it may still iterate the unmap_ops list.
- */
- mutex_lock(&priv->lock);
+ WARN_ON(map->vma != vma);
+ mmu_interval_notifier_remove(&map->notifier);
map->vma = NULL;
- mutex_unlock(&priv->lock);
}
vma->vm_private_data = NULL;
gntdev_put_map(priv, map);
@@ -478,109 +461,44 @@
/* ------------------------------------------------------------------ */
-static bool in_range(struct gntdev_grant_map *map,
- unsigned long start, unsigned long end)
+static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- if (!map->vma)
- return false;
- if (map->vma->vm_start >= end)
- return false;
- if (map->vma->vm_end <= start)
- return false;
-
- return true;
-}
-
-static int unmap_if_in_range(struct gntdev_grant_map *map,
- unsigned long start, unsigned long end,
- bool blockable)
-{
+ struct gntdev_grant_map *map =
+ container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
int err;
- if (!in_range(map, start, end))
- return 0;
+ if (!mmu_notifier_range_blockable(range))
+ return false;
- if (!blockable)
- return -EAGAIN;
+ /*
+ * If the VMA is split or otherwise changed the notifier is not
+ * updated, but we don't want to process VA's outside the modified
+ * VMA. FIXME: It would be much more understandable to just prevent
+ * modifying the VMA in the first place.
+ */
+ if (map->vma->vm_start >= range->end ||
+ map->vma->vm_end <= range->start)
+ return true;
- mstart = max(start, map->vma->vm_start);
- mend = min(end, map->vma->vm_end);
+ mstart = max(range->start, map->vma->vm_start);
+ mend = min(range->end, map->vma->vm_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
- start, end, mstart, mend);
+ range->start, range->end, mstart, mend);
err = unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
WARN_ON(err);
- return 0;
+ return true;
}
-static int mn_invl_range_start(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
- struct gntdev_grant_map *map;
- int ret = 0;
-
- if (mmu_notifier_range_blockable(range))
- mutex_lock(&priv->lock);
- else if (!mutex_trylock(&priv->lock))
- return -EAGAIN;
-
- list_for_each_entry(map, &priv->maps, next) {
- ret = unmap_if_in_range(map, range->start, range->end,
- mmu_notifier_range_blockable(range));
- if (ret)
- goto out_unlock;
- }
- list_for_each_entry(map, &priv->freeable_maps, next) {
- ret = unmap_if_in_range(map, range->start, range->end,
- mmu_notifier_range_blockable(range));
- if (ret)
- goto out_unlock;
- }
-
-out_unlock:
- mutex_unlock(&priv->lock);
-
- return ret;
-}
-
-static void mn_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
-{
- struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
- struct gntdev_grant_map *map;
- int err;
-
- mutex_lock(&priv->lock);
- list_for_each_entry(map, &priv->maps, next) {
- if (!map->vma)
- continue;
- pr_debug("map %d+%d (%lx %lx)\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
- }
- list_for_each_entry(map, &priv->freeable_maps, next) {
- if (!map->vma)
- continue;
- pr_debug("map %d+%d (%lx %lx)\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
- }
- mutex_unlock(&priv->lock);
-}
-
-static const struct mmu_notifier_ops gntdev_mmu_ops = {
- .release = mn_release,
- .invalidate_range_start = mn_invl_range_start,
+static const struct mmu_interval_notifier_ops gntdev_mmu_ops = {
+ .invalidate = gntdev_invalidate,
};
/* ------------------------------------------------------------------ */
@@ -588,41 +506,24 @@
static int gntdev_open(struct inode *inode, struct file *flip)
{
struct gntdev_priv *priv;
- int ret = 0;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
INIT_LIST_HEAD(&priv->maps);
- INIT_LIST_HEAD(&priv->freeable_maps);
mutex_init(&priv->lock);
#ifdef CONFIG_XEN_GNTDEV_DMABUF
priv->dmabuf_priv = gntdev_dmabuf_init(flip);
if (IS_ERR(priv->dmabuf_priv)) {
- ret = PTR_ERR(priv->dmabuf_priv);
+ int ret = PTR_ERR(priv->dmabuf_priv);
+
kfree(priv);
return ret;
}
#endif
- if (use_ptemod) {
- priv->mm = get_task_mm(current);
- if (!priv->mm) {
- kfree(priv);
- return -ENOMEM;
- }
- priv->mn.ops = &gntdev_mmu_ops;
- ret = mmu_notifier_register(&priv->mn, priv->mm);
- mmput(priv->mm);
- }
-
- if (ret) {
- kfree(priv);
- return ret;
- }
-
flip->private_data = priv;
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
priv->dma_dev = gntdev_miscdev.this_device;
@@ -647,16 +548,12 @@
list_del(&map->next);
gntdev_put_map(NULL /* already removed */, map);
}
- WARN_ON(!list_empty(&priv->freeable_maps));
mutex_unlock(&priv->lock);
#ifdef CONFIG_XEN_GNTDEV_DMABUF
gntdev_dmabuf_fini(priv->dmabuf_priv);
#endif
- if (use_ptemod)
- mmu_notifier_unregister(&priv->mn, priv->mm);
-
kfree(priv);
return 0;
}
@@ -671,7 +568,7 @@
if (copy_from_user(&op, u, sizeof(op)) != 0)
return -EFAULT;
pr_debug("priv %p, add %d\n", priv, op.count);
- if (unlikely(op.count <= 0))
+ if (unlikely(gntdev_test_page_count(op.count)))
return -EINVAL;
err = -ENOMEM;
@@ -679,12 +576,6 @@
if (!map)
return err;
- if (unlikely(gntdev_account_mapped_pages(op.count))) {
- pr_debug("can't map: over limit\n");
- gntdev_put_map(NULL, map);
- return err;
- }
-
if (copy_from_user(map->grants, &u->refs,
sizeof(map->grants[0]) * op.count) != 0) {
gntdev_put_map(NULL, map);
@@ -717,8 +608,6 @@
map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
if (map) {
list_del(&map->next);
- if (populate_freeable_maps)
- list_add_tail(&map->next, &priv->freeable_maps);
err = 0;
}
mutex_unlock(&priv->lock);
@@ -739,7 +628,7 @@
return -EFAULT;
pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);
- down_read(¤t->mm->mmap_sem);
+ mmap_read_lock(current->mm);
vma = find_vma(current->mm, op.vaddr);
if (!vma || vma->vm_ops != &gntdev_vmops)
goto out_unlock;
@@ -753,7 +642,7 @@
rv = 0;
out_unlock:
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (rv == 0 && copy_to_user(u, &op, sizeof(op)) != 0)
return -EFAULT;
@@ -766,7 +655,7 @@
struct gntdev_grant_map *map;
int rc;
int out_flags;
- unsigned int out_event;
+ evtchn_port_t out_event;
if (copy_from_user(&op, u, sizeof(op)))
return -EFAULT;
@@ -845,7 +734,7 @@
unsigned long xen_pfn;
int ret;
- ret = get_user_pages_fast(addr, 1, batch->writeable ? FOLL_WRITE : 0, &page);
+ ret = pin_user_pages_fast(addr, 1, batch->writeable ? FOLL_WRITE : 0, &page);
if (ret < 0)
return ret;
@@ -859,13 +748,7 @@
static void gntdev_put_pages(struct gntdev_copy_batch *batch)
{
- unsigned int i;
-
- for (i = 0; i < batch->nr_pages; i++) {
- if (batch->writeable && !PageDirty(batch->pages[i]))
- set_page_dirty_lock(batch->pages[i]);
- put_page(batch->pages[i]);
- }
+ unpin_user_pages_dirty_lock(batch->pages, batch->nr_pages, batch->writeable);
batch->nr_pages = 0;
batch->writeable = false;
}
@@ -1097,11 +980,6 @@
goto unlock_out;
if (use_ptemod && map->vma)
goto unlock_out;
- if (use_ptemod && priv->mm != vma->vm_mm) {
- pr_warn("Huh? Other mm?\n");
- goto unlock_out;
- }
-
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
@@ -1112,10 +990,6 @@
vma->vm_flags |= VM_DONTCOPY;
vma->vm_private_data = map;
-
- if (use_ptemod)
- map->vma = vma;
-
if (map->flags) {
if ((vma->vm_flags & VM_WRITE) &&
(map->flags & GNTMAP_readonly))
@@ -1126,9 +1000,31 @@
map->flags |= GNTMAP_readonly;
}
+ if (use_ptemod) {
+ map->vma = vma;
+ err = mmu_interval_notifier_insert_locked(
+ &map->notifier, vma->vm_mm, vma->vm_start,
+ vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
+ if (err) {
+ map->vma = NULL;
+ goto out_unlock_put;
+ }
+ }
mutex_unlock(&priv->lock);
if (use_ptemod) {
+ /*
+ * gntdev takes the address of the PTE in find_grant_ptes() and
+ * passes it to the hypervisor in gntdev_map_grant_pages(). The
+ * purpose of the notifier is to prevent the hypervisor pointer
+ * to the PTE from going stale.
+ *
+ * Since this vma's mappings can't be touched without the
+ * mmap_lock, and we are holding it now, there is no need for
+ * the notifier_range locking pattern.
+ */
+ mmu_interval_read_begin(&map->notifier);
+
map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start,
@@ -1176,8 +1072,11 @@
mutex_unlock(&priv->lock);
out_put_map:
if (use_ptemod) {
- map->vma = NULL;
unmap_grant_pages(map, 0, map->count);
+ if (map->vma) {
+ mmu_interval_notifier_remove(&map->notifier);
+ map->vma = NULL;
+ }
}
gntdev_put_map(priv, map);
return err;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 49b381e..5c83d41 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -64,7 +64,6 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
-#include <asm/pgtable.h>
#include <asm/sync_bitops.h>
/* External tools reserve first few grant table entries. */
@@ -135,12 +134,9 @@
*/
unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
/*
- * Query the status of a grant entry. Ref parameter is reference of
- * queried grant entry, return value is the status of queried entry.
- * Detailed status(writing/reading) can be gotten from the return value
- * by bit operations.
+ * Read the frame number related to a given grant reference.
*/
- int (*query_foreign_access)(grant_ref_t ref);
+ unsigned long (*read_frame)(grant_ref_t ref);
};
struct unmap_refs_callback_data {
@@ -285,22 +281,6 @@
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
-static int gnttab_query_foreign_access_v1(grant_ref_t ref)
-{
- return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
-}
-
-static int gnttab_query_foreign_access_v2(grant_ref_t ref)
-{
- return grstatus[ref] & (GTF_reading|GTF_writing);
-}
-
-int gnttab_query_foreign_access(grant_ref_t ref)
-{
- return gnttab_interface->query_foreign_access(ref);
-}
-EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
-
static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
{
u16 flags, nflags;
@@ -354,6 +334,16 @@
}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
+static unsigned long gnttab_read_frame_v1(grant_ref_t ref)
+{
+ return gnttab_shared.v1[ref].frame;
+}
+
+static unsigned long gnttab_read_frame_v2(grant_ref_t ref)
+{
+ return gnttab_shared.v2[ref].full_page.frame;
+}
+
struct deferred_entry {
struct list_head list;
grant_ref_t ref;
@@ -383,12 +373,9 @@
spin_unlock_irqrestore(&gnttab_list_lock, flags);
if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
put_free_entry(entry->ref);
- if (entry->page) {
- pr_debug("freeing g.e. %#x (pfn %#lx)\n",
- entry->ref, page_to_pfn(entry->page));
- put_page(entry->page);
- } else
- pr_info("freeing g.e. %#x\n", entry->ref);
+ pr_debug("freeing g.e. %#x (pfn %#lx)\n",
+ entry->ref, page_to_pfn(entry->page));
+ put_page(entry->page);
kfree(entry);
entry = NULL;
} else {
@@ -413,9 +400,18 @@
static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
struct page *page)
{
- struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+ struct deferred_entry *entry;
+ gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
const char *what = KERN_WARNING "leaking";
+ entry = kmalloc(sizeof(*entry), gfp);
+ if (!page) {
+ unsigned long gfn = gnttab_interface->read_frame(ref);
+
+ page = pfn_to_page(gfn_to_pfn(gfn));
+ get_page(page);
+ }
+
if (entry) {
unsigned long flags;
@@ -436,11 +432,21 @@
what, ref, page ? page_to_pfn(page) : -1);
}
+int gnttab_try_end_foreign_access(grant_ref_t ref)
+{
+ int ret = _gnttab_end_foreign_access_ref(ref, 0);
+
+ if (ret)
+ put_free_entry(ref);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access);
+
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
unsigned long page)
{
- if (gnttab_end_foreign_access_ref(ref, readonly)) {
- put_free_entry(ref);
+ if (gnttab_try_end_foreign_access(ref)) {
if (page != 0)
put_page(virt_to_page(page));
} else
@@ -664,7 +670,6 @@
unsigned int nr_glist_frames, new_nr_glist_frames;
unsigned int grefs_per_frame;
- BUG_ON(gnttab_interface == NULL);
grefs_per_frame = gnttab_interface->grefs_per_grant_frame;
new_nr_grant_frames = nr_grant_frames + more_frames;
@@ -803,7 +808,7 @@
{
int ret;
- ret = alloc_xenballooned_pages(nr_pages, pages);
+ ret = xen_alloc_unpopulated_pages(nr_pages, pages);
if (ret < 0)
return ret;
@@ -815,6 +820,129 @@
}
EXPORT_SYMBOL_GPL(gnttab_alloc_pages);
+#ifdef CONFIG_XEN_UNPOPULATED_ALLOC
+static inline void cache_init(struct gnttab_page_cache *cache)
+{
+ cache->pages = NULL;
+}
+
+static inline bool cache_empty(struct gnttab_page_cache *cache)
+{
+ return !cache->pages;
+}
+
+static inline struct page *cache_deq(struct gnttab_page_cache *cache)
+{
+ struct page *page;
+
+ page = cache->pages;
+ cache->pages = page->zone_device_data;
+
+ return page;
+}
+
+static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
+{
+ page->zone_device_data = cache->pages;
+ cache->pages = page;
+}
+#else
+static inline void cache_init(struct gnttab_page_cache *cache)
+{
+ INIT_LIST_HEAD(&cache->pages);
+}
+
+static inline bool cache_empty(struct gnttab_page_cache *cache)
+{
+ return list_empty(&cache->pages);
+}
+
+static inline struct page *cache_deq(struct gnttab_page_cache *cache)
+{
+ struct page *page;
+
+ page = list_first_entry(&cache->pages, struct page, lru);
+ list_del(&page->lru);
+
+ return page;
+}
+
+static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
+{
+ list_add(&page->lru, &cache->pages);
+}
+#endif
+
+void gnttab_page_cache_init(struct gnttab_page_cache *cache)
+{
+ spin_lock_init(&cache->lock);
+ cache_init(cache);
+ cache->num_pages = 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_init);
+
+int gnttab_page_cache_get(struct gnttab_page_cache *cache, struct page **page)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cache->lock, flags);
+
+ if (cache_empty(cache)) {
+ spin_unlock_irqrestore(&cache->lock, flags);
+ return gnttab_alloc_pages(1, page);
+ }
+
+ page[0] = cache_deq(cache);
+ cache->num_pages--;
+
+ spin_unlock_irqrestore(&cache->lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_get);
+
+void gnttab_page_cache_put(struct gnttab_page_cache *cache, struct page **page,
+ unsigned int num)
+{
+ unsigned long flags;
+ unsigned int i;
+
+ spin_lock_irqsave(&cache->lock, flags);
+
+ for (i = 0; i < num; i++)
+ cache_enq(cache, page[i]);
+ cache->num_pages += num;
+
+ spin_unlock_irqrestore(&cache->lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_put);
+
+void gnttab_page_cache_shrink(struct gnttab_page_cache *cache, unsigned int num)
+{
+ struct page *page[10];
+ unsigned int i = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cache->lock, flags);
+
+ while (cache->num_pages > num) {
+ page[i] = cache_deq(cache);
+ cache->num_pages--;
+ if (++i == ARRAY_SIZE(page)) {
+ spin_unlock_irqrestore(&cache->lock, flags);
+ gnttab_free_pages(i, page);
+ i = 0;
+ spin_lock_irqsave(&cache->lock, flags);
+ }
+ }
+
+ spin_unlock_irqrestore(&cache->lock, flags);
+
+ if (i != 0)
+ gnttab_free_pages(i, page);
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_shrink);
+
void gnttab_pages_clear_private(int nr_pages, struct page **pages)
{
int i;
@@ -838,7 +966,7 @@
void gnttab_free_pages(int nr_pages, struct page **pages)
{
gnttab_pages_clear_private(nr_pages, pages);
- free_xenballooned_pages(nr_pages, pages);
+ xen_free_unpopulated_pages(nr_pages, pages);
}
EXPORT_SYMBOL_GPL(gnttab_free_pages);
@@ -1160,7 +1288,6 @@
static unsigned int nr_status_frames(unsigned int nr_grant_frames)
{
- BUG_ON(gnttab_interface == NULL);
return gnttab_frames(nr_grant_frames, SPP);
}
@@ -1297,7 +1424,7 @@
.update_entry = gnttab_update_entry_v1,
.end_foreign_access_ref = gnttab_end_foreign_access_ref_v1,
.end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1,
- .query_foreign_access = gnttab_query_foreign_access_v1,
+ .read_frame = gnttab_read_frame_v1,
};
static const struct gnttab_ops gnttab_v2_ops = {
@@ -1309,7 +1436,7 @@
.update_entry = gnttab_update_entry_v2,
.end_foreign_access_ref = gnttab_end_foreign_access_ref_v2,
.end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2,
- .query_foreign_access = gnttab_query_foreign_access_v2,
+ .read_frame = gnttab_read_frame_v2,
};
static bool gnttab_need_v2(void)
@@ -1388,7 +1515,6 @@
int rc;
unsigned int cur, extra;
- BUG_ON(gnttab_interface == NULL);
cur = nr_grant_frames;
extra = ((req_entries + gnttab_interface->grefs_per_grant_frame - 1) /
gnttab_interface->grefs_per_grant_frame);
@@ -1423,7 +1549,6 @@
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
- BUG_ON(gnttab_interface == NULL);
max_nr_glist_frames = (max_nr_grant_frames *
gnttab_interface->grefs_per_grant_frame / RPP);
diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
index b8bf61a..e9ac3b8 100644
--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -222,7 +222,7 @@
struct mcinfo_global *mc_global;
struct mcinfo_bank *mc_bank;
struct xen_mce m;
- uint32_t i;
+ unsigned int i, j;
mic = NULL;
x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
@@ -248,7 +248,17 @@
m.socketid = g_physinfo[i].mc_chipid;
m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
- m.mcgcap = g_physinfo[i].mc_msrvalues[__MC_MSR_MCGCAP].value;
+ for (j = 0; j < g_physinfo[i].mc_nmsrvals; ++j)
+ switch (g_physinfo[i].mc_msrvalues[j].reg) {
+ case MSR_IA32_MCG_CAP:
+ m.mcgcap = g_physinfo[i].mc_msrvalues[j].value;
+ break;
+
+ case MSR_PPIN:
+ case MSR_AMD_PPIN:
+ m.ppin = g_physinfo[i].mc_msrvalues[j].value;
+ break;
+ }
mic = NULL;
x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index c456464..9db557b 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -74,7 +74,7 @@
"xen-platform-pci", pdev);
}
-static int platform_pci_resume(struct pci_dev *pdev)
+static int platform_pci_resume(struct device *dev)
{
int err;
@@ -83,7 +83,7 @@
err = xen_set_callback_via(callback_via);
if (err) {
- dev_err(&pdev->dev, "platform_pci_resume failure!\n");
+ dev_err(dev, "platform_pci_resume failure!\n");
return err;
}
return 0;
@@ -167,13 +167,17 @@
{0,}
};
+static const struct dev_pm_ops platform_pm_ops = {
+ .resume_noirq = platform_pci_resume,
+};
+
static struct pci_driver platform_driver = {
.name = DRV_NAME,
.probe = platform_pci_probe,
.id_table = platform_pci_tbl,
-#ifdef CONFIG_PM
- .resume_early = platform_pci_resume,
-#endif
+ .driver = {
+ .pm = &platform_pm_ops,
+ },
};
builtin_pci_driver(platform_driver);
diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
deleted file mode 100644
index 98a9d68..0000000
--- a/drivers/xen/preempt.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Preemptible hypercalls
- *
- * Copyright (C) 2014 Citrix Systems R&D ltd.
- */
-
-#include <linux/sched.h>
-#include <xen/xen-ops.h>
-
-#ifndef CONFIG_PREEMPT
-
-/*
- * Some hypercalls issued by the toolstack can take many 10s of
- * seconds. Allow tasks running hypercalls via the privcmd driver to
- * be voluntarily preempted even if full kernel preemption is
- * disabled.
- *
- * Such preemptible hypercalls are bracketed by
- * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
- * calls.
- */
-
-DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
-EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
-
-asmlinkage __visible void xen_maybe_preempt_hcall(void)
-{
- if (unlikely(__this_cpu_read(xen_in_preemptible_hcall)
- && need_resched() && !preempt_count())) {
- /*
- * Clear flag as we may be rescheduled on a different
- * cpu.
- */
- __this_cpu_write(xen_in_preemptible_hcall, false);
- local_irq_enable();
- cond_resched();
- local_irq_disable();
- __this_cpu_write(xen_in_preemptible_hcall, true);
- }
-}
-#endif /* CONFIG_PREEMPT */
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 9c9422e..fe8df32 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -25,9 +25,6 @@
#include <linux/miscdevice.h>
#include <linux/moduleparam.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/tlb.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
@@ -278,7 +275,7 @@
if (rc || list_empty(&pagelist))
goto out;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
{
struct page *page = list_first_entry(&pagelist,
@@ -303,7 +300,7 @@
out_up:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
out:
free_page_list(&pagelist);
@@ -427,7 +424,7 @@
if (pages == NULL)
return -ENOMEM;
- rc = alloc_xenballooned_pages(numpgs, pages);
+ rc = xen_alloc_unpopulated_pages(numpgs, pages);
if (rc != 0) {
pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
numpgs, rc);
@@ -499,7 +496,7 @@
}
}
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
vma = find_vma(mm, m.addr);
if (!vma ||
@@ -555,7 +552,7 @@
BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_batch_fn, &state));
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
if (state.global_error) {
/* Write back errors in second pass. */
@@ -576,19 +573,19 @@
return ret;
out_unlock:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
goto out;
}
static int lock_pages(
struct privcmd_dm_op_buf kbufs[], unsigned int num,
- struct page *pages[], unsigned int nr_pages)
+ struct page *pages[], unsigned int nr_pages, unsigned int *pinned)
{
unsigned int i;
for (i = 0; i < num; i++) {
unsigned int requested;
- int pinned;
+ int page_count;
requested = DIV_ROUND_UP(
offset_in_page(kbufs[i].uptr) + kbufs[i].size,
@@ -596,14 +593,15 @@
if (requested > nr_pages)
return -ENOSPC;
- pinned = get_user_pages_fast(
+ page_count = pin_user_pages_fast(
(unsigned long) kbufs[i].uptr,
requested, FOLL_WRITE, pages);
- if (pinned < 0)
- return pinned;
+ if (page_count < 0)
+ return page_count;
- nr_pages -= pinned;
- pages += pinned;
+ *pinned += page_count;
+ nr_pages -= page_count;
+ pages += page_count;
}
return 0;
@@ -611,15 +609,7 @@
static void unlock_pages(struct page *pages[], unsigned int nr_pages)
{
- unsigned int i;
-
- if (!pages)
- return;
-
- for (i = 0; i < nr_pages; i++) {
- if (pages[i])
- put_page(pages[i]);
- }
+ unpin_user_pages_dirty_lock(pages, nr_pages, true);
}
static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
@@ -632,6 +622,7 @@
struct xen_dm_op_buf *xbufs = NULL;
unsigned int i;
long rc;
+ unsigned int pinned = 0;
if (copy_from_user(&kdata, udata, sizeof(kdata)))
return -EFAULT;
@@ -685,9 +676,11 @@
goto out;
}
- rc = lock_pages(kbufs, kdata.num, pages, nr_pages);
- if (rc)
+ rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned);
+ if (rc < 0) {
+ nr_pages = pinned;
goto out;
+ }
for (i = 0; i < kdata.num; i++) {
set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
@@ -758,7 +751,7 @@
return __put_user(xdata.nr_frames, &udata->num);
}
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
vma = find_vma(mm, kdata.addr);
if (!vma || vma->vm_ops != &privcmd_vm_ops) {
@@ -810,11 +803,12 @@
unsigned int domid =
(xdata.flags & XENMEM_rsrc_acq_caller_owned) ?
DOMID_SELF : kdata.dom;
- int num;
+ int num, *errs = (int *)pfns;
+ BUILD_BUG_ON(sizeof(*errs) > sizeof(*pfns));
num = xen_remap_domain_mfn_array(vma,
kdata.addr & PAGE_MASK,
- pfns, kdata.num, (int *)pfns,
+ pfns, kdata.num, errs,
vma->vm_page_prot,
domid,
vma->vm_private_data);
@@ -824,7 +818,7 @@
unsigned int i;
for (i = 0; i < num; i++) {
- rc = pfns[i];
+ rc = errs[i];
if (rc < 0)
break;
}
@@ -833,7 +827,7 @@
}
out:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
kfree(pfns);
return rc;
@@ -915,7 +909,7 @@
rc = xen_unmap_domain_gfn_range(vma, numgfns, pages);
if (rc == 0)
- free_xenballooned_pages(numpgs, pages);
+ xen_free_unpopulated_pages(numpgs, pages);
else
pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n",
numpgs, rc);
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 9439de2..a7d293f 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -24,7 +24,7 @@
#define PVCALLS_VERSIONS "1"
#define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
-struct pvcalls_back_global {
+static struct pvcalls_back_global {
struct list_head frontends;
struct semaphore frontends_lock;
} pvcalls_back_global;
@@ -313,7 +313,7 @@
struct pvcalls_fedata *fedata,
uint64_t id,
grant_ref_t ref,
- uint32_t evtchn,
+ evtchn_port_t evtchn,
struct socket *sock)
{
int ret;
@@ -921,7 +921,8 @@
static int backend_connect(struct xenbus_device *dev)
{
- int err, evtchn;
+ int err;
+ evtchn_port_t evtchn;
grant_ref_t ring_ref;
struct pvcalls_fedata *fedata = NULL;
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 57592a6..bbe337d 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -337,8 +337,8 @@
if (!map->active.ring)
return;
- free_pages((unsigned long)map->active.data.in,
- map->active.ring->ring_order);
+ free_pages_exact(map->active.data.in,
+ PAGE_SIZE << map->active.ring->ring_order);
free_page((unsigned long)map->active.ring);
}
@@ -352,8 +352,8 @@
goto out;
map->active.ring->ring_order = PVCALLS_RING_ORDER;
- bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- PVCALLS_RING_ORDER);
+ bytes = alloc_pages_exact(PAGE_SIZE << PVCALLS_RING_ORDER,
+ GFP_KERNEL | __GFP_ZERO);
if (!bytes)
goto out;
@@ -368,12 +368,12 @@
return -ENOMEM;
}
-static int create_active(struct sock_mapping *map, int *evtchn)
+static int create_active(struct sock_mapping *map, evtchn_port_t *evtchn)
{
void *bytes;
- int ret = -ENOMEM, irq = -1, i;
+ int ret, irq = -1, i;
- *evtchn = -1;
+ *evtchn = 0;
init_waitqueue_head(&map->active.inflight_conn_req);
bytes = map->active.data.in;
@@ -404,7 +404,7 @@
return 0;
out_error:
- if (*evtchn >= 0)
+ if (*evtchn > 0)
xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
return ret;
}
@@ -415,7 +415,8 @@
struct pvcalls_bedata *bedata;
struct sock_mapping *map = NULL;
struct xen_pvcalls_request *req;
- int notify, req_id, ret, evtchn;
+ int notify, req_id, ret;
+ evtchn_port_t evtchn;
if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
return -EOPNOTSUPP;
@@ -765,7 +766,8 @@
struct sock_mapping *map;
struct sock_mapping *map2 = NULL;
struct xen_pvcalls_request *req;
- int notify, req_id, ret, evtchn, nonblock;
+ int notify, req_id, ret, nonblock;
+ evtchn_port_t evtchn;
map = pvcalls_enter_sock(sock);
if (IS_ERR(map))
@@ -1125,7 +1127,8 @@
static int pvcalls_front_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
- int ret = -ENOMEM, evtchn, i;
+ int ret = -ENOMEM, i;
+ evtchn_port_t evtchn;
unsigned int max_page_order, function_calls, len;
char *versions;
grant_ref_t gref_head = 0;
@@ -1260,7 +1263,7 @@
if (dev->state == XenbusStateClosed)
break;
/* Missed the backend's CLOSING state */
- /* fall through */
+ fallthrough;
case XenbusStateClosing:
xenbus_frontend_closed(dev);
break;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 0634642..2b385c1 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -28,7 +28,7 @@
#include <linux/memblock.h>
#include <linux/dma-direct.h>
-#include <linux/dma-noncoherent.h>
+#include <linux/dma-map-ops.h>
#include <linux/export.h>
#include <xen/swiotlb-xen.h>
#include <xen/page.h>
@@ -52,37 +52,39 @@
* Quick lookup value of the bus address of the IOTLB.
*/
-static u64 start_dma_addr;
-
-/*
- * Both of these functions should avoid XEN_PFN_PHYS because phys_addr_t
- * can be 32bit when dma_addr_t is 64bit leading to a loss in
- * information if the shift is done before casting to 64bit.
- */
-static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
+static inline phys_addr_t xen_phys_to_bus(struct device *dev, phys_addr_t paddr)
{
unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr));
- dma_addr_t dma = (dma_addr_t)bfn << XEN_PAGE_SHIFT;
+ phys_addr_t baddr = (phys_addr_t)bfn << XEN_PAGE_SHIFT;
- dma |= paddr & ~XEN_PAGE_MASK;
-
- return dma;
+ baddr |= paddr & ~XEN_PAGE_MASK;
+ return baddr;
}
-static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
+static inline dma_addr_t xen_phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return phys_to_dma(dev, xen_phys_to_bus(dev, paddr));
+}
+
+static inline phys_addr_t xen_bus_to_phys(struct device *dev,
+ phys_addr_t baddr)
{
unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr));
- dma_addr_t dma = (dma_addr_t)xen_pfn << XEN_PAGE_SHIFT;
- phys_addr_t paddr = dma;
-
- paddr |= baddr & ~XEN_PAGE_MASK;
+ phys_addr_t paddr = (xen_pfn << XEN_PAGE_SHIFT) |
+ (baddr & ~XEN_PAGE_MASK);
return paddr;
}
-static inline dma_addr_t xen_virt_to_bus(void *address)
+static inline phys_addr_t xen_dma_to_phys(struct device *dev,
+ dma_addr_t dma_addr)
{
- return xen_phys_to_bus(virt_to_phys(address));
+ return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr));
+}
+
+static inline dma_addr_t xen_virt_to_bus(struct device *dev, void *address)
+{
+ return xen_phys_to_dma(dev, virt_to_phys(address));
}
static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
@@ -99,11 +101,11 @@
return 0;
}
-static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
+static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
{
- unsigned long bfn = XEN_PFN_DOWN(dma_addr);
+ unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
unsigned long xen_pfn = bfn_to_local_pfn(bfn);
- phys_addr_t paddr = XEN_PFN_PHYS(xen_pfn);
+ phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT;
/* If the address is outside our domain, it CAN
* have the same virtual address as another address
@@ -241,7 +243,6 @@
m_ret = XEN_SWIOTLB_EFIXUP;
goto error;
}
- start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
if (early) {
if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
verbose))
@@ -307,12 +308,12 @@
if (hwdev && hwdev->coherent_dma_mask)
dma_mask = hwdev->coherent_dma_mask;
- /* At this point dma_handle is the physical address, next we are
+ /* At this point dma_handle is the dma address, next we are
* going to set it to the machine address.
* Do not use virt_to_phys(ret) because on ARM it doesn't correspond
* to *dma_handle. */
- phys = *dma_handle;
- dev_addr = xen_phys_to_bus(phys);
+ phys = dma_to_phys(hwdev, *dma_handle);
+ dev_addr = xen_phys_to_dma(hwdev, phys);
if (((dev_addr + size - 1 <= dma_mask)) &&
!range_straddles_page_boundary(phys, size))
*dma_handle = dev_addr;
@@ -322,6 +323,7 @@
xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
return NULL;
}
+ *dma_handle = phys_to_dma(hwdev, *dma_handle);
SetPageXenRemapped(virt_to_page(ret));
}
memset(ret, 0, size);
@@ -342,7 +344,7 @@
/* do not use virt_to_phys because on ARM it doesn't return you the
* physical address */
- phys = xen_bus_to_phys(dev_addr);
+ phys = xen_dma_to_phys(hwdev, dev_addr);
/* Convert the size to actually allocated. */
size = 1UL << (order + XEN_PAGE_SHIFT);
@@ -357,7 +359,8 @@
TestClearPageXenRemapped(page))
xen_destroy_contiguous_region(phys, order);
- xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
+ xen_free_coherent_pages(hwdev, size, vaddr, phys_to_dma(hwdev, phys),
+ attrs);
}
/*
@@ -373,7 +376,7 @@
unsigned long attrs)
{
phys_addr_t map, phys = page_to_phys(page) + offset;
- dma_addr_t dev_addr = xen_phys_to_bus(phys);
+ dma_addr_t dev_addr = xen_phys_to_dma(dev, phys);
BUG_ON(dir == DMA_NONE);
/*
@@ -381,7 +384,7 @@
* we can safely return the device addr and not worry about bounce
* buffering it.
*/
- if (dma_capable(dev, dev_addr, size) &&
+ if (dma_capable(dev, dev_addr, size, true) &&
!range_straddles_page_boundary(phys, size) &&
!xen_arch_need_swiotlb(dev, phys, dev_addr) &&
swiotlb_force != SWIOTLB_FORCE)
@@ -392,26 +395,29 @@
*/
trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
- map = swiotlb_tbl_map_single(dev, start_dma_addr, phys,
- size, size, dir, attrs);
+ map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs);
if (map == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
phys = map;
- dev_addr = xen_phys_to_bus(map);
+ dev_addr = xen_phys_to_dma(dev, map);
/*
* Ensure that the address returned is DMA'ble
*/
- if (unlikely(!dma_capable(dev, dev_addr, size))) {
+ if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
swiotlb_tbl_unmap_single(dev, map, size, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
return DMA_MAPPING_ERROR;
}
done:
- if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- xen_dma_sync_for_device(dev, dev_addr, phys, size, dir);
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
+ arch_sync_dma_for_device(phys, size, dir);
+ else
+ xen_dma_sync_for_device(dev, dev_addr, size, dir);
+ }
return dev_addr;
}
@@ -426,15 +432,19 @@
static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
- phys_addr_t paddr = xen_bus_to_phys(dev_addr);
+ phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
- if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- xen_dma_sync_for_cpu(hwdev, dev_addr, paddr, size, dir);
+ if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
+ if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
+ arch_sync_dma_for_cpu(paddr, size, dir);
+ else
+ xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
+ }
/* NOTE: We use dev_addr here, not paddr! */
- if (is_xen_swiotlb_buffer(dev_addr))
+ if (is_xen_swiotlb_buffer(hwdev, dev_addr))
swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs);
}
@@ -442,12 +452,16 @@
xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir)
{
- phys_addr_t paddr = xen_bus_to_phys(dma_addr);
+ phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
- if (!dev_is_dma_coherent(dev))
- xen_dma_sync_for_cpu(dev, dma_addr, paddr, size, dir);
+ if (!dev_is_dma_coherent(dev)) {
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+ arch_sync_dma_for_cpu(paddr, size, dir);
+ else
+ xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
+ }
- if (is_xen_swiotlb_buffer(dma_addr))
+ if (is_xen_swiotlb_buffer(dev, dma_addr))
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
}
@@ -455,13 +469,17 @@
xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir)
{
- phys_addr_t paddr = xen_bus_to_phys(dma_addr);
+ phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
- if (is_xen_swiotlb_buffer(dma_addr))
+ if (is_xen_swiotlb_buffer(dev, dma_addr))
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
- if (!dev_is_dma_coherent(dev))
- xen_dma_sync_for_device(dev, dma_addr, paddr, size, dir);
+ if (!dev_is_dma_coherent(dev)) {
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+ arch_sync_dma_for_device(paddr, size, dir);
+ else
+ xen_dma_sync_for_device(dev, dma_addr, size, dir);
+ }
}
/*
@@ -542,7 +560,7 @@
static int
xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
- return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask;
+ return xen_virt_to_bus(hwdev, xen_io_tlb_end - 1) <= mask;
}
const struct dma_map_ops xen_swiotlb_dma_ops = {
@@ -559,4 +577,6 @@
.dma_supported = xen_swiotlb_dma_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
};
diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index 0968859..108edbc 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -64,7 +64,7 @@
do {
state_time = get64(&state->state_entry_time);
rmb(); /* Hypervisor might update data. */
- *res = READ_ONCE(*state);
+ *res = __READ_ONCE(*state);
rmb(); /* Hypervisor might update data. */
} while (get64(&state->state_entry_time) != state_time ||
(state_time & XEN_RUNSTATE_UPDATE));
diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c
new file mode 100644
index 0000000..87e6b7d
--- /dev/null
+++ b/drivers/xen/unpopulated-alloc.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/memremap.h>
+#include <linux/slab.h>
+
+#include <asm/page.h>
+
+#include <xen/page.h>
+#include <xen/xen.h>
+
+static DEFINE_MUTEX(list_lock);
+static struct page *page_list;
+static unsigned int list_count;
+
+static int fill_list(unsigned int nr_pages)
+{
+ struct dev_pagemap *pgmap;
+ struct resource *res;
+ void *vaddr;
+ unsigned int i, alloc_pages = round_up(nr_pages, PAGES_PER_SECTION);
+ int ret = -ENOMEM;
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ res->name = "Xen scratch";
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+ ret = allocate_resource(&iomem_resource, res,
+ alloc_pages * PAGE_SIZE, 0, -1,
+ PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+ if (ret < 0) {
+ pr_err("Cannot allocate new IOMEM resource\n");
+ goto err_resource;
+ }
+
+ pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
+ if (!pgmap) {
+ ret = -ENOMEM;
+ goto err_pgmap;
+ }
+
+ pgmap->type = MEMORY_DEVICE_GENERIC;
+ pgmap->range = (struct range) {
+ .start = res->start,
+ .end = res->end,
+ };
+ pgmap->nr_range = 1;
+ pgmap->owner = res;
+
+#ifdef CONFIG_XEN_HAVE_PVMMU
+ /*
+ * memremap will build page tables for the new memory so
+ * the p2m must contain invalid entries so the correct
+ * non-present PTEs will be written.
+ *
+ * If a failure occurs, the original (identity) p2m entries
+ * are not restored since this region is now known not to
+ * conflict with any devices.
+ */
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ xen_pfn_t pfn = PFN_DOWN(res->start);
+
+ for (i = 0; i < alloc_pages; i++) {
+ if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) {
+ pr_warn("set_phys_to_machine() failed, no memory added\n");
+ ret = -ENOMEM;
+ goto err_memremap;
+ }
+ }
+ }
+#endif
+
+ vaddr = memremap_pages(pgmap, NUMA_NO_NODE);
+ if (IS_ERR(vaddr)) {
+ pr_err("Cannot remap memory range\n");
+ ret = PTR_ERR(vaddr);
+ goto err_memremap;
+ }
+
+ for (i = 0; i < alloc_pages; i++) {
+ struct page *pg = virt_to_page(vaddr + PAGE_SIZE * i);
+
+ BUG_ON(!virt_addr_valid(vaddr + PAGE_SIZE * i));
+ pg->zone_device_data = page_list;
+ page_list = pg;
+ list_count++;
+ }
+
+ return 0;
+
+err_memremap:
+ kfree(pgmap);
+err_pgmap:
+ release_resource(res);
+err_resource:
+ kfree(res);
+ return ret;
+}
+
+/**
+ * xen_alloc_unpopulated_pages - alloc unpopulated pages
+ * @nr_pages: Number of pages
+ * @pages: pages returned
+ * @return 0 on success, error otherwise
+ */
+int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages)
+{
+ unsigned int i;
+ int ret = 0;
+
+ mutex_lock(&list_lock);
+ if (list_count < nr_pages) {
+ ret = fill_list(nr_pages - list_count);
+ if (ret)
+ goto out;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *pg = page_list;
+
+ BUG_ON(!pg);
+ page_list = pg->zone_device_data;
+ list_count--;
+ pages[i] = pg;
+
+#ifdef CONFIG_XEN_HAVE_PVMMU
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ ret = xen_alloc_p2m_entry(page_to_pfn(pg));
+ if (ret < 0) {
+ unsigned int j;
+
+ for (j = 0; j <= i; j++) {
+ pages[j]->zone_device_data = page_list;
+ page_list = pages[j];
+ list_count++;
+ }
+ goto out;
+ }
+ }
+#endif
+ }
+
+out:
+ mutex_unlock(&list_lock);
+ return ret;
+}
+EXPORT_SYMBOL(xen_alloc_unpopulated_pages);
+
+/**
+ * xen_free_unpopulated_pages - return unpopulated pages
+ * @nr_pages: Number of pages
+ * @pages: pages to return
+ */
+void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages)
+{
+ unsigned int i;
+
+ mutex_lock(&list_lock);
+ for (i = 0; i < nr_pages; i++) {
+ pages[i]->zone_device_data = page_list;
+ page_list = pages[i];
+ list_count++;
+ }
+ mutex_unlock(&list_lock);
+}
+EXPORT_SYMBOL(xen_free_unpopulated_pages);
+
+#ifdef CONFIG_XEN_PV
+static int __init init(void)
+{
+ unsigned int i;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+ if (!xen_pv_domain())
+ return 0;
+
+ /*
+ * Initialize with pages from the extra memory regions (see
+ * arch/x86/xen/setup.c).
+ */
+ for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+ unsigned int j;
+
+ for (j = 0; j < xen_extra_mem[i].n_pfns; j++) {
+ struct page *pg =
+ pfn_to_page(xen_extra_mem[i].start_pfn + j);
+
+ pg->zone_device_data = page_list;
+ page_list = pg;
+ list_count++;
+ }
+ }
+
+ return 0;
+}
+subsys_initcall(init);
+#endif
diff --git a/drivers/xen/xen-acpi-memhotplug.c b/drivers/xen/xen-acpi-memhotplug.c
index 7457213..f914b72 100644
--- a/drivers/xen/xen-acpi-memhotplug.c
+++ b/drivers/xen/xen-acpi-memhotplug.c
@@ -229,7 +229,7 @@
case ACPI_NOTIFY_BUS_CHECK:
ACPI_DEBUG_PRINT((ACPI_DB_INFO,
"\nReceived BUS CHECK notification for device\n"));
- /* Fall Through */
+ fallthrough;
case ACPI_NOTIFY_DEVICE_CHECK:
if (event == ACPI_NOTIFY_DEVICE_CHECK)
ACPI_DEBUG_PRINT((ACPI_DB_INFO,
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 6011171..059de92 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -10,6 +10,8 @@
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
+#define dev_fmt(fmt) DRV_NAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/moduleparam.h>
#include <linux/pci.h>
@@ -154,9 +156,7 @@
* (as if device didn't respond) */
u32 value = 0, tmp_val;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
- pci_name(dev), size, offset);
+ dev_dbg(&dev->dev, "read %d bytes at 0x%x\n", size, offset);
if (!valid_request(offset, size)) {
err = XEN_PCI_ERR_invalid_offset;
@@ -195,9 +195,7 @@
}
out:
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n",
- pci_name(dev), size, offset, value);
+ dev_dbg(&dev->dev, "read %d bytes at 0x%x = %x\n", size, offset, value);
*ret_val = value;
return xen_pcibios_err_to_errno(err);
@@ -212,10 +210,8 @@
u32 tmp_val;
int field_start, field_end;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG
- DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n",
- pci_name(dev), size, offset, value);
+ dev_dbg(&dev->dev, "write request %d bytes at 0x%x = %x\n",
+ size, offset, value);
if (!valid_request(offset, size))
return XEN_PCI_ERR_invalid_offset;
@@ -286,6 +282,43 @@
return xen_pcibios_err_to_errno(err);
}
+int xen_pcibk_get_interrupt_type(struct pci_dev *dev)
+{
+ int err;
+ u16 val;
+ int ret = 0;
+
+ err = pci_read_config_word(dev, PCI_COMMAND, &val);
+ if (err)
+ return err;
+ if (!(val & PCI_COMMAND_INTX_DISABLE))
+ ret |= INTERRUPT_TYPE_INTX;
+
+ /*
+ * Do not trust dev->msi(x)_enabled here, as enabling could be done
+ * bypassing the pci_*msi* functions, by the qemu.
+ */
+ if (dev->msi_cap) {
+ err = pci_read_config_word(dev,
+ dev->msi_cap + PCI_MSI_FLAGS,
+ &val);
+ if (err)
+ return err;
+ if (val & PCI_MSI_FLAGS_ENABLE)
+ ret |= INTERRUPT_TYPE_MSI;
+ }
+ if (dev->msix_cap) {
+ err = pci_read_config_word(dev,
+ dev->msix_cap + PCI_MSIX_FLAGS,
+ &val);
+ if (err)
+ return err;
+ if (val & PCI_MSIX_FLAGS_ENABLE)
+ ret |= INTERRUPT_TYPE_MSIX;
+ }
+ return ret ?: INTERRUPT_TYPE_NONE;
+}
+
void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
{
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h
index 22db630..5fe431c 100644
--- a/drivers/xen/xen-pciback/conf_space.h
+++ b/drivers/xen/xen-pciback/conf_space.h
@@ -65,6 +65,11 @@
void *data;
};
+#define INTERRUPT_TYPE_NONE (0)
+#define INTERRUPT_TYPE_INTX (1<<0)
+#define INTERRUPT_TYPE_MSI (1<<1)
+#define INTERRUPT_TYPE_MSIX (1<<2)
+
extern bool xen_pcibk_permissive;
#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
@@ -126,4 +131,6 @@
int xen_pcibk_config_header_add_fields(struct pci_dev *dev);
int xen_pcibk_config_capability_add_fields(struct pci_dev *dev);
+int xen_pcibk_get_interrupt_type(struct pci_dev *dev);
+
#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c
index e569413..5e53b48 100644
--- a/drivers/xen/xen-pciback/conf_space_capability.c
+++ b/drivers/xen/xen-pciback/conf_space_capability.c
@@ -160,7 +160,7 @@
}
out:
- return ERR_PTR(err);
+ return err ? ERR_PTR(err) : NULL;
}
static const struct config_field caplist_pm[] = {
@@ -189,6 +189,85 @@
{}
};
+static struct msi_msix_field_config {
+ u16 enable_bit; /* bit for enabling MSI/MSI-X */
+ unsigned int int_type; /* interrupt type for exclusiveness check */
+} msi_field_config = {
+ .enable_bit = PCI_MSI_FLAGS_ENABLE,
+ .int_type = INTERRUPT_TYPE_MSI,
+}, msix_field_config = {
+ .enable_bit = PCI_MSIX_FLAGS_ENABLE,
+ .int_type = INTERRUPT_TYPE_MSIX,
+};
+
+static void *msi_field_init(struct pci_dev *dev, int offset)
+{
+ return &msi_field_config;
+}
+
+static void *msix_field_init(struct pci_dev *dev, int offset)
+{
+ return &msix_field_config;
+}
+
+static int msi_msix_flags_write(struct pci_dev *dev, int offset, u16 new_value,
+ void *data)
+{
+ int err;
+ u16 old_value;
+ const struct msi_msix_field_config *field_config = data;
+ const struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+
+ if (xen_pcibk_permissive || dev_data->permissive)
+ goto write;
+
+ err = pci_read_config_word(dev, offset, &old_value);
+ if (err)
+ return err;
+
+ if (new_value == old_value)
+ return 0;
+
+ if (!dev_data->allow_interrupt_control ||
+ (new_value ^ old_value) & ~field_config->enable_bit)
+ return PCIBIOS_SET_FAILED;
+
+ if (new_value & field_config->enable_bit) {
+ /* don't allow enabling together with other interrupt types */
+ int int_type = xen_pcibk_get_interrupt_type(dev);
+
+ if (int_type == INTERRUPT_TYPE_NONE ||
+ int_type == field_config->int_type)
+ goto write;
+ return PCIBIOS_SET_FAILED;
+ }
+
+write:
+ return pci_write_config_word(dev, offset, new_value);
+}
+
+static const struct config_field caplist_msix[] = {
+ {
+ .offset = PCI_MSIX_FLAGS,
+ .size = 2,
+ .init = msix_field_init,
+ .u.w.read = xen_pcibk_read_config_word,
+ .u.w.write = msi_msix_flags_write,
+ },
+ {}
+};
+
+static const struct config_field caplist_msi[] = {
+ {
+ .offset = PCI_MSI_FLAGS,
+ .size = 2,
+ .init = msi_field_init,
+ .u.w.read = xen_pcibk_read_config_word,
+ .u.w.write = msi_msix_flags_write,
+ },
+ {}
+};
+
static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = {
.capability = PCI_CAP_ID_PM,
.fields = caplist_pm,
@@ -197,11 +276,21 @@
.capability = PCI_CAP_ID_VPD,
.fields = caplist_vpd,
};
+static struct xen_pcibk_config_capability xen_pcibk_config_capability_msi = {
+ .capability = PCI_CAP_ID_MSI,
+ .fields = caplist_msi,
+};
+static struct xen_pcibk_config_capability xen_pcibk_config_capability_msix = {
+ .capability = PCI_CAP_ID_MSIX,
+ .fields = caplist_msix,
+};
int xen_pcibk_config_capability_init(void)
{
register_capability(&xen_pcibk_config_capability_vpd);
register_capability(&xen_pcibk_config_capability_pm);
+ register_capability(&xen_pcibk_config_capability_msi);
+ register_capability(&xen_pcibk_config_capability_msix);
return 0;
}
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index 10ae24b..ac45cdc 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -6,6 +6,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
#include <linux/kernel.h>
#include <linux/pci.h>
@@ -67,56 +68,61 @@
dev_data = pci_get_drvdata(dev);
if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: enable\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "enable\n");
err = pci_enable_device(dev);
if (err)
return err;
if (dev_data)
dev_data->enable_intx = 1;
} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: disable\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "disable\n");
pci_disable_device(dev);
if (dev_data)
dev_data->enable_intx = 0;
}
if (!dev->is_busmaster && is_master_cmd(value)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "set bus master\n");
pci_set_master(dev);
} else if (dev->is_busmaster && !is_master_cmd(value)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: clear bus master\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "clear bus master\n");
pci_clear_master(dev);
}
if (!(cmd->val & PCI_COMMAND_INVALIDATE) &&
(value & PCI_COMMAND_INVALIDATE)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG
- DRV_NAME ": %s: enable memory-write-invalidate\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "enable memory-write-invalidate\n");
err = pci_set_mwi(dev);
if (err) {
- pr_warn("%s: cannot enable memory-write-invalidate (%d)\n",
- pci_name(dev), err);
+ dev_warn(&dev->dev, "cannot enable memory-write-invalidate (%d)\n",
+ err);
value &= ~PCI_COMMAND_INVALIDATE;
}
} else if ((cmd->val & PCI_COMMAND_INVALIDATE) &&
!(value & PCI_COMMAND_INVALIDATE)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG
- DRV_NAME ": %s: disable memory-write-invalidate\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "disable memory-write-invalidate\n");
pci_clear_mwi(dev);
}
+ if (dev_data && dev_data->allow_interrupt_control) {
+ if ((cmd->val ^ value) & PCI_COMMAND_INTX_DISABLE) {
+ if (value & PCI_COMMAND_INTX_DISABLE) {
+ pci_intx(dev, 0);
+ } else {
+ /* Do not allow enabling INTx together with MSI or MSI-X. */
+ switch (xen_pcibk_get_interrupt_type(dev)) {
+ case INTERRUPT_TYPE_NONE:
+ pci_intx(dev, 1);
+ break;
+ case INTERRUPT_TYPE_INTX:
+ break;
+ default:
+ return PCIBIOS_SET_FAILED;
+ }
+ }
+ }
+ }
+
cmd->val = value;
if (!xen_pcibk_permissive && (!dev_data || !dev_data->permissive))
@@ -138,8 +144,7 @@
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
- pr_warn(DRV_NAME ": driver data not found for %s\n",
- pci_name(dev));
+ dev_warn(&dev->dev, "driver data not found\n");
return XEN_PCI_ERR_op_failed;
}
@@ -175,8 +180,7 @@
u32 mask;
if (unlikely(!bar)) {
- pr_warn(DRV_NAME ": driver data not found for %s\n",
- pci_name(dev));
+ dev_warn(&dev->dev, "driver data not found\n");
return XEN_PCI_ERR_op_failed;
}
@@ -209,8 +213,7 @@
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
- pr_warn(DRV_NAME ": driver data not found for %s\n",
- pci_name(dev));
+ dev_warn(&dev->dev, "driver data not found\n");
return XEN_PCI_ERR_op_failed;
}
@@ -414,8 +417,8 @@
default:
err = -EINVAL;
- pr_err("%s: Unsupported header type %d!\n",
- pci_name(dev), dev->hdr_type);
+ dev_err(&dev->dev, "Unsupported header type %d!\n",
+ dev->hdr_type);
break;
}
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c
index ed593d1..7dc2810 100644
--- a/drivers/xen/xen-pciback/conf_space_quirks.c
+++ b/drivers/xen/xen-pciback/conf_space_quirks.c
@@ -6,6 +6,8 @@
* Author: Chris Bookholt <hap10@epoch.ncsc.mil>
*/
+#define dev_fmt(fmt) DRV_NAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/pci.h>
#include "pciback.h"
@@ -35,8 +37,8 @@
if (match_one_device(&tmp_quirk->devid, dev) != NULL)
goto out;
tmp_quirk = NULL;
- printk(KERN_DEBUG DRV_NAME
- ": quirk didn't match any device known\n");
+ dev_printk(KERN_DEBUG, &dev->dev,
+ "quirk didn't match any device known\n");
out:
return tmp_quirk;
}
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index adf3aae..cb904ac 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -6,6 +6,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
#include <linux/module.h>
#include <linux/init.h>
@@ -304,6 +305,8 @@
xen_pcibk_config_reset_dev(dev);
xen_pcibk_config_free_dyn_fields(dev);
+ dev_data->allow_interrupt_control = 0;
+
xen_unregister_device_domain_owner(dev);
spin_lock_irqsave(&found_psdev->lock, flags);
@@ -624,11 +627,11 @@
if (found_psdev->pdev) {
int domid = xen_find_device_domain_owner(dev);
- pr_warn("****** removing device %s while still in-use by domain %d! ******\n",
+ dev_warn(&dev->dev, "****** removing device %s while still in-use by domain %d! ******\n",
pci_name(found_psdev->dev), domid);
- pr_warn("****** driver domain may still access this device's i/o resources!\n");
- pr_warn("****** shutdown driver domain before binding device\n");
- pr_warn("****** to other drivers or domains\n");
+ dev_warn(&dev->dev, "****** driver domain may still access this device's i/o resources!\n");
+ dev_warn(&dev->dev, "****** shutdown driver domain before binding device\n");
+ dev_warn(&dev->dev, "****** to other drivers or domains\n");
/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
@@ -709,14 +712,12 @@
ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev,
&aer_op->domain, &aer_op->bus, &aer_op->devfn);
if (!ret) {
- dev_err(&psdev->dev->dev,
- DRV_NAME ": failed to get pcifront device\n");
+ dev_err(&psdev->dev->dev, "failed to get pcifront device\n");
return PCI_ERS_RESULT_NONE;
}
wmb();
- dev_dbg(&psdev->dev->dev,
- DRV_NAME ": aer_op %x dom %x bus %x devfn %x\n",
+ dev_dbg(&psdev->dev->dev, "aer_op %x dom %x bus %x devfn %x\n",
aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
/*local flag to mark there's aer request, xen_pcibk callback will use
* this flag to judge whether we need to check pci-front give aer
@@ -784,13 +785,12 @@
PCI_FUNC(dev->devfn));
if (!psdev || !psdev->pdev) {
- dev_err(&dev->dev,
- DRV_NAME " device is not found/assigned\n");
+ dev_err(&dev->dev, "device is not found/assigned\n");
goto end;
}
if (!psdev->pdev->sh_info) {
- dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ dev_err(&dev->dev, "device is not connected or owned"
" by HVM, kill it\n");
kill_domain_by_device(psdev);
goto end;
@@ -842,13 +842,12 @@
PCI_FUNC(dev->devfn));
if (!psdev || !psdev->pdev) {
- dev_err(&dev->dev,
- DRV_NAME " device is not found/assigned\n");
+ dev_err(&dev->dev, "device is not found/assigned\n");
goto end;
}
if (!psdev->pdev->sh_info) {
- dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ dev_err(&dev->dev, "device is not connected or owned"
" by HVM, kill it\n");
kill_domain_by_device(psdev);
goto end;
@@ -900,13 +899,12 @@
PCI_FUNC(dev->devfn));
if (!psdev || !psdev->pdev) {
- dev_err(&dev->dev,
- DRV_NAME " device is not found/assigned\n");
+ dev_err(&dev->dev, "device is not found/assigned\n");
goto end;
}
if (!psdev->pdev->sh_info) {
- dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ dev_err(&dev->dev, "device is not connected or owned"
" by HVM, kill it\n");
kill_domain_by_device(psdev);
goto end;
@@ -954,13 +952,12 @@
PCI_FUNC(dev->devfn));
if (!psdev || !psdev->pdev) {
- dev_err(&dev->dev,
- DRV_NAME " device is not found/assigned\n");
+ dev_err(&dev->dev, "device is not found/assigned\n");
goto end;
}
if (!psdev->pdev->sh_info) {
- dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ dev_err(&dev->dev, "device is not connected or owned"
" by HVM, kill it\n");
kill_domain_by_device(psdev);
goto end;
@@ -1431,6 +1428,65 @@
}
static DRIVER_ATTR_RW(permissive);
+static ssize_t allow_interrupt_control_store(struct device_driver *drv,
+ const char *buf, size_t count)
+{
+ int domain, bus, slot, func;
+ int err;
+ struct pcistub_device *psdev;
+ struct xen_pcibk_dev_data *dev_data;
+
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
+ if (err)
+ goto out;
+
+ psdev = pcistub_device_find(domain, bus, slot, func);
+ if (!psdev) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dev_data = pci_get_drvdata(psdev->dev);
+ /* the driver data for a device should never be null at this point */
+ if (!dev_data) {
+ err = -ENXIO;
+ goto release;
+ }
+ dev_data->allow_interrupt_control = 1;
+release:
+ pcistub_device_put(psdev);
+out:
+ if (!err)
+ err = count;
+ return err;
+}
+
+static ssize_t allow_interrupt_control_show(struct device_driver *drv,
+ char *buf)
+{
+ struct pcistub_device *psdev;
+ struct xen_pcibk_dev_data *dev_data;
+ size_t count = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+ if (count >= PAGE_SIZE)
+ break;
+ if (!psdev->dev)
+ continue;
+ dev_data = pci_get_drvdata(psdev->dev);
+ if (!dev_data || !dev_data->allow_interrupt_control)
+ continue;
+ count +=
+ scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
+ pci_name(psdev->dev));
+ }
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ return count;
+}
+static DRIVER_ATTR_RW(allow_interrupt_control);
+
static void pcistub_exit(void)
{
driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot);
@@ -1441,6 +1497,8 @@
driver_remove_file(&xen_pcibk_pci_driver.driver,
&driver_attr_permissive);
driver_remove_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_allow_interrupt_control);
+ driver_remove_file(&xen_pcibk_pci_driver.driver,
&driver_attr_irq_handlers);
driver_remove_file(&xen_pcibk_pci_driver.driver,
&driver_attr_irq_handler_state);
@@ -1530,6 +1588,9 @@
if (!err)
err = driver_create_file(&xen_pcibk_pci_driver.driver,
&driver_attr_permissive);
+ if (!err)
+ err = driver_create_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_allow_interrupt_control);
if (!err)
err = driver_create_file(&xen_pcibk_pci_driver.driver,
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index 235cdfe..95e28ee 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -48,13 +48,14 @@
struct list_head config_fields;
struct pci_saved_state *pci_saved_state;
unsigned int permissive:1;
+ unsigned int allow_interrupt_control:1;
unsigned int warned_on_write:1;
unsigned int enable_intx:1;
unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
unsigned int ack_intr:1; /* .. and ACK-ing */
unsigned long handled;
unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
- char irq_name[0]; /* xen-pcibk[000:04:00.0] */
+ char irq_name[]; /* xen-pcibk[000:04:00.0] */
};
/* Used by XenBus and xen_pcibk_ops.c */
@@ -194,8 +195,6 @@
int xen_pcibk_xenbus_register(void);
void xen_pcibk_xenbus_unregister(void);
-
-extern int verbose_request;
#endif
/* Handles shared IRQs that can to device domain and control domain. */
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index c4ed2c6..3fbc214 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -6,6 +6,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
#include <linux/moduleparam.h>
#include <linux/wait.h>
@@ -14,9 +15,6 @@
#include <linux/sched.h>
#include "pciback.h"
-int verbose_request;
-module_param(verbose_request, int, 0644);
-
static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id);
/* Ensure a device is has the fake IRQ handler "turned on/off" and is
@@ -147,9 +145,6 @@
struct xen_pcibk_dev_data *dev_data;
int status;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
-
if (dev->msi_enabled)
status = -EALREADY;
else if (dev->msix_enabled)
@@ -158,9 +153,8 @@
status = pci_enable_msi(dev);
if (status) {
- pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n",
- pci_name(dev), pdev->xdev->otherend_id,
- status);
+ dev_warn_ratelimited(&dev->dev, "error enabling MSI for guest %u: err %d\n",
+ pdev->xdev->otherend_id, status);
op->value = 0;
return XEN_PCI_ERR_op_failed;
}
@@ -169,9 +163,8 @@
* the local domain's IRQ number. */
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
- op->value);
+
+ dev_dbg(&dev->dev, "MSI: %d\n", op->value);
dev_data = pci_get_drvdata(dev);
if (dev_data)
@@ -184,10 +177,6 @@
int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
- pci_name(dev));
-
if (dev->msi_enabled) {
struct xen_pcibk_dev_data *dev_data;
@@ -198,9 +187,9 @@
dev_data->ack_intr = 1;
}
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
- op->value);
+
+ dev_dbg(&dev->dev, "MSI: %d\n", op->value);
+
return 0;
}
@@ -213,9 +202,7 @@
struct msix_entry *entries;
u16 cmd;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "enable MSI-X\n");
if (op->value > SH_INFO_MAX_VEC)
return -EINVAL;
@@ -248,17 +235,13 @@
if (entries[i].vector) {
op->msix_entries[i].vector =
xen_pirq_from_irq(entries[i].vector);
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: " \
- "MSI-X[%d]: %d\n",
- pci_name(dev), i,
- op->msix_entries[i].vector);
+ dev_dbg(&dev->dev, "MSI-X[%d]: %d\n", i,
+ op->msix_entries[i].vector);
}
}
} else
- pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n",
- pci_name(dev), pdev->xdev->otherend_id,
- result);
+ dev_warn_ratelimited(&dev->dev, "error enabling MSI-X for guest %u: err %d!\n",
+ pdev->xdev->otherend_id, result);
kfree(entries);
op->value = result;
@@ -273,10 +256,6 @@
int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
- pci_name(dev));
-
if (dev->msix_enabled) {
struct xen_pcibk_dev_data *dev_data;
@@ -291,9 +270,9 @@
* an undefined IRQ value of zero.
*/
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n",
- pci_name(dev), op->value);
+
+ dev_dbg(&dev->dev, "MSI-X: %d\n", op->value);
+
return 0;
}
#endif
@@ -452,7 +431,7 @@
dev_data->handled++;
if ((dev_data->handled % 1000) == 0) {
if (xen_test_irq_shared(irq)) {
- pr_info("%s IRQ line is not shared "
+ dev_info(&dev->dev, "%s IRQ line is not shared "
"with other domains. Turning ISR off\n",
dev_data->irq_name);
dev_data->ack_intr = 0;
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index 3031308..1221cfd 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -7,6 +7,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
#include <linux/list.h>
#include <linux/slab.h>
@@ -109,9 +110,8 @@
continue;
if (match_slot(dev, t->dev)) {
- pr_info("vpci: %s: assign to virtual slot %d func %d\n",
- pci_name(dev), slot,
- func);
+ dev_info(&dev->dev, "vpci: assign to virtual slot %d func %d\n",
+ slot, func);
list_add_tail(&dev_entry->list,
&vpci_dev->dev_list[slot]);
goto unlock;
@@ -122,8 +122,8 @@
/* Assign to a new slot on the virtual PCI bus */
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
if (list_empty(&vpci_dev->dev_list[slot])) {
- pr_info("vpci: %s: assign to virtual slot %d\n",
- pci_name(dev), slot);
+ dev_info(&dev->dev, "vpci: assign to virtual slot %d\n",
+ slot);
list_add_tail(&dev_entry->list,
&vpci_dev->dev_list[slot]);
goto unlock;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index a255b08..cad56ea 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -105,13 +105,13 @@
}
static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
- int remote_evtchn)
+ evtchn_port_t remote_evtchn)
{
int err = 0;
void *vaddr;
dev_dbg(&pdev->xdev->dev,
- "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
+ "Attaching to frontend resources - gnt_ref=%d evtchn=%u\n",
gnt_ref, remote_evtchn);
err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
@@ -142,7 +142,8 @@
static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
{
int err = 0;
- int gnt_ref, remote_evtchn;
+ int gnt_ref;
+ evtchn_port_t remote_evtchn;
char *magic = NULL;
@@ -547,7 +548,7 @@
xenbus_switch_state(xdev, XenbusStateClosed);
if (xenbus_dev_is_online(xdev))
break;
- /* fall through - if not online */
+ fallthrough; /* if not online */
case XenbusStateUnknown:
dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
device_unregister(&xdev->dev);
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 32aba2e..9cd4fe8 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -99,6 +99,8 @@
struct list_head v2p_entry_lists;
wait_queue_head_t waiting_to_free;
+
+ struct gnttab_page_cache free_pages;
};
/* theoretical maximum of grants for one request */
@@ -188,10 +190,6 @@
MODULE_PARM_DESC(max_buffer_pages,
"Maximum number of free pages to keep in backend buffer");
-static DEFINE_SPINLOCK(free_pages_lock);
-static int free_pages_num;
-static LIST_HEAD(scsiback_free_pages);
-
/* Global spinlock to protect scsiback TPG list */
static DEFINE_MUTEX(scsiback_mutex);
static LIST_HEAD(scsiback_list);
@@ -207,41 +205,6 @@
wake_up(&info->waiting_to_free);
}
-static void put_free_pages(struct page **page, int num)
-{
- unsigned long flags;
- int i = free_pages_num + num, n = num;
-
- if (num == 0)
- return;
- if (i > scsiback_max_buffer_pages) {
- n = min(num, i - scsiback_max_buffer_pages);
- gnttab_free_pages(n, page + num - n);
- n = num - n;
- }
- spin_lock_irqsave(&free_pages_lock, flags);
- for (i = 0; i < n; i++)
- list_add(&page[i]->lru, &scsiback_free_pages);
- free_pages_num += n;
- spin_unlock_irqrestore(&free_pages_lock, flags);
-}
-
-static int get_free_page(struct page **page)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&free_pages_lock, flags);
- if (list_empty(&scsiback_free_pages)) {
- spin_unlock_irqrestore(&free_pages_lock, flags);
- return gnttab_alloc_pages(1, page);
- }
- page[0] = list_first_entry(&scsiback_free_pages, struct page, lru);
- list_del(&page[0]->lru);
- free_pages_num--;
- spin_unlock_irqrestore(&free_pages_lock, flags);
- return 0;
-}
-
static unsigned long vaddr_page(struct page *page)
{
unsigned long pfn = page_to_pfn(page);
@@ -302,7 +265,8 @@
BUG_ON(err);
}
- put_free_pages(req->pages, req->n_grants);
+ gnttab_page_cache_put(&req->info->free_pages, req->pages,
+ req->n_grants);
req->n_grants = 0;
}
@@ -445,8 +409,8 @@
struct vscsibk_info *info = pending_req->info;
for (i = 0; i < cnt; i++) {
- if (get_free_page(pg + mapcount)) {
- put_free_pages(pg, mapcount);
+ if (gnttab_page_cache_get(&info->free_pages, pg + mapcount)) {
+ gnttab_page_cache_put(&info->free_pages, pg, mapcount);
pr_err("no grant page\n");
return -ENOMEM;
}
@@ -796,6 +760,8 @@
cond_resched();
}
+ gnttab_page_cache_shrink(&info->free_pages, scsiback_max_buffer_pages);
+
RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do);
return more_to_do;
}
@@ -858,7 +824,8 @@
static int scsiback_map(struct vscsibk_info *info)
{
struct xenbus_device *dev = info->dev;
- unsigned int ring_ref, evtchn;
+ unsigned int ring_ref;
+ evtchn_port_t evtchn;
int err;
err = xenbus_gather(XBT_NIL, dev->otherend,
@@ -1188,7 +1155,7 @@
xenbus_switch_state(dev, XenbusStateClosed);
if (xenbus_dev_is_online(dev))
break;
- /* fall through - if not online */
+ fallthrough; /* if not online */
case XenbusStateUnknown:
device_unregister(&dev->dev);
break;
@@ -1232,6 +1199,8 @@
scsiback_release_translation_entry(info);
+ gnttab_page_cache_shrink(&info->free_pages, 0);
+
dev_set_drvdata(&dev->dev, NULL);
return 0;
@@ -1262,6 +1231,7 @@
info->irq = 0;
INIT_LIST_HEAD(&info->v2p_entry_lists);
spin_lock_init(&info->v2p_lock);
+ gnttab_page_cache_init(&info->free_pages);
err = xenbus_printf(XBT_NIL, dev->nodename, "feature-sg-grant", "%u",
SG_ALL);
@@ -1878,13 +1848,6 @@
static void __exit scsiback_exit(void)
{
- struct page *page;
-
- while (free_pages_num) {
- if (get_free_page(&page))
- BUG();
- gnttab_free_pages(1, &page);
- }
target_unregister_template(&scsiback_ops);
xenbus_unregister_driver(&scsiback_driver);
}
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h
index 88516a8..2a93b7c 100644
--- a/drivers/xen/xenbus/xenbus.h
+++ b/drivers/xen/xenbus/xenbus.h
@@ -118,8 +118,6 @@
void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
-void xenbus_dev_shutdown(struct device *_dev);
-
int xenbus_dev_suspend(struct device *dev);
int xenbus_dev_resume(struct device *dev);
int xenbus_dev_cancel(struct device *dev);
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 81eddb8..16cfef0 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -69,11 +69,24 @@
unsigned int nr_handles;
};
+struct map_ring_valloc {
+ struct xenbus_map_node *node;
+
+ /* Why do we need two arrays? See comment of __xenbus_map_ring */
+ unsigned long addrs[XENBUS_MAX_RING_GRANTS];
+ phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
+
+ struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
+ struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
+
+ unsigned int idx;
+};
+
static DEFINE_SPINLOCK(xenbus_valloc_lock);
static LIST_HEAD(xenbus_valloc_pages);
struct xenbus_ring_ops {
- int (*map)(struct xenbus_device *dev,
+ int (*map)(struct xenbus_device *dev, struct map_ring_valloc *info,
grant_ref_t *gnt_refs, unsigned int nr_grefs,
void **vaddr);
int (*unmap)(struct xenbus_device *dev, void *vaddr);
@@ -366,7 +379,14 @@
unsigned int nr_pages, grant_ref_t *grefs)
{
int err;
- int i, j;
+ unsigned int i;
+ grant_ref_t gref_head;
+
+ err = gnttab_alloc_grant_references(nr_pages, &gref_head);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "granting access to ring page");
+ return err;
+ }
for (i = 0; i < nr_pages; i++) {
unsigned long gfn;
@@ -376,23 +396,14 @@
else
gfn = virt_to_gfn(vaddr);
- err = gnttab_grant_foreign_access(dev->otherend_id, gfn, 0);
- if (err < 0) {
- xenbus_dev_fatal(dev, err,
- "granting access to ring page");
- goto fail;
- }
- grefs[i] = err;
+ grefs[i] = gnttab_claim_grant_reference(&gref_head);
+ gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id,
+ gfn, 0);
vaddr = vaddr + XEN_PAGE_SIZE;
}
return 0;
-
-fail:
- for (j = 0; j < i; j++)
- gnttab_end_foreign_access_ref(grefs[j], 0);
- return err;
}
EXPORT_SYMBOL_GPL(xenbus_grant_ring);
@@ -403,7 +414,7 @@
* error, the device will switch to XenbusStateClosing, and the error will be
* saved in the store.
*/
-int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
+int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port)
{
struct evtchn_alloc_unbound alloc_unbound;
int err;
@@ -426,7 +437,7 @@
/**
* Free an existing event channel. Returns 0 on success or -errno on error.
*/
-int xenbus_free_evtchn(struct xenbus_device *dev, int port)
+int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port)
{
struct evtchn_close close;
int err;
@@ -435,7 +446,7 @@
err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
if (err)
- xenbus_dev_error(dev, err, "freeing event channel %d", port);
+ xenbus_dev_error(dev, err, "freeing event channel %u", port);
return err;
}
@@ -452,8 +463,7 @@
* Map @nr_grefs pages of memory into this domain from another
* domain's grant table. xenbus_map_ring_valloc allocates @nr_grefs
* pages of virtual address space, maps the pages to that address, and
- * sets *vaddr to that address. Returns 0 on success, and GNTST_*
- * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on
+ * sets *vaddr to that address. Returns 0 on success, and -errno on
* error. If an error is returned, device will switch to
* XenbusStateClosing and the error message will be saved in XenStore.
*/
@@ -461,12 +471,25 @@
unsigned int nr_grefs, void **vaddr)
{
int err;
+ struct map_ring_valloc *info;
- err = ring_ops->map(dev, gnt_refs, nr_grefs, vaddr);
- /* Some hypervisors are buggy and can return 1. */
- if (err > 0)
- err = GNTST_general_error;
+ *vaddr = NULL;
+ if (nr_grefs > XENBUS_MAX_RING_GRANTS)
+ return -EINVAL;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->node = kzalloc(sizeof(*info->node), GFP_KERNEL);
+ if (!info->node)
+ err = -ENOMEM;
+ else
+ err = ring_ops->map(dev, info, gnt_refs, nr_grefs, vaddr);
+
+ kfree(info->node);
+ kfree(info);
return err;
}
EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
@@ -478,79 +501,107 @@
grant_ref_t *gnt_refs,
unsigned int nr_grefs,
grant_handle_t *handles,
- phys_addr_t *addrs,
+ struct map_ring_valloc *info,
unsigned int flags,
bool *leaked)
{
- struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
- struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
int i, j;
- int err = GNTST_okay;
if (nr_grefs > XENBUS_MAX_RING_GRANTS)
return -EINVAL;
for (i = 0; i < nr_grefs; i++) {
- memset(&map[i], 0, sizeof(map[i]));
- gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i],
- dev->otherend_id);
+ gnttab_set_map_op(&info->map[i], info->phys_addrs[i], flags,
+ gnt_refs[i], dev->otherend_id);
handles[i] = INVALID_GRANT_HANDLE;
}
- gnttab_batch_map(map, i);
+ gnttab_batch_map(info->map, i);
for (i = 0; i < nr_grefs; i++) {
- if (map[i].status != GNTST_okay) {
- err = map[i].status;
- xenbus_dev_fatal(dev, map[i].status,
+ if (info->map[i].status != GNTST_okay) {
+ xenbus_dev_fatal(dev, info->map[i].status,
"mapping in shared page %d from domain %d",
gnt_refs[i], dev->otherend_id);
goto fail;
} else
- handles[i] = map[i].handle;
+ handles[i] = info->map[i].handle;
}
- return GNTST_okay;
+ return 0;
fail:
for (i = j = 0; i < nr_grefs; i++) {
if (handles[i] != INVALID_GRANT_HANDLE) {
- memset(&unmap[j], 0, sizeof(unmap[j]));
- gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i],
+ gnttab_set_unmap_op(&info->unmap[j],
+ info->phys_addrs[i],
GNTMAP_host_map, handles[i]);
j++;
}
}
- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j))
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j))
BUG();
*leaked = false;
for (i = 0; i < j; i++) {
- if (unmap[i].status != GNTST_okay) {
+ if (info->unmap[i].status != GNTST_okay) {
*leaked = true;
break;
}
}
+ return -ENOENT;
+}
+
+/**
+ * xenbus_unmap_ring
+ * @dev: xenbus device
+ * @handles: grant handle array
+ * @nr_handles: number of handles in the array
+ * @vaddrs: addresses to unmap
+ *
+ * Unmap memory in this domain that was imported from another domain.
+ * Returns 0 on success and returns GNTST_* on error
+ * (see xen/include/interface/grant_table.h).
+ */
+static int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles,
+ unsigned int nr_handles, unsigned long *vaddrs)
+{
+ struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
+ int i;
+ int err;
+
+ if (nr_handles > XENBUS_MAX_RING_GRANTS)
+ return -EINVAL;
+
+ for (i = 0; i < nr_handles; i++)
+ gnttab_set_unmap_op(&unmap[i], vaddrs[i],
+ GNTMAP_host_map, handles[i]);
+
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
+ BUG();
+
+ err = GNTST_okay;
+ for (i = 0; i < nr_handles; i++) {
+ if (unmap[i].status != GNTST_okay) {
+ xenbus_dev_error(dev, unmap[i].status,
+ "unmapping page at handle %d error %d",
+ handles[i], unmap[i].status);
+ err = unmap[i].status;
+ break;
+ }
+ }
+
return err;
}
-struct map_ring_valloc_hvm
-{
- unsigned int idx;
-
- /* Why do we need two arrays? See comment of __xenbus_map_ring */
- phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
- unsigned long addrs[XENBUS_MAX_RING_GRANTS];
-};
-
static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn,
unsigned int goffset,
unsigned int len,
void *data)
{
- struct map_ring_valloc_hvm *info = data;
+ struct map_ring_valloc *info = data;
unsigned long vaddr = (unsigned long)gfn_to_virt(gfn);
info->phys_addrs[info->idx] = vaddr;
@@ -559,39 +610,28 @@
info->idx++;
}
-static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
- grant_ref_t *gnt_ref,
- unsigned int nr_grefs,
- void **vaddr)
+static int xenbus_map_ring_hvm(struct xenbus_device *dev,
+ struct map_ring_valloc *info,
+ grant_ref_t *gnt_ref,
+ unsigned int nr_grefs,
+ void **vaddr)
{
- struct xenbus_map_node *node;
+ struct xenbus_map_node *node = info->node;
int err;
void *addr;
bool leaked = false;
- struct map_ring_valloc_hvm info = {
- .idx = 0,
- };
unsigned int nr_pages = XENBUS_PAGES(nr_grefs);
- if (nr_grefs > XENBUS_MAX_RING_GRANTS)
- return -EINVAL;
-
- *vaddr = NULL;
-
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
- return -ENOMEM;
-
- err = alloc_xenballooned_pages(nr_pages, node->hvm.pages);
+ err = xen_alloc_unpopulated_pages(nr_pages, node->hvm.pages);
if (err)
goto out_err;
gnttab_foreach_grant(node->hvm.pages, nr_grefs,
xenbus_map_ring_setup_grant_hvm,
- &info);
+ info);
err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
- info.phys_addrs, GNTMAP_host_map, &leaked);
+ info, GNTMAP_host_map, &leaked);
node->nr_handles = nr_grefs;
if (err)
@@ -611,61 +651,23 @@
spin_unlock(&xenbus_valloc_lock);
*vaddr = addr;
+ info->node = NULL;
+
return 0;
out_xenbus_unmap_ring:
if (!leaked)
- xenbus_unmap_ring(dev, node->handles, nr_grefs, info.addrs);
+ xenbus_unmap_ring(dev, node->handles, nr_grefs, info->addrs);
else
pr_alert("leaking %p size %u page(s)",
addr, nr_pages);
out_free_ballooned_pages:
if (!leaked)
- free_xenballooned_pages(nr_pages, node->hvm.pages);
+ xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
out_err:
- kfree(node);
return err;
}
-
-/**
- * xenbus_map_ring
- * @dev: xenbus device
- * @gnt_refs: grant reference array
- * @nr_grefs: number of grant reference
- * @handles: pointer to grant handle to be filled
- * @vaddrs: addresses to be mapped to
- * @leaked: fail to clean up a failed map, caller should not free vaddr
- *
- * Map pages of memory into this domain from another domain's grant table.
- * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the pages to the specified address.
- * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
- * or -ENOMEM / -EINVAL on error. If an error is returned, device will switch to
- * XenbusStateClosing and the first error message will be saved in XenStore.
- * Further more if we fail to map the ring, caller should check @leaked.
- * If @leaked is not zero it means xenbus_map_ring fails to clean up, caller
- * should not free the address space of @vaddr.
- */
-int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs,
- unsigned int nr_grefs, grant_handle_t *handles,
- unsigned long *vaddrs, bool *leaked)
-{
- phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
- int i;
-
- if (nr_grefs > XENBUS_MAX_RING_GRANTS)
- return -EINVAL;
-
- for (i = 0; i < nr_grefs; i++)
- phys_addrs[i] = (unsigned long)vaddrs[i];
-
- return __xenbus_map_ring(dev, gnt_refs, nr_grefs, handles,
- phys_addrs, GNTMAP_host_map, leaked);
-}
-EXPORT_SYMBOL_GPL(xenbus_map_ring);
-
-
/**
* xenbus_unmap_ring_vfree
* @dev: xenbus device
@@ -685,40 +687,33 @@
EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
#ifdef CONFIG_XEN_PV
-static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
- grant_ref_t *gnt_refs,
- unsigned int nr_grefs,
- void **vaddr)
+static int map_ring_apply(pte_t *pte, unsigned long addr, void *data)
{
- struct xenbus_map_node *node;
+ struct map_ring_valloc *info = data;
+
+ info->phys_addrs[info->idx++] = arbitrary_virt_to_machine(pte).maddr;
+ return 0;
+}
+
+static int xenbus_map_ring_pv(struct xenbus_device *dev,
+ struct map_ring_valloc *info,
+ grant_ref_t *gnt_refs,
+ unsigned int nr_grefs,
+ void **vaddr)
+{
+ struct xenbus_map_node *node = info->node;
struct vm_struct *area;
- pte_t *ptes[XENBUS_MAX_RING_GRANTS];
- phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
- int err = GNTST_okay;
- int i;
- bool leaked;
+ bool leaked = false;
+ int err = -ENOMEM;
- *vaddr = NULL;
-
- if (nr_grefs > XENBUS_MAX_RING_GRANTS)
- return -EINVAL;
-
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
+ area = get_vm_area(XEN_PAGE_SIZE * nr_grefs, VM_IOREMAP);
+ if (!area)
return -ENOMEM;
-
- area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes);
- if (!area) {
- kfree(node);
- return -ENOMEM;
- }
-
- for (i = 0; i < nr_grefs; i++)
- phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
-
+ if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
+ XEN_PAGE_SIZE * nr_grefs, map_ring_apply, info))
+ goto failed;
err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
- phys_addrs,
- GNTMAP_host_map | GNTMAP_contains_pte,
+ info, GNTMAP_host_map | GNTMAP_contains_pte,
&leaked);
if (err)
goto failed;
@@ -731,6 +726,8 @@
spin_unlock(&xenbus_valloc_lock);
*vaddr = area->addr;
+ info->node = NULL;
+
return 0;
failed:
@@ -739,11 +736,10 @@
else
pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
- kfree(node);
return err;
}
-static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
+static int xenbus_unmap_ring_pv(struct xenbus_device *dev, void *vaddr)
{
struct xenbus_map_node *node;
struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
@@ -807,12 +803,12 @@
}
static const struct xenbus_ring_ops ring_ops_pv = {
- .map = xenbus_map_ring_valloc_pv,
- .unmap = xenbus_unmap_ring_vfree_pv,
+ .map = xenbus_map_ring_pv,
+ .unmap = xenbus_unmap_ring_pv,
};
#endif
-struct unmap_ring_vfree_hvm
+struct unmap_ring_hvm
{
unsigned int idx;
unsigned long addrs[XENBUS_MAX_RING_GRANTS];
@@ -823,19 +819,19 @@
unsigned int len,
void *data)
{
- struct unmap_ring_vfree_hvm *info = data;
+ struct unmap_ring_hvm *info = data;
info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn);
info->idx++;
}
-static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
+static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr)
{
int rv;
struct xenbus_map_node *node;
void *addr;
- struct unmap_ring_vfree_hvm info = {
+ struct unmap_ring_hvm info = {
.idx = 0,
};
unsigned int nr_pages;
@@ -868,7 +864,7 @@
info.addrs);
if (!rv) {
vunmap(vaddr);
- free_xenballooned_pages(nr_pages, node->hvm.pages);
+ xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
}
else
WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages);
@@ -878,51 +874,6 @@
}
/**
- * xenbus_unmap_ring
- * @dev: xenbus device
- * @handles: grant handle array
- * @nr_handles: number of handles in the array
- * @vaddrs: addresses to unmap
- *
- * Unmap memory in this domain that was imported from another domain.
- * Returns 0 on success and returns GNTST_* on error
- * (see xen/include/interface/grant_table.h).
- */
-int xenbus_unmap_ring(struct xenbus_device *dev,
- grant_handle_t *handles, unsigned int nr_handles,
- unsigned long *vaddrs)
-{
- struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
- int i;
- int err;
-
- if (nr_handles > XENBUS_MAX_RING_GRANTS)
- return -EINVAL;
-
- for (i = 0; i < nr_handles; i++)
- gnttab_set_unmap_op(&unmap[i], vaddrs[i],
- GNTMAP_host_map, handles[i]);
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
- BUG();
-
- err = GNTST_okay;
- for (i = 0; i < nr_handles; i++) {
- if (unmap[i].status != GNTST_okay) {
- xenbus_dev_error(dev, unmap[i].status,
- "unmapping page at handle %d error %d",
- handles[i], unmap[i].status);
- err = unmap[i].status;
- break;
- }
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
-
-
-/**
* xenbus_read_driver_state
* @path: path for driver
*
@@ -941,8 +892,8 @@
EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
static const struct xenbus_ring_ops ring_ops_hvm = {
- .map = xenbus_map_ring_valloc_hvm,
- .unmap = xenbus_unmap_ring_vfree_hvm,
+ .map = xenbus_map_ring_hvm,
+ .unmap = xenbus_unmap_ring_hvm,
};
void __init xenbus_ring_ops_init(void)
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 652894d..98d8706 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -31,6 +31,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
#define DPRINTK(fmt, args...) \
pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
@@ -51,7 +52,6 @@
#include <linux/module.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/xen/hypervisor.h>
#include <xen/xen.h>
@@ -233,9 +233,18 @@
return err;
}
- err = drv->probe(dev, id);
- if (err)
+ if (!try_module_get(drv->driver.owner)) {
+ dev_warn(&dev->dev, "failed to acquire module reference on '%s'\n",
+ drv->driver.name);
+ err = -ESRCH;
goto fail;
+ }
+
+ down(&dev->reclaim_sem);
+ err = drv->probe(dev, id);
+ up(&dev->reclaim_sem);
+ if (err)
+ goto fail_put;
err = watch_otherend(dev);
if (err) {
@@ -245,9 +254,10 @@
}
return 0;
+fail_put:
+ module_put(drv->driver.owner);
fail:
xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
- xenbus_switch_state(dev, XenbusStateClosed);
return err;
}
EXPORT_SYMBOL_GPL(xenbus_dev_probe);
@@ -261,39 +271,30 @@
free_otherend_watch(dev);
- if (drv->remove)
+ if (drv->remove) {
+ down(&dev->reclaim_sem);
drv->remove(dev);
+ up(&dev->reclaim_sem);
+ }
+
+ module_put(drv->driver.owner);
free_otherend_details(dev);
- xenbus_switch_state(dev, XenbusStateClosed);
+ /*
+ * If the toolstack has forced the device state to closing then set
+ * the state to closed now to allow it to be cleaned up.
+ * Similarly, if the driver does not support re-bind, set the
+ * closed.
+ */
+ if (!drv->allow_rebind ||
+ xenbus_read_driver_state(dev->nodename) == XenbusStateClosing)
+ xenbus_switch_state(dev, XenbusStateClosed);
+
return 0;
}
EXPORT_SYMBOL_GPL(xenbus_dev_remove);
-void xenbus_dev_shutdown(struct device *_dev)
-{
- struct xenbus_device *dev = to_xenbus_device(_dev);
- unsigned long timeout = 5*HZ;
-
- DPRINTK("%s", dev->nodename);
-
- get_device(&dev->dev);
- if (dev->state != XenbusStateConnected) {
- pr_info("%s: %s: %s != Connected, skipping\n",
- __func__, dev->nodename, xenbus_strstate(dev->state));
- goto out;
- }
- xenbus_switch_state(dev, XenbusStateClosing);
- timeout = wait_for_completion_timeout(&dev->down, timeout);
- if (!timeout)
- pr_info("%s: %s timeout closing device\n",
- __func__, dev->nodename);
- out:
- put_device(&dev->dev);
-}
-EXPORT_SYMBOL_GPL(xenbus_dev_shutdown);
-
int xenbus_register_driver_common(struct xenbus_driver *drv,
struct xen_bus_type *bus,
struct module *owner, const char *mod_name)
@@ -473,6 +474,7 @@
goto fail;
dev_set_name(&xendev->dev, "%s", devname);
+ sema_init(&xendev->reclaim_sem, 1);
/* Register with generic device framework. */
err = device_register(&xendev->dev);
@@ -607,7 +609,7 @@
if (drv->suspend)
err = drv->suspend(xdev);
if (err)
- pr_warn("suspend %s failed: %i\n", dev_name(dev), err);
+ dev_warn(dev, "suspend failed: %i\n", err);
return 0;
}
EXPORT_SYMBOL_GPL(xenbus_dev_suspend);
@@ -626,8 +628,7 @@
drv = to_xenbus_driver(dev->driver);
err = talk_to_otherend(xdev);
if (err) {
- pr_warn("resume (talk_to_otherend) %s failed: %i\n",
- dev_name(dev), err);
+ dev_warn(dev, "resume (talk_to_otherend) failed: %i\n", err);
return err;
}
@@ -636,15 +637,14 @@
if (drv->resume) {
err = drv->resume(xdev);
if (err) {
- pr_warn("resume %s failed: %i\n", dev_name(dev), err);
+ dev_warn(dev, "resume failed: %i\n", err);
return err;
}
}
err = watch_otherend(xdev);
if (err) {
- pr_warn("resume (watch_otherend) %s failed: %d.\n",
- dev_name(dev), err);
+ dev_warn(dev, "resume (watch_otherend) failed: %d\n", err);
return err;
}
@@ -846,7 +846,7 @@
static int __init xenbus_init(void)
{
- int err = 0;
+ int err;
uint64_t v = 0;
xen_store_domain_type = XS_UNKNOWN;
@@ -886,6 +886,29 @@
err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
if (err)
goto out_error;
+ /*
+ * Uninitialized hvm_params are zero and return no error.
+ * Although it is theoretically possible to have
+ * HVM_PARAM_STORE_PFN set to zero on purpose, in reality it is
+ * not zero when valid. If zero, it means that Xenstore hasn't
+ * been properly initialized. Instead of attempting to map a
+ * wrong guest physical address return error.
+ *
+ * Also recognize all bits set as an invalid value.
+ */
+ if (!v || !~v) {
+ err = -ENOENT;
+ goto out_error;
+ }
+ /* Avoid truncation on 32-bit. */
+#if BITS_PER_LONG == 32
+ if (v > ULONG_MAX) {
+ pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n",
+ __func__, v);
+ err = -EINVAL;
+ goto out_error;
+ }
+#endif
xen_store_gfn = (unsigned long)v;
xen_store_interface =
xen_remap(xen_store_gfn << XEN_PAGE_SHIFT,
@@ -920,8 +943,10 @@
*/
proc_create_mount_point("xen");
#endif
+ return 0;
out_error:
+ xen_store_domain_type = XS_UNKNOWN;
return err;
}
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 4bb6030..5abded9 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -45,9 +45,9 @@
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/export.h>
+#include <linux/semaphore.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/xen/hypervisor.h>
#include <asm/hypervisor.h>
#include <xen/xenbus.h>
@@ -205,7 +205,6 @@
.uevent = xenbus_uevent_backend,
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
- .shutdown = xenbus_dev_shutdown,
.dev_groups = xenbus_dev_groups,
},
};
@@ -255,6 +254,41 @@
return NOTIFY_DONE;
}
+static int backend_reclaim_memory(struct device *dev, void *data)
+{
+ const struct xenbus_driver *drv;
+ struct xenbus_device *xdev;
+
+ if (!dev->driver)
+ return 0;
+ drv = to_xenbus_driver(dev->driver);
+ if (drv && drv->reclaim_memory) {
+ xdev = to_xenbus_device(dev);
+ if (down_trylock(&xdev->reclaim_sem))
+ return 0;
+ drv->reclaim_memory(xdev);
+ up(&xdev->reclaim_sem);
+ }
+ return 0;
+}
+
+/*
+ * Returns 0 always because we are using shrinker to only detect memory
+ * pressure.
+ */
+static unsigned long backend_shrink_memory_count(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL,
+ backend_reclaim_memory);
+ return 0;
+}
+
+static struct shrinker backend_memory_shrinker = {
+ .count_objects = backend_shrink_memory_count,
+ .seeks = DEFAULT_SEEKS,
+};
+
static int __init xenbus_probe_backend_init(void)
{
static struct notifier_block xenstore_notifier = {
@@ -271,6 +305,9 @@
register_xenstore_notifier(&xenstore_notifier);
+ if (register_shrinker(&backend_memory_shrinker))
+ pr_warn("shrinker registration failed\n");
+
return 0;
}
subsys_initcall(xenbus_probe_backend_init);
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index a7d90a7..4809446 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -19,7 +19,6 @@
#include <linux/module.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/xen/hypervisor.h>
#include <xen/xenbus.h>
#include <xen/events.h>
@@ -126,6 +125,28 @@
return xenbus_dev_probe(dev);
}
+static void xenbus_frontend_dev_shutdown(struct device *_dev)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ unsigned long timeout = 5*HZ;
+
+ DPRINTK("%s", dev->nodename);
+
+ get_device(&dev->dev);
+ if (dev->state != XenbusStateConnected) {
+ pr_info("%s: %s: %s != Connected, skipping\n",
+ __func__, dev->nodename, xenbus_strstate(dev->state));
+ goto out;
+ }
+ xenbus_switch_state(dev, XenbusStateClosing);
+ timeout = wait_for_completion_timeout(&dev->down, timeout);
+ if (!timeout)
+ pr_info("%s: %s timeout closing device\n",
+ __func__, dev->nodename);
+ out:
+ put_device(&dev->dev);
+}
+
static const struct dev_pm_ops xenbus_pm_ops = {
.suspend = xenbus_dev_suspend,
.resume = xenbus_frontend_dev_resume,
@@ -146,7 +167,7 @@
.uevent = xenbus_uevent_frontend,
.probe = xenbus_frontend_dev_probe,
.remove = xenbus_dev_remove,
- .shutdown = xenbus_dev_shutdown,
+ .shutdown = xenbus_frontend_dev_shutdown,
.dev_groups = xenbus_dev_groups,
.pm = &xenbus_pm_ops,
@@ -380,12 +401,12 @@
case XenbusStateConnected:
xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
xenbus_reset_wait_for_backend(be, XenbusStateClosing);
- /* fall through */
+ fallthrough;
case XenbusStateClosing:
xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
xenbus_reset_wait_for_backend(be, XenbusStateClosed);
- /* fall through */
+ fallthrough;
case XenbusStateClosed:
xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising);
diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
index 7b1077f..34742c6 100644
--- a/drivers/xen/xlate_mmu.c
+++ b/drivers/xen/xlate_mmu.c
@@ -232,7 +232,7 @@
kfree(pages);
return -ENOMEM;
}
- rc = alloc_xenballooned_pages(nr_pages, pages);
+ rc = xen_alloc_unpopulated_pages(nr_pages, pages);
if (rc) {
pr_warn("%s Couldn't balloon alloc %ld pages rc:%d\n", __func__,
nr_pages, rc);
@@ -249,7 +249,7 @@
if (!vaddr) {
pr_warn("%s Couldn't map %ld pages rc:%d\n", __func__,
nr_pages, rc);
- free_xenballooned_pages(nr_pages, pages);
+ xen_free_unpopulated_pages(nr_pages, pages);
kfree(pages);
kfree(pfns);
return -ENOMEM;