Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 97954f5..79e5356 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -4,14 +4,15 @@
config HYPERV
tristate "Microsoft Hyper-V client drivers"
- depends on X86 && ACPI && PCI && X86_LOCAL_APIC && HYPERVISOR_GUEST
+ depends on X86 && ACPI && X86_LOCAL_APIC && HYPERVISOR_GUEST
select PARAVIRT
+ select X86_HV_CALLBACK_VECTOR
help
Select this option to run Linux as a Hyper-V client operating
system.
-config HYPERV_TSCPAGE
- def_bool HYPERV && X86_64
+config HYPERV_TIMER
+ def_bool HYPERV
config HYPERV_UTILS
tristate "Microsoft Hyper-V Utilities driver"
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 2f164bd..23f358c 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2009, Microsoft Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
* Hank Janssen <hjanssen@microsoft.com>
@@ -38,7 +26,7 @@
static unsigned long virt_to_hvpfn(void *addr)
{
- unsigned long paddr;
+ phys_addr_t paddr;
if (is_vmalloc_addr(addr))
paddr = page_to_phys(vmalloc_to_page(addr)) +
@@ -79,85 +67,96 @@
}
EXPORT_SYMBOL_GPL(vmbus_setevent);
-/*
- * vmbus_open - Open the specified channel.
- */
-int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
- u32 recv_ringbuffer_size, void *userdata, u32 userdatalen,
- void (*onchannelcallback)(void *context), void *context)
+/* vmbus_free_ring - drop mapping of ring buffer */
+void vmbus_free_ring(struct vmbus_channel *channel)
+{
+ hv_ringbuffer_cleanup(&channel->outbound);
+ hv_ringbuffer_cleanup(&channel->inbound);
+
+ if (channel->ringbuffer_page) {
+ __free_pages(channel->ringbuffer_page,
+ get_order(channel->ringbuffer_pagecount
+ << PAGE_SHIFT));
+ channel->ringbuffer_page = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(vmbus_free_ring);
+
+/* vmbus_alloc_ring - allocate and map pages for ring buffer */
+int vmbus_alloc_ring(struct vmbus_channel *newchannel,
+ u32 send_size, u32 recv_size)
+{
+ struct page *page;
+ int order;
+
+ if (send_size % PAGE_SIZE || recv_size % PAGE_SIZE)
+ return -EINVAL;
+
+ /* Allocate the ring buffer */
+ order = get_order(send_size + recv_size);
+ page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
+ GFP_KERNEL|__GFP_ZERO, order);
+
+ if (!page)
+ page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order);
+
+ if (!page)
+ return -ENOMEM;
+
+ newchannel->ringbuffer_page = page;
+ newchannel->ringbuffer_pagecount = (send_size + recv_size) >> PAGE_SHIFT;
+ newchannel->ringbuffer_send_offset = send_size >> PAGE_SHIFT;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vmbus_alloc_ring);
+
+static int __vmbus_open(struct vmbus_channel *newchannel,
+ void *userdata, u32 userdatalen,
+ void (*onchannelcallback)(void *context), void *context)
{
struct vmbus_channel_open_channel *open_msg;
struct vmbus_channel_msginfo *open_info = NULL;
+ struct page *page = newchannel->ringbuffer_page;
+ u32 send_pages, recv_pages;
unsigned long flags;
- int ret, err = 0;
- struct page *page;
+ int err;
- if (send_ringbuffer_size % PAGE_SIZE ||
- recv_ringbuffer_size % PAGE_SIZE)
+ if (userdatalen > MAX_USER_DEFINED_BYTES)
return -EINVAL;
+ send_pages = newchannel->ringbuffer_send_offset;
+ recv_pages = newchannel->ringbuffer_pagecount - send_pages;
+
spin_lock_irqsave(&newchannel->lock, flags);
- if (newchannel->state == CHANNEL_OPEN_STATE) {
- newchannel->state = CHANNEL_OPENING_STATE;
- } else {
+ if (newchannel->state != CHANNEL_OPEN_STATE) {
spin_unlock_irqrestore(&newchannel->lock, flags);
return -EINVAL;
}
spin_unlock_irqrestore(&newchannel->lock, flags);
+ newchannel->state = CHANNEL_OPENING_STATE;
newchannel->onchannel_callback = onchannelcallback;
newchannel->channel_callback_context = context;
- /* Allocate the ring buffer */
- page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
- GFP_KERNEL|__GFP_ZERO,
- get_order(send_ringbuffer_size +
- recv_ringbuffer_size));
+ err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages);
+ if (err)
+ goto error_clean_ring;
- if (!page)
- page = alloc_pages(GFP_KERNEL|__GFP_ZERO,
- get_order(send_ringbuffer_size +
- recv_ringbuffer_size));
-
- if (!page) {
- err = -ENOMEM;
- goto error_set_chnstate;
- }
-
- newchannel->ringbuffer_pages = page_address(page);
- newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
- recv_ringbuffer_size) >> PAGE_SHIFT;
-
- ret = hv_ringbuffer_init(&newchannel->outbound, page,
- send_ringbuffer_size >> PAGE_SHIFT);
-
- if (ret != 0) {
- err = ret;
- goto error_free_pages;
- }
-
- ret = hv_ringbuffer_init(&newchannel->inbound,
- &page[send_ringbuffer_size >> PAGE_SHIFT],
- recv_ringbuffer_size >> PAGE_SHIFT);
- if (ret != 0) {
- err = ret;
- goto error_free_pages;
- }
-
+ err = hv_ringbuffer_init(&newchannel->inbound,
+ &page[send_pages], recv_pages);
+ if (err)
+ goto error_clean_ring;
/* Establish the gpadl for the ring buffer */
newchannel->ringbuffer_gpadlhandle = 0;
- ret = vmbus_establish_gpadl(newchannel,
- page_address(page),
- send_ringbuffer_size +
- recv_ringbuffer_size,
+ err = vmbus_establish_gpadl(newchannel,
+ page_address(newchannel->ringbuffer_page),
+ (send_pages + recv_pages) << PAGE_SHIFT,
&newchannel->ringbuffer_gpadlhandle);
-
- if (ret != 0) {
- err = ret;
- goto error_free_pages;
- }
+ if (err)
+ goto error_clean_ring;
/* Create and init the channel open message */
open_info = kmalloc(sizeof(*open_info) +
@@ -176,15 +175,9 @@
open_msg->openid = newchannel->offermsg.child_relid;
open_msg->child_relid = newchannel->offermsg.child_relid;
open_msg->ringbuffer_gpadlhandle = newchannel->ringbuffer_gpadlhandle;
- open_msg->downstream_ringbuffer_pageoffset = send_ringbuffer_size >>
- PAGE_SHIFT;
+ open_msg->downstream_ringbuffer_pageoffset = newchannel->ringbuffer_send_offset;
open_msg->target_vp = newchannel->target_vp;
- if (userdatalen > MAX_USER_DEFINED_BYTES) {
- err = -EINVAL;
- goto error_free_gpadl;
- }
-
if (userdatalen)
memcpy(open_msg->userdata, userdata, userdatalen);
@@ -195,18 +188,16 @@
if (newchannel->rescind) {
err = -ENODEV;
- goto error_free_gpadl;
+ goto error_free_info;
}
- ret = vmbus_post_msg(open_msg,
+ err = vmbus_post_msg(open_msg,
sizeof(struct vmbus_channel_open_channel), true);
- trace_vmbus_open(open_msg, ret);
+ trace_vmbus_open(open_msg, err);
- if (ret != 0) {
- err = ret;
+ if (err != 0)
goto error_clean_msglist;
- }
wait_for_completion(&open_info->waitevent);
@@ -216,12 +207,12 @@
if (newchannel->rescind) {
err = -ENODEV;
- goto error_free_gpadl;
+ goto error_free_info;
}
if (open_info->response.open_result.status) {
err = -EAGAIN;
- goto error_free_gpadl;
+ goto error_free_info;
}
newchannel->state = CHANNEL_OPENED_STATE;
@@ -232,24 +223,55 @@
spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
list_del(&open_info->msglistentry);
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
-
+error_free_info:
+ kfree(open_info);
error_free_gpadl:
vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
- kfree(open_info);
-error_free_pages:
+ newchannel->ringbuffer_gpadlhandle = 0;
+error_clean_ring:
hv_ringbuffer_cleanup(&newchannel->outbound);
hv_ringbuffer_cleanup(&newchannel->inbound);
- __free_pages(page,
- get_order(send_ringbuffer_size + recv_ringbuffer_size));
-error_set_chnstate:
newchannel->state = CHANNEL_OPEN_STATE;
return err;
}
+
+/*
+ * vmbus_connect_ring - Open the channel but reuse ring buffer
+ */
+int vmbus_connect_ring(struct vmbus_channel *newchannel,
+ void (*onchannelcallback)(void *context), void *context)
+{
+ return __vmbus_open(newchannel, NULL, 0, onchannelcallback, context);
+}
+EXPORT_SYMBOL_GPL(vmbus_connect_ring);
+
+/*
+ * vmbus_open - Open the specified channel.
+ */
+int vmbus_open(struct vmbus_channel *newchannel,
+ u32 send_ringbuffer_size, u32 recv_ringbuffer_size,
+ void *userdata, u32 userdatalen,
+ void (*onchannelcallback)(void *context), void *context)
+{
+ int err;
+
+ err = vmbus_alloc_ring(newchannel, send_ringbuffer_size,
+ recv_ringbuffer_size);
+ if (err)
+ return err;
+
+ err = __vmbus_open(newchannel, userdata, userdatalen,
+ onchannelcallback, context);
+ if (err)
+ vmbus_free_ring(newchannel);
+
+ return err;
+}
EXPORT_SYMBOL_GPL(vmbus_open);
/* Used for Hyper-V Socket: a guest client's connect() to the host */
-int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
- const uuid_le *shv_host_servie_id)
+int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
+ const guid_t *shv_host_servie_id)
{
struct vmbus_channel_tl_connect_request conn_msg;
int ret;
@@ -620,10 +642,8 @@
* in Hyper-V Manager), the driver's remove() invokes vmbus_close():
* here we should skip most of the below cleanup work.
*/
- if (channel->state != CHANNEL_OPENED_STATE) {
- ret = -EINVAL;
- goto out;
- }
+ if (channel->state != CHANNEL_OPENED_STATE)
+ return -EINVAL;
channel->state = CHANNEL_OPEN_STATE;
@@ -645,11 +665,10 @@
* If we failed to post the close msg,
* it is perhaps better to leak memory.
*/
- goto out;
}
/* Tear down the gpadl for the channel's ring buffer */
- if (channel->ringbuffer_gpadlhandle) {
+ else if (channel->ringbuffer_gpadlhandle) {
ret = vmbus_teardown_gpadl(channel,
channel->ringbuffer_gpadlhandle);
if (ret) {
@@ -658,74 +677,70 @@
* If we failed to teardown gpadl,
* it is perhaps better to leak memory.
*/
- goto out;
}
+
+ channel->ringbuffer_gpadlhandle = 0;
}
- /* Cleanup the ring buffers for this channel */
- hv_ringbuffer_cleanup(&channel->outbound);
- hv_ringbuffer_cleanup(&channel->inbound);
-
- free_pages((unsigned long)channel->ringbuffer_pages,
- get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
-
-out:
return ret;
}
+/* disconnect ring - close all channels */
+int vmbus_disconnect_ring(struct vmbus_channel *channel)
+{
+ struct vmbus_channel *cur_channel, *tmp;
+ int ret;
+
+ if (channel->primary_channel != NULL)
+ return -EINVAL;
+
+ list_for_each_entry_safe(cur_channel, tmp, &channel->sc_list, sc_list) {
+ if (cur_channel->rescind)
+ wait_for_completion(&cur_channel->rescind_event);
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+ if (vmbus_close_internal(cur_channel) == 0) {
+ vmbus_free_ring(cur_channel);
+
+ if (cur_channel->rescind)
+ hv_process_channel_removal(cur_channel);
+ }
+ mutex_unlock(&vmbus_connection.channel_mutex);
+ }
+
+ /*
+ * Now close the primary.
+ */
+ mutex_lock(&vmbus_connection.channel_mutex);
+ ret = vmbus_close_internal(channel);
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vmbus_disconnect_ring);
+
/*
* vmbus_close - Close the specified channel
*/
void vmbus_close(struct vmbus_channel *channel)
{
- struct list_head *cur, *tmp;
- struct vmbus_channel *cur_channel;
-
- if (channel->primary_channel != NULL) {
- /*
- * We will only close sub-channels when
- * the primary is closed.
- */
- return;
- }
- /*
- * Close all the sub-channels first and then close the
- * primary channel.
- */
- list_for_each_safe(cur, tmp, &channel->sc_list) {
- cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
- if (cur_channel->rescind) {
- wait_for_completion(&cur_channel->rescind_event);
- mutex_lock(&vmbus_connection.channel_mutex);
- vmbus_close_internal(cur_channel);
- hv_process_channel_removal(
- cur_channel->offermsg.child_relid);
- } else {
- mutex_lock(&vmbus_connection.channel_mutex);
- vmbus_close_internal(cur_channel);
- }
- mutex_unlock(&vmbus_connection.channel_mutex);
- }
- /*
- * Now close the primary.
- */
- mutex_lock(&vmbus_connection.channel_mutex);
- vmbus_close_internal(channel);
- mutex_unlock(&vmbus_connection.channel_mutex);
+ if (vmbus_disconnect_ring(channel) == 0)
+ vmbus_free_ring(channel);
}
EXPORT_SYMBOL_GPL(vmbus_close);
/**
* vmbus_sendpacket() - Send the specified buffer on the given channel
- * @channel: Pointer to vmbus_channel structure.
- * @buffer: Pointer to the buffer you want to receive the data into.
- * @bufferlen: Maximum size of what the the buffer will hold
+ * @channel: Pointer to vmbus_channel structure
+ * @buffer: Pointer to the buffer you want to send the data from.
+ * @bufferlen: Maximum size of what the buffer holds.
* @requestid: Identifier of the request
- * @type: Type of packet that is being send e.g. negotiate, time
- * packet etc.
+ * @type: Type of packet that is being sent e.g. negotiate, time
+ * packet etc.
+ * @flags: 0 or VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
*
- * Sends data in @buffer directly to hyper-v via the vmbus
- * This will send the data unparsed to hyper-v.
+ * Sends data in @buffer directly to Hyper-V via the vmbus.
+ * This will send the data unparsed to Hyper-V.
*
* Mainly used by Hyper-V drivers.
*/
@@ -858,12 +873,13 @@
EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);
/**
- * vmbus_recvpacket() - Retrieve the user packet on the specified channel
- * @channel: Pointer to vmbus_channel structure.
+ * __vmbus_recvpacket() - Retrieve the user packet on the specified channel
+ * @channel: Pointer to vmbus_channel structure
* @buffer: Pointer to the buffer you want to receive the data into.
- * @bufferlen: Maximum size of what the the buffer will hold
- * @buffer_actual_len: The actual size of the data after it was received
+ * @bufferlen: Maximum size of what the buffer can hold.
+ * @buffer_actual_len: The actual size of the data after it was received.
* @requestid: Identifier of the request
+ * @raw: true means keep the vmpacket_descriptor header in the received data.
*
* Receives directly from the hyper-v vmbus and puts the data it received
* into Buffer. This will receive the data unparsed from hyper-v.
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 16eb9b3..8eb1675 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2009, Microsoft Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
* Hank Janssen <hjanssen@microsoft.com>
@@ -141,7 +129,7 @@
};
static const struct {
- uuid_le guid;
+ guid_t guid;
} vmbus_unsupported_devs[] = {
{ HV_AVMA1_GUID },
{ HV_AVMA2_GUID },
@@ -171,26 +159,26 @@
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
}
-static bool is_unsupported_vmbus_devs(const uuid_le *guid)
+static bool is_unsupported_vmbus_devs(const guid_t *guid)
{
int i;
for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
- if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
+ if (guid_equal(guid, &vmbus_unsupported_devs[i].guid))
return true;
return false;
}
static u16 hv_get_dev_type(const struct vmbus_channel *channel)
{
- const uuid_le *guid = &channel->offermsg.offer.if_type;
+ const guid_t *guid = &channel->offermsg.offer.if_type;
u16 i;
if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
return HV_UNKNOWN;
for (i = HV_IDE; i < HV_UNKNOWN; i++) {
- if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
+ if (guid_equal(guid, &vmbus_devs[i].guid))
return i;
}
pr_info("Unknown GUID: %pUl\n", guid);
@@ -198,24 +186,19 @@
}
/**
- * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
+ * vmbus_prep_negotiate_resp() - Create default response for Negotiate message
* @icmsghdrp: Pointer to msg header structure
- * @icmsg_negotiate: Pointer to negotiate message structure
* @buf: Raw buffer channel data
+ * @fw_version: The framework versions we can support.
+ * @fw_vercnt: The size of @fw_version.
+ * @srv_version: The service versions we can support.
+ * @srv_vercnt: The size of @srv_version.
+ * @nego_fw_version: The selected framework version.
+ * @nego_srv_version: The selected service version.
*
- * @icmsghdrp is of type &struct icmsg_hdr.
+ * Note: Versions are given in decreasing order.
+ *
* Set up and fill in default negotiate response message.
- *
- * The fw_version and fw_vercnt specifies the framework version that
- * we can support.
- *
- * The srv_version and srv_vercnt specifies the service
- * versions we can support.
- *
- * Versions are given in decreasing order.
- *
- * nego_fw_version and nego_srv_version store the selected protocol versions.
- *
* Mainly used by Hyper-V drivers.
*/
bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
@@ -341,6 +324,8 @@
tasklet_init(&channel->callback_event,
vmbus_on_event, (unsigned long)channel);
+ hv_ringbuffer_pre_init(channel);
+
return channel;
}
@@ -350,6 +335,7 @@
static void free_channel(struct vmbus_channel *channel)
{
tasklet_kill(&channel->callback_event);
+ vmbus_remove_channel_attr_group(channel);
kobject_put(&channel->kobj);
}
@@ -385,21 +371,14 @@
trace_vmbus_release_relid(&msg, ret);
}
-void hv_process_channel_removal(u32 relid)
+void hv_process_channel_removal(struct vmbus_channel *channel)
{
+ struct vmbus_channel *primary_channel;
unsigned long flags;
- struct vmbus_channel *primary_channel, *channel;
BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
-
- /*
- * Make sure channel is valid as we may have raced.
- */
- channel = relid2channel(relid);
- if (!channel)
- return;
-
BUG_ON(!channel->rescind);
+
if (channel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(channel->target_cpu,
@@ -417,7 +396,6 @@
primary_channel = channel->primary_channel;
spin_lock_irqsave(&primary_channel->lock, flags);
list_del(&channel->sc_list);
- primary_channel->num_sc--;
spin_unlock_irqrestore(&primary_channel->lock, flags);
}
@@ -429,7 +407,15 @@
cpumask_clear_cpu(channel->target_cpu,
&primary_channel->alloced_cpus_in_node);
- vmbus_release_relid(relid);
+ /*
+ * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
+ * the relid is invalidated; after hibernation, when the user-space app
+ * destroys the channel, the relid is INVALID_RELID, and in this case
+ * it's unnecessary and unsafe to release the old relid, since the same
+ * relid can refer to a completely different channel now.
+ */
+ if (channel->offermsg.child_relid != INVALID_RELID)
+ vmbus_release_relid(channel->offermsg.child_relid);
free_channel(channel);
}
@@ -567,6 +553,10 @@
mutex_lock(&vmbus_connection.channel_mutex);
+ /* Remember the channels that should be cleaned up upon suspend. */
+ if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
+ atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
+
/*
* Now that we have acquired the channel_mutex,
* we can release the potentially racing rescind thread.
@@ -574,10 +564,10 @@
atomic_dec(&vmbus_connection.offer_in_progress);
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
- if (!uuid_le_cmp(channel->offermsg.offer.if_type,
- newchannel->offermsg.offer.if_type) &&
- !uuid_le_cmp(channel->offermsg.offer.if_instance,
- newchannel->offermsg.offer.if_instance)) {
+ if (guid_equal(&channel->offermsg.offer.if_type,
+ &newchannel->offermsg.offer.if_type) &&
+ guid_equal(&channel->offermsg.offer.if_instance,
+ &newchannel->offermsg.offer.if_instance)) {
fnew = false;
break;
}
@@ -869,6 +859,67 @@
vmbus_wait_for_unload();
}
+static void check_ready_for_resume_event(void)
+{
+ /*
+ * If all the old primary channels have been fixed up, then it's safe
+ * to resume.
+ */
+ if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
+ complete(&vmbus_connection.ready_for_resume_event);
+}
+
+static void vmbus_setup_channel_state(struct vmbus_channel *channel,
+ struct vmbus_channel_offer_channel *offer)
+{
+ /*
+ * Setup state for signalling the host.
+ */
+ channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
+
+ if (vmbus_proto_version != VERSION_WS2008) {
+ channel->is_dedicated_interrupt =
+ (offer->is_dedicated_interrupt != 0);
+ channel->sig_event = offer->connection_id;
+ }
+
+ memcpy(&channel->offermsg, offer,
+ sizeof(struct vmbus_channel_offer_channel));
+ channel->monitor_grp = (u8)offer->monitorid / 32;
+ channel->monitor_bit = (u8)offer->monitorid % 32;
+}
+
+/*
+ * find_primary_channel_by_offer - Get the channel object given the new offer.
+ * This is only used in the resume path of hibernation.
+ */
+static struct vmbus_channel *
+find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
+{
+ struct vmbus_channel *channel = NULL, *iter;
+ const guid_t *inst1, *inst2;
+
+ /* Ignore sub-channel offers. */
+ if (offer->offer.sub_channel_index != 0)
+ return NULL;
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
+ inst1 = &iter->offermsg.offer.if_instance;
+ inst2 = &offer->offer.if_instance;
+
+ if (guid_equal(inst1, inst2)) {
+ channel = iter;
+ break;
+ }
+ }
+
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ return channel;
+}
+
/*
* vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
*
@@ -876,12 +927,58 @@
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
{
struct vmbus_channel_offer_channel *offer;
- struct vmbus_channel *newchannel;
+ struct vmbus_channel *oldchannel, *newchannel;
+ size_t offer_sz;
offer = (struct vmbus_channel_offer_channel *)hdr;
trace_vmbus_onoffer(offer);
+ oldchannel = find_primary_channel_by_offer(offer);
+
+ if (oldchannel != NULL) {
+ atomic_dec(&vmbus_connection.offer_in_progress);
+
+ /*
+ * We're resuming from hibernation: all the sub-channel and
+ * hv_sock channels we had before the hibernation should have
+ * been cleaned up, and now we must be seeing a re-offered
+ * primary channel that we had before the hibernation.
+ */
+
+ WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
+ /* Fix up the relid. */
+ oldchannel->offermsg.child_relid = offer->child_relid;
+
+ offer_sz = sizeof(*offer);
+ if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
+ check_ready_for_resume_event();
+ return;
+ }
+
+ /*
+ * This is not an error, since the host can also change the
+ * other field(s) of the offer, e.g. on WS RS5 (Build 17763),
+ * the offer->connection_id of the Mellanox VF vmbus device
+ * can change when the host reoffers the device upon resume.
+ */
+ pr_debug("vmbus offer changed: relid=%d\n",
+ offer->child_relid);
+
+ print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
+ 16, 4, &oldchannel->offermsg, offer_sz,
+ false);
+ print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
+ 16, 4, offer, offer_sz, false);
+
+ /* Fix up the old channel. */
+ vmbus_setup_channel_state(oldchannel, offer);
+
+ check_ready_for_resume_event();
+
+ return;
+ }
+
/* Allocate the channel object and save this offer. */
newchannel = alloc_channel();
if (!newchannel) {
@@ -891,25 +988,21 @@
return;
}
- /*
- * Setup state for signalling the host.
- */
- newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
-
- if (vmbus_proto_version != VERSION_WS2008) {
- newchannel->is_dedicated_interrupt =
- (offer->is_dedicated_interrupt != 0);
- newchannel->sig_event = offer->connection_id;
- }
-
- memcpy(&newchannel->offermsg, offer,
- sizeof(struct vmbus_channel_offer_channel));
- newchannel->monitor_grp = (u8)offer->monitorid / 32;
- newchannel->monitor_bit = (u8)offer->monitorid % 32;
+ vmbus_setup_channel_state(newchannel, offer);
vmbus_process_offer(newchannel);
}
+static void check_ready_for_suspend_event(void)
+{
+ /*
+ * If all the sub-channels or hv_sock channels have been cleaned up,
+ * then it's safe to suspend.
+ */
+ if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
+ complete(&vmbus_connection.ready_for_suspend_event);
+}
+
/*
* vmbus_onoffer_rescind - Rescind offer handler.
*
@@ -920,6 +1013,7 @@
struct vmbus_channel_rescind_offer *rescind;
struct vmbus_channel *channel;
struct device *dev;
+ bool clean_up_chan_for_suspend;
rescind = (struct vmbus_channel_rescind_offer *)hdr;
@@ -959,6 +1053,8 @@
return;
}
+ clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
+ is_sub_channel(channel);
/*
* Before setting channel->rescind in vmbus_rescind_cleanup(), we
* should make sure the channel callback is not running any more.
@@ -984,6 +1080,10 @@
if (channel->device_obj) {
if (channel->chn_rescind_callback) {
channel->chn_rescind_callback(channel);
+
+ if (clean_up_chan_for_suspend)
+ check_ready_for_suspend_event();
+
return;
}
/*
@@ -1010,12 +1110,17 @@
* The channel is currently not open;
* it is safe for us to cleanup the channel.
*/
- hv_process_channel_removal(rescind->child_relid);
+ hv_process_channel_removal(channel);
} else {
complete(&channel->rescind_event);
}
mutex_unlock(&vmbus_connection.channel_mutex);
}
+
+ /* The "channel" may have been freed. Do not access it any longer. */
+
+ if (clean_up_chan_for_suspend)
+ check_ready_for_suspend_event();
}
void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
@@ -1314,49 +1419,6 @@
return ret;
}
-/*
- * Retrieve the (sub) channel on which to send an outgoing request.
- * When a primary channel has multiple sub-channels, we try to
- * distribute the load equally amongst all available channels.
- */
-struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
-{
- struct list_head *cur, *tmp;
- int cur_cpu;
- struct vmbus_channel *cur_channel;
- struct vmbus_channel *outgoing_channel = primary;
- int next_channel;
- int i = 1;
-
- if (list_empty(&primary->sc_list))
- return outgoing_channel;
-
- next_channel = primary->next_oc++;
-
- if (next_channel > (primary->num_sc)) {
- primary->next_oc = 0;
- return outgoing_channel;
- }
-
- cur_cpu = hv_cpu_number_to_vp_number(smp_processor_id());
- list_for_each_safe(cur, tmp, &primary->sc_list) {
- cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
- if (cur_channel->state != CHANNEL_OPENED_STATE)
- continue;
-
- if (cur_channel->target_vp == cur_cpu)
- return cur_channel;
-
- if (i == next_channel)
- return cur_channel;
-
- i++;
- }
-
- return outgoing_channel;
-}
-EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
-
static void invoke_sc_cb(struct vmbus_channel *primary_channel)
{
struct list_head *cur, *tmp;
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 4fe117b..6e4c015 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -1,24 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright (c) 2009, Microsoft Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
* Hank Janssen <hjanssen@microsoft.com>
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -39,6 +26,11 @@
struct vmbus_connection vmbus_connection = {
.conn_state = DISCONNECTED,
.next_gpadl_handle = ATOMIC_INIT(0xE1E10),
+
+ .ready_for_suspend_event= COMPLETION_INITIALIZER(
+ vmbus_connection.ready_for_suspend_event),
+ .ready_for_resume_event = COMPLETION_INITIALIZER(
+ vmbus_connection.ready_for_resume_event),
};
EXPORT_SYMBOL_GPL(vmbus_connection);
@@ -72,8 +64,7 @@
}
}
-static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
- __u32 version)
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
{
int ret = 0;
unsigned int cur_cpu;
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 748a1c4..fcc5279 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2009, Microsoft Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
* Hank Janssen <hjanssen@microsoft.com>
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -29,28 +16,12 @@
#include <linux/version.h>
#include <linux/random.h>
#include <linux/clockchips.h>
+#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
/* The one and only */
-struct hv_context hv_context = {
- .synic_initialized = false,
-};
-
-/*
- * If false, we're using the old mechanism for stimer0 interrupts
- * where it sends a VMbus message when it expires. The old
- * mechanism is used when running on older versions of Hyper-V
- * that don't support Direct Mode. While Hyper-V provides
- * four stimer's per CPU, Linux uses only stimer0.
- */
-static bool direct_mode_enabled;
-static int stimer0_irq;
-static int stimer0_vector;
-
-#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
-#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
-#define HV_MIN_DELTA_TICKS 1
+struct hv_context hv_context;
/*
* hv_init - Main initialization routine.
@@ -62,9 +33,6 @@
hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
if (!hv_context.cpu_context)
return -ENOMEM;
-
- direct_mode_enabled = ms_hyperv.misc_features &
- HV_STIMER_DIRECT_MODE_AVAILABLE;
return 0;
}
@@ -103,92 +71,20 @@
return status & 0xFFFF;
}
-/*
- * ISR for when stimer0 is operating in Direct Mode. Direct Mode
- * does not use VMbus or any VMbus messages, so process here and not
- * in the VMbus driver code.
- */
-
-static void hv_stimer0_isr(void)
-{
- struct hv_per_cpu_context *hv_cpu;
-
- hv_cpu = this_cpu_ptr(hv_context.cpu_context);
- hv_cpu->clk_evt->event_handler(hv_cpu->clk_evt);
- add_interrupt_randomness(stimer0_vector, 0);
-}
-
-static int hv_ce_set_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- u64 current_tick;
-
- WARN_ON(!clockevent_state_oneshot(evt));
-
- current_tick = hyperv_cs->read(NULL);
- current_tick += delta;
- hv_init_timer(0, current_tick);
- return 0;
-}
-
-static int hv_ce_shutdown(struct clock_event_device *evt)
-{
- hv_init_timer(0, 0);
- hv_init_timer_config(0, 0);
- if (direct_mode_enabled)
- hv_disable_stimer0_percpu_irq(stimer0_irq);
-
- return 0;
-}
-
-static int hv_ce_set_oneshot(struct clock_event_device *evt)
-{
- union hv_timer_config timer_cfg;
-
- timer_cfg.as_uint64 = 0;
- timer_cfg.enable = 1;
- timer_cfg.auto_enable = 1;
- if (direct_mode_enabled) {
- /*
- * When it expires, the timer will directly interrupt
- * on the specified hardware vector/IRQ.
- */
- timer_cfg.direct_mode = 1;
- timer_cfg.apic_vector = stimer0_vector;
- hv_enable_stimer0_percpu_irq(stimer0_irq);
- } else {
- /*
- * When it expires, the timer will generate a VMbus message,
- * to be handled by the normal VMbus interrupt handler.
- */
- timer_cfg.direct_mode = 0;
- timer_cfg.sintx = VMBUS_MESSAGE_SINT;
- }
- hv_init_timer_config(0, timer_cfg.as_uint64);
- return 0;
-}
-
-static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
-{
- dev->name = "Hyper-V clockevent";
- dev->features = CLOCK_EVT_FEAT_ONESHOT;
- dev->cpumask = cpumask_of(cpu);
- dev->rating = 1000;
- /*
- * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will
- * result in clockevents_config_and_register() taking additional
- * references to the hv_vmbus module making it impossible to unload.
- */
-
- dev->set_state_shutdown = hv_ce_shutdown;
- dev->set_state_oneshot = hv_ce_set_oneshot;
- dev->set_next_event = hv_ce_set_next_event;
-}
-
-
int hv_synic_alloc(void)
{
int cpu;
+ struct hv_per_cpu_context *hv_cpu;
+
+ /*
+ * First, zero all per-cpu memory areas so hv_synic_free() can
+ * detect what memory has been allocated and cleanup properly
+ * after any failures.
+ */
+ for_each_present_cpu(cpu) {
+ hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
+ memset(hv_cpu, 0, sizeof(*hv_cpu));
+ }
hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
GFP_KERNEL);
@@ -198,21 +94,11 @@
}
for_each_present_cpu(cpu) {
- struct hv_per_cpu_context *hv_cpu
- = per_cpu_ptr(hv_context.cpu_context, cpu);
+ hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
- memset(hv_cpu, 0, sizeof(*hv_cpu));
tasklet_init(&hv_cpu->msg_dpc,
vmbus_on_msg_dpc, (unsigned long) hv_cpu);
- hv_cpu->clk_evt = kzalloc(sizeof(struct clock_event_device),
- GFP_KERNEL);
- if (hv_cpu->clk_evt == NULL) {
- pr_err("Unable to allocate clock event device\n");
- goto err;
- }
- hv_init_clockevent_device(hv_cpu->clk_evt, cpu);
-
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
@@ -235,11 +121,6 @@
INIT_LIST_HEAD(&hv_cpu->chan_list);
}
- if (direct_mode_enabled &&
- hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
- hv_stimer0_isr))
- goto err;
-
return 0;
err:
/*
@@ -258,7 +139,6 @@
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- kfree(hv_cpu->clk_evt);
free_page((unsigned long)hv_cpu->synic_event_page);
free_page((unsigned long)hv_cpu->synic_message_page);
free_page((unsigned long)hv_cpu->post_msg_page);
@@ -274,7 +154,7 @@
* retrieve the initialized message and event pages. Otherwise, we create and
* initialize the message and event pages.
*/
-int hv_synic_init(unsigned int cpu)
+void hv_synic_enable_regs(unsigned int cpu)
{
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
@@ -316,57 +196,59 @@
sctrl.enable = 1;
hv_set_synic_state(sctrl.as_uint64);
-
- hv_context.synic_initialized = true;
-
- /*
- * Register the per-cpu clockevent source.
- */
- if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)
- clockevents_config_and_register(hv_cpu->clk_evt,
- HV_TIMER_FREQUENCY,
- HV_MIN_DELTA_TICKS,
- HV_MAX_MAX_DELTA_TICKS);
- return 0;
}
-/*
- * hv_synic_clockevents_cleanup - Cleanup clockevent devices
- */
-void hv_synic_clockevents_cleanup(void)
+int hv_synic_init(unsigned int cpu)
{
- int cpu;
+ hv_synic_enable_regs(cpu);
- if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
- return;
+ hv_stimer_init(cpu);
- if (direct_mode_enabled)
- hv_remove_stimer0_irq(stimer0_irq);
-
- for_each_present_cpu(cpu) {
- struct hv_per_cpu_context *hv_cpu
- = per_cpu_ptr(hv_context.cpu_context, cpu);
-
- clockevents_unbind_device(hv_cpu->clk_evt, cpu);
- }
+ return 0;
}
/*
* hv_synic_cleanup - Cleanup routine for hv_synic_init().
*/
-int hv_synic_cleanup(unsigned int cpu)
+void hv_synic_disable_regs(unsigned int cpu)
{
union hv_synic_sint shared_sint;
union hv_synic_simp simp;
union hv_synic_siefp siefp;
union hv_synic_scontrol sctrl;
+
+ hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+ shared_sint.masked = 1;
+
+ /* Need to correctly cleanup in the case of SMP!!! */
+ /* Disable the interrupt */
+ hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+ hv_get_simp(simp.as_uint64);
+ simp.simp_enabled = 0;
+ simp.base_simp_gpa = 0;
+
+ hv_set_simp(simp.as_uint64);
+
+ hv_get_siefp(siefp.as_uint64);
+ siefp.siefp_enabled = 0;
+ siefp.base_siefp_gpa = 0;
+
+ hv_set_siefp(siefp.as_uint64);
+
+ /* Disable the global synic bit */
+ hv_get_synic_state(sctrl.as_uint64);
+ sctrl.enable = 0;
+ hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_cleanup(unsigned int cpu)
+{
struct vmbus_channel *channel, *sc;
bool channel_found = false;
unsigned long flags;
- if (!hv_context.synic_initialized)
- return -EFAULT;
-
/*
* Search for channels which are bound to the CPU we're about to
* cleanup. In case we find one and vmbus is still connected we need to
@@ -395,40 +277,9 @@
if (channel_found && vmbus_connection.conn_state == CONNECTED)
return -EBUSY;
- /* Turn off clockevent device */
- if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
- struct hv_per_cpu_context *hv_cpu
- = this_cpu_ptr(hv_context.cpu_context);
+ hv_stimer_cleanup(cpu);
- clockevents_unbind_device(hv_cpu->clk_evt, cpu);
- hv_ce_shutdown(hv_cpu->clk_evt);
- put_cpu_ptr(hv_cpu);
- }
-
- hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
- shared_sint.masked = 1;
-
- /* Need to correctly cleanup in the case of SMP!!! */
- /* Disable the interrupt */
- hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
- hv_get_simp(simp.as_uint64);
- simp.simp_enabled = 0;
- simp.base_simp_gpa = 0;
-
- hv_set_simp(simp.as_uint64);
-
- hv_get_siefp(siefp.as_uint64);
- siefp.siefp_enabled = 0;
- siefp.base_siefp_gpa = 0;
-
- hv_set_siefp(siefp.as_uint64);
-
- /* Disable the global synic bit */
- hv_get_synic_state(sctrl.as_uint64);
- sctrl.enable = 0;
- hv_set_synic_state(sctrl.as_uint64);
+ hv_synic_disable_regs(cpu);
return 0;
}
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index b1b7880..34bd735 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1,19 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2012, Microsoft Corporation.
*
* Author:
* K. Y. Srinivasan <kys@microsoft.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -504,7 +494,7 @@
static __u8 recv_buffer[PAGE_SIZE];
-static __u8 *send_buffer;
+static __u8 balloon_up_send_buffer[PAGE_SIZE];
#define PAGES_IN_2M 512
#define HA_CHUNK (32 * 1024)
@@ -681,15 +671,20 @@
/* Check if the particular page is backed and can be onlined and online it. */
static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
{
- if (!has_pfn_is_backed(has, page_to_pfn(pg)))
+ if (!has_pfn_is_backed(has, page_to_pfn(pg))) {
+ if (!PageOffline(pg))
+ __SetPageOffline(pg);
return;
+ }
+ if (PageOffline(pg))
+ __ClearPageOffline(pg);
/* This frame is currently backed; online the page. */
__online_page_set_limits(pg);
__online_page_increment_counters(pg);
__online_page_free(pg);
- WARN_ON_ONCE(!spin_is_locked(&dm_device.ha_lock));
+ lockdep_assert_held(&dm_device.ha_lock);
dm_device.num_pages_onlined++;
}
@@ -771,7 +766,7 @@
}
}
-static void hv_online_page(struct page *pg)
+static void hv_online_page(struct page *pg, unsigned int order)
{
struct hv_hotadd_state *has;
unsigned long flags;
@@ -780,10 +775,11 @@
spin_lock_irqsave(&dm_device.ha_lock, flags);
list_for_each_entry(has, &dm_device.ha_region_list, list) {
/* The page belongs to a different HAS. */
- if ((pfn < has->start_pfn) || (pfn >= has->end_pfn))
+ if ((pfn < has->start_pfn) ||
+ (pfn + (1UL << order) > has->end_pfn))
continue;
- hv_page_online_one(has, pg);
+ hv_bring_pgs_online(has, pfn, 1UL << order);
break;
}
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
@@ -888,12 +884,14 @@
pfn_cnt -= pgs_ol;
/*
* Check if the corresponding memory block is already
- * online by checking its last previously backed page.
- * In case it is we need to bring rest (which was not
- * backed previously) online too.
+ * online. It is possible to observe struct pages still
+ * being uninitialized here so check section instead.
+ * In case the section is online we need to bring the
+ * rest of pfns (which were not backed previously)
+ * online too.
*/
if (start_pfn > has->start_pfn &&
- !PageReserved(pfn_to_page(start_pfn - 1)))
+ online_section_nr(pfn_to_section_nr(start_pfn)))
hv_bring_pgs_online(has, start_pfn, pgs_ol);
}
@@ -1090,6 +1088,7 @@
static unsigned long compute_balloon_floor(void)
{
unsigned long min_pages;
+ unsigned long nr_pages = totalram_pages();
#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
/* Simple continuous piecewiese linear function:
* max MiB -> min MiB gradient
@@ -1102,16 +1101,16 @@
* 8192 744 (1/16)
* 32768 1512 (1/32)
*/
- if (totalram_pages < MB2PAGES(128))
- min_pages = MB2PAGES(8) + (totalram_pages >> 1);
- else if (totalram_pages < MB2PAGES(512))
- min_pages = MB2PAGES(40) + (totalram_pages >> 2);
- else if (totalram_pages < MB2PAGES(2048))
- min_pages = MB2PAGES(104) + (totalram_pages >> 3);
- else if (totalram_pages < MB2PAGES(8192))
- min_pages = MB2PAGES(232) + (totalram_pages >> 4);
+ if (nr_pages < MB2PAGES(128))
+ min_pages = MB2PAGES(8) + (nr_pages >> 1);
+ else if (nr_pages < MB2PAGES(512))
+ min_pages = MB2PAGES(40) + (nr_pages >> 2);
+ else if (nr_pages < MB2PAGES(2048))
+ min_pages = MB2PAGES(104) + (nr_pages >> 3);
+ else if (nr_pages < MB2PAGES(8192))
+ min_pages = MB2PAGES(232) + (nr_pages >> 4);
else
- min_pages = MB2PAGES(488) + (totalram_pages >> 5);
+ min_pages = MB2PAGES(488) + (nr_pages >> 5);
#undef MB2PAGES
return min_pages;
}
@@ -1198,6 +1197,7 @@
for (i = 0; i < num_pages; i++) {
pg = pfn_to_page(i + start_frame);
+ __ClearPageOffline(pg);
__free_page(pg);
dm->num_pages_ballooned--;
}
@@ -1210,7 +1210,7 @@
struct dm_balloon_response *bl_resp,
int alloc_unit)
{
- unsigned int i = 0;
+ unsigned int i, j;
struct page *pg;
if (num_pages < alloc_unit)
@@ -1242,6 +1242,10 @@
if (alloc_unit != 1)
split_page(pg, get_order(alloc_unit << PAGE_SHIFT));
+ /* mark all pages offline */
+ for (j = 0; j < (1 << get_order(alloc_unit << PAGE_SHIFT)); j++)
+ __SetPageOffline(pg + j);
+
bl_resp->range_count++;
bl_resp->range_array[i].finfo.start_page =
page_to_pfn(pg);
@@ -1288,8 +1292,8 @@
}
while (!done) {
- bl_resp = (struct dm_balloon_response *)send_buffer;
- memset(send_buffer, 0, PAGE_SIZE);
+ memset(balloon_up_send_buffer, 0, PAGE_SIZE);
+ bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer;
bl_resp->hdr.type = DM_BALLOON_RESPONSE;
bl_resp->hdr.size = sizeof(struct dm_balloon_response);
bl_resp->more_pages = 1;
@@ -1560,58 +1564,18 @@
}
-static int balloon_probe(struct hv_device *dev,
- const struct hv_vmbus_device_id *dev_id)
+static int balloon_connect_vsp(struct hv_device *dev)
{
- int ret;
- unsigned long t;
struct dm_version_request version_req;
struct dm_capabilities cap_msg;
-
-#ifdef CONFIG_MEMORY_HOTPLUG
- do_hot_add = hot_add;
-#else
- do_hot_add = false;
-#endif
-
- /*
- * First allocate a send buffer.
- */
-
- send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!send_buffer)
- return -ENOMEM;
+ unsigned long t;
+ int ret;
ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
- balloon_onchannelcallback, dev);
-
+ balloon_onchannelcallback, dev);
if (ret)
- goto probe_error0;
+ return ret;
- dm_device.dev = dev;
- dm_device.state = DM_INITIALIZING;
- dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
- init_completion(&dm_device.host_event);
- init_completion(&dm_device.config_event);
- INIT_LIST_HEAD(&dm_device.ha_region_list);
- spin_lock_init(&dm_device.ha_lock);
- INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
- INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
- dm_device.host_specified_ha_region = false;
-
- dm_device.thread =
- kthread_run(dm_thread_func, &dm_device, "hv_balloon");
- if (IS_ERR(dm_device.thread)) {
- ret = PTR_ERR(dm_device.thread);
- goto probe_error1;
- }
-
-#ifdef CONFIG_MEMORY_HOTPLUG
- set_online_page_callback(&hv_online_page);
- register_memory_notifier(&hv_memory_nb);
-#endif
-
- hv_set_drvdata(dev, &dm_device);
/*
* Initiate the hand shake with the host and negotiate
* a version that the host can support. We start with the
@@ -1627,16 +1591,15 @@
dm_device.version = version_req.version.version;
ret = vmbus_sendpacket(dev->channel, &version_req,
- sizeof(struct dm_version_request),
- (unsigned long)NULL,
- VM_PKT_DATA_INBAND, 0);
+ sizeof(struct dm_version_request),
+ (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
if (ret)
- goto probe_error2;
+ goto out;
t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
- goto probe_error2;
+ goto out;
}
/*
@@ -1644,8 +1607,8 @@
* fail the probe function.
*/
if (dm_device.state == DM_INIT_ERROR) {
- ret = -ETIMEDOUT;
- goto probe_error2;
+ ret = -EPROTO;
+ goto out;
}
pr_info("Using Dynamic Memory protocol version %u.%u\n",
@@ -1678,16 +1641,15 @@
cap_msg.max_page_number = -1;
ret = vmbus_sendpacket(dev->channel, &cap_msg,
- sizeof(struct dm_capabilities),
- (unsigned long)NULL,
- VM_PKT_DATA_INBAND, 0);
+ sizeof(struct dm_capabilities),
+ (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
if (ret)
- goto probe_error2;
+ goto out;
t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
- goto probe_error2;
+ goto out;
}
/*
@@ -1695,25 +1657,65 @@
* fail the probe function.
*/
if (dm_device.state == DM_INIT_ERROR) {
- ret = -ETIMEDOUT;
- goto probe_error2;
+ ret = -EPROTO;
+ goto out;
}
+ return 0;
+out:
+ vmbus_close(dev->channel);
+ return ret;
+}
+
+static int balloon_probe(struct hv_device *dev,
+ const struct hv_vmbus_device_id *dev_id)
+{
+ int ret;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ do_hot_add = hot_add;
+#else
+ do_hot_add = false;
+#endif
+ dm_device.dev = dev;
+ dm_device.state = DM_INITIALIZING;
+ dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
+ init_completion(&dm_device.host_event);
+ init_completion(&dm_device.config_event);
+ INIT_LIST_HEAD(&dm_device.ha_region_list);
+ spin_lock_init(&dm_device.ha_lock);
+ INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
+ INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
+ dm_device.host_specified_ha_region = false;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ set_online_page_callback(&hv_online_page);
+ register_memory_notifier(&hv_memory_nb);
+#endif
+
+ hv_set_drvdata(dev, &dm_device);
+
+ ret = balloon_connect_vsp(dev);
+ if (ret != 0)
+ return ret;
+
dm_device.state = DM_INITIALIZED;
- last_post_time = jiffies;
+
+ dm_device.thread =
+ kthread_run(dm_thread_func, &dm_device, "hv_balloon");
+ if (IS_ERR(dm_device.thread)) {
+ ret = PTR_ERR(dm_device.thread);
+ goto probe_error;
+ }
return 0;
-probe_error2:
+probe_error:
+ vmbus_close(dev->channel);
#ifdef CONFIG_MEMORY_HOTPLUG
+ unregister_memory_notifier(&hv_memory_nb);
restore_online_page_callback(&hv_online_page);
#endif
- kthread_stop(dm_device.thread);
-
-probe_error1:
- vmbus_close(dev->channel);
-probe_error0:
- kfree(send_buffer);
return ret;
}
@@ -1730,12 +1732,11 @@
cancel_work_sync(&dm->balloon_wrk.wrk);
cancel_work_sync(&dm->ha_wrk.wrk);
- vmbus_close(dev->channel);
kthread_stop(dm->thread);
- kfree(send_buffer);
+ vmbus_close(dev->channel);
#ifdef CONFIG_MEMORY_HOTPLUG
- restore_online_page_callback(&hv_online_page);
unregister_memory_notifier(&hv_memory_nb);
+ restore_online_page_callback(&hv_online_page);
#endif
spin_lock_irqsave(&dm_device.ha_lock, flags);
list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 2364281..7e30ae0 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -1,20 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* An implementation of file copy service.
*
* Copyright (C) 2014, Microsoft, Inc.
*
* Author : K. Y. Srinivasan <ksrinivasan@novell.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 5eed1e7..5054d11 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -353,7 +353,9 @@
out->body.kvp_ip_val.dhcp_enabled = in->kvp_ip_val.dhcp_enabled;
- default:
+ /* fallthrough */
+
+ case KVP_OP_GET_IP_INFO:
utf16s_to_utf8s((wchar_t *)in->kvp_ip_val.adapter_id,
MAX_ADAPTER_ID_SIZE,
UTF16_LITTLE_ENDIAN,
@@ -406,6 +408,10 @@
process_ib_ipinfo(in_msg, message, KVP_OP_SET_IP_INFO);
break;
case KVP_OP_GET_IP_INFO:
+ /*
+ * We only need to pass on the info of operation, adapter_id
+ * and addr_family to the userland kvp daemon.
+ */
process_ib_ipinfo(in_msg, message, KVP_OP_GET_IP_INFO);
break;
case KVP_OP_SET:
@@ -421,7 +427,7 @@
UTF16_LITTLE_ENDIAN,
message->body.kvp_set.data.value,
HV_KVP_EXCHANGE_MAX_VALUE_SIZE - 1) + 1;
- break;
+ break;
case REG_U32:
/*
@@ -431,7 +437,7 @@
val32 = in_msg->body.kvp_set.data.value_u32;
message->body.kvp_set.data.value_size =
sprintf(message->body.kvp_set.data.value,
- "%d", val32) + 1;
+ "%u", val32) + 1;
break;
case REG_U64:
@@ -446,7 +452,10 @@
break;
}
- case KVP_OP_GET:
+
+ /*
+ * The key is always a string - utf16 encoding.
+ */
message->body.kvp_set.data.key_size =
utf16s_to_utf8s(
(wchar_t *)in_msg->body.kvp_set.data.key,
@@ -454,7 +463,18 @@
UTF16_LITTLE_ENDIAN,
message->body.kvp_set.data.key,
HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
- break;
+
+ break;
+
+ case KVP_OP_GET:
+ message->body.kvp_get.data.key_size =
+ utf16s_to_utf8s(
+ (wchar_t *)in_msg->body.kvp_get.data.key,
+ in_msg->body.kvp_get.data.key_size,
+ UTF16_LITTLE_ENDIAN,
+ message->body.kvp_get.data.key,
+ HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
+ break;
case KVP_OP_DELETE:
message->body.kvp_delete.key_size =
@@ -464,12 +484,12 @@
UTF16_LITTLE_ENDIAN,
message->body.kvp_delete.key,
HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
- break;
+ break;
case KVP_OP_ENUMERATE:
message->body.kvp_enum_data.index =
in_msg->body.kvp_enum_data.index;
- break;
+ break;
}
kvp_transaction.state = HVUTIL_USERSPACE_REQ;
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 6831efd..20ba95b 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -1,20 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* An implementation of host initiated guest snapshot.
*
- *
* Copyright (C) 2013, Microsoft, Inc.
* Author : K. Y. Srinivasan <kys@microsoft.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/hv/hv_trace.h b/drivers/hv/hv_trace.h
index 999f80a..e70783e 100644
--- a/drivers/hv/hv_trace.h
+++ b/drivers/hv/hv_trace.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hyperv
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 4232050..e32681e 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2010, Microsoft Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
* Hank Janssen <hjanssen@microsoft.com>
@@ -29,6 +17,7 @@
#include <linux/hyperv.h>
#include <linux/clockchips.h>
#include <linux/ptp_clock_kernel.h>
+#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
@@ -483,7 +472,7 @@
/* The one and only one */
static struct hv_driver util_drv = {
- .name = "hv_util",
+ .name = "hv_utils",
.id_table = id_table,
.probe = util_probe,
.remove = util_remove,
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c
index 8327775..eb2833d 100644
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -1,18 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel/userspace transport abstraction for Hyper-V util driver.
*
* Copyright (C) 2015, Vitaly Kuznetsov <vkuznets@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
*/
#include <linux/slab.h>
diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h
index 79afb62..1c16239 100644
--- a/drivers/hv/hv_utils_transport.h
+++ b/drivers/hv/hv_utils_transport.h
@@ -1,18 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Kernel/userspace transport abstraction for Hyper-V util driver.
*
* Copyright (C) 2015, Vitaly Kuznetsov <vkuznets@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
*/
#ifndef _HV_UTILS_TRANSPORT_H
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 87d3d7d..af9379a 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -1,25 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
*
* Copyright (c) 2011, Microsoft Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
* Hank Janssen <hjanssen@microsoft.com>
* K. Y. Srinivasan <kys@microsoft.com>
- *
*/
#ifndef _HYPERV_VMBUS_H
@@ -44,74 +31,6 @@
*/
#define HV_UTIL_NEGO_TIMEOUT 55
-/* Define synthetic interrupt controller flag constants. */
-#define HV_EVENT_FLAGS_COUNT (256 * 8)
-#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long))
-
-/*
- * Timer configuration register.
- */
-union hv_timer_config {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 periodic:1;
- u64 lazy:1;
- u64 auto_enable:1;
- u64 apic_vector:8;
- u64 direct_mode:1;
- u64 reserved_z0:3;
- u64 sintx:4;
- u64 reserved_z1:44;
- };
-};
-
-
-/* Define the synthetic interrupt controller event flags format. */
-union hv_synic_event_flags {
- unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
-};
-
-/* Define SynIC control register. */
-union hv_synic_scontrol {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:63;
- };
-};
-
-/* Define synthetic interrupt source. */
-union hv_synic_sint {
- u64 as_uint64;
- struct {
- u64 vector:8;
- u64 reserved1:8;
- u64 masked:1;
- u64 auto_eoi:1;
- u64 reserved2:46;
- };
-};
-
-/* Define the format of the SIMP register */
-union hv_synic_simp {
- u64 as_uint64;
- struct {
- u64 simp_enabled:1;
- u64 preserved:11;
- u64 base_simp_gpa:52;
- };
-};
-
-/* Define the format of the SIEFP register */
-union hv_synic_siefp {
- u64 as_uint64;
- struct {
- u64 siefp_enabled:1;
- u64 preserved:11;
- u64 base_siefp_gpa:52;
- };
-};
/* Definitions for the monitored notification facility */
union hv_monitor_trigger_group {
@@ -219,7 +138,6 @@
* per-cpu list of the channels based on their CPU affinity.
*/
struct list_head chan_list;
- struct clock_event_device *clk_evt;
};
struct hv_context {
@@ -228,10 +146,6 @@
*/
u64 guestid;
- void *tsc_page;
-
- bool synic_initialized;
-
struct hv_per_cpu_context __percpu *cpu_context;
/*
@@ -255,14 +169,15 @@
extern void hv_synic_free(void);
+extern void hv_synic_enable_regs(unsigned int cpu);
extern int hv_synic_init(unsigned int cpu);
+extern void hv_synic_disable_regs(unsigned int cpu);
extern int hv_synic_cleanup(unsigned int cpu);
-extern void hv_synic_clockevents_cleanup(void);
-
/* Interface */
+void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
struct page *pages, u32 pagecnt);
@@ -277,11 +192,11 @@
u64 *requestid, bool raw);
/*
- * Maximum channels is determined by the size of the interrupt page
- * which is PAGE_SIZE. 1/2 of PAGE_SIZE is for send endpoint interrupt
- * and the other is receive endpoint interrupt
+ * The Maximum number of channels (16348) is determined by the size of the
+ * interrupt page, which is HV_HYP_PAGE_SIZE. 1/2 of HV_HYP_PAGE_SIZE is to
+ * send endpoint interrupts, and the other is to receive endpoint interrupts.
*/
-#define MAX_NUM_CHANNELS ((PAGE_SIZE >> 1) << 3) /* 16348 channels */
+#define MAX_NUM_CHANNELS ((HV_HYP_PAGE_SIZE >> 1) << 3)
/* The value here must be in multiple of 32 */
/* TODO: Need to make this configurable */
@@ -343,6 +258,32 @@
struct workqueue_struct *work_queue;
struct workqueue_struct *handle_primary_chan_wq;
struct workqueue_struct *handle_sub_chan_wq;
+
+ /*
+ * The number of sub-channels and hv_sock channels that should be
+ * cleaned up upon suspend: sub-channels will be re-created upon
+ * resume, and hv_sock channels should not survive suspend.
+ */
+ atomic_t nr_chan_close_on_suspend;
+ /*
+ * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to
+ * drop to zero.
+ */
+ struct completion ready_for_suspend_event;
+
+ /*
+ * The number of primary channels that should be "fixed up"
+ * upon resume: these channels are re-offered upon resume, and some
+ * fields of the channel offers (i.e. child_relid and connection_id)
+ * can change, so the old offermsg must be fixed up, before the resume
+ * callbacks of the VSC drivers start to further touch the channels.
+ */
+ atomic_t nr_chan_fixup_on_resume;
+ /*
+ * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
+ * drop to zero.
+ */
+ struct completion ready_for_resume_event;
};
@@ -357,6 +298,8 @@
extern struct vmbus_connection vmbus_connection;
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
+
static inline void vmbus_send_interrupt(u32 relid)
{
sync_set_bit(relid, vmbus_connection.send_int_page);
@@ -382,8 +325,8 @@
/* General vmbus interface */
-struct hv_device *vmbus_device_create(const uuid_le *type,
- const uuid_le *instance,
+struct hv_device *vmbus_device_create(const guid_t *type,
+ const guid_t *instance,
struct vmbus_channel *channel);
int vmbus_device_register(struct hv_device *child_device_obj);
@@ -391,6 +334,8 @@
int vmbus_add_channel_kobj(struct hv_device *device_obj,
struct vmbus_channel *channel);
+void vmbus_remove_channel_attr_group(struct vmbus_channel *channel);
+
struct vmbus_channel *relid2channel(u32 relid);
void vmbus_free_channels(void);
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 3e90eb9..9a03b16 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -1,25 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright (c) 2009, Microsoft Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
* Hank Janssen <hjanssen@microsoft.com>
* K. Y. Srinivasan <kys@microsoft.com>
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -74,8 +61,10 @@
* This is the only case we need to signal when the
* ring transitions from being empty to non-empty.
*/
- if (old_write == READ_ONCE(rbi->ring_buffer->read_index))
+ if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) {
+ ++channel->intr_out_empty;
vmbus_setevent(channel);
+ }
}
/* Get the next write location for the specified ring buffer. */
@@ -164,29 +153,41 @@
}
/* Get various debug metrics for the specified ring buffer. */
-void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info,
- struct hv_ring_buffer_debug_info *debug_info)
+int hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
+ struct hv_ring_buffer_debug_info *debug_info)
{
u32 bytes_avail_towrite;
u32 bytes_avail_toread;
- if (ring_info->ring_buffer) {
- hv_get_ringbuffer_availbytes(ring_info,
- &bytes_avail_toread,
- &bytes_avail_towrite);
+ mutex_lock(&ring_info->ring_buffer_mutex);
- debug_info->bytes_avail_toread = bytes_avail_toread;
- debug_info->bytes_avail_towrite = bytes_avail_towrite;
- debug_info->current_read_index =
- ring_info->ring_buffer->read_index;
- debug_info->current_write_index =
- ring_info->ring_buffer->write_index;
- debug_info->current_interrupt_mask =
- ring_info->ring_buffer->interrupt_mask;
+ if (!ring_info->ring_buffer) {
+ mutex_unlock(&ring_info->ring_buffer_mutex);
+ return -EINVAL;
}
+
+ hv_get_ringbuffer_availbytes(ring_info,
+ &bytes_avail_toread,
+ &bytes_avail_towrite);
+ debug_info->bytes_avail_toread = bytes_avail_toread;
+ debug_info->bytes_avail_towrite = bytes_avail_towrite;
+ debug_info->current_read_index = ring_info->ring_buffer->read_index;
+ debug_info->current_write_index = ring_info->ring_buffer->write_index;
+ debug_info->current_interrupt_mask
+ = ring_info->ring_buffer->interrupt_mask;
+ mutex_unlock(&ring_info->ring_buffer_mutex);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo);
+/* Initialize a channel's ring buffer info mutex locks */
+void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
+{
+ mutex_init(&channel->inbound.ring_buffer_mutex);
+ mutex_init(&channel->outbound.ring_buffer_mutex);
+}
+
/* Initialize the ring buffer. */
int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
struct page *pages, u32 page_cnt)
@@ -196,8 +197,6 @@
BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
- memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
-
/*
* First page holds struct hv_ring_buffer, do wraparound mapping for
* the rest.
@@ -231,6 +230,7 @@
reciprocal_value(ring_info->ring_size / 10);
ring_info->ring_datasize = ring_info->ring_size -
sizeof(struct hv_ring_buffer);
+ ring_info->priv_read_index = 0;
spin_lock_init(&ring_info->ring_lock);
@@ -240,7 +240,10 @@
/* Cleanup the ring buffer. */
void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
{
+ mutex_lock(&ring_info->ring_buffer_mutex);
vunmap(ring_info->ring_buffer);
+ ring_info->ring_buffer = NULL;
+ mutex_unlock(&ring_info->ring_buffer_mutex);
}
/* Write to the ring buffer. */
@@ -272,10 +275,19 @@
* is empty since the read index == write index.
*/
if (bytes_avail_towrite <= totalbytes_towrite) {
+ ++channel->out_full_total;
+
+ if (!channel->out_full_flag) {
+ ++channel->out_full_first;
+ channel->out_full_flag = true;
+ }
+
spin_unlock_irqrestore(&outring_info->ring_lock, flags);
return -EAGAIN;
}
+ channel->out_full_flag = false;
+
/* Write to the ring buffer */
next_write_location = hv_get_next_write_location(outring_info);
@@ -530,6 +542,7 @@
if (curr_write_sz <= pending_sz)
return;
+ ++channel->intr_in_full;
vmbus_setevent(channel);
}
EXPORT_SYMBOL_GPL(hv_pkt_iter_close);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 2c6d5c7..53a60c8 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1,24 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2009, Microsoft Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
* Hank Janssen <hjanssen@microsoft.com>
* K. Y. Srinivasan <kys@microsoft.com>
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -37,12 +24,15 @@
#include <linux/sched/task_stack.h>
#include <asm/mshyperv.h>
+#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/ptrace.h>
#include <linux/screen_info.h>
#include <linux/kdebug.h>
#include <linux/efi.h>
#include <linux/random.h>
+#include <linux/syscore_ops.h>
+#include <clocksource/hyperv_timer.h>
#include "hyperv_vmbus.h"
struct vmbus_dynid {
@@ -234,7 +224,7 @@
return -ENODEV;
return sprintf(buf, "%d\n",
channel_pending(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_pending);
@@ -313,12 +303,16 @@
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info outbound;
+ int ret;
if (!hv_dev->channel)
return -ENODEV;
- if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
- return -EINVAL;
- hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+ ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+ &outbound);
+ if (ret < 0)
+ return ret;
+
return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(out_intr_mask);
@@ -328,12 +322,15 @@
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info outbound;
+ int ret;
if (!hv_dev->channel)
return -ENODEV;
- if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
- return -EINVAL;
- hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+ ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+ &outbound);
+ if (ret < 0)
+ return ret;
return sprintf(buf, "%d\n", outbound.current_read_index);
}
static DEVICE_ATTR_RO(out_read_index);
@@ -344,12 +341,15 @@
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info outbound;
+ int ret;
if (!hv_dev->channel)
return -ENODEV;
- if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
- return -EINVAL;
- hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+ ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+ &outbound);
+ if (ret < 0)
+ return ret;
return sprintf(buf, "%d\n", outbound.current_write_index);
}
static DEVICE_ATTR_RO(out_write_index);
@@ -360,12 +360,15 @@
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info outbound;
+ int ret;
if (!hv_dev->channel)
return -ENODEV;
- if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
- return -EINVAL;
- hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+ ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+ &outbound);
+ if (ret < 0)
+ return ret;
return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(out_read_bytes_avail);
@@ -376,12 +379,15 @@
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info outbound;
+ int ret;
if (!hv_dev->channel)
return -ENODEV;
- if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
- return -EINVAL;
- hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+ ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+ &outbound);
+ if (ret < 0)
+ return ret;
return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(out_write_bytes_avail);
@@ -391,12 +397,15 @@
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info inbound;
+ int ret;
if (!hv_dev->channel)
return -ENODEV;
- if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
- return -EINVAL;
- hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+
+ ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+ if (ret < 0)
+ return ret;
+
return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(in_intr_mask);
@@ -406,12 +415,15 @@
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info inbound;
+ int ret;
if (!hv_dev->channel)
return -ENODEV;
- if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
- return -EINVAL;
- hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+
+ ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+ if (ret < 0)
+ return ret;
+
return sprintf(buf, "%d\n", inbound.current_read_index);
}
static DEVICE_ATTR_RO(in_read_index);
@@ -421,12 +433,15 @@
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info inbound;
+ int ret;
if (!hv_dev->channel)
return -ENODEV;
- if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
- return -EINVAL;
- hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+
+ ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+ if (ret < 0)
+ return ret;
+
return sprintf(buf, "%d\n", inbound.current_write_index);
}
static DEVICE_ATTR_RO(in_write_index);
@@ -437,12 +452,15 @@
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info inbound;
+ int ret;
if (!hv_dev->channel)
return -ENODEV;
- if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
- return -EINVAL;
- hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+
+ ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+ if (ret < 0)
+ return ret;
+
return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(in_read_bytes_avail);
@@ -453,12 +471,15 @@
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info inbound;
+ int ret;
if (!hv_dev->channel)
return -ENODEV;
- if (hv_dev->channel->state != CHANNEL_OPENED_STATE)
- return -EINVAL;
- hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+
+ ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
+ if (ret < 0)
+ return ret;
+
return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(in_write_bytes_avail);
@@ -518,6 +539,54 @@
}
static DEVICE_ATTR_RO(device);
+static ssize_t driver_override_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hv_device *hv_dev = device_to_hv_device(dev);
+ char *driver_override, *old, *cp;
+
+ /* We need to keep extra room for a newline */
+ if (count >= (PAGE_SIZE - 1))
+ return -EINVAL;
+
+ driver_override = kstrndup(buf, count, GFP_KERNEL);
+ if (!driver_override)
+ return -ENOMEM;
+
+ cp = strchr(driver_override, '\n');
+ if (cp)
+ *cp = '\0';
+
+ device_lock(dev);
+ old = hv_dev->driver_override;
+ if (strlen(driver_override)) {
+ hv_dev->driver_override = driver_override;
+ } else {
+ kfree(driver_override);
+ hv_dev->driver_override = NULL;
+ }
+ device_unlock(dev);
+
+ kfree(old);
+
+ return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hv_device *hv_dev = device_to_hv_device(dev);
+ ssize_t len;
+
+ device_lock(dev);
+ len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
+ device_unlock(dev);
+
+ return len;
+}
+static DEVICE_ATTR_RW(driver_override);
+
/* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
static struct attribute *vmbus_dev_attrs[] = {
&dev_attr_id.attr,
@@ -548,9 +617,39 @@
&dev_attr_channel_vp_mapping.attr,
&dev_attr_vendor.attr,
&dev_attr_device.attr,
+ &dev_attr_driver_override.attr,
NULL,
};
-ATTRIBUTE_GROUPS(vmbus_dev);
+
+/*
+ * Device-level attribute_group callback function. Returns the permission for
+ * each attribute, and returns 0 if an attribute is not visible.
+ */
+static umode_t vmbus_dev_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ const struct hv_device *hv_dev = device_to_hv_device(dev);
+
+ /* Hide the monitor attributes if the monitor mechanism is not used. */
+ if (!hv_dev->channel->offermsg.monitor_allocated &&
+ (attr == &dev_attr_monitor_id.attr ||
+ attr == &dev_attr_server_monitor_pending.attr ||
+ attr == &dev_attr_client_monitor_pending.attr ||
+ attr == &dev_attr_server_monitor_latency.attr ||
+ attr == &dev_attr_client_monitor_latency.attr ||
+ attr == &dev_attr_server_monitor_conn_id.attr ||
+ attr == &dev_attr_client_monitor_conn_id.attr))
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group vmbus_dev_group = {
+ .attrs = vmbus_dev_attrs,
+ .is_visible = vmbus_dev_attr_is_visible
+};
+__ATTRIBUTE_GROUPS(vmbus_dev);
/*
* vmbus_uevent - add uevent for our device
@@ -574,51 +673,67 @@
return ret;
}
-static const uuid_le null_guid;
-
-static inline bool is_null_guid(const uuid_le *guid)
+static const struct hv_vmbus_device_id *
+hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid)
{
- if (uuid_le_cmp(*guid, null_guid))
- return false;
- return true;
+ if (id == NULL)
+ return NULL; /* empty device table */
+
+ for (; !guid_is_null(&id->guid); id++)
+ if (guid_equal(&id->guid, guid))
+ return id;
+
+ return NULL;
}
-/*
- * Return a matching hv_vmbus_device_id pointer.
- * If there is no match, return NULL.
- */
-static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
- const uuid_le *guid)
+static const struct hv_vmbus_device_id *
+hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid)
{
const struct hv_vmbus_device_id *id = NULL;
struct vmbus_dynid *dynid;
- /* Look at the dynamic ids first, before the static ones */
spin_lock(&drv->dynids.lock);
list_for_each_entry(dynid, &drv->dynids.list, node) {
- if (!uuid_le_cmp(dynid->id.guid, *guid)) {
+ if (guid_equal(&dynid->id.guid, guid)) {
id = &dynid->id;
break;
}
}
spin_unlock(&drv->dynids.lock);
- if (id)
- return id;
+ return id;
+}
- id = drv->id_table;
- if (id == NULL)
- return NULL; /* empty device table */
+static const struct hv_vmbus_device_id vmbus_device_null;
- for (; !is_null_guid(&id->guid); id++)
- if (!uuid_le_cmp(id->guid, *guid))
- return id;
+/*
+ * Return a matching hv_vmbus_device_id pointer.
+ * If there is no match, return NULL.
+ */
+static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
+ struct hv_device *dev)
+{
+ const guid_t *guid = &dev->dev_type;
+ const struct hv_vmbus_device_id *id;
- return NULL;
+ /* When driver_override is set, only bind to the matching driver */
+ if (dev->driver_override && strcmp(dev->driver_override, drv->name))
+ return NULL;
+
+ /* Look at the dynamic ids first, before the static ones */
+ id = hv_vmbus_dynid_match(drv, guid);
+ if (!id)
+ id = hv_vmbus_dev_match(drv->id_table, guid);
+
+ /* driver_override will always match, send a dummy id */
+ if (!id && dev->driver_override)
+ id = &vmbus_device_null;
+
+ return id;
}
/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
-static int vmbus_add_dynid(struct hv_driver *drv, uuid_le *guid)
+static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid)
{
struct vmbus_dynid *dynid;
@@ -656,14 +771,14 @@
size_t count)
{
struct hv_driver *drv = drv_to_hv_drv(driver);
- uuid_le guid;
+ guid_t guid;
ssize_t retval;
- retval = uuid_le_to_bin(buf, &guid);
+ retval = guid_parse(buf, &guid);
if (retval)
return retval;
- if (hv_vmbus_get_id(drv, &guid))
+ if (hv_vmbus_dynid_match(drv, &guid))
return -EEXIST;
retval = vmbus_add_dynid(drv, &guid);
@@ -683,10 +798,10 @@
{
struct hv_driver *drv = drv_to_hv_drv(driver);
struct vmbus_dynid *dynid, *n;
- uuid_le guid;
+ guid_t guid;
ssize_t retval;
- retval = uuid_le_to_bin(buf, &guid);
+ retval = guid_parse(buf, &guid);
if (retval)
return retval;
@@ -695,7 +810,7 @@
list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
struct hv_vmbus_device_id *id = &dynid->id;
- if (!uuid_le_cmp(id->guid, guid)) {
+ if (guid_equal(&id->guid, &guid)) {
list_del(&dynid->node);
kfree(dynid);
retval = count;
@@ -728,7 +843,7 @@
if (is_hvsock_channel(hv_dev->channel))
return drv->hvsock;
- if (hv_vmbus_get_id(drv, &hv_dev->dev_type))
+ if (hv_vmbus_get_id(drv, hv_dev))
return 1;
return 0;
@@ -745,7 +860,7 @@
struct hv_device *dev = device_to_hv_device(child_device);
const struct hv_vmbus_device_id *dev_id;
- dev_id = hv_vmbus_get_id(drv, &dev->dev_type);
+ dev_id = hv_vmbus_get_id(drv, dev);
if (drv->probe) {
ret = drv->probe(dev, dev_id);
if (ret != 0)
@@ -797,6 +912,45 @@
drv->shutdown(dev);
}
+#ifdef CONFIG_PM_SLEEP
+/*
+ * vmbus_suspend - Suspend a vmbus device
+ */
+static int vmbus_suspend(struct device *child_device)
+{
+ struct hv_driver *drv;
+ struct hv_device *dev = device_to_hv_device(child_device);
+
+ /* The device may not be attached yet */
+ if (!child_device->driver)
+ return 0;
+
+ drv = drv_to_hv_drv(child_device->driver);
+ if (!drv->suspend)
+ return -EOPNOTSUPP;
+
+ return drv->suspend(dev);
+}
+
+/*
+ * vmbus_resume - Resume a vmbus device
+ */
+static int vmbus_resume(struct device *child_device)
+{
+ struct hv_driver *drv;
+ struct hv_device *dev = device_to_hv_device(child_device);
+
+ /* The device may not be attached yet */
+ if (!child_device->driver)
+ return 0;
+
+ drv = drv_to_hv_drv(child_device->driver);
+ if (!drv->resume)
+ return -EOPNOTSUPP;
+
+ return drv->resume(dev);
+}
+#endif /* CONFIG_PM_SLEEP */
/*
* vmbus_device_release - Final callback release of the vmbus child device
@@ -807,12 +961,19 @@
struct vmbus_channel *channel = hv_dev->channel;
mutex_lock(&vmbus_connection.channel_mutex);
- hv_process_channel_removal(channel->offermsg.child_relid);
+ hv_process_channel_removal(channel);
mutex_unlock(&vmbus_connection.channel_mutex);
kfree(hv_dev);
-
}
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm.
+ */
+static const struct dev_pm_ops vmbus_pm = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
+};
+
/* The one and only one */
static struct bus_type hv_bus = {
.name = "vmbus",
@@ -823,6 +984,7 @@
.uevent = vmbus_uevent,
.dev_groups = vmbus_dev_groups,
.drv_groups = vmbus_drv_groups,
+ .pm = &vmbus_pm,
};
struct onmessage_work_context {
@@ -844,17 +1006,6 @@
kfree(ctx);
}
-static void hv_process_timer_expiration(struct hv_message *msg,
- struct hv_per_cpu_context *hv_cpu)
-{
- struct clock_event_device *dev = hv_cpu->clk_evt;
-
- if (dev->event_handler)
- dev->event_handler(dev);
-
- vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
-}
-
void vmbus_on_msg_dpc(unsigned long data)
{
struct hv_per_cpu_context *hv_cpu = (void *)data;
@@ -921,6 +1072,43 @@
vmbus_signal_eom(msg, message_type);
}
+#ifdef CONFIG_PM_SLEEP
+/*
+ * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
+ * hibernation, because hv_sock connections can not persist across hibernation.
+ */
+static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
+{
+ struct onmessage_work_context *ctx;
+ struct vmbus_channel_rescind_offer *rescind;
+
+ WARN_ON(!is_hvsock_channel(channel));
+
+ /*
+ * sizeof(*ctx) is small and the allocation should really not fail,
+ * otherwise the state of the hv_sock connections ends up in limbo.
+ */
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
+
+ /*
+ * So far, these are not really used by Linux. Just set them to the
+ * reasonable values conforming to the definitions of the fields.
+ */
+ ctx->msg.header.message_type = 1;
+ ctx->msg.header.payload_size = sizeof(*rescind);
+
+ /* These values are actually used by Linux. */
+ rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload;
+ rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
+ rescind->child_relid = channel->offermsg.child_relid;
+
+ INIT_WORK(&ctx->work, vmbus_onmessage_work);
+
+ queue_work_on(vmbus_connection.connect_cpu,
+ vmbus_connection.work_queue,
+ &ctx->work);
+}
+#endif /* CONFIG_PM_SLEEP */
/*
* Direct callback for channels using other deferred processing
@@ -1048,9 +1236,10 @@
/* Check if there are actual msgs to be processed */
if (msg->header.message_type != HVMSG_NONE) {
- if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
- hv_process_timer_expiration(msg, hv_cpu);
- else
+ if (msg->header.message_type == HVMSG_TIMER_EXPIRED) {
+ hv_stimer0_isr();
+ vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
+ } else
tasklet_schedule(&hv_cpu->msg_dpc);
}
@@ -1095,8 +1284,6 @@
};
static struct ctl_table_header *hv_ctl_table_hdr;
-static int zero;
-static int one = 1;
/*
* sysctl option to allow the user to control whether kmsg data should be
@@ -1109,8 +1296,8 @@
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{}
};
@@ -1152,14 +1339,19 @@
ret = hv_synic_alloc();
if (ret)
goto err_alloc;
+
+ ret = hv_stimer_alloc(VMBUS_MESSAGE_SINT);
+ if (ret < 0)
+ goto err_alloc;
+
/*
- * Initialize the per-cpu interrupt state and
- * connect to the host.
+ * Initialize the per-cpu interrupt state and stimer state.
+ * Then connect to the host.
*/
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
hv_synic_init, hv_synic_cleanup);
if (ret < 0)
- goto err_alloc;
+ goto err_cpuhp;
hyperv_cpuhp_online = ret;
ret = vmbus_connect();
@@ -1207,6 +1399,8 @@
err_connect:
cpuhp_remove_state(hyperv_cpuhp_online);
+err_cpuhp:
+ hv_stimer_free();
err_alloc:
hv_synic_free();
hv_remove_vmbus_irq();
@@ -1286,7 +1480,7 @@
struct vmbus_chan_attribute {
struct attribute attr;
- ssize_t (*show)(const struct vmbus_channel *chan, char *buf);
+ ssize_t (*show)(struct vmbus_channel *chan, char *buf);
ssize_t (*store)(struct vmbus_channel *chan,
const char *buf, size_t count);
};
@@ -1305,15 +1499,12 @@
{
const struct vmbus_chan_attribute *attribute
= container_of(attr, struct vmbus_chan_attribute, attr);
- const struct vmbus_channel *chan
+ struct vmbus_channel *chan
= container_of(kobj, struct vmbus_channel, kobj);
if (!attribute->show)
return -EIO;
- if (chan->state != CHANNEL_OPENED_STATE)
- return -EINVAL;
-
return attribute->show(chan, buf);
}
@@ -1321,45 +1512,81 @@
.show = vmbus_chan_attr_show,
};
-static ssize_t out_mask_show(const struct vmbus_channel *channel, char *buf)
+static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf)
{
- const struct hv_ring_buffer_info *rbi = &channel->outbound;
+ struct hv_ring_buffer_info *rbi = &channel->outbound;
+ ssize_t ret;
- return sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
+ mutex_lock(&rbi->ring_buffer_mutex);
+ if (!rbi->ring_buffer) {
+ mutex_unlock(&rbi->ring_buffer_mutex);
+ return -EINVAL;
+ }
+
+ ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
+ mutex_unlock(&rbi->ring_buffer_mutex);
+ return ret;
}
static VMBUS_CHAN_ATTR_RO(out_mask);
-static ssize_t in_mask_show(const struct vmbus_channel *channel, char *buf)
+static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf)
{
- const struct hv_ring_buffer_info *rbi = &channel->inbound;
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
+ ssize_t ret;
- return sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
+ mutex_lock(&rbi->ring_buffer_mutex);
+ if (!rbi->ring_buffer) {
+ mutex_unlock(&rbi->ring_buffer_mutex);
+ return -EINVAL;
+ }
+
+ ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
+ mutex_unlock(&rbi->ring_buffer_mutex);
+ return ret;
}
static VMBUS_CHAN_ATTR_RO(in_mask);
-static ssize_t read_avail_show(const struct vmbus_channel *channel, char *buf)
+static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf)
{
- const struct hv_ring_buffer_info *rbi = &channel->inbound;
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
+ ssize_t ret;
- return sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi));
+ mutex_lock(&rbi->ring_buffer_mutex);
+ if (!rbi->ring_buffer) {
+ mutex_unlock(&rbi->ring_buffer_mutex);
+ return -EINVAL;
+ }
+
+ ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi));
+ mutex_unlock(&rbi->ring_buffer_mutex);
+ return ret;
}
static VMBUS_CHAN_ATTR_RO(read_avail);
-static ssize_t write_avail_show(const struct vmbus_channel *channel, char *buf)
+static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf)
{
- const struct hv_ring_buffer_info *rbi = &channel->outbound;
+ struct hv_ring_buffer_info *rbi = &channel->outbound;
+ ssize_t ret;
- return sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi));
+ mutex_lock(&rbi->ring_buffer_mutex);
+ if (!rbi->ring_buffer) {
+ mutex_unlock(&rbi->ring_buffer_mutex);
+ return -EINVAL;
+ }
+
+ ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi));
+ mutex_unlock(&rbi->ring_buffer_mutex);
+ return ret;
}
static VMBUS_CHAN_ATTR_RO(write_avail);
-static ssize_t show_target_cpu(const struct vmbus_channel *channel, char *buf)
+static ssize_t show_target_cpu(struct vmbus_channel *channel, char *buf)
{
return sprintf(buf, "%u\n", channel->target_cpu);
}
static VMBUS_CHAN_ATTR(cpu, S_IRUGO, show_target_cpu, NULL);
-static ssize_t channel_pending_show(const struct vmbus_channel *channel,
+static ssize_t channel_pending_show(struct vmbus_channel *channel,
char *buf)
{
return sprintf(buf, "%d\n",
@@ -1368,7 +1595,7 @@
}
static VMBUS_CHAN_ATTR(pending, S_IRUGO, channel_pending_show, NULL);
-static ssize_t channel_latency_show(const struct vmbus_channel *channel,
+static ssize_t channel_latency_show(struct vmbus_channel *channel,
char *buf)
{
return sprintf(buf, "%d\n",
@@ -1377,26 +1604,58 @@
}
static VMBUS_CHAN_ATTR(latency, S_IRUGO, channel_latency_show, NULL);
-static ssize_t channel_interrupts_show(const struct vmbus_channel *channel, char *buf)
+static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf)
{
return sprintf(buf, "%llu\n", channel->interrupts);
}
static VMBUS_CHAN_ATTR(interrupts, S_IRUGO, channel_interrupts_show, NULL);
-static ssize_t channel_events_show(const struct vmbus_channel *channel, char *buf)
+static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf)
{
return sprintf(buf, "%llu\n", channel->sig_events);
}
static VMBUS_CHAN_ATTR(events, S_IRUGO, channel_events_show, NULL);
-static ssize_t subchannel_monitor_id_show(const struct vmbus_channel *channel,
+static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel,
+ char *buf)
+{
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)channel->intr_in_full);
+}
+static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL);
+
+static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel,
+ char *buf)
+{
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)channel->intr_out_empty);
+}
+static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL);
+
+static ssize_t channel_out_full_first_show(struct vmbus_channel *channel,
+ char *buf)
+{
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)channel->out_full_first);
+}
+static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL);
+
+static ssize_t channel_out_full_total_show(struct vmbus_channel *channel,
+ char *buf)
+{
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)channel->out_full_total);
+}
+static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL);
+
+static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel,
char *buf)
{
return sprintf(buf, "%u\n", channel->offermsg.monitorid);
}
static VMBUS_CHAN_ATTR(monitor_id, S_IRUGO, subchannel_monitor_id_show, NULL);
-static ssize_t subchannel_id_show(const struct vmbus_channel *channel,
+static ssize_t subchannel_id_show(struct vmbus_channel *channel,
char *buf)
{
return sprintf(buf, "%u\n",
@@ -1414,15 +1673,43 @@
&chan_attr_latency.attr,
&chan_attr_interrupts.attr,
&chan_attr_events.attr,
+ &chan_attr_intr_in_full.attr,
+ &chan_attr_intr_out_empty.attr,
+ &chan_attr_out_full_first.attr,
+ &chan_attr_out_full_total.attr,
&chan_attr_monitor_id.attr,
&chan_attr_subchannel_id.attr,
NULL
};
+/*
+ * Channel-level attribute_group callback function. Returns the permission for
+ * each attribute, and returns 0 if an attribute is not visible.
+ */
+static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int idx)
+{
+ const struct vmbus_channel *channel =
+ container_of(kobj, struct vmbus_channel, kobj);
+
+ /* Hide the monitor attributes if the monitor mechanism is not used. */
+ if (!channel->offermsg.monitor_allocated &&
+ (attr == &chan_attr_pending.attr ||
+ attr == &chan_attr_latency.attr ||
+ attr == &chan_attr_monitor_id.attr))
+ return 0;
+
+ return attr->mode;
+}
+
+static struct attribute_group vmbus_chan_group = {
+ .attrs = vmbus_chan_attrs,
+ .is_visible = vmbus_chan_attr_is_visible
+};
+
static struct kobj_type vmbus_chan_ktype = {
.sysfs_ops = &vmbus_chan_sysfs_ops,
.release = vmbus_chan_release,
- .default_attrs = vmbus_chan_attrs,
};
/*
@@ -1430,6 +1717,7 @@
*/
int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel)
{
+ const struct device *device = &dev->device;
struct kobject *kobj = &channel->kobj;
u32 relid = channel->offermsg.child_relid;
int ret;
@@ -1440,17 +1728,36 @@
if (ret)
return ret;
+ ret = sysfs_create_group(kobj, &vmbus_chan_group);
+
+ if (ret) {
+ /*
+ * The calling functions' error handling paths will cleanup the
+ * empty channel directory.
+ */
+ dev_err(device, "Unable to set up channel sysfs files\n");
+ return ret;
+ }
+
kobject_uevent(kobj, KOBJ_ADD);
return 0;
}
/*
+ * vmbus_remove_channel_attr_group - remove the channel's attribute group
+ */
+void vmbus_remove_channel_attr_group(struct vmbus_channel *channel)
+{
+ sysfs_remove_group(&channel->kobj, &vmbus_chan_group);
+}
+
+/*
* vmbus_device_create - Creates and registers a new child device
* on the vmbus.
*/
-struct hv_device *vmbus_device_create(const uuid_le *type,
- const uuid_le *instance,
+struct hv_device *vmbus_device_create(const guid_t *type,
+ const guid_t *instance,
struct vmbus_channel *channel)
{
struct hv_device *child_device_obj;
@@ -1462,12 +1769,10 @@
}
child_device_obj->channel = channel;
- memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
- memcpy(&child_device_obj->dev_instance, instance,
- sizeof(uuid_le));
+ guid_copy(&child_device_obj->dev_type, type);
+ guid_copy(&child_device_obj->dev_instance, instance);
child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */
-
return child_device_obj;
}
@@ -1824,6 +2129,131 @@
return ret_val;
}
+#ifdef CONFIG_PM_SLEEP
+static int vmbus_bus_suspend(struct device *dev)
+{
+ struct vmbus_channel *channel, *sc;
+ unsigned long flags;
+
+ while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
+ /*
+ * We wait here until the completion of any channel
+ * offers that are currently in progress.
+ */
+ msleep(1);
+ }
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ if (!is_hvsock_channel(channel))
+ continue;
+
+ vmbus_force_channel_rescinded(channel);
+ }
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ /*
+ * Wait until all the sub-channels and hv_sock channels have been
+ * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
+ * they would conflict with the new sub-channels that will be created
+ * in the resume path. hv_sock channels should also be destroyed, but
+ * a hv_sock channel of an established hv_sock connection can not be
+ * really destroyed since it may still be referenced by the userspace
+ * application, so we just force the hv_sock channel to be rescinded
+ * by vmbus_force_channel_rescinded(), and the userspace application
+ * will thoroughly destroy the channel after hibernation.
+ *
+ * Note: the counter nr_chan_close_on_suspend may never go above 0 if
+ * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
+ */
+ if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
+ wait_for_completion(&vmbus_connection.ready_for_suspend_event);
+
+ WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0);
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ /*
+ * Invalidate the field. Upon resume, vmbus_onoffer() will fix
+ * up the field, and the other fields (if necessary).
+ */
+ channel->offermsg.child_relid = INVALID_RELID;
+
+ if (is_hvsock_channel(channel)) {
+ if (!channel->rescind) {
+ pr_err("hv_sock channel not rescinded!\n");
+ WARN_ON_ONCE(1);
+ }
+ continue;
+ }
+
+ spin_lock_irqsave(&channel->lock, flags);
+ list_for_each_entry(sc, &channel->sc_list, sc_list) {
+ pr_err("Sub-channel not deleted!\n");
+ WARN_ON_ONCE(1);
+ }
+ spin_unlock_irqrestore(&channel->lock, flags);
+
+ atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
+ }
+
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ vmbus_initiate_unload(false);
+
+ vmbus_connection.conn_state = DISCONNECTED;
+
+ /* Reset the event for the next resume. */
+ reinit_completion(&vmbus_connection.ready_for_resume_event);
+
+ return 0;
+}
+
+static int vmbus_bus_resume(struct device *dev)
+{
+ struct vmbus_channel_msginfo *msginfo;
+ size_t msgsize;
+ int ret;
+
+ /*
+ * We only use the 'vmbus_proto_version', which was in use before
+ * hibernation, to re-negotiate with the host.
+ */
+ if (vmbus_proto_version == VERSION_INVAL ||
+ vmbus_proto_version == 0) {
+ pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
+ return -EINVAL;
+ }
+
+ msgsize = sizeof(*msginfo) +
+ sizeof(struct vmbus_channel_initiate_contact);
+
+ msginfo = kzalloc(msgsize, GFP_KERNEL);
+
+ if (msginfo == NULL)
+ return -ENOMEM;
+
+ ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
+
+ kfree(msginfo);
+
+ if (ret != 0)
+ return ret;
+
+ WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
+
+ vmbus_request_offers();
+
+ wait_for_completion(&vmbus_connection.ready_for_resume_event);
+
+ /* Reset the event for the next suspend. */
+ reinit_completion(&vmbus_connection.ready_for_suspend_event);
+
+ return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
static const struct acpi_device_id vmbus_acpi_device_ids[] = {
{"VMBUS", 0},
{"VMBus", 0},
@@ -1831,6 +2261,19 @@
};
MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS, otherwise NIC SR-IOV can not work, because the
+ * "pci_dev_pm_ops" uses the "noirq" callbacks: in the resume path, the
+ * pci "noirq" restore callback runs before "non-noirq" callbacks (see
+ * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
+ * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
+ * resume callback must also run via the "noirq" callbacks.
+ */
+static const struct dev_pm_ops vmbus_bus_pm = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume)
+};
+
static struct acpi_driver vmbus_acpi_driver = {
.name = "vmbus",
.ids = vmbus_acpi_device_ids,
@@ -1838,11 +2281,12 @@
.add = vmbus_acpi_add,
.remove = vmbus_acpi_remove,
},
+ .drv.pm = &vmbus_bus_pm,
};
static void hv_kexec_handler(void)
{
- hv_synic_clockevents_cleanup();
+ hv_stimer_global_cleanup();
vmbus_initiate_unload(false);
vmbus_connection.conn_state = DISCONNECTED;
/* Make sure conn_state is set as hv_synic_cleanup checks for it */
@@ -1853,6 +2297,8 @@
static void hv_crash_handler(struct pt_regs *regs)
{
+ int cpu;
+
vmbus_initiate_unload(true);
/*
* In crash handler we can't schedule synic cleanup for all CPUs,
@@ -1860,10 +2306,53 @@
* for kdump.
*/
vmbus_connection.conn_state = DISCONNECTED;
- hv_synic_cleanup(smp_processor_id());
+ cpu = smp_processor_id();
+ hv_stimer_cleanup(cpu);
+ hv_synic_cleanup(cpu);
hyperv_cleanup();
};
+static int hv_synic_suspend(void)
+{
+ /*
+ * When we reach here, all the non-boot CPUs have been offlined, and
+ * the stimers on them have been unbound in hv_synic_cleanup() ->
+ * hv_stimer_cleanup() -> clockevents_unbind_device().
+ *
+ * hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
+ * we do not unbind the stimer on CPU0 because: 1) it's unnecessary
+ * because the interrupts remain disabled between syscore_suspend()
+ * and syscore_resume(): see create_image() and resume_target_kernel();
+ * 2) the stimer on CPU0 is automatically disabled later by
+ * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
+ * -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
+ * would be triggered if we call clockevents_unbind_device(), which
+ * may sleep, in an interrupts-disabled context. So, we intentionally
+ * don't call hv_stimer_cleanup(0) here.
+ */
+
+ hv_synic_disable_regs(0);
+
+ return 0;
+}
+
+static void hv_synic_resume(void)
+{
+ hv_synic_enable_regs(0);
+
+ /*
+ * Note: we don't need to call hv_stimer_init(0), because the timer
+ * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
+ * automatically re-enabled in timekeeping_resume().
+ */
+}
+
+/* The callbacks run only on CPU0, with irqs_disabled. */
+static struct syscore_ops hv_synic_syscore_ops = {
+ .suspend = hv_synic_suspend,
+ .resume = hv_synic_resume,
+};
+
static int __init hv_acpi_init(void)
{
int ret, t;
@@ -1894,6 +2383,8 @@
hv_setup_kexec_handler(hv_kexec_handler);
hv_setup_crash_handler(hv_crash_handler);
+ register_syscore_ops(&hv_synic_syscore_ops);
+
return 0;
cleanup:
@@ -1906,10 +2397,12 @@
{
int cpu;
+ unregister_syscore_ops(&hv_synic_syscore_ops);
+
hv_remove_kexec_handler();
hv_remove_crash_handler();
vmbus_connection.conn_state = DISCONNECTED;
- hv_synic_clockevents_cleanup();
+ hv_stimer_global_cleanup();
vmbus_disconnect();
hv_remove_vmbus_irq();
for_each_online_cpu(cpu) {
@@ -1939,6 +2432,7 @@
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver");
subsys_initcall(hv_acpi_init);
module_exit(vmbus_exit);