Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/net/hyperv/Makefile b/drivers/net/hyperv/Makefile
index 3a2aa07..0db7cca 100644
--- a/drivers/net/hyperv/Makefile
+++ b/drivers/net/hyperv/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o
-hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o
+hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o netvsc_bpf.o
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index e74f2d1..a0f338c 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -142,6 +142,8 @@
u32 send_section_size;
u32 recv_section_size;
+ struct bpf_prog *bprog;
+
u8 rss_key[NETVSC_HASH_KEYLEN];
};
@@ -189,7 +191,8 @@
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
struct hv_page_buffer *page_buffer,
- struct sk_buff *skb);
+ struct sk_buff *skb,
+ bool xdp_tx);
void netvsc_linkstatus_callback(struct net_device *net,
struct rndis_message *resp);
int netvsc_recv_callback(struct net_device *net,
@@ -198,6 +201,16 @@
void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget);
+u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
+ struct xdp_buff *xdp);
+unsigned int netvsc_xdp_fraglen(unsigned int len);
+struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev);
+int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack,
+ struct netvsc_device *nvdev);
+int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog);
+int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf);
+
int rndis_set_subchannel(struct net_device *ndev,
struct netvsc_device *nvdev,
struct netvsc_device_info *dev_info);
@@ -822,7 +835,8 @@
#define NETVSC_SUPPORTED_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | \
NETIF_F_TSO | NETIF_F_IPV6_CSUM | \
- NETIF_F_TSO6 | NETIF_F_LRO | NETIF_F_SG)
+ NETIF_F_TSO6 | NETIF_F_LRO | \
+ NETIF_F_SG | NETIF_F_RXHASH)
#define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */
#define VRSS_CHANNEL_MAX 64
@@ -831,6 +845,12 @@
#define RNDIS_MAX_PKT_DEFAULT 8
#define RNDIS_PKT_ALIGN_DEFAULT 8
+#define NETVSC_XDP_HDRM 256
+
+#define NETVSC_XFER_HEADER_SIZE(rng_cnt) \
+ (offsetof(struct vmtransfer_page_packet_header, ranges) + \
+ (rng_cnt) * sizeof(struct vmtransfer_page_range))
+
struct multi_send_data {
struct sk_buff *skb; /* skb containing the pkt */
struct hv_netvsc_packet *pkt; /* netvsc pkt pending */
@@ -853,6 +873,7 @@
struct nvsc_rsc {
const struct ndis_pkt_8021q_info *vlan;
const struct ndis_tcp_ip_checksum_info *csum_info;
+ const u32 *hash_info;
u8 is_last; /* last RNDIS msg in a vmtransfer_page */
u32 cnt; /* #fragments in an RSC packet */
u32 pktlen; /* Full packet length */
@@ -865,6 +886,7 @@
u64 bytes;
u64 broadcast;
u64 multicast;
+ u64 xdp_drop;
struct u64_stats_sync syncp;
};
@@ -879,6 +901,7 @@
unsigned long rx_no_memory;
unsigned long stop_queue;
unsigned long wake_queue;
+ unsigned long vlan_error;
};
struct netvsc_ethtool_pcpu_stats {
@@ -954,6 +977,12 @@
u32 vf_alloc;
/* Serial number of the VF to team with */
u32 vf_serial;
+
+ /* Is the current data path through the VF NIC? */
+ bool data_path_is_vf;
+
+ /* Used to temporarily save the config info across hibernation */
+ struct netvsc_device_info *saved_netvsc_dev_info;
};
/* Per channel data */
@@ -967,6 +996,9 @@
atomic_t queue_sends;
struct nvsc_rsc rsc;
+ struct bpf_prog __rcu *bpf_prog;
+ struct xdp_rxq_info xdp_rxq;
+
struct netvsc_stats tx_stats;
struct netvsc_stats rx_stats;
};
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 6c0732f..6a7ab93 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -122,8 +122,10 @@
vfree(nvdev->send_buf);
kfree(nvdev->send_section_map);
- for (i = 0; i < VRSS_CHANNEL_MAX; i++)
+ for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
+ xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
vfree(nvdev->chan_table[i].mrc.slots);
+ }
kfree(nvdev);
}
@@ -386,10 +388,20 @@
net_device->recv_section_size = resp->sections[0].sub_alloc_size;
net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
- /* Setup receive completion ring */
- net_device->recv_completion_cnt
- = round_up(net_device->recv_section_cnt + 1,
- PAGE_SIZE / sizeof(u64));
+ /* Ensure buffer will not overflow */
+ if (net_device->recv_section_size < NETVSC_MTU_MIN || (u64)net_device->recv_section_size *
+ (u64)net_device->recv_section_cnt > (u64)buf_size) {
+ netdev_err(ndev, "invalid recv_section_size %u\n",
+ net_device->recv_section_size);
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ /* Setup receive completion ring.
+ * Add 1 to the recv_section_cnt because at least one entry in a
+ * ring buffer has to be empty.
+ */
+ net_device->recv_completion_cnt = net_device->recv_section_cnt + 1;
ret = netvsc_alloc_recv_comp_ring(net_device, 0);
if (ret)
goto cleanup;
@@ -457,6 +469,12 @@
/* Parse the response */
net_device->send_section_size = init_packet->msg.
v1_msg.send_send_buf_complete.section_size;
+ if (net_device->send_section_size < NETVSC_MTU_MIN) {
+ netdev_err(ndev, "invalid send_section_size %u\n",
+ net_device->send_section_size);
+ ret = -EINVAL;
+ goto cleanup;
+ }
/* Section count is simply the size divided by the section size. */
net_device->send_section_cnt = buf_size / net_device->send_section_size;
@@ -633,9 +651,12 @@
RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
- /* And disassociate NAPI context from device */
- for (i = 0; i < net_device->num_chn; i++)
+ /* Disable NAPI and disassociate its context from the device. */
+ for (i = 0; i < net_device->num_chn; i++) {
+ /* See also vmbus_reset_channel_cb(). */
+ napi_disable(&net_device->chan_table[i].napi);
netif_napi_del(&net_device->chan_table[i].napi);
+ }
/*
* At this point, no one should be accessing net_device
@@ -725,12 +746,49 @@
int budget)
{
const struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
+ u32 msglen = hv_pkt_datalen(desc);
+
+ /* Ensure packet is big enough to read header fields */
+ if (msglen < sizeof(struct nvsp_message_header)) {
+ netdev_err(ndev, "nvsp_message length too small: %u\n", msglen);
+ return;
+ }
switch (nvsp_packet->hdr.msg_type) {
case NVSP_MSG_TYPE_INIT_COMPLETE:
+ if (msglen < sizeof(struct nvsp_message_header) +
+ sizeof(struct nvsp_message_init_complete)) {
+ netdev_err(ndev, "nvsp_msg length too small: %u\n",
+ msglen);
+ return;
+ }
+ fallthrough;
+
case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
+ if (msglen < sizeof(struct nvsp_message_header) +
+ sizeof(struct nvsp_1_message_send_receive_buffer_complete)) {
+ netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
+ msglen);
+ return;
+ }
+ fallthrough;
+
case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
+ if (msglen < sizeof(struct nvsp_message_header) +
+ sizeof(struct nvsp_1_message_send_send_buffer_complete)) {
+ netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
+ msglen);
+ return;
+ }
+ fallthrough;
+
case NVSP_MSG5_TYPE_SUBCHANNEL:
+ if (msglen < sizeof(struct nvsp_message_header) +
+ sizeof(struct nvsp_5_subchannel_complete)) {
+ netdev_err(ndev, "nvsp_msg5 length too small: %u\n",
+ msglen);
+ return;
+ }
/* Copy the response back */
memcpy(&net_device->channel_init_pkt, nvsp_packet,
sizeof(struct nvsp_message));
@@ -788,7 +846,7 @@
}
for (i = 0; i < page_count; i++) {
- char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT);
+ char *src = phys_to_virt(pb[i].pfn << HV_HYP_PAGE_SHIFT);
u32 offset = pb[i].offset;
u32 len = pb[i].len;
@@ -900,7 +958,8 @@
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
struct hv_page_buffer *pb,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ bool xdp_tx)
{
struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct netvsc_device *net_device
@@ -923,10 +982,11 @@
packet->send_buf_index = NETVSC_INVALID_INDEX;
packet->cp_partial = false;
- /* Send control message directly without accessing msd (Multi-Send
- * Data) field which may be changed during data packet processing.
+ /* Send a control message or XDP packet directly without accessing
+ * msd (Multi-Send Data) field which may be changed during data packet
+ * processing.
*/
- if (!skb)
+ if (!skb || xdp_tx)
return netvsc_send_pkt(device, packet, net_device, pb, skb);
/* batch packets in send buffer if possible */
@@ -1109,19 +1169,28 @@
static int netvsc_receive(struct net_device *ndev,
struct netvsc_device *net_device,
struct netvsc_channel *nvchan,
- const struct vmpacket_descriptor *desc,
- const struct nvsp_message *nvsp)
+ const struct vmpacket_descriptor *desc)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct vmbus_channel *channel = nvchan->channel;
const struct vmtransfer_page_packet_header *vmxferpage_packet
= container_of(desc, const struct vmtransfer_page_packet_header, d);
+ const struct nvsp_message *nvsp = hv_pkt_data(desc);
+ u32 msglen = hv_pkt_datalen(desc);
u16 q_idx = channel->offermsg.offer.sub_channel_index;
char *recv_buf = net_device->recv_buf;
u32 status = NVSP_STAT_SUCCESS;
int i;
int count = 0;
+ /* Ensure packet is big enough to read header fields */
+ if (msglen < sizeof(struct nvsp_message_header)) {
+ netif_err(net_device_ctx, rx_err, ndev,
+ "invalid nvsp header, length too small: %u\n",
+ msglen);
+ return 0;
+ }
+
/* Make sure this is a valid nvsp packet */
if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
netif_err(net_device_ctx, rx_err, ndev,
@@ -1130,6 +1199,14 @@
return 0;
}
+ /* Validate xfer page pkt header */
+ if ((desc->offset8 << 3) < sizeof(struct vmtransfer_page_packet_header)) {
+ netif_err(net_device_ctx, rx_err, ndev,
+ "Invalid xfer page pkt, offset too small: %u\n",
+ desc->offset8 << 3);
+ return 0;
+ }
+
if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
netif_err(net_device_ctx, rx_err, ndev,
"Invalid xfer page set id - expecting %x got %x\n",
@@ -1140,6 +1217,14 @@
count = vmxferpage_packet->range_cnt;
+ /* Check count for a valid value */
+ if (NETVSC_XFER_HEADER_SIZE(count) > desc->offset8 << 3) {
+ netif_err(net_device_ctx, rx_err, ndev,
+ "Range count is not valid: %d\n",
+ count);
+ return 0;
+ }
+
/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
for (i = 0; i < count; i++) {
u32 offset = vmxferpage_packet->ranges[i].byte_offset;
@@ -1147,7 +1232,8 @@
void *data;
int ret;
- if (unlikely(offset + buflen > net_device->recv_buf_size)) {
+ if (unlikely(offset > net_device->recv_buf_size ||
+ buflen > net_device->recv_buf_size - offset)) {
nvchan->rsc.cnt = 0;
status = NVSP_STAT_FAIL;
netif_err(net_device_ctx, rx_err, ndev,
@@ -1167,8 +1253,11 @@
ret = rndis_filter_receive(ndev, net_device,
nvchan, data, buflen);
- if (unlikely(ret != NVSP_STAT_SUCCESS))
+ if (unlikely(ret != NVSP_STAT_SUCCESS)) {
+ /* Drop incomplete packet */
+ nvchan->rsc.cnt = 0;
status = NVSP_STAT_FAIL;
+ }
}
enq_receive_complete(ndev, net_device, q_idx,
@@ -1186,6 +1275,13 @@
u32 count, offset, *tab;
int i;
+ /* Ensure packet is big enough to read send_table fields */
+ if (msglen < sizeof(struct nvsp_message_header) +
+ sizeof(struct nvsp_5_send_indirect_table)) {
+ netdev_err(ndev, "nvsp_v5_msg length too small: %u\n", msglen);
+ return;
+ }
+
count = nvmsg->msg.v5_msg.send_table.count;
offset = nvmsg->msg.v5_msg.send_table.offset;
@@ -1217,10 +1313,18 @@
}
static void netvsc_send_vf(struct net_device *ndev,
- const struct nvsp_message *nvmsg)
+ const struct nvsp_message *nvmsg,
+ u32 msglen)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
+ /* Ensure packet is big enough to read its fields */
+ if (msglen < sizeof(struct nvsp_message_header) +
+ sizeof(struct nvsp_4_send_vf_association)) {
+ netdev_err(ndev, "nvsp_v4_msg length too small: %u\n", msglen);
+ return;
+ }
+
net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
netdev_info(ndev, "VF slot %u %s\n",
@@ -1230,16 +1334,24 @@
static void netvsc_receive_inband(struct net_device *ndev,
struct netvsc_device *nvscdev,
- const struct nvsp_message *nvmsg,
- u32 msglen)
+ const struct vmpacket_descriptor *desc)
{
+ const struct nvsp_message *nvmsg = hv_pkt_data(desc);
+ u32 msglen = hv_pkt_datalen(desc);
+
+ /* Ensure packet is big enough to read header fields */
+ if (msglen < sizeof(struct nvsp_message_header)) {
+ netdev_err(ndev, "inband nvsp_message length too small: %u\n", msglen);
+ return;
+ }
+
switch (nvmsg->hdr.msg_type) {
case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
netvsc_send_table(ndev, nvscdev, nvmsg, msglen);
break;
case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
- netvsc_send_vf(ndev, nvmsg);
+ netvsc_send_vf(ndev, nvmsg, msglen);
break;
}
}
@@ -1253,23 +1365,20 @@
{
struct vmbus_channel *channel = nvchan->channel;
const struct nvsp_message *nvmsg = hv_pkt_data(desc);
- u32 msglen = hv_pkt_datalen(desc);
trace_nvsp_recv(ndev, channel, nvmsg);
switch (desc->type) {
case VM_PKT_COMP:
- netvsc_send_completion(ndev, net_device, channel,
- desc, budget);
+ netvsc_send_completion(ndev, net_device, channel, desc, budget);
break;
case VM_PKT_DATA_USING_XFER_PAGES:
- return netvsc_receive(ndev, net_device, nvchan,
- desc, nvmsg);
+ return netvsc_receive(ndev, net_device, nvchan, desc);
break;
case VM_PKT_DATA_INBAND:
- netvsc_receive_inband(ndev, net_device, nvmsg, msglen);
+ netvsc_receive_inband(ndev, net_device, desc);
break;
default:
@@ -1392,6 +1501,21 @@
nvchan->net_device = net_device;
u64_stats_init(&nvchan->tx_stats.syncp);
u64_stats_init(&nvchan->rx_stats.syncp);
+
+ ret = xdp_rxq_info_reg(&nvchan->xdp_rxq, ndev, i);
+
+ if (ret) {
+ netdev_err(ndev, "xdp_rxq_info_reg fail: %d\n", ret);
+ goto cleanup2;
+ }
+
+ ret = xdp_rxq_info_reg_mem_model(&nvchan->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+
+ if (ret) {
+ netdev_err(ndev, "xdp reg_mem_model fail: %d\n", ret);
+ goto cleanup2;
+ }
}
/* Enable NAPI handler before init callbacks */
@@ -1437,6 +1561,8 @@
cleanup:
netif_napi_del(&net_device->chan_table[0].napi);
+
+cleanup2:
free_netvsc_device(&net_device->rcu);
return ERR_PTR(ret);
diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
new file mode 100644
index 0000000..440486d
--- /dev/null
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019, Microsoft Corporation.
+ *
+ * Author:
+ * Haiyang Zhang <haiyangz@microsoft.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/kernel.h>
+#include <net/xdp.h>
+
+#include <linux/mutex.h>
+#include <linux/rtnetlink.h>
+
+#include "hyperv_net.h"
+
+u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
+ struct xdp_buff *xdp)
+{
+ void *data = nvchan->rsc.data[0];
+ u32 len = nvchan->rsc.len[0];
+ struct page *page = NULL;
+ struct bpf_prog *prog;
+ u32 act = XDP_PASS;
+
+ xdp->data_hard_start = NULL;
+
+ rcu_read_lock();
+ prog = rcu_dereference(nvchan->bpf_prog);
+
+ if (!prog)
+ goto out;
+
+ /* allocate page buffer for data */
+ page = alloc_page(GFP_ATOMIC);
+ if (!page) {
+ act = XDP_DROP;
+ goto out;
+ }
+
+ xdp->data_hard_start = page_address(page);
+ xdp->data = xdp->data_hard_start + NETVSC_XDP_HDRM;
+ xdp_set_data_meta_invalid(xdp);
+ xdp->data_end = xdp->data + len;
+ xdp->rxq = &nvchan->xdp_rxq;
+ xdp->frame_sz = PAGE_SIZE;
+
+ memcpy(xdp->data, data, len);
+
+ act = bpf_prog_run_xdp(prog, xdp);
+
+ switch (act) {
+ case XDP_PASS:
+ case XDP_TX:
+ case XDP_DROP:
+ break;
+
+ case XDP_ABORTED:
+ trace_xdp_exception(ndev, prog, act);
+ break;
+
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ }
+
+out:
+ rcu_read_unlock();
+
+ if (page && act != XDP_PASS && act != XDP_TX) {
+ __free_page(page);
+ xdp->data_hard_start = NULL;
+ }
+
+ return act;
+}
+
+unsigned int netvsc_xdp_fraglen(unsigned int len)
+{
+ return SKB_DATA_ALIGN(len) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+}
+
+struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev)
+{
+ return rtnl_dereference(nvdev->chan_table[0].bpf_prog);
+}
+
+int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack,
+ struct netvsc_device *nvdev)
+{
+ struct bpf_prog *old_prog;
+ int buf_max, i;
+
+ old_prog = netvsc_xdp_get(nvdev);
+
+ if (!old_prog && !prog)
+ return 0;
+
+ buf_max = NETVSC_XDP_HDRM + netvsc_xdp_fraglen(dev->mtu + ETH_HLEN);
+ if (prog && buf_max > PAGE_SIZE) {
+ netdev_err(dev, "XDP: mtu:%u too large, buf_max:%u\n",
+ dev->mtu, buf_max);
+ NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large");
+
+ return -EOPNOTSUPP;
+ }
+
+ if (prog && (dev->features & NETIF_F_LRO)) {
+ netdev_err(dev, "XDP: not support LRO\n");
+ NL_SET_ERR_MSG_MOD(extack, "XDP: not support LRO");
+
+ return -EOPNOTSUPP;
+ }
+
+ if (prog)
+ bpf_prog_add(prog, nvdev->num_chn - 1);
+
+ for (i = 0; i < nvdev->num_chn; i++)
+ rcu_assign_pointer(nvdev->chan_table[i].bpf_prog, prog);
+
+ if (old_prog)
+ for (i = 0; i < nvdev->num_chn; i++)
+ bpf_prog_put(old_prog);
+
+ return 0;
+}
+
+int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
+{
+ struct netdev_bpf xdp;
+ bpf_op_t ndo_bpf;
+ int ret;
+
+ ASSERT_RTNL();
+
+ if (!vf_netdev)
+ return 0;
+
+ ndo_bpf = vf_netdev->netdev_ops->ndo_bpf;
+ if (!ndo_bpf)
+ return 0;
+
+ memset(&xdp, 0, sizeof(xdp));
+
+ if (prog)
+ bpf_prog_inc(prog);
+
+ xdp.command = XDP_SETUP_PROG;
+ xdp.prog = prog;
+
+ ret = ndo_bpf(vf_netdev, &xdp);
+
+ if (ret && prog)
+ bpf_prog_put(prog);
+
+ return ret;
+}
+
+int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ struct net_device_context *ndevctx = netdev_priv(dev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+ struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
+ struct netlink_ext_ack *extack = bpf->extack;
+ int ret;
+
+ if (!nvdev || nvdev->destroy) {
+ return -ENODEV;
+ }
+
+ switch (bpf->command) {
+ case XDP_SETUP_PROG:
+ ret = netvsc_xdp_set(dev, bpf->prog, extack, nvdev);
+
+ if (ret)
+ return ret;
+
+ ret = netvsc_vf_setxdp(vf_netdev, bpf->prog);
+
+ if (ret) {
+ netdev_err(dev, "vf_setxdp failed:%d\n", ret);
+ NL_SET_ERR_MSG_MOD(extack, "vf_setxdp failed");
+
+ netvsc_xdp_set(dev, NULL, extack, nvdev);
+ }
+
+ return ret;
+
+ default:
+ return -EINVAL;
+ }
+}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 362b7ca..e367638 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/rtnetlink.h>
#include <linux/netpoll.h>
+#include <linux/bpf.h>
#include <net/arp.h>
#include <net/route.h>
@@ -372,32 +373,29 @@
return txq;
}
-static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
+static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len,
struct hv_page_buffer *pb)
{
int j = 0;
- /* Deal with compound pages by ignoring unused part
- * of the page.
- */
- page += (offset >> PAGE_SHIFT);
- offset &= ~PAGE_MASK;
+ hvpfn += offset >> HV_HYP_PAGE_SHIFT;
+ offset = offset & ~HV_HYP_PAGE_MASK;
while (len > 0) {
unsigned long bytes;
- bytes = PAGE_SIZE - offset;
+ bytes = HV_HYP_PAGE_SIZE - offset;
if (bytes > len)
bytes = len;
- pb[j].pfn = page_to_pfn(page);
+ pb[j].pfn = hvpfn;
pb[j].offset = offset;
pb[j].len = bytes;
offset += bytes;
len -= bytes;
- if (offset == PAGE_SIZE && len) {
- page++;
+ if (offset == HV_HYP_PAGE_SIZE && len) {
+ hvpfn++;
offset = 0;
j++;
}
@@ -420,23 +418,26 @@
* 2. skb linear data
* 3. skb fragment data
*/
- slots_used += fill_pg_buf(virt_to_page(hdr),
- offset_in_page(hdr),
- len, &pb[slots_used]);
+ slots_used += fill_pg_buf(virt_to_hvpfn(hdr),
+ offset_in_hvpage(hdr),
+ len,
+ &pb[slots_used]);
packet->rmsg_size = len;
packet->rmsg_pgcnt = slots_used;
- slots_used += fill_pg_buf(virt_to_page(data),
- offset_in_page(data),
- skb_headlen(skb), &pb[slots_used]);
+ slots_used += fill_pg_buf(virt_to_hvpfn(data),
+ offset_in_hvpage(data),
+ skb_headlen(skb),
+ &pb[slots_used]);
for (i = 0; i < frags; i++) {
skb_frag_t *frag = skb_shinfo(skb)->frags + i;
- slots_used += fill_pg_buf(skb_frag_page(frag),
- skb_frag_off(frag),
- skb_frag_size(frag), &pb[slots_used]);
+ slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)),
+ skb_frag_off(frag),
+ skb_frag_size(frag),
+ &pb[slots_used]);
}
return slots_used;
}
@@ -452,8 +453,8 @@
unsigned long offset = skb_frag_off(frag);
/* Skip unused frames from start of page */
- offset &= ~PAGE_MASK;
- pages += PFN_UP(offset + size);
+ offset &= ~HV_HYP_PAGE_MASK;
+ pages += HVPFN_UP(offset + size);
}
return pages;
}
@@ -461,12 +462,12 @@
static int netvsc_get_slots(struct sk_buff *skb)
{
char *data = skb->data;
- unsigned int offset = offset_in_page(data);
+ unsigned int offset = offset_in_hvpage(data);
unsigned int len = skb_headlen(skb);
int slots;
int frag_slots;
- slots = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+ slots = DIV_ROUND_UP(offset + len, HV_HYP_PAGE_SIZE);
frag_slots = count_skb_frag_slots(skb);
return slots + frag_slots;
}
@@ -519,7 +520,7 @@
return rc;
}
-static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_netvsc_packet *packet = NULL;
@@ -572,7 +573,7 @@
/* Use the skb control buffer for building up the packet */
BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_field(struct sk_buff, cb));
packet = (struct hv_netvsc_packet *)skb->cb;
packet->q_idx = skb_get_queue_mapping(skb);
@@ -605,6 +606,29 @@
*hash_info = hash;
}
+ /* When using AF_PACKET we need to drop VLAN header from
+ * the frame and update the SKB to allow the HOST OS
+ * to transmit the 802.1Q packet
+ */
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ u16 vlan_tci;
+
+ skb_reset_mac_header(skb);
+ if (eth_type_vlan(eth_hdr(skb)->h_proto)) {
+ if (unlikely(__skb_vlan_pop(skb, &vlan_tci) != 0)) {
+ ++net_device_ctx->eth_stats.vlan_error;
+ goto drop;
+ }
+
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
+ /* Update the NDIS header pkt lengths */
+ packet->total_data_buflen -= VLAN_HLEN;
+ packet->total_bytes -= VLAN_HLEN;
+ rndis_msg->msg_len = packet->total_data_buflen;
+ rndis_msg->msg.pkt.data_len = packet->total_data_buflen;
+ }
+ }
+
if (skb_vlan_tag_present(skb)) {
struct ndis_pkt_8021q_info *vlan;
@@ -638,10 +662,7 @@
} else {
lso_info->lso_v2_transmit.ip_version =
NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
- ipv6_hdr(skb)->payload_len = 0;
- tcp_hdr(skb)->check =
- ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
+ tcp_v6_gso_csum_prep(skb);
}
lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb);
lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
@@ -687,7 +708,7 @@
/* timestamp packet in software */
skb_tx_timestamp(skb);
- ret = netvsc_send(net, packet, rndis_msg, pb, skb);
+ ret = netvsc_send(net, packet, rndis_msg, pb, skb, xdp_tx);
if (likely(ret == 0))
return NETDEV_TX_OK;
@@ -710,6 +731,12 @@
goto drop;
}
+static netdev_tx_t netvsc_start_xmit(struct sk_buff *skb,
+ struct net_device *ndev)
+{
+ return netvsc_xmit(skb, ndev, false);
+}
+
/*
* netvsc_linkstatus_callback - Link up/down notification
*/
@@ -721,6 +748,13 @@
struct netvsc_reconfig *event;
unsigned long flags;
+ /* Ensure the packet is big enough to access its fields */
+ if (resp->msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_indicate_status)) {
+ netdev_err(net, "invalid rndis_indicate_status packet, len: %u\n",
+ resp->msg_len);
+ return;
+ }
+
/* Update the physical link speed when changing to another vSwitch */
if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
u32 speed;
@@ -752,6 +786,22 @@
schedule_delayed_work(&ndev_ctx->dwork, 0);
}
+static void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+ int rc;
+
+ skb->queue_mapping = skb_get_rx_queue(skb);
+ __skb_push(skb, ETH_HLEN);
+
+ rc = netvsc_xmit(skb, ndev, true);
+
+ if (dev_xmit_complete(rc))
+ return;
+
+ dev_kfree_skb_any(skb);
+ ndev->stats.tx_dropped++;
+}
+
static void netvsc_comp_ipcsum(struct sk_buff *skb)
{
struct iphdr *iph = (struct iphdr *)skb->data;
@@ -761,25 +811,46 @@
}
static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
- struct netvsc_channel *nvchan)
+ struct netvsc_channel *nvchan,
+ struct xdp_buff *xdp)
{
struct napi_struct *napi = &nvchan->napi;
const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan;
const struct ndis_tcp_ip_checksum_info *csum_info =
nvchan->rsc.csum_info;
+ const u32 *hash_info = nvchan->rsc.hash_info;
struct sk_buff *skb;
+ void *xbuf = xdp->data_hard_start;
int i;
- skb = napi_alloc_skb(napi, nvchan->rsc.pktlen);
- if (!skb)
- return skb;
+ if (xbuf) {
+ unsigned int hdroom = xdp->data - xdp->data_hard_start;
+ unsigned int xlen = xdp->data_end - xdp->data;
+ unsigned int frag_size = xdp->frame_sz;
- /*
- * Copy to skb. This copy is needed here since the memory pointed by
- * hv_netvsc_packet cannot be deallocated
- */
- for (i = 0; i < nvchan->rsc.cnt; i++)
- skb_put_data(skb, nvchan->rsc.data[i], nvchan->rsc.len[i]);
+ skb = build_skb(xbuf, frag_size);
+
+ if (!skb) {
+ __free_page(virt_to_page(xbuf));
+ return NULL;
+ }
+
+ skb_reserve(skb, hdroom);
+ skb_put(skb, xlen);
+ skb->dev = napi->dev;
+ } else {
+ skb = napi_alloc_skb(napi, nvchan->rsc.pktlen);
+
+ if (!skb)
+ return NULL;
+
+ /* Copy to skb. This copy is needed here since the memory
+ * pointed by hv_netvsc_packet cannot be deallocated.
+ */
+ for (i = 0; i < nvchan->rsc.cnt; i++)
+ skb_put_data(skb, nvchan->rsc.data[i],
+ nvchan->rsc.len[i]);
+ }
skb->protocol = eth_type_trans(skb, net);
@@ -803,6 +874,9 @@
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
+ if (hash_info && (net->features & NETIF_F_RXHASH))
+ skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4);
+
if (vlan) {
u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT) |
(vlan->cfi ? VLAN_CFI_MASK : 0);
@@ -826,13 +900,25 @@
struct vmbus_channel *channel = nvchan->channel;
u16 q_idx = channel->offermsg.offer.sub_channel_index;
struct sk_buff *skb;
- struct netvsc_stats *rx_stats;
+ struct netvsc_stats *rx_stats = &nvchan->rx_stats;
+ struct xdp_buff xdp;
+ u32 act;
if (net->reg_state != NETREG_REGISTERED)
return NVSP_STAT_FAIL;
+ act = netvsc_run_xdp(net, nvchan, &xdp);
+
+ if (act != XDP_PASS && act != XDP_TX) {
+ u64_stats_update_begin(&rx_stats->syncp);
+ rx_stats->xdp_drop++;
+ u64_stats_update_end(&rx_stats->syncp);
+
+ return NVSP_STAT_SUCCESS; /* consumed by XDP */
+ }
+
/* Allocate a skb - TODO direct I/O to pages? */
- skb = netvsc_alloc_recv_skb(net, nvchan);
+ skb = netvsc_alloc_recv_skb(net, nvchan, &xdp);
if (unlikely(!skb)) {
++net_device_ctx->eth_stats.rx_no_memory;
@@ -846,7 +932,6 @@
* on the synthetic device because modifying the VF device
* statistics will not work correctly.
*/
- rx_stats = &nvchan->rx_stats;
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->packets++;
rx_stats->bytes += nvchan->rsc.pktlen;
@@ -857,6 +942,11 @@
++rx_stats->multicast;
u64_stats_update_end(&rx_stats->syncp);
+ if (act == XDP_TX) {
+ netvsc_xdp_xmit(skb, net);
+ return NVSP_STAT_SUCCESS;
+ }
+
napi_gro_receive(&nvchan->napi, skb);
return NVSP_STAT_SUCCESS;
}
@@ -883,10 +973,11 @@
/* Alloc struct netvsc_device_info, and initialize it from either existing
* struct netvsc_device, or from default values.
*/
-static struct netvsc_device_info *netvsc_devinfo_get
- (struct netvsc_device *nvdev)
+static
+struct netvsc_device_info *netvsc_devinfo_get(struct netvsc_device *nvdev)
{
struct netvsc_device_info *dev_info;
+ struct bpf_prog *prog;
dev_info = kzalloc(sizeof(*dev_info), GFP_ATOMIC);
@@ -894,6 +985,8 @@
return NULL;
if (nvdev) {
+ ASSERT_RTNL();
+
dev_info->num_chn = nvdev->num_chn;
dev_info->send_sections = nvdev->send_section_cnt;
dev_info->send_section_size = nvdev->send_section_size;
@@ -902,6 +995,12 @@
memcpy(dev_info->rss_key, nvdev->extension->rss_key,
NETVSC_HASH_KEYLEN);
+
+ prog = netvsc_xdp_get(nvdev);
+ if (prog) {
+ bpf_prog_inc(prog);
+ dev_info->bprog = prog;
+ }
} else {
dev_info->num_chn = VRSS_CHANNEL_DEFAULT;
dev_info->send_sections = NETVSC_DEFAULT_TX;
@@ -913,6 +1012,17 @@
return dev_info;
}
+/* Free struct netvsc_device_info */
+static void netvsc_devinfo_put(struct netvsc_device_info *dev_info)
+{
+ if (dev_info->bprog) {
+ ASSERT_RTNL();
+ bpf_prog_put(dev_info->bprog);
+ }
+
+ kfree(dev_info);
+}
+
static int netvsc_detach(struct net_device *ndev,
struct netvsc_device *nvdev)
{
@@ -924,6 +1034,8 @@
if (cancel_work_sync(&nvdev->subchan_work))
nvdev->num_chn = 1;
+ netvsc_xdp_set(ndev, NULL, NULL, nvdev);
+
/* If device was up (receiving) then shutdown */
if (netif_running(ndev)) {
netvsc_tx_disable(nvdev, ndev);
@@ -957,7 +1069,8 @@
struct hv_device *hdev = ndev_ctx->device_ctx;
struct netvsc_device *nvdev;
struct rndis_device *rdev;
- int ret;
+ struct bpf_prog *prog;
+ int ret = 0;
nvdev = rndis_filter_device_add(hdev, dev_info);
if (IS_ERR(nvdev))
@@ -973,6 +1086,16 @@
}
}
+ prog = dev_info->bprog;
+ if (prog) {
+ bpf_prog_inc(prog);
+ ret = netvsc_xdp_set(ndev, prog, NULL, nvdev);
+ if (ret) {
+ bpf_prog_put(prog);
+ goto err1;
+ }
+ }
+
/* In any case device is now ready */
nvdev->tx_disable = false;
netif_device_attach(ndev);
@@ -983,7 +1106,7 @@
if (netif_running(ndev)) {
ret = rndis_filter_open(nvdev);
if (ret)
- goto err;
+ goto err2;
rdev = nvdev->extension;
if (!rdev->link_state)
@@ -992,9 +1115,10 @@
return 0;
-err:
+err2:
netif_device_detach(ndev);
+err1:
rndis_filter_device_remove(hdev, nvdev);
return ret;
@@ -1044,27 +1168,10 @@
}
out:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return ret;
}
-static bool
-netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings *cmd)
-{
- struct ethtool_link_ksettings diff1 = *cmd;
- struct ethtool_link_ksettings diff2 = {};
-
- diff1.base.speed = 0;
- diff1.base.duplex = 0;
- /* advertising and cmd are usually set */
- ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
- diff1.base.cmd = 0;
- /* We set port to PORT_OTHER */
- diff2.base.port = PORT_OTHER;
-
- return !memcmp(&diff1, &diff2, sizeof(diff1));
-}
-
static void netvsc_init_settings(struct net_device *dev)
{
struct net_device_context *ndc = netdev_priv(dev);
@@ -1081,6 +1188,12 @@
struct ethtool_link_ksettings *cmd)
{
struct net_device_context *ndc = netdev_priv(dev);
+ struct net_device *vf_netdev;
+
+ vf_netdev = rtnl_dereference(ndc->vf_netdev);
+
+ if (vf_netdev)
+ return __ethtool_get_link_ksettings(vf_netdev, cmd);
cmd->base.speed = ndc->speed;
cmd->base.duplex = ndc->duplex;
@@ -1093,18 +1206,18 @@
const struct ethtool_link_ksettings *cmd)
{
struct net_device_context *ndc = netdev_priv(dev);
- u32 speed;
+ struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev);
- speed = cmd->base.speed;
- if (!ethtool_validate_speed(speed) ||
- !ethtool_validate_duplex(cmd->base.duplex) ||
- !netvsc_validate_ethtool_ss_cmd(cmd))
- return -EINVAL;
+ if (vf_netdev) {
+ if (!vf_netdev->ethtool_ops->set_link_ksettings)
+ return -EOPNOTSUPP;
- ndc->speed = speed;
- ndc->duplex = cmd->base.duplex;
+ return vf_netdev->ethtool_ops->set_link_ksettings(vf_netdev,
+ cmd);
+ }
- return 0;
+ return ethtool_virtdev_set_link_ksettings(dev, cmd,
+ &ndc->speed, &ndc->duplex);
}
static int netvsc_change_mtu(struct net_device *ndev, int mtu)
@@ -1151,7 +1264,7 @@
dev_set_mtu(vf_netdev, orig_mtu);
out:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return ret;
}
@@ -1345,6 +1458,7 @@
{ "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
{ "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
{ "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
+ { "vlan_error", offsetof(struct netvsc_ethtool_stats, vlan_error) },
}, pcpu_stats[] = {
{ "cpu%u_rx_packets",
offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
@@ -1376,8 +1490,8 @@
/* statistics per queue (rx/tx packets/bytes) */
#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
-/* 4 statistics per queue (rx/tx packets/bytes) */
-#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
+/* 5 statistics per queue (rx/tx packets/bytes, rx xdp_drop) */
+#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 5)
static int netvsc_get_sset_count(struct net_device *dev, int string_set)
{
@@ -1409,6 +1523,7 @@
struct netvsc_ethtool_pcpu_stats *pcpu_sum;
unsigned int start;
u64 packets, bytes;
+ u64 xdp_drop;
int i, j, cpu;
if (!nvdev)
@@ -1437,14 +1552,19 @@
start = u64_stats_fetch_begin_irq(&qstats->syncp);
packets = qstats->packets;
bytes = qstats->bytes;
+ xdp_drop = qstats->xdp_drop;
} while (u64_stats_fetch_retry_irq(&qstats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
+ data[i++] = xdp_drop;
}
pcpu_sum = kvmalloc_array(num_possible_cpus(),
sizeof(struct netvsc_ethtool_pcpu_stats),
GFP_KERNEL);
+ if (!pcpu_sum)
+ return;
+
netvsc_get_pcpu_stats(dev, pcpu_sum);
for_each_present_cpu(cpu) {
struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
@@ -1487,6 +1607,8 @@
p += ETH_GSTRING_LEN;
sprintf(p, "rx_queue_%u_bytes", i);
p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_xdp_drop", i);
+ p += ETH_GSTRING_LEN;
}
for_each_present_cpu(cpu) {
@@ -1783,10 +1905,27 @@
}
out:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return ret;
}
+static netdev_features_t netvsc_fix_features(struct net_device *ndev,
+ netdev_features_t features)
+{
+ struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+
+ if (!nvdev || nvdev->destroy)
+ return features;
+
+ if ((features & NETIF_F_LRO) && netvsc_xdp_get(nvdev)) {
+ features ^= NETIF_F_LRO;
+ netdev_info(ndev, "Skip LRO - unsupported with XDP\n");
+ }
+
+ return features;
+}
+
static int netvsc_set_features(struct net_device *ndev,
netdev_features_t features)
{
@@ -1830,6 +1969,23 @@
return ret;
}
+static int netvsc_get_regs_len(struct net_device *netdev)
+{
+ return VRSS_SEND_TAB_SIZE * sizeof(u32);
+}
+
+static void netvsc_get_regs(struct net_device *netdev,
+ struct ethtool_regs *regs, void *p)
+{
+ struct net_device_context *ndc = netdev_priv(netdev);
+ u32 *regs_buff = p;
+
+ /* increase the version, if buffer format is changed. */
+ regs->version = 1;
+
+ memcpy(regs_buff, ndc->tx_table, VRSS_SEND_TAB_SIZE * sizeof(u32));
+}
+
static u32 netvsc_get_msglevel(struct net_device *ndev)
{
struct net_device_context *ndev_ctx = netdev_priv(ndev);
@@ -1846,6 +2002,8 @@
static const struct ethtool_ops ethtool_ops = {
.get_drvinfo = netvsc_get_drvinfo,
+ .get_regs_len = netvsc_get_regs_len,
+ .get_regs = netvsc_get_regs,
.get_msglevel = netvsc_get_msglevel,
.set_msglevel = netvsc_set_msglevel,
.get_link = ethtool_op_get_link,
@@ -1873,12 +2031,14 @@
.ndo_start_xmit = netvsc_start_xmit,
.ndo_change_rx_flags = netvsc_change_rx_flags,
.ndo_set_rx_mode = netvsc_set_rx_mode,
+ .ndo_fix_features = netvsc_fix_features,
.ndo_set_features = netvsc_set_features,
.ndo_change_mtu = netvsc_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = netvsc_set_mac_addr,
.ndo_select_queue = netvsc_select_queue,
.ndo_get_stats64 = netvsc_get_stats64,
+ .ndo_bpf = netvsc_bpf,
};
/*
@@ -2165,6 +2325,7 @@
{
struct net_device_context *net_device_ctx;
struct netvsc_device *netvsc_dev;
+ struct bpf_prog *prog;
struct net_device *ndev;
int ret;
@@ -2209,10 +2370,22 @@
vf_netdev->wanted_features = ndev->features;
netdev_update_features(vf_netdev);
+ prog = netvsc_xdp_get(netvsc_dev);
+ netvsc_vf_setxdp(vf_netdev, prog);
+
return NOTIFY_OK;
}
-/* VF up/down change detected, schedule to change data path */
+/* Change the data path when VF UP/DOWN/CHANGE are detected.
+ *
+ * Typically a UP or DOWN event is followed by a CHANGE event, so
+ * net_device_ctx->data_path_is_vf is used to cache the current data path
+ * to avoid the duplicate call of netvsc_switch_datapath() and the duplicate
+ * message.
+ *
+ * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network
+ * interface, there is only the CHANGE event and no UP or DOWN event.
+ */
static int netvsc_vf_changed(struct net_device *vf_netdev)
{
struct net_device_context *net_device_ctx;
@@ -2229,6 +2402,10 @@
if (!netvsc_dev)
return NOTIFY_DONE;
+ if (net_device_ctx->data_path_is_vf == vf_is_up)
+ return NOTIFY_OK;
+ net_device_ctx->data_path_is_vf = vf_is_up;
+
netvsc_switch_datapath(ndev, vf_is_up);
netdev_info(ndev, "Data path switched %s VF: %s\n",
vf_is_up ? "to" : "from", vf_netdev->name);
@@ -2250,6 +2427,8 @@
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
+ netvsc_vf_setxdp(vf_netdev, NULL);
+
netdev_rx_handler_unregister(vf_netdev);
netdev_upper_dev_unlink(vf_netdev, ndev);
RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
@@ -2345,6 +2524,8 @@
NETIF_F_HW_VLAN_CTAG_RX;
net->vlan_features = net->features;
+ netdev_lockdep_set_classes(net);
+
/* MTU range: 68 - 1500 or 65521 */
net->min_mtu = NETVSC_MTU_MIN;
if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
@@ -2363,14 +2544,14 @@
list_add(&net_device_ctx->list, &netvsc_dev_list);
rtnl_unlock();
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return 0;
register_failed:
rtnl_unlock();
rndis_filter_device_remove(dev, nvdev);
rndis_failed:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
devinfo_failed:
free_percpu(net_device_ctx->vf_stats);
no_stats:
@@ -2398,8 +2579,10 @@
rtnl_lock();
nvdev = rtnl_dereference(ndev_ctx->nvdev);
- if (nvdev)
+ if (nvdev) {
cancel_work_sync(&nvdev->subchan_work);
+ netvsc_xdp_set(net, NULL, NULL, nvdev);
+ }
/*
* Call to the vsc driver to let it know that the device is being
@@ -2424,6 +2607,63 @@
return 0;
}
+static int netvsc_suspend(struct hv_device *dev)
+{
+ struct net_device_context *ndev_ctx;
+ struct netvsc_device *nvdev;
+ struct net_device *net;
+ int ret;
+
+ net = hv_get_drvdata(dev);
+
+ ndev_ctx = netdev_priv(net);
+ cancel_delayed_work_sync(&ndev_ctx->dwork);
+
+ rtnl_lock();
+
+ nvdev = rtnl_dereference(ndev_ctx->nvdev);
+ if (nvdev == NULL) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /* Save the current config info */
+ ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev);
+
+ ret = netvsc_detach(net, nvdev);
+out:
+ rtnl_unlock();
+
+ return ret;
+}
+
+static int netvsc_resume(struct hv_device *dev)
+{
+ struct net_device *net = hv_get_drvdata(dev);
+ struct net_device_context *net_device_ctx;
+ struct netvsc_device_info *device_info;
+ int ret;
+
+ rtnl_lock();
+
+ net_device_ctx = netdev_priv(net);
+
+ /* Reset the data path to the netvsc NIC before re-opening the vmbus
+ * channel. Later netvsc_netdev_event() will switch the data path to
+ * the VF upon the UP or CHANGE event.
+ */
+ net_device_ctx->data_path_is_vf = false;
+ device_info = net_device_ctx->saved_netvsc_dev_info;
+
+ ret = netvsc_attach(net, device_info);
+
+ netvsc_devinfo_put(device_info);
+ net_device_ctx->saved_netvsc_dev_info = NULL;
+
+ rtnl_unlock();
+
+ return ret;
+}
static const struct hv_vmbus_device_id id_table[] = {
/* Network guid */
{ HV_NIC_GUID, },
@@ -2438,6 +2678,8 @@
.id_table = id_table,
.probe = netvsc_probe,
.remove = netvsc_remove,
+ .suspend = netvsc_suspend,
+ .resume = netvsc_resume,
.driver = {
.probe_type = PROBE_FORCE_SYNCHRONOUS,
},
@@ -2478,6 +2720,7 @@
return netvsc_unregister_vf(event_dev);
case NETDEV_UP:
case NETDEV_DOWN:
+ case NETDEV_CHANGE:
return netvsc_vf_changed(event_dev);
default:
return NOTIFY_DONE;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index b9e44bb..90bc000 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -25,7 +25,7 @@
static void rndis_set_multicast(struct work_struct *w);
-#define RNDIS_EXT_LEN PAGE_SIZE
+#define RNDIS_EXT_LEN HV_HYP_PAGE_SIZE
struct rndis_request {
struct list_head list_ent;
struct completion wait_event;
@@ -215,18 +215,17 @@
packet->page_buf_cnt = 1;
pb[0].pfn = virt_to_phys(&req->request_msg) >>
- PAGE_SHIFT;
+ HV_HYP_PAGE_SHIFT;
pb[0].len = req->request_msg.msg_len;
- pb[0].offset =
- (unsigned long)&req->request_msg & (PAGE_SIZE - 1);
+ pb[0].offset = offset_in_hvpage(&req->request_msg);
/* Add one page_buf when request_msg crossing page boundary */
- if (pb[0].offset + pb[0].len > PAGE_SIZE) {
+ if (pb[0].offset + pb[0].len > HV_HYP_PAGE_SIZE) {
packet->page_buf_cnt++;
- pb[0].len = PAGE_SIZE -
+ pb[0].len = HV_HYP_PAGE_SIZE -
pb[0].offset;
pb[1].pfn = virt_to_phys((void *)&req->request_msg
- + pb[0].len) >> PAGE_SHIFT;
+ + pb[0].len) >> HV_HYP_PAGE_SHIFT;
pb[1].offset = 0;
pb[1].len = req->request_msg.msg_len -
pb[0].len;
@@ -235,7 +234,7 @@
trace_rndis_send(dev->ndev, 0, &req->request_msg);
rcu_read_lock_bh();
- ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL);
+ ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL, false);
rcu_read_unlock_bh();
return ret;
@@ -275,6 +274,16 @@
return;
}
+ /* Ensure the packet is big enough to read req_id. Req_id is the 1st
+ * field in any request/response message, so the payload should have at
+ * least sizeof(u32) bytes
+ */
+ if (resp->msg_len - RNDIS_HEADER_SIZE < sizeof(u32)) {
+ netdev_err(ndev, "rndis msg_len too small: %u\n",
+ resp->msg_len);
+ return;
+ }
+
spin_lock_irqsave(&dev->request_lock, flags);
list_for_each_entry(request, &dev->req_list, list_ent) {
/*
@@ -331,8 +340,9 @@
* Get the Per-Packet-Info with the specified type
* return NULL if not found.
*/
-static inline void *rndis_get_ppi(struct rndis_packet *rpkt,
- u32 type, u8 internal)
+static inline void *rndis_get_ppi(struct net_device *ndev,
+ struct rndis_packet *rpkt,
+ u32 rpkt_len, u32 type, u8 internal)
{
struct rndis_per_packet_info *ppi;
int len;
@@ -340,11 +350,36 @@
if (rpkt->per_pkt_info_offset == 0)
return NULL;
+ /* Validate info_offset and info_len */
+ if (rpkt->per_pkt_info_offset < sizeof(struct rndis_packet) ||
+ rpkt->per_pkt_info_offset > rpkt_len) {
+ netdev_err(ndev, "Invalid per_pkt_info_offset: %u\n",
+ rpkt->per_pkt_info_offset);
+ return NULL;
+ }
+
+ if (rpkt->per_pkt_info_len > rpkt_len - rpkt->per_pkt_info_offset) {
+ netdev_err(ndev, "Invalid per_pkt_info_len: %u\n",
+ rpkt->per_pkt_info_len);
+ return NULL;
+ }
+
ppi = (struct rndis_per_packet_info *)((ulong)rpkt +
rpkt->per_pkt_info_offset);
len = rpkt->per_pkt_info_len;
while (len > 0) {
+ /* Validate ppi_offset and ppi_size */
+ if (ppi->size > len) {
+ netdev_err(ndev, "Invalid ppi size: %u\n", ppi->size);
+ continue;
+ }
+
+ if (ppi->ppi_offset >= ppi->size) {
+ netdev_err(ndev, "Invalid ppi_offset: %u\n", ppi->ppi_offset);
+ continue;
+ }
+
if (ppi->type == type && ppi->internal == internal)
return (void *)((ulong)ppi + ppi->ppi_offset);
len -= ppi->size;
@@ -358,6 +393,7 @@
void rsc_add_data(struct netvsc_channel *nvchan,
const struct ndis_pkt_8021q_info *vlan,
const struct ndis_tcp_ip_checksum_info *csum_info,
+ const u32 *hash_info,
void *data, u32 len)
{
u32 cnt = nvchan->rsc.cnt;
@@ -368,6 +404,7 @@
nvchan->rsc.vlan = vlan;
nvchan->rsc.csum_info = csum_info;
nvchan->rsc.pktlen = len;
+ nvchan->rsc.hash_info = hash_info;
}
nvchan->rsc.data[cnt] = data;
@@ -385,14 +422,30 @@
const struct ndis_tcp_ip_checksum_info *csum_info;
const struct ndis_pkt_8021q_info *vlan;
const struct rndis_pktinfo_id *pktinfo_id;
- u32 data_offset;
+ const u32 *hash_info;
+ u32 data_offset, rpkt_len;
void *data;
bool rsc_more = false;
int ret;
+ /* Ensure data_buflen is big enough to read header fields */
+ if (data_buflen < RNDIS_HEADER_SIZE + sizeof(struct rndis_packet)) {
+ netdev_err(ndev, "invalid rndis pkt, data_buflen too small: %u\n",
+ data_buflen);
+ return NVSP_STAT_FAIL;
+ }
+
+ /* Validate rndis_pkt offset */
+ if (rndis_pkt->data_offset >= data_buflen - RNDIS_HEADER_SIZE) {
+ netdev_err(ndev, "invalid rndis packet offset: %u\n",
+ rndis_pkt->data_offset);
+ return NVSP_STAT_FAIL;
+ }
+
/* Remove the rndis header and pass it back up the stack */
data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
+ rpkt_len = data_buflen - RNDIS_HEADER_SIZE;
data_buflen -= data_offset;
/*
@@ -407,11 +460,13 @@
return NVSP_STAT_FAIL;
}
- vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO, 0);
+ vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0);
- csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO, 0);
+ csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0);
- pktinfo_id = rndis_get_ppi(rndis_pkt, RNDIS_PKTINFO_ID, 1);
+ hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0);
+
+ pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1);
data = (void *)msg + data_offset;
@@ -441,7 +496,8 @@
* rndis_pkt->data_len tell us the real data length, we only copy
* the data packet to the stack, without the rndis trailer padding
*/
- rsc_add_data(nvchan, vlan, csum_info, data, rndis_pkt->data_len);
+ rsc_add_data(nvchan, vlan, csum_info, hash_info,
+ data, rndis_pkt->data_len);
if (rsc_more)
return NVSP_STAT_SUCCESS;
@@ -452,8 +508,6 @@
return ret;
drop:
- /* Drop incomplete packet */
- nvchan->rsc.cnt = 0;
return NVSP_STAT_FAIL;
}
@@ -468,6 +522,14 @@
if (netif_msg_rx_status(net_device_ctx))
dump_rndis_message(ndev, rndis_msg);
+ /* Validate incoming rndis_message packet */
+ if (buflen < RNDIS_HEADER_SIZE || rndis_msg->msg_len < RNDIS_HEADER_SIZE ||
+ buflen < rndis_msg->msg_len) {
+ netdev_err(ndev, "Invalid rndis_msg (buflen: %u, msg_len: %u)\n",
+ buflen, rndis_msg->msg_len);
+ return NVSP_STAT_FAIL;
+ }
+
switch (rndis_msg->ndis_msg_type) {
case RNDIS_MSG_PACKET:
return rndis_filter_receive_data(ndev, net_dev, nvchan,
@@ -1209,6 +1271,7 @@
/* Compute tx offload settings based on hw capabilities */
net->hw_features |= NETIF_F_RXCSUM;
net->hw_features |= NETIF_F_SG;
+ net->hw_features |= NETIF_F_RXHASH;
if ((hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_ALL_TCP4) == NDIS_TXCSUM_ALL_TCP4) {
/* Can checksum TCP */