Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ad14fbf..5a635f0 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* A network driver using virtio.
*
* Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
//#define DEBUG
#include <linux/netdevice.h>
@@ -31,7 +19,6 @@
#include <linux/average.h>
#include <linux/filter.h>
#include <linux/kernel.h>
-#include <linux/pci.h>
#include <net/route.h>
#include <net/xdp.h>
#include <net/net_failover.h>
@@ -39,7 +26,7 @@
static int napi_weight = NAPI_POLL_WEIGHT;
module_param(napi_weight, int, 0444);
-static bool csum = true, gso = true, napi_tx;
+static bool csum = true, gso = true, napi_tx = true;
module_param(csum, bool, 0444);
module_param(gso, bool, 0444);
module_param(napi_tx, bool, 0644);
@@ -57,6 +44,8 @@
#define VIRTIO_XDP_TX BIT(0)
#define VIRTIO_XDP_REDIR BIT(1)
+#define VIRTIO_XDP_FLAG BIT(0)
+
/* RX packet size EWMA. The average packet size is used to determine the packet
* buffer size when refilling RX rings. As the entire RX ring may be refilled
* at once, the weight is chosen so that the EWMA will be insensitive to short-
@@ -236,6 +225,7 @@
u32 speed;
unsigned long guest_offloads;
+ unsigned long guest_offloads_capable;
/* failover when STANDBY feature enabled */
struct failover *failover;
@@ -251,6 +241,21 @@
char padding[4];
};
+static bool is_xdp_frame(void *ptr)
+{
+ return (unsigned long)ptr & VIRTIO_XDP_FLAG;
+}
+
+static void *xdp_to_ptr(struct xdp_frame *ptr)
+{
+ return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
+}
+
+static struct xdp_frame *ptr_to_xdp(void *ptr)
+{
+ return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
+}
+
/* Converting between virtqueue no. and kernel tx/rx queue no.
* 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
*/
@@ -461,7 +466,8 @@
sg_init_one(sq->sg, xdpf->data, xdpf->len);
- err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC);
+ err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf),
+ GFP_ATOMIC);
if (unlikely(err))
return -ENOSPC; /* Caller handle free/refcnt */
@@ -481,15 +487,24 @@
{
struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq;
- struct xdp_frame *xdpf_sent;
struct bpf_prog *xdp_prog;
struct send_queue *sq;
unsigned int len;
+ int packets = 0;
+ int bytes = 0;
int drops = 0;
int kicks = 0;
int ret, err;
+ void *ptr;
int i;
+ /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
+ * indicate XDP resources have been successfully allocated.
+ */
+ xdp_prog = rcu_dereference(rq->xdp_prog);
+ if (!xdp_prog)
+ return -ENXIO;
+
sq = virtnet_xdp_sq(vi);
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
@@ -498,19 +513,21 @@
goto out;
}
- /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
- * indicate XDP resources have been successfully allocated.
- */
- xdp_prog = rcu_dereference(rq->xdp_prog);
- if (!xdp_prog) {
- ret = -ENXIO;
- drops = n;
- goto out;
- }
-
/* Free up any pending old buffers before queueing new ones. */
- while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
- xdp_return_frame(xdpf_sent);
+ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+ if (likely(is_xdp_frame(ptr))) {
+ struct xdp_frame *frame = ptr_to_xdp(ptr);
+
+ bytes += frame->len;
+ xdp_return_frame(frame);
+ } else {
+ struct sk_buff *skb = ptr;
+
+ bytes += skb->len;
+ napi_consume_skb(skb, false);
+ }
+ packets++;
+ }
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
@@ -529,6 +546,8 @@
}
out:
u64_stats_update_begin(&sq->stats.syncp);
+ sq->stats.bytes += bytes;
+ sq->stats.packets += packets;
sq->stats.xdp_tx += n;
sq->stats.xdp_tx_drops += drops;
sq->stats.kicks += kicks;
@@ -1034,6 +1053,7 @@
goto frame_err;
}
+ skb_record_rx_queue(skb, vq2rxq(rq->vq));
skb->protocol = eth_type_trans(skb, dev);
pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
ntohs(skb->protocol), skb->len, skb->pkt_type);
@@ -1311,7 +1331,7 @@
}
}
- if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
+ if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
if (!try_fill_recv(vi, rq, GFP_ATOMIC))
schedule_delayed_work(&vi->refill, 0);
}
@@ -1329,20 +1349,28 @@
return stats.packets;
}
-static void free_old_xmit_skbs(struct send_queue *sq)
+static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
{
- struct sk_buff *skb;
unsigned int len;
unsigned int packets = 0;
unsigned int bytes = 0;
+ void *ptr;
- while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
- pr_debug("Sent skb %p\n", skb);
+ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+ if (likely(!is_xdp_frame(ptr))) {
+ struct sk_buff *skb = ptr;
- bytes += skb->len;
+ pr_debug("Sent skb %p\n", skb);
+
+ bytes += skb->len;
+ napi_consume_skb(skb, in_napi);
+ } else {
+ struct xdp_frame *frame = ptr_to_xdp(ptr);
+
+ bytes += frame->len;
+ xdp_return_frame(frame);
+ }
packets++;
-
- dev_consume_skb_any(skb);
}
/* Avoid overhead when no packets have been processed
@@ -1357,6 +1385,16 @@
u64_stats_update_end(&sq->stats.syncp);
}
+static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
+{
+ if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
+ return false;
+ else if (q < vi->curr_queue_pairs)
+ return true;
+ else
+ return false;
+}
+
static void virtnet_poll_cleantx(struct receive_queue *rq)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
@@ -1364,11 +1402,11 @@
struct send_queue *sq = &vi->sq[index];
struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
- if (!sq->napi.weight)
+ if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
return;
if (__netif_tx_trylock(txq)) {
- free_old_xmit_skbs(sq);
+ free_old_xmit_skbs(sq, true);
__netif_tx_unlock(txq);
}
@@ -1441,10 +1479,18 @@
{
struct send_queue *sq = container_of(napi, struct send_queue, napi);
struct virtnet_info *vi = sq->vq->vdev->priv;
- struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
+ unsigned int index = vq2txq(sq->vq);
+ struct netdev_queue *txq;
+ if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
+ /* We don't need to enable cb for XDP */
+ napi_complete_done(napi, 0);
+ return 0;
+ }
+
+ txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id());
- free_old_xmit_skbs(sq);
+ free_old_xmit_skbs(sq, true);
__netif_tx_unlock(txq);
virtqueue_napi_complete(napi, sq->vq, 0);
@@ -1509,11 +1555,11 @@
struct send_queue *sq = &vi->sq[qnum];
int err;
struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
- bool kick = !skb->xmit_more;
+ bool kick = !netdev_xmit_more();
bool use_napi = sq->napi.weight;
/* Free up any pending old buffers before queueing new ones. */
- free_old_xmit_skbs(sq);
+ free_old_xmit_skbs(sq, false);
if (use_napi && kick)
virtqueue_enable_cb_delayed(sq->vq);
@@ -1529,7 +1575,8 @@
dev->stats.tx_fifo_errors++;
if (net_ratelimit())
dev_warn(&dev->dev,
- "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
+ "Unexpected TXQ (%d) queue failure: %d\n",
+ qnum, err);
dev->stats.tx_dropped++;
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
@@ -1538,7 +1585,7 @@
/* Don't wait up for transmitted skbs to be freed. */
if (!use_napi) {
skb_orphan(skb);
- nf_reset(skb);
+ nf_reset_ct(skb);
}
/* If running out of space, stop queue to avoid getting packets that we
@@ -1556,7 +1603,7 @@
if (!use_napi &&
unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
/* More just got used, free them then recheck. */
- free_old_xmit_skbs(sq);
+ free_old_xmit_skbs(sq, false);
if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
netif_start_subqueue(dev, qnum);
virtqueue_disable_cb(sq->vq);
@@ -1866,7 +1913,7 @@
return 0;
}
-static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
+static void virtnet_clean_affinity(struct virtnet_info *vi)
{
int i;
@@ -1890,7 +1937,7 @@
int stride;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
- virtnet_clean_affinity(vi, -1);
+ virtnet_clean_affinity(vi);
return;
}
@@ -1940,7 +1987,7 @@
struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
node);
- virtnet_clean_affinity(vi, cpu);
+ virtnet_clean_affinity(vi);
return 0;
}
@@ -2175,6 +2222,53 @@
return 0;
}
+static int virtnet_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec)
+{
+ struct ethtool_coalesce ec_default = {
+ .cmd = ETHTOOL_SCOALESCE,
+ .rx_max_coalesced_frames = 1,
+ };
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i, napi_weight;
+
+ if (ec->tx_max_coalesced_frames > 1)
+ return -EINVAL;
+
+ ec_default.tx_max_coalesced_frames = ec->tx_max_coalesced_frames;
+ napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
+
+ /* disallow changes to fields not explicitly tested above */
+ if (memcmp(ec, &ec_default, sizeof(ec_default)))
+ return -EINVAL;
+
+ if (napi_weight ^ vi->sq[0].napi.weight) {
+ if (dev->flags & IFF_UP)
+ return -EBUSY;
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ vi->sq[i].napi.weight = napi_weight;
+ }
+
+ return 0;
+}
+
+static int virtnet_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec)
+{
+ struct ethtool_coalesce ec_default = {
+ .cmd = ETHTOOL_GCOALESCE,
+ .rx_max_coalesced_frames = 1,
+ };
+ struct virtnet_info *vi = netdev_priv(dev);
+
+ memcpy(ec, &ec_default, sizeof(ec_default));
+
+ if (vi->sq[0].napi.weight)
+ ec->tx_max_coalesced_frames = 1;
+
+ return 0;
+}
+
static void virtnet_init_settings(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
@@ -2213,6 +2307,8 @@
.get_ts_info = ethtool_op_get_ts_info,
.get_link_ksettings = virtnet_get_link_ksettings,
.set_link_ksettings = virtnet_set_link_ksettings,
+ .set_coalesce = virtnet_set_coalesce,
+ .get_coalesce = virtnet_get_coalesce,
};
static void virtnet_freeze_down(struct virtio_device *vdev)
@@ -2276,7 +2372,7 @@
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
- dev_warn(&vi->dev->dev, "Fail to set guest offload. \n");
+ dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
return -EINVAL;
}
@@ -2345,6 +2441,10 @@
return -ENOMEM;
}
+ old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
+ if (!prog && !old_prog)
+ return 0;
+
if (prog) {
prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
if (IS_ERR(prog))
@@ -2352,36 +2452,62 @@
}
/* Make sure NAPI is not using any XDP TX queues for RX. */
- if (netif_running(dev))
- for (i = 0; i < vi->max_queue_pairs; i++)
+ if (netif_running(dev)) {
+ for (i = 0; i < vi->max_queue_pairs; i++) {
napi_disable(&vi->rq[i].napi);
+ virtnet_napi_tx_disable(&vi->sq[i].napi);
+ }
+ }
- netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+ if (!prog) {
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+ if (i == 0)
+ virtnet_restore_guest_offloads(vi);
+ }
+ synchronize_net();
+ }
+
err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
if (err)
goto err;
+ netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
vi->xdp_queue_pairs = xdp_qp;
- for (i = 0; i < vi->max_queue_pairs; i++) {
- old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
- rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
- if (i == 0) {
- if (!old_prog)
+ if (prog) {
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+ if (i == 0 && !old_prog)
virtnet_clear_guest_offloads(vi);
- if (!prog)
- virtnet_restore_guest_offloads(vi);
}
+ }
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
if (old_prog)
bpf_prog_put(old_prog);
- if (netif_running(dev))
+ if (netif_running(dev)) {
virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+ virtnet_napi_tx_enable(vi, vi->sq[i].vq,
+ &vi->sq[i].napi);
+ }
}
return 0;
err:
- for (i = 0; i < vi->max_queue_pairs; i++)
- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+ if (!prog) {
+ virtnet_clear_guest_offloads(vi);
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
+ }
+
+ if (netif_running(dev)) {
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+ virtnet_napi_tx_enable(vi, vi->sq[i].vq,
+ &vi->sq[i].napi);
+ }
+ }
if (prog)
bpf_prog_sub(prog, vi->max_queue_pairs - 1);
return err;
@@ -2430,6 +2556,31 @@
return 0;
}
+static int virtnet_set_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ u64 offloads;
+ int err;
+
+ if ((dev->features ^ features) & NETIF_F_LRO) {
+ if (vi->xdp_queue_pairs)
+ return -EBUSY;
+
+ if (features & NETIF_F_LRO)
+ offloads = vi->guest_offloads_capable;
+ else
+ offloads = 0;
+
+ err = virtnet_set_guest_offloads(vi, offloads);
+ if (err)
+ return err;
+ vi->guest_offloads = offloads;
+ }
+
+ return 0;
+}
+
static const struct net_device_ops virtnet_netdev = {
.ndo_open = virtnet_open,
.ndo_stop = virtnet_close,
@@ -2444,6 +2595,7 @@
.ndo_xdp_xmit = virtnet_xdp_xmit,
.ndo_features_check = passthru_features_check,
.ndo_get_phys_port_name = virtnet_get_phys_port_name,
+ .ndo_set_features = virtnet_set_features,
};
static void virtnet_config_changed_work(struct work_struct *work)
@@ -2537,16 +2689,6 @@
put_page(vi->rq[i].alloc_frag.page);
}
-static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
-{
- if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
- return false;
- else if (q < vi->curr_queue_pairs)
- return true;
- else
- return false;
-}
-
static void free_unused_bufs(struct virtnet_info *vi)
{
void *buf;
@@ -2555,10 +2697,10 @@
for (i = 0; i < vi->max_queue_pairs; i++) {
struct virtqueue *vq = vi->sq[i].vq;
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
- if (!is_xdp_raw_buffer_queue(vi, i))
+ if (!is_xdp_frame(buf))
dev_kfree_skb(buf);
else
- put_page(virt_to_head_page(buf));
+ xdp_return_frame(ptr_to_xdp(buf));
}
}
@@ -2581,7 +2723,7 @@
{
struct virtio_device *vdev = vi->vdev;
- virtnet_clean_affinity(vi, -1);
+ virtnet_clean_affinity(vi);
vdev->config->del_vqs(vdev);
@@ -2902,6 +3044,11 @@
}
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
dev->features |= NETIF_F_RXCSUM;
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+ virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
+ dev->features |= NETIF_F_LRO;
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
+ dev->hw_features |= NETIF_F_LRO;
dev->vlan_features = dev->features;
@@ -2956,8 +3103,9 @@
/* Should never trigger: MTU was previously validated
* in virtnet_validate.
*/
- dev_err(&vdev->dev, "device MTU appears to have changed "
- "it is now %d < %d", mtu, dev->min_mtu);
+ dev_err(&vdev->dev,
+ "device MTU appears to have changed it is now %d < %d",
+ mtu, dev->min_mtu);
goto free;
}
@@ -3031,6 +3179,7 @@
for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
if (virtio_has_feature(vi->vdev, guest_offloads[i]))
set_bit(guest_offloads[i], &vi->guest_offloads);
+ vi->guest_offloads_capable = vi->guest_offloads;
pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
dev->name, max_queue_pairs);