Update Linux to v5.10.157
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.157.tar.xz
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
Change-Id: I7b30d9e98d8c465d6b44de8e7433b4a40b3289ba
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index d12c9a8..64a94c9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -278,9 +278,7 @@
return 0;
out_free_newdev:
- if (new_dev->reg_state == NETREG_UNINITIALIZED ||
- new_dev->reg_state == NETREG_UNREGISTERED)
- free_netdev(new_dev);
+ free_netdev(new_dev);
return err;
}
diff --git a/net/9p/client.c b/net/9p/client.c
index bf6ed00..e8862cd 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -893,16 +893,13 @@
struct p9_fid *fid;
p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt);
- fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
+ fid = kzalloc(sizeof(struct p9_fid), GFP_KERNEL);
if (!fid)
return NULL;
- memset(&fid->qid, 0, sizeof(struct p9_qid));
fid->mode = -1;
fid->uid = current_fsuid();
fid->clnt = clnt;
- fid->rdir = NULL;
- fid->fid = 0;
idr_preload(GFP_KERNEL);
spin_lock_irq(&clnt->lock);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 8f528e7..4002198 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -200,11 +200,15 @@
list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
list_move(&req->req_list, &cancel_list);
+ req->status = REQ_STATUS_ERROR;
}
list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
list_move(&req->req_list, &cancel_list);
+ req->status = REQ_STATUS_ERROR;
}
+ spin_unlock(&m->client->lock);
+
list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
list_del(&req->req_list);
@@ -212,7 +216,6 @@
req->t_err = err;
p9_client_cb(m->client, req, REQ_STATUS_ERROR);
}
- spin_unlock(&m->client->lock);
}
static __poll_t
@@ -820,11 +823,14 @@
goto out_free_ts;
if (!(ts->rd->f_mode & FMODE_READ))
goto out_put_rd;
+ /* prevent workers from hanging on IO when fd is a pipe */
+ ts->rd->f_flags |= O_NONBLOCK;
ts->wr = fget(wfd);
if (!ts->wr)
goto out_put_rd;
if (!(ts->wr->f_mode & FMODE_WRITE))
goto out_put_wr;
+ ts->wr->f_flags |= O_NONBLOCK;
client->trans = ts;
client->status = Connected;
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 829db9e..aaf64b9 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -219,11 +219,12 @@
if (!page)
return -ENOMEM;
- for (p = page, len = 0; len < nbytes; p++, len++) {
+ for (p = page, len = 0; len < nbytes; p++) {
if (get_user(*p, buff++)) {
free_page((unsigned long)page);
return -EFAULT;
}
+ len += 1;
if (*p == '\0' || *p == '\n')
break;
}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 9e0eef7..a1f4cb8 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -89,17 +89,21 @@
sk = s->sk;
if (!sk) {
spin_unlock_bh(&ax25_list_lock);
- s->ax25_dev = NULL;
ax25_disconnect(s, ENETUNREACH);
+ s->ax25_dev = NULL;
spin_lock_bh(&ax25_list_lock);
goto again;
}
sock_hold(sk);
spin_unlock_bh(&ax25_list_lock);
lock_sock(sk);
- s->ax25_dev = NULL;
- release_sock(sk);
ax25_disconnect(s, ENETUNREACH);
+ s->ax25_dev = NULL;
+ if (sk->sk_socket) {
+ dev_put(ax25_dev->dev);
+ ax25_dev_put(ax25_dev);
+ }
+ release_sock(sk);
spin_lock_bh(&ax25_list_lock);
sock_put(sk);
/* The entry could have been deleted from the
@@ -365,21 +369,25 @@
if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl)))
return -EFAULT;
- if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL)
- return -ENODEV;
-
if (ax25_ctl.digi_count > AX25_MAX_DIGIS)
return -EINVAL;
if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr);
+ if (!ax25_dev)
+ return -ENODEV;
+
digi.ndigi = ax25_ctl.digi_count;
for (k = 0; k < digi.ndigi; k++)
digi.calls[k] = ax25_ctl.digi_addr[k];
- if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL)
+ ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev);
+ if (!ax25) {
+ ax25_dev_put(ax25_dev);
return -ENOTCONN;
+ }
switch (ax25_ctl.cmd) {
case AX25_KILL:
@@ -446,6 +454,7 @@
}
out_put:
+ ax25_dev_put(ax25_dev);
ax25_cb_put(ax25);
return ret;
@@ -971,14 +980,16 @@
{
struct sock *sk = sock->sk;
ax25_cb *ax25;
+ ax25_dev *ax25_dev;
if (sk == NULL)
return 0;
sock_hold(sk);
- sock_orphan(sk);
lock_sock(sk);
+ sock_orphan(sk);
ax25 = sk_to_ax25(sk);
+ ax25_dev = ax25->ax25_dev;
if (sk->sk_type == SOCK_SEQPACKET) {
switch (ax25->state) {
@@ -1040,6 +1051,15 @@
sk->sk_state_change(sk);
ax25_destroy_socket(ax25);
}
+ if (ax25_dev) {
+ del_timer_sync(&ax25->timer);
+ del_timer_sync(&ax25->t1timer);
+ del_timer_sync(&ax25->t2timer);
+ del_timer_sync(&ax25->t3timer);
+ del_timer_sync(&ax25->idletimer);
+ dev_put(ax25_dev->dev);
+ ax25_dev_put(ax25_dev);
+ }
sock->sk = NULL;
release_sock(sk);
@@ -1116,8 +1136,10 @@
}
}
- if (ax25_dev != NULL)
+ if (ax25_dev) {
ax25_fillin_cb(ax25, ax25_dev);
+ dev_hold(ax25_dev->dev);
+ }
done:
ax25_cb_add(ax25);
@@ -1631,9 +1653,12 @@
int flags)
{
struct sock *sk = sock->sk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
+ struct sk_buff_head *sk_queue;
int copied;
int err = 0;
+ int off = 0;
+ long timeo;
lock_sock(sk);
/*
@@ -1645,11 +1670,29 @@
goto out;
}
- /* Now we can treat all alike */
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
- if (skb == NULL)
- goto out;
+ /* We need support for non-blocking reads. */
+ sk_queue = &sk->sk_receive_queue;
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);
+ /* If no packet is available, release_sock(sk) and try again. */
+ if (!skb) {
+ if (err != -EAGAIN)
+ goto out;
+ release_sock(sk);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,
+ &timeo, last)) {
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,
+ &err, &last);
+ if (skb)
+ break;
+
+ if (err != -EAGAIN)
+ goto done;
+ }
+ if (!skb)
+ goto done;
+ lock_sock(sk);
+ }
if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
@@ -1696,6 +1739,7 @@
out:
release_sock(sk);
+done:
return err;
}
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 4ac2e08..d2e0cc6 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -37,6 +37,7 @@
for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
if (ax25cmp(addr, (ax25_address *)ax25_dev->dev->dev_addr) == 0) {
res = ax25_dev;
+ ax25_dev_hold(ax25_dev);
}
spin_unlock_bh(&ax25_dev_lock);
@@ -56,6 +57,7 @@
return;
}
+ refcount_set(&ax25_dev->refcount, 1);
dev->ax25_ptr = ax25_dev;
ax25_dev->dev = dev;
dev_hold(dev);
@@ -84,6 +86,7 @@
ax25_dev->next = ax25_dev_list;
ax25_dev_list = ax25_dev;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_hold(ax25_dev);
ax25_register_dev_sysctl(ax25_dev);
}
@@ -113,9 +116,10 @@
if ((s = ax25_dev_list) == ax25_dev) {
ax25_dev_list = s->next;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_put(ax25_dev);
dev->ax25_ptr = NULL;
dev_put(dev);
- kfree(ax25_dev);
+ ax25_dev_put(ax25_dev);
return;
}
@@ -123,9 +127,10 @@
if (s->next == ax25_dev) {
s->next = ax25_dev->next;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_put(ax25_dev);
dev->ax25_ptr = NULL;
dev_put(dev);
- kfree(ax25_dev);
+ ax25_dev_put(ax25_dev);
return;
}
@@ -133,6 +138,7 @@
}
spin_unlock_bh(&ax25_dev_lock);
dev->ax25_ptr = NULL;
+ ax25_dev_put(ax25_dev);
}
int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
@@ -144,20 +150,32 @@
switch (cmd) {
case SIOCAX25ADDFWD:
- if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL)
+ fwd_dev = ax25_addr_ax25dev(&fwd->port_to);
+ if (!fwd_dev) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
- if (ax25_dev->forward != NULL)
+ }
+ if (ax25_dev->forward) {
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = fwd_dev->dev;
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
break;
case SIOCAX25DELFWD:
- if (ax25_dev->forward == NULL)
+ if (!ax25_dev->forward) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = NULL;
+ ax25_dev_put(ax25_dev);
break;
default:
+ ax25_dev_put(ax25_dev);
return -EINVAL;
}
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index b40e0bc..dc2168d 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -75,11 +75,13 @@
ax25_dev *ax25_dev;
int i;
- if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
- return -EINVAL;
if (route->digi_count > AX25_MAX_DIGIS)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&route->port_addr);
+ if (!ax25_dev)
+ return -EINVAL;
+
write_lock_bh(&ax25_route_lock);
ax25_rt = ax25_route_list;
@@ -91,6 +93,7 @@
if (route->digi_count != 0) {
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -101,6 +104,7 @@
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
ax25_rt = ax25_rt->next;
@@ -108,6 +112,7 @@
if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
@@ -120,6 +125,7 @@
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
kfree(ax25_rt);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -132,6 +138,7 @@
ax25_rt->next = ax25_route_list;
ax25_route_list = ax25_rt;
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -173,6 +180,7 @@
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -215,6 +223,7 @@
out:
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return err;
}
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 15ab812..3a476e4 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -261,12 +261,20 @@
{
ax25_clear_queues(ax25);
- if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
- ax25_stop_heartbeat(ax25);
- ax25_stop_t1timer(ax25);
- ax25_stop_t2timer(ax25);
- ax25_stop_t3timer(ax25);
- ax25_stop_idletimer(ax25);
+ if (reason == ENETUNREACH) {
+ del_timer_sync(&ax25->timer);
+ del_timer_sync(&ax25->t1timer);
+ del_timer_sync(&ax25->t2timer);
+ del_timer_sync(&ax25->t3timer);
+ del_timer_sync(&ax25->idletimer);
+ } else {
+ if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
+ ax25_stop_heartbeat(ax25);
+ ax25_stop_t1timer(ax25);
+ ax25_stop_t2timer(ax25);
+ ax25_stop_t3timer(ax25);
+ ax25_stop_idletimer(ax25);
+ }
ax25->state = AX25_STATE_0;
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ee9cead..986f707 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -164,6 +164,9 @@
*/
static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw)
{
+ if (!backbone_gw)
+ return;
+
kref_put(&backbone_gw->refcount, batadv_backbone_gw_release);
}
@@ -199,6 +202,9 @@
*/
static void batadv_claim_put(struct batadv_bla_claim *claim)
{
+ if (!claim)
+ return;
+
kref_put(&claim->refcount, batadv_claim_release);
}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 0e6e53e..338e4e9 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -128,6 +128,9 @@
*/
static void batadv_dat_entry_put(struct batadv_dat_entry *dat_entry)
{
+ if (!dat_entry)
+ return;
+
kref_put(&dat_entry->refcount, batadv_dat_entry_release);
}
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 1f1f5b0..895d834 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -478,6 +478,17 @@
goto free_skb;
}
+ /* GRO might have added fragments to the fragment list instead of
+ * frags[]. But this is not handled by skb_split and must be
+ * linearized to avoid incorrect length information after all
+ * batman-adv fragments were created and submitted to the
+ * hard-interface
+ */
+ if (skb_has_frag_list(skb) && __skb_linearize(skb)) {
+ ret = -ENOMEM;
+ goto free_skb;
+ }
+
/* Create one header to be copied to all fragments */
frag_header.packet_type = BATADV_UNICAST_FRAG;
frag_header.version = BATADV_COMPAT_VERSION;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index ef3f85b..62f6f13 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -60,7 +60,7 @@
* after rcu grace period
* @ref: kref pointer of the gw_node
*/
-static void batadv_gw_node_release(struct kref *ref)
+void batadv_gw_node_release(struct kref *ref)
{
struct batadv_gw_node *gw_node;
@@ -71,16 +71,6 @@
}
/**
- * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release
- * it
- * @gw_node: gateway node to free
- */
-void batadv_gw_node_put(struct batadv_gw_node *gw_node)
-{
- kref_put(&gw_node->refcount, batadv_gw_node_release);
-}
-
-/**
* batadv_gw_get_selected_gw_node() - Get currently selected gateway
* @bat_priv: the bat priv with all the soft interface information
*
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 88b5dba..c5b1de5 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -9,6 +9,7 @@
#include "main.h"
+#include <linux/kref.h>
#include <linux/netlink.h>
#include <linux/seq_file.h>
#include <linux/skbuff.h>
@@ -28,7 +29,7 @@
void batadv_gw_node_delete(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node);
void batadv_gw_node_free(struct batadv_priv *bat_priv);
-void batadv_gw_node_put(struct batadv_gw_node *gw_node);
+void batadv_gw_node_release(struct kref *ref);
struct batadv_gw_node *
batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv);
int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset);
@@ -40,4 +41,17 @@
struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node);
+/**
+ * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release
+ * it
+ * @gw_node: gateway node to free
+ */
+static inline void batadv_gw_node_put(struct batadv_gw_node *gw_node)
+{
+ if (!gw_node)
+ return;
+
+ kref_put(&gw_node->refcount, batadv_gw_node_release);
+}
+
#endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index b1855d9..ba5850c 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -113,6 +113,9 @@
*/
static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface)
{
+ if (!hard_iface)
+ return;
+
kref_put(&hard_iface->refcount, batadv_hardif_release);
}
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 139894c..c8a341c 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -136,7 +136,7 @@
{
struct inet6_dev *in6_dev = __in6_dev_get(dev);
- if (in6_dev && in6_dev->cnf.mc_forwarding)
+ if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding))
return BATADV_NO_FLAGS;
else
return BATADV_MCAST_WANT_NO_RTR6;
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 35b3e03..1481b80 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -222,6 +222,9 @@
*/
static void batadv_nc_node_put(struct batadv_nc_node *nc_node)
{
+ if (!nc_node)
+ return;
+
kref_put(&nc_node->refcount, batadv_nc_node_release);
}
@@ -246,6 +249,9 @@
*/
static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
{
+ if (!nc_path)
+ return;
+
kref_put(&nc_path->refcount, batadv_nc_path_release);
}
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 805d896..2d38a09 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -178,7 +178,7 @@
* and queue for free after rcu grace period
* @ref: kref pointer of the originator-vlan object
*/
-static void batadv_orig_node_vlan_release(struct kref *ref)
+void batadv_orig_node_vlan_release(struct kref *ref)
{
struct batadv_orig_node_vlan *orig_vlan;
@@ -188,16 +188,6 @@
}
/**
- * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release
- * the originator-vlan object
- * @orig_vlan: the originator-vlan object to release
- */
-void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
-{
- kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
-}
-
-/**
* batadv_originator_init() - Initialize all originator structures
* @bat_priv: the bat priv with all the soft interface information
*
@@ -232,7 +222,7 @@
* free after rcu grace period
* @ref: kref pointer of the neigh_ifinfo
*/
-static void batadv_neigh_ifinfo_release(struct kref *ref)
+void batadv_neigh_ifinfo_release(struct kref *ref)
{
struct batadv_neigh_ifinfo *neigh_ifinfo;
@@ -245,21 +235,11 @@
}
/**
- * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release
- * the neigh_ifinfo
- * @neigh_ifinfo: the neigh_ifinfo object to release
- */
-void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
-{
- kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release);
-}
-
-/**
* batadv_hardif_neigh_release() - release hardif neigh node from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the neigh_node
*/
-static void batadv_hardif_neigh_release(struct kref *ref)
+void batadv_hardif_neigh_release(struct kref *ref)
{
struct batadv_hardif_neigh_node *hardif_neigh;
@@ -275,21 +255,11 @@
}
/**
- * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter
- * and possibly release it
- * @hardif_neigh: hardif neigh neighbor to free
- */
-void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
-{
- kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release);
-}
-
-/**
* batadv_neigh_node_release() - release neigh_node from lists and queue for
* free after rcu grace period
* @ref: kref pointer of the neigh_node
*/
-static void batadv_neigh_node_release(struct kref *ref)
+void batadv_neigh_node_release(struct kref *ref)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
@@ -310,16 +280,6 @@
}
/**
- * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly
- * release it
- * @neigh_node: neigh neighbor to free
- */
-void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
-{
- kref_put(&neigh_node->refcount, batadv_neigh_node_release);
-}
-
-/**
* batadv_orig_router_get() - router to the originator depending on iface
* @orig_node: the orig node for the router
* @if_outgoing: the interface where the payload packet has been received or
@@ -851,7 +811,7 @@
* free after rcu grace period
* @ref: kref pointer of the orig_ifinfo
*/
-static void batadv_orig_ifinfo_release(struct kref *ref)
+void batadv_orig_ifinfo_release(struct kref *ref)
{
struct batadv_orig_ifinfo *orig_ifinfo;
struct batadv_neigh_node *router;
@@ -870,16 +830,6 @@
}
/**
- * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release
- * the orig_ifinfo
- * @orig_ifinfo: the orig_ifinfo object to release
- */
-void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
-{
- kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release);
-}
-
-/**
* batadv_orig_node_free_rcu() - free the orig_node
* @rcu: rcu pointer of the orig_node
*/
@@ -902,7 +852,7 @@
* free after rcu grace period
* @ref: kref pointer of the orig_node
*/
-static void batadv_orig_node_release(struct kref *ref)
+void batadv_orig_node_release(struct kref *ref)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
@@ -949,16 +899,6 @@
}
/**
- * batadv_orig_node_put() - decrement the orig node refcounter and possibly
- * release it
- * @orig_node: the orig node to free
- */
-void batadv_orig_node_put(struct batadv_orig_node *orig_node)
-{
- kref_put(&orig_node->refcount, batadv_orig_node_release);
-}
-
-/**
* batadv_originator_free() - Free all originator structures
* @bat_priv: the bat priv with all the soft interface information
*/
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 7bc01c1..3b824a7 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -12,6 +12,7 @@
#include <linux/compiler.h>
#include <linux/if_ether.h>
#include <linux/jhash.h>
+#include <linux/kref.h>
#include <linux/netlink.h>
#include <linux/seq_file.h>
#include <linux/skbuff.h>
@@ -21,19 +22,18 @@
int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
-void batadv_orig_node_put(struct batadv_orig_node *orig_node);
+void batadv_orig_node_release(struct kref *ref);
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const u8 *addr);
struct batadv_hardif_neigh_node *
batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
const u8 *neigh_addr);
-void
-batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh);
+void batadv_hardif_neigh_release(struct kref *ref);
struct batadv_neigh_node *
batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *hard_iface,
const u8 *neigh_addr);
-void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node);
+void batadv_neigh_node_release(struct kref *ref);
struct batadv_neigh_node *
batadv_orig_router_get(struct batadv_orig_node *orig_node,
const struct batadv_hard_iface *if_outgoing);
@@ -43,7 +43,7 @@
struct batadv_neigh_ifinfo *
batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
struct batadv_hard_iface *if_outgoing);
-void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
+void batadv_neigh_ifinfo_release(struct kref *ref);
int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb);
int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset);
@@ -54,7 +54,7 @@
struct batadv_orig_ifinfo *
batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *if_outgoing);
-void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
+void batadv_orig_ifinfo_release(struct kref *ref);
int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
@@ -65,7 +65,7 @@
struct batadv_orig_node_vlan *
batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
unsigned short vid);
-void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan);
+void batadv_orig_node_vlan_release(struct kref *ref);
/**
* batadv_choose_orig() - Return the index of the orig entry in the hash table
@@ -86,4 +86,86 @@
struct batadv_orig_node *
batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data);
+/**
+ * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release
+ * the originator-vlan object
+ * @orig_vlan: the originator-vlan object to release
+ */
+static inline void
+batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
+{
+ if (!orig_vlan)
+ return;
+
+ kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
+}
+
+/**
+ * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release
+ * the neigh_ifinfo
+ * @neigh_ifinfo: the neigh_ifinfo object to release
+ */
+static inline void
+batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
+{
+ if (!neigh_ifinfo)
+ return;
+
+ kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release);
+}
+
+/**
+ * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter
+ * and possibly release it
+ * @hardif_neigh: hardif neigh neighbor to free
+ */
+static inline void
+batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
+{
+ if (!hardif_neigh)
+ return;
+
+ kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release);
+}
+
+/**
+ * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly
+ * release it
+ * @neigh_node: neigh neighbor to free
+ */
+static inline void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
+{
+ if (!neigh_node)
+ return;
+
+ kref_put(&neigh_node->refcount, batadv_neigh_node_release);
+}
+
+/**
+ * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release
+ * the orig_ifinfo
+ * @orig_ifinfo: the orig_ifinfo object to release
+ */
+static inline void
+batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
+{
+ if (!orig_ifinfo)
+ return;
+
+ kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release);
+}
+
+/**
+ * batadv_orig_node_put() - decrement the orig node refcounter and possibly
+ * release it
+ * @orig_node: the orig node to free
+ */
+static inline void batadv_orig_node_put(struct batadv_orig_node *orig_node)
+{
+ if (!orig_node)
+ return;
+
+ kref_put(&orig_node->refcount, batadv_orig_node_release);
+}
+
#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 7496047..8f7c778 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -512,7 +512,7 @@
* after rcu grace period
* @ref: kref pointer of the vlan object
*/
-static void batadv_softif_vlan_release(struct kref *ref)
+void batadv_softif_vlan_release(struct kref *ref)
{
struct batadv_softif_vlan *vlan;
@@ -526,19 +526,6 @@
}
/**
- * batadv_softif_vlan_put() - decrease the vlan object refcounter and
- * possibly release it
- * @vlan: the vlan object to release
- */
-void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
-{
- if (!vlan)
- return;
-
- kref_put(&vlan->refcount, batadv_softif_vlan_release);
-}
-
-/**
* batadv_softif_vlan_get() - get the vlan object for a specific vid
* @bat_priv: the bat priv with all the soft interface information
* @vid: the identifier of the vlan object to retrieve
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 534e08d..53aba17 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -9,6 +9,7 @@
#include "main.h"
+#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/types.h>
@@ -24,8 +25,21 @@
bool batadv_softif_is_valid(const struct net_device *net_dev);
extern struct rtnl_link_ops batadv_link_ops;
int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid);
-void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan);
+void batadv_softif_vlan_release(struct kref *ref);
struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
unsigned short vid);
+/**
+ * batadv_softif_vlan_put() - decrease the vlan object refcounter and
+ * possibly release it
+ * @vlan: the vlan object to release
+ */
+static inline void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
+{
+ if (!vlan)
+ return;
+
+ kref_put(&vlan->refcount, batadv_softif_vlan_release);
+}
+
#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index db7e377..00d62a6 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -357,6 +357,9 @@
*/
static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
{
+ if (!tp_vars)
+ return;
+
kref_put(&tp_vars->refcount, batadv_tp_vars_release);
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index de946ea..5f990a2 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -248,6 +248,9 @@
static void
batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
{
+ if (!tt_local_entry)
+ return;
+
kref_put(&tt_local_entry->common.refcount,
batadv_tt_local_entry_release);
}
@@ -271,7 +274,7 @@
* queue for free after rcu grace period
* @ref: kref pointer of the nc_node
*/
-static void batadv_tt_global_entry_release(struct kref *ref)
+void batadv_tt_global_entry_release(struct kref *ref)
{
struct batadv_tt_global_entry *tt_global_entry;
@@ -284,17 +287,6 @@
}
/**
- * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and
- * possibly release it
- * @tt_global_entry: tt_global_entry to be free'd
- */
-void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
-{
- kref_put(&tt_global_entry->common.refcount,
- batadv_tt_global_entry_release);
-}
-
-/**
* batadv_tt_global_hash_count() - count the number of orig entries
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the client to count entries for
@@ -453,6 +445,9 @@
static void
batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry)
{
+ if (!orig_entry)
+ return;
+
kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release);
}
@@ -2818,6 +2813,9 @@
*/
static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node)
{
+ if (!tt_req_node)
+ return;
+
kref_put(&tt_req_node->refcount, batadv_tt_req_node_release);
}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index b24d35b..63cc8fd 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -9,6 +9,7 @@
#include "main.h"
+#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/seq_file.h>
@@ -31,7 +32,7 @@
struct batadv_tt_global_entry *
batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid);
-void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry);
+void batadv_tt_global_entry_release(struct kref *ref);
int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
const u8 *addr, unsigned short vid);
struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
@@ -58,4 +59,19 @@
int batadv_tt_cache_init(void);
void batadv_tt_cache_destroy(void);
+/**
+ * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and
+ * possibly release it
+ * @tt_global_entry: tt_global_entry to be free'd
+ */
+static inline void
+batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
+{
+ if (!tt_global_entry)
+ return;
+
+ kref_put(&tt_global_entry->common.refcount,
+ batadv_tt_global_entry_release);
+}
+
#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 6a23a56..99fc48e 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -50,6 +50,9 @@
*/
static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
{
+ if (!tvlv_handler)
+ return;
+
kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
}
@@ -106,6 +109,9 @@
*/
static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
{
+ if (!tvlv)
+ return;
+
kref_put(&tvlv->refcount, batadv_tvlv_container_release);
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 1c5a0a6..140d976 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -240,7 +240,7 @@
{
BT_DBG("hcon %p", conn);
- /* When we are master of an established connection and it enters
+ /* When we are central of an established connection and it enters
* the disconnect timeout, then go ahead and try to read the
* current clock offset. Processing of the result is done
* within the event handling and hci_clock_offset_evt function.
@@ -508,7 +508,9 @@
if (conn->role == HCI_ROLE_SLAVE) {
/* Disable LE Advertising */
le_disable_advertising(hdev);
+ hci_dev_lock(hdev);
hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+ hci_dev_unlock(hdev);
return;
}
@@ -1063,16 +1065,16 @@
hci_req_init(&req, hdev);
- /* Disable advertising if we're active. For master role
+ /* Disable advertising if we're active. For central role
* connections most controllers will refuse to connect if
- * advertising is enabled, and for slave role connections we
+ * advertising is enabled, and for peripheral role connections we
* anyway have to disable it in order to start directed
* advertising.
*/
if (hci_dev_test_flag(hdev, HCI_LE_ADV))
__hci_req_disable_advertising(&req);
- /* If requested to connect as slave use directed advertising */
+ /* If requested to connect as peripheral use directed advertising */
if (conn->role == HCI_ROLE_SLAVE) {
/* If we're active scanning most controllers are unable
* to initiate advertising. Simply reject the attempt.
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2e7998b..866eb22 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -742,14 +742,14 @@
}
if (hdev->commands[26] & 0x40) {
- /* Read LE White List Size */
- hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE,
+ /* Read LE Accept List Size */
+ hci_req_add(req, HCI_OP_LE_READ_ACCEPT_LIST_SIZE,
0, NULL);
}
if (hdev->commands[26] & 0x80) {
- /* Clear LE White List */
- hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
+ /* Clear LE Accept List */
+ hci_req_add(req, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL);
}
if (hdev->commands[34] & 0x40) {
@@ -3548,13 +3548,13 @@
/* Suspend consists of two actions:
* - First, disconnect everything and make the controller not
* connectable (disabling scanning)
- * - Second, program event filter/whitelist and enable scan
+ * - Second, program event filter/accept list and enable scan
*/
ret = hci_change_suspend_state(hdev, BT_SUSPEND_DISCONNECT);
if (!ret)
state = BT_SUSPEND_DISCONNECT;
- /* Only configure whitelist if disconnect succeeded and wake
+ /* Only configure accept list if disconnect succeeded and wake
* isn't being prevented.
*/
if (!ret && !(hdev->prevent_wake && hdev->prevent_wake(hdev))) {
@@ -3606,6 +3606,9 @@
hdev->cur_adv_instance = 0x00;
hdev->adv_instance_timeout = 0;
+ hdev->advmon_allowlist_duration = 300;
+ hdev->advmon_no_filter_duration = 500;
+
hdev->sniff_max_interval = 800;
hdev->sniff_min_interval = 80;
@@ -3654,14 +3657,14 @@
mutex_init(&hdev->req_lock);
INIT_LIST_HEAD(&hdev->mgmt_pending);
- INIT_LIST_HEAD(&hdev->blacklist);
- INIT_LIST_HEAD(&hdev->whitelist);
+ INIT_LIST_HEAD(&hdev->reject_list);
+ INIT_LIST_HEAD(&hdev->accept_list);
INIT_LIST_HEAD(&hdev->uuids);
INIT_LIST_HEAD(&hdev->link_keys);
INIT_LIST_HEAD(&hdev->long_term_keys);
INIT_LIST_HEAD(&hdev->identity_resolving_keys);
INIT_LIST_HEAD(&hdev->remote_oob_data);
- INIT_LIST_HEAD(&hdev->le_white_list);
+ INIT_LIST_HEAD(&hdev->le_accept_list);
INIT_LIST_HEAD(&hdev->le_resolv_list);
INIT_LIST_HEAD(&hdev->le_conn_params);
INIT_LIST_HEAD(&hdev->pend_le_conns);
@@ -3718,10 +3721,10 @@
*/
switch (hdev->dev_type) {
case HCI_PRIMARY:
- id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL);
break;
case HCI_AMP:
- id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL);
break;
default:
return -EINVAL;
@@ -3730,7 +3733,7 @@
if (id < 0)
return id;
- sprintf(hdev->name, "hci%d", id);
+ snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
hdev->id = id;
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
@@ -3877,8 +3880,8 @@
destroy_workqueue(hdev->req_workqueue);
hci_dev_lock(hdev);
- hci_bdaddr_list_clear(&hdev->blacklist);
- hci_bdaddr_list_clear(&hdev->whitelist);
+ hci_bdaddr_list_clear(&hdev->reject_list);
+ hci_bdaddr_list_clear(&hdev->accept_list);
hci_uuids_clear(hdev);
hci_link_keys_clear(hdev);
hci_smp_ltks_clear(hdev);
@@ -3886,7 +3889,7 @@
hci_remote_oob_data_clear(hdev);
hci_adv_instances_clear(hdev);
hci_adv_monitors_clear(hdev);
- hci_bdaddr_list_clear(&hdev->le_white_list);
+ hci_bdaddr_list_clear(&hdev->le_accept_list);
hci_bdaddr_list_clear(&hdev->le_resolv_list);
hci_conn_params_clear_all(hdev);
hci_discovery_filter_clear(hdev);
@@ -4479,15 +4482,27 @@
return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len);
}
-static void __check_timeout(struct hci_dev *hdev, unsigned int cnt)
+static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type)
{
- if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
- /* ACL tx timeout must be longer than maximum
- * link supervision timeout (40.9 seconds) */
- if (!cnt && time_after(jiffies, hdev->acl_last_tx +
- HCI_ACL_TX_TIMEOUT))
- hci_link_tx_to(hdev, ACL_LINK);
+ unsigned long last_tx;
+
+ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
+ return;
+
+ switch (type) {
+ case LE_LINK:
+ last_tx = hdev->le_last_tx;
+ break;
+ default:
+ last_tx = hdev->acl_last_tx;
+ break;
}
+
+ /* tx timeout must be longer than maximum link supervision timeout
+ * (40.9 seconds)
+ */
+ if (!cnt && time_after(jiffies, last_tx + HCI_ACL_TX_TIMEOUT))
+ hci_link_tx_to(hdev, type);
}
/* Schedule SCO */
@@ -4545,7 +4560,7 @@
struct sk_buff *skb;
int quote;
- __check_timeout(hdev, cnt);
+ __check_timeout(hdev, cnt, ACL_LINK);
while (hdev->acl_cnt &&
(chan = hci_chan_sent(hdev, ACL_LINK, "e))) {
@@ -4588,8 +4603,6 @@
int quote;
u8 type;
- __check_timeout(hdev, cnt);
-
BT_DBG("%s", hdev->name);
if (hdev->dev_type == HCI_AMP)
@@ -4597,6 +4610,8 @@
else
type = ACL_LINK;
+ __check_timeout(hdev, cnt, type);
+
while (hdev->block_cnt > 0 &&
(chan = hci_chan_sent(hdev, type, "e))) {
u32 priority = (skb_peek(&chan->data_q))->priority;
@@ -4670,7 +4685,7 @@
cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt;
- __check_timeout(hdev, cnt);
+ __check_timeout(hdev, cnt, LE_LINK);
tmp = cnt;
while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) {
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 5e8af26..338833f 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -125,7 +125,7 @@
struct bdaddr_list *b;
hci_dev_lock(hdev);
- list_for_each_entry(b, &hdev->whitelist, list)
+ list_for_each_entry(b, &hdev->accept_list, list)
seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
list_for_each_entry(p, &hdev->le_conn_params, list) {
seq_printf(f, "%pMR (type %u) %u\n", &p->addr, p->addr_type,
@@ -144,7 +144,7 @@
struct bdaddr_list *b;
hci_dev_lock(hdev);
- list_for_each_entry(b, &hdev->blacklist, list)
+ list_for_each_entry(b, &hdev->reject_list, list)
seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
hci_dev_unlock(hdev);
@@ -734,7 +734,7 @@
struct bdaddr_list *b;
hci_dev_lock(hdev);
- list_for_each_entry(b, &hdev->le_white_list, list)
+ list_for_each_entry(b, &hdev->le_accept_list, list)
seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
hci_dev_unlock(hdev);
@@ -1145,7 +1145,7 @@
&force_static_address_fops);
debugfs_create_u8("white_list_size", 0444, hdev->debugfs,
- &hdev->le_white_list_size);
+ &hdev->le_accept_list_size);
debugfs_create_file("white_list", 0444, hdev->debugfs, hdev,
&white_list_fops);
debugfs_create_u8("resolv_list_size", 0444, hdev->debugfs,
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 72b4127..954b296 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -236,7 +236,7 @@
hdev->ssp_debug_mode = 0;
- hci_bdaddr_list_clear(&hdev->le_white_list);
+ hci_bdaddr_list_clear(&hdev->le_accept_list);
hci_bdaddr_list_clear(&hdev->le_resolv_list);
}
@@ -1456,36 +1456,22 @@
hdev->le_num_of_adv_sets = rp->num_of_sets;
}
-static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
- struct sk_buff *skb)
+static void hci_cc_le_read_accept_list_size(struct hci_dev *hdev,
+ struct sk_buff *skb)
{
- struct hci_rp_le_read_white_list_size *rp = (void *) skb->data;
+ struct hci_rp_le_read_accept_list_size *rp = (void *)skb->data;
BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size);
if (rp->status)
return;
- hdev->le_white_list_size = rp->size;
+ hdev->le_accept_list_size = rp->size;
}
-static void hci_cc_le_clear_white_list(struct hci_dev *hdev,
- struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- if (status)
- return;
-
- hci_bdaddr_list_clear(&hdev->le_white_list);
-}
-
-static void hci_cc_le_add_to_white_list(struct hci_dev *hdev,
+static void hci_cc_le_clear_accept_list(struct hci_dev *hdev,
struct sk_buff *skb)
{
- struct hci_cp_le_add_to_white_list *sent;
__u8 status = *((__u8 *) skb->data);
BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -1493,18 +1479,13 @@
if (status)
return;
- sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_WHITE_LIST);
- if (!sent)
- return;
-
- hci_bdaddr_list_add(&hdev->le_white_list, &sent->bdaddr,
- sent->bdaddr_type);
+ hci_bdaddr_list_clear(&hdev->le_accept_list);
}
-static void hci_cc_le_del_from_white_list(struct hci_dev *hdev,
- struct sk_buff *skb)
+static void hci_cc_le_add_to_accept_list(struct hci_dev *hdev,
+ struct sk_buff *skb)
{
- struct hci_cp_le_del_from_white_list *sent;
+ struct hci_cp_le_add_to_accept_list *sent;
__u8 status = *((__u8 *) skb->data);
BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -1512,11 +1493,30 @@
if (status)
return;
- sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_WHITE_LIST);
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST);
if (!sent)
return;
- hci_bdaddr_list_del(&hdev->le_white_list, &sent->bdaddr,
+ hci_bdaddr_list_add(&hdev->le_accept_list, &sent->bdaddr,
+ sent->bdaddr_type);
+}
+
+static void hci_cc_le_del_from_accept_list(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_cp_le_del_from_accept_list *sent;
+ __u8 status = *((__u8 *) skb->data);
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (status)
+ return;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_ACCEPT_LIST);
+ if (!sent)
+ return;
+
+ hci_bdaddr_list_del(&hdev->le_accept_list, &sent->bdaddr,
sent->bdaddr_type);
}
@@ -2331,7 +2331,7 @@
/* We don't want the connection attempt to stick around
* indefinitely since LE doesn't have a page timeout concept
* like BR/EDR. Set a timer for any connection that doesn't use
- * the white list for connecting.
+ * the accept list for connecting.
*/
if (filter_policy == HCI_LE_USE_PEER_ADDR)
queue_delayed_work(conn->hdev->workqueue,
@@ -2587,7 +2587,7 @@
* only used during suspend.
*/
if (ev->link_type == ACL_LINK &&
- hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+ hci_bdaddr_list_lookup_with_flags(&hdev->accept_list,
&ev->bdaddr,
BDADDR_BREDR)) {
conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
@@ -2709,28 +2709,28 @@
return;
}
- if (hci_bdaddr_list_lookup(&hdev->blacklist, &ev->bdaddr,
+ hci_dev_lock(hdev);
+
+ if (hci_bdaddr_list_lookup(&hdev->reject_list, &ev->bdaddr,
BDADDR_BREDR)) {
hci_reject_conn(hdev, &ev->bdaddr);
- return;
+ goto unlock;
}
- /* Require HCI_CONNECTABLE or a whitelist entry to accept the
+ /* Require HCI_CONNECTABLE or an accept list entry to accept the
* connection. These features are only touched through mgmt so
* only do the checks if HCI_MGMT is set.
*/
if (hci_dev_test_flag(hdev, HCI_MGMT) &&
!hci_dev_test_flag(hdev, HCI_CONNECTABLE) &&
- !hci_bdaddr_list_lookup_with_flags(&hdev->whitelist, &ev->bdaddr,
+ !hci_bdaddr_list_lookup_with_flags(&hdev->accept_list, &ev->bdaddr,
BDADDR_BREDR)) {
hci_reject_conn(hdev, &ev->bdaddr);
- return;
+ goto unlock;
}
/* Connection accepted */
- hci_dev_lock(hdev);
-
ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
if (ie)
memcpy(ie->data.dev_class, ev->dev_class, 3);
@@ -2742,8 +2742,7 @@
HCI_ROLE_SLAVE);
if (!conn) {
bt_dev_err(hdev, "no memory for new connection");
- hci_dev_unlock(hdev);
- return;
+ goto unlock;
}
}
@@ -2759,9 +2758,9 @@
bacpy(&cp.bdaddr, &ev->bdaddr);
if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
- cp.role = 0x00; /* Become master */
+ cp.role = 0x00; /* Become central */
else
- cp.role = 0x01; /* Remain slave */
+ cp.role = 0x01; /* Remain peripheral */
hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
} else if (!(flags & HCI_PROTO_DEFER)) {
@@ -2783,6 +2782,10 @@
conn->state = BT_CONNECT2;
hci_connect_cfm(conn, 0);
}
+
+ return;
+unlock:
+ hci_dev_unlock(hdev);
}
static u8 hci_to_mgmt_reason(u8 err)
@@ -3481,20 +3484,20 @@
hci_cc_le_set_scan_enable(hdev, skb);
break;
- case HCI_OP_LE_READ_WHITE_LIST_SIZE:
- hci_cc_le_read_white_list_size(hdev, skb);
+ case HCI_OP_LE_READ_ACCEPT_LIST_SIZE:
+ hci_cc_le_read_accept_list_size(hdev, skb);
break;
- case HCI_OP_LE_CLEAR_WHITE_LIST:
- hci_cc_le_clear_white_list(hdev, skb);
+ case HCI_OP_LE_CLEAR_ACCEPT_LIST:
+ hci_cc_le_clear_accept_list(hdev, skb);
break;
- case HCI_OP_LE_ADD_TO_WHITE_LIST:
- hci_cc_le_add_to_white_list(hdev, skb);
+ case HCI_OP_LE_ADD_TO_ACCEPT_LIST:
+ hci_cc_le_add_to_accept_list(hdev, skb);
break;
- case HCI_OP_LE_DEL_FROM_WHITE_LIST:
- hci_cc_le_del_from_white_list(hdev, skb);
+ case HCI_OP_LE_DEL_FROM_ACCEPT_LIST:
+ hci_cc_le_del_from_accept_list(hdev, skb);
break;
case HCI_OP_LE_READ_SUPPORTED_STATES:
@@ -5061,8 +5064,9 @@
hci_dev_lock(hdev);
hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (hcon) {
+ if (hcon && hcon->type == AMP_LINK) {
hcon->state = BT_CLOSED;
+ hci_disconn_cfm(hcon, ev->reason);
hci_conn_del(hcon);
}
@@ -5152,8 +5156,8 @@
conn->dst_type = bdaddr_type;
/* If we didn't have a hci_conn object previously
- * but we're in master role this must be something
- * initiated using a white list. Since white list based
+ * but we're in central role this must be something
+ * initiated using an accept list. Since accept list based
* connections are not "first class citizens" we don't
* have full tracking of them. Therefore, we go ahead
* with a "best effort" approach of determining the
@@ -5203,7 +5207,7 @@
addr_type = BDADDR_LE_RANDOM;
/* Drop the connection if the device is blocked */
- if (hci_bdaddr_list_lookup(&hdev->blacklist, &conn->dst, addr_type)) {
+ if (hci_bdaddr_list_lookup(&hdev->reject_list, &conn->dst, addr_type)) {
hci_conn_drop(conn);
goto unlock;
}
@@ -5371,7 +5375,7 @@
return NULL;
/* Ignore if the device is blocked */
- if (hci_bdaddr_list_lookup(&hdev->blacklist, addr, addr_type))
+ if (hci_bdaddr_list_lookup(&hdev->reject_list, addr, addr_type))
return NULL;
/* Most controller will fail if we try to create new connections
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index d965b7c..a0f980e 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -382,6 +382,53 @@
hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
}
+static void start_interleave_scan(struct hci_dev *hdev)
+{
+ hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER;
+ queue_delayed_work(hdev->req_workqueue,
+ &hdev->interleave_scan, 0);
+}
+
+static bool is_interleave_scanning(struct hci_dev *hdev)
+{
+ return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE;
+}
+
+static void cancel_interleave_scan(struct hci_dev *hdev)
+{
+ bt_dev_dbg(hdev, "cancelling interleave scan");
+
+ cancel_delayed_work_sync(&hdev->interleave_scan);
+
+ hdev->interleave_scan_state = INTERLEAVE_SCAN_NONE;
+}
+
+/* Return true if interleave_scan wasn't started until exiting this function,
+ * otherwise, return false
+ */
+static bool __hci_update_interleaved_scan(struct hci_dev *hdev)
+{
+ /* If there is at least one ADV monitors and one pending LE connection
+ * or one device to be scanned for, we should alternate between
+ * allowlist scan and one without any filters to save power.
+ */
+ bool use_interleaving = hci_is_adv_monitoring(hdev) &&
+ !(list_empty(&hdev->pend_le_conns) &&
+ list_empty(&hdev->pend_le_reports));
+ bool is_interleaving = is_interleave_scanning(hdev);
+
+ if (use_interleaving && !is_interleaving) {
+ start_interleave_scan(hdev);
+ bt_dev_dbg(hdev, "starting interleave scan");
+ return true;
+ }
+
+ if (!use_interleaving && is_interleaving)
+ cancel_interleave_scan(hdev);
+
+ return false;
+}
+
/* This function controls the background scanning based on hdev->pend_le_conns
* list. If there are pending LE connection we start the background scanning,
* otherwise we stop it.
@@ -454,8 +501,7 @@
hci_req_add_le_scan_disable(req, false);
hci_req_add_le_passive_scan(req);
-
- BT_DBG("%s starting background scanning", hdev->name);
+ bt_dev_dbg(hdev, "starting background scanning");
}
}
@@ -690,17 +736,17 @@
}
}
-static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr,
- u8 bdaddr_type)
+static void del_from_accept_list(struct hci_request *req, bdaddr_t *bdaddr,
+ u8 bdaddr_type)
{
- struct hci_cp_le_del_from_white_list cp;
+ struct hci_cp_le_del_from_accept_list cp;
cp.bdaddr_type = bdaddr_type;
bacpy(&cp.bdaddr, bdaddr);
- bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from whitelist", &cp.bdaddr,
+ bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from accept list", &cp.bdaddr,
cp.bdaddr_type);
- hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_LE_DEL_FROM_ACCEPT_LIST, sizeof(cp), &cp);
if (use_ll_privacy(req->hdev) &&
hci_dev_test_flag(req->hdev, HCI_ENABLE_LL_PRIVACY)) {
@@ -719,31 +765,31 @@
}
}
-/* Adds connection to white list if needed. On error, returns -1. */
-static int add_to_white_list(struct hci_request *req,
- struct hci_conn_params *params, u8 *num_entries,
- bool allow_rpa)
+/* Adds connection to accept list if needed. On error, returns -1. */
+static int add_to_accept_list(struct hci_request *req,
+ struct hci_conn_params *params, u8 *num_entries,
+ bool allow_rpa)
{
- struct hci_cp_le_add_to_white_list cp;
+ struct hci_cp_le_add_to_accept_list cp;
struct hci_dev *hdev = req->hdev;
- /* Already in white list */
- if (hci_bdaddr_list_lookup(&hdev->le_white_list, ¶ms->addr,
+ /* Already in accept list */
+ if (hci_bdaddr_list_lookup(&hdev->le_accept_list, ¶ms->addr,
params->addr_type))
return 0;
/* Select filter policy to accept all advertising */
- if (*num_entries >= hdev->le_white_list_size)
+ if (*num_entries >= hdev->le_accept_list_size)
return -1;
- /* White list can not be used with RPAs */
+ /* Accept list can not be used with RPAs */
if (!allow_rpa &&
!hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) {
return -1;
}
- /* During suspend, only wakeable devices can be in whitelist */
+ /* During suspend, only wakeable devices can be in accept list */
if (hdev->suspended && !hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP,
params->current_flags))
return 0;
@@ -752,9 +798,9 @@
cp.bdaddr_type = params->addr_type;
bacpy(&cp.bdaddr, ¶ms->addr);
- bt_dev_dbg(hdev, "Add %pMR (0x%x) to whitelist", &cp.bdaddr,
+ bt_dev_dbg(hdev, "Add %pMR (0x%x) to accept list", &cp.bdaddr,
cp.bdaddr_type);
- hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_LE_ADD_TO_ACCEPT_LIST, sizeof(cp), &cp);
if (use_ll_privacy(hdev) &&
hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) {
@@ -782,27 +828,31 @@
return 0;
}
-static u8 update_white_list(struct hci_request *req)
+static u8 update_accept_list(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
struct hci_conn_params *params;
struct bdaddr_list *b;
u8 num_entries = 0;
bool pend_conn, pend_report;
- /* We allow whitelisting even with RPAs in suspend. In the worst case,
- * we won't be able to wake from devices that use the privacy1.2
+ /* We allow usage of accept list even with RPAs in suspend. In the worst
+ * case, we won't be able to wake from devices that use the privacy1.2
* features. Additionally, once we support privacy1.2 and IRK
* offloading, we can update this to also check for those conditions.
*/
bool allow_rpa = hdev->suspended;
- /* Go through the current white list programmed into the
+ if (use_ll_privacy(hdev) &&
+ hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
+ allow_rpa = true;
+
+ /* Go through the current accept list programmed into the
* controller one by one and check if that address is still
* in the list of pending connections or list of devices to
* report. If not present in either list, then queue the
* command to remove it from the controller.
*/
- list_for_each_entry(b, &hdev->le_white_list, list) {
+ list_for_each_entry(b, &hdev->le_accept_list, list) {
pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
&b->bdaddr,
b->bdaddr_type);
@@ -811,14 +861,14 @@
b->bdaddr_type);
/* If the device is not likely to connect or report,
- * remove it from the whitelist.
+ * remove it from the accept list.
*/
if (!pend_conn && !pend_report) {
- del_from_white_list(req, &b->bdaddr, b->bdaddr_type);
+ del_from_accept_list(req, &b->bdaddr, b->bdaddr_type);
continue;
}
- /* White list can not be used with RPAs */
+ /* Accept list can not be used with RPAs */
if (!allow_rpa &&
!hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
@@ -828,39 +878,44 @@
num_entries++;
}
- /* Since all no longer valid white list entries have been
+ /* Since all no longer valid accept list entries have been
* removed, walk through the list of pending connections
* and ensure that any new device gets programmed into
* the controller.
*
* If the list of the devices is larger than the list of
- * available white list entries in the controller, then
+ * available accept list entries in the controller, then
* just abort and return filer policy value to not use the
- * white list.
+ * accept list.
*/
list_for_each_entry(params, &hdev->pend_le_conns, action) {
- if (add_to_white_list(req, params, &num_entries, allow_rpa))
+ if (add_to_accept_list(req, params, &num_entries, allow_rpa))
return 0x00;
}
/* After adding all new pending connections, walk through
* the list of pending reports and also add these to the
- * white list if there is still space. Abort if space runs out.
+ * accept list if there is still space. Abort if space runs out.
*/
list_for_each_entry(params, &hdev->pend_le_reports, action) {
- if (add_to_white_list(req, params, &num_entries, allow_rpa))
+ if (add_to_accept_list(req, params, &num_entries, allow_rpa))
return 0x00;
}
- /* Once the controller offloading of advertisement monitor is in place,
- * the if condition should include the support of MSFT extension
- * support. If suspend is ongoing, whitelist should be the default to
- * prevent waking by random advertisements.
+ /* Use the allowlist unless the following conditions are all true:
+ * - We are not currently suspending
+ * - There are 1 or more ADV monitors registered
+ * - Interleaved scanning is not currently using the allowlist
+ *
+ * Once the controller offloading of advertisement monitor is in place,
+ * the above condition should include the support of MSFT extension
+ * support.
*/
- if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended)
+ if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended &&
+ hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST)
return 0x00;
- /* Select filter policy to use white list */
+ /* Select filter policy to use accept list */
return 0x01;
}
@@ -1010,20 +1065,24 @@
&own_addr_type))
return;
- /* Adding or removing entries from the white list must
+ if (__hci_update_interleaved_scan(hdev))
+ return;
+
+ bt_dev_dbg(hdev, "interleave state %d", hdev->interleave_scan_state);
+ /* Adding or removing entries from the accept list must
* happen before enabling scanning. The controller does
- * not allow white list modification while scanning.
+ * not allow accept list modification while scanning.
*/
- filter_policy = update_white_list(req);
+ filter_policy = update_accept_list(req);
/* When the controller is using random resolvable addresses and
* with that having LE privacy enabled, then controllers with
* Extended Scanner Filter Policies support can now enable support
* for handling directed advertising.
*
- * So instead of using filter polices 0x00 (no whitelist)
- * and 0x01 (whitelist enabled) use the new filter policies
- * 0x02 (no whitelist) and 0x03 (whitelist enabled).
+ * So instead of using filter polices 0x00 (no accept list)
+ * and 0x01 (accept list enabled) use the new filter policies
+ * 0x02 (no accept list) and 0x03 (accept list enabled).
*/
if (hci_dev_test_flag(hdev, HCI_PRIVACY) &&
(hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY))
@@ -1043,7 +1102,8 @@
interval = hdev->le_scan_interval;
}
- bt_dev_dbg(hdev, "LE passive scan with whitelist = %d", filter_policy);
+ bt_dev_dbg(hdev, "LE passive scan with accept list = %d",
+ filter_policy);
hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window,
own_addr_type, filter_policy, addr_resolv);
}
@@ -1091,7 +1151,7 @@
/* Always clear event filter when starting */
hci_req_clear_event_filter(req);
- list_for_each_entry(b, &hdev->whitelist, list) {
+ list_for_each_entry(b, &hdev->accept_list, list) {
if (!hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP,
b->current_flags))
continue;
@@ -1884,6 +1944,62 @@
hci_dev_unlock(hdev);
}
+static int hci_req_add_le_interleaved_scan(struct hci_request *req,
+ unsigned long opt)
+{
+ struct hci_dev *hdev = req->hdev;
+ int ret = 0;
+
+ hci_dev_lock(hdev);
+
+ if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
+ hci_req_add_le_scan_disable(req, false);
+ hci_req_add_le_passive_scan(req);
+
+ switch (hdev->interleave_scan_state) {
+ case INTERLEAVE_SCAN_ALLOWLIST:
+ bt_dev_dbg(hdev, "next state: allowlist");
+ hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER;
+ break;
+ case INTERLEAVE_SCAN_NO_FILTER:
+ bt_dev_dbg(hdev, "next state: no filter");
+ hdev->interleave_scan_state = INTERLEAVE_SCAN_ALLOWLIST;
+ break;
+ case INTERLEAVE_SCAN_NONE:
+ BT_ERR("unexpected error");
+ ret = -1;
+ }
+
+ hci_dev_unlock(hdev);
+
+ return ret;
+}
+
+static void interleave_scan_work(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ interleave_scan.work);
+ u8 status;
+ unsigned long timeout;
+
+ if (hdev->interleave_scan_state == INTERLEAVE_SCAN_ALLOWLIST) {
+ timeout = msecs_to_jiffies(hdev->advmon_allowlist_duration);
+ } else if (hdev->interleave_scan_state == INTERLEAVE_SCAN_NO_FILTER) {
+ timeout = msecs_to_jiffies(hdev->advmon_no_filter_duration);
+ } else {
+ bt_dev_err(hdev, "unexpected error");
+ return;
+ }
+
+ hci_req_sync(hdev, hci_req_add_le_interleaved_scan, 0,
+ HCI_CMD_TIMEOUT, &status);
+
+ /* Don't continue interleaving if it was canceled */
+ if (is_interleave_scanning(hdev))
+ queue_delayed_work(hdev->req_workqueue,
+ &hdev->interleave_scan, timeout);
+}
+
int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
bool use_rpa, struct adv_info *adv_instance,
u8 *own_addr_type, bdaddr_t *rand_addr)
@@ -2447,11 +2563,11 @@
return 0;
}
-static bool disconnected_whitelist_entries(struct hci_dev *hdev)
+static bool disconnected_accept_list_entries(struct hci_dev *hdev)
{
struct bdaddr_list *b;
- list_for_each_entry(b, &hdev->whitelist, list) {
+ list_for_each_entry(b, &hdev->accept_list, list) {
struct hci_conn *conn;
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr);
@@ -2483,7 +2599,7 @@
return;
if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) ||
- disconnected_whitelist_entries(hdev))
+ disconnected_accept_list_entries(hdev))
scan = SCAN_PAGE;
else
scan = SCAN_DISABLED;
@@ -2972,7 +3088,7 @@
uint16_t interval = opt;
struct hci_dev *hdev = req->hdev;
u8 own_addr_type;
- /* White list is not used for discovery */
+ /* Accept list is not used for discovery */
u8 filter_policy = 0x00;
/* Discovery doesn't require controller address resolution */
bool addr_resolv = false;
@@ -3311,6 +3427,7 @@
INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
+ INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
}
void hci_request_cancel_all(struct hci_dev *hdev)
@@ -3330,4 +3447,6 @@
cancel_delayed_work_sync(&hdev->adv_instance_expire);
hdev->adv_instance_timeout = 0;
}
+
+ cancel_interleave_scan(hdev);
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 53f85d7..71d18d3 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -897,7 +897,7 @@
return 0;
}
-static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
+static int hci_sock_reject_list_add(struct hci_dev *hdev, void __user *arg)
{
bdaddr_t bdaddr;
int err;
@@ -907,14 +907,14 @@
hci_dev_lock(hdev);
- err = hci_bdaddr_list_add(&hdev->blacklist, &bdaddr, BDADDR_BREDR);
+ err = hci_bdaddr_list_add(&hdev->reject_list, &bdaddr, BDADDR_BREDR);
hci_dev_unlock(hdev);
return err;
}
-static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg)
+static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg)
{
bdaddr_t bdaddr;
int err;
@@ -924,7 +924,7 @@
hci_dev_lock(hdev);
- err = hci_bdaddr_list_del(&hdev->blacklist, &bdaddr, BDADDR_BREDR);
+ err = hci_bdaddr_list_del(&hdev->reject_list, &bdaddr, BDADDR_BREDR);
hci_dev_unlock(hdev);
@@ -964,12 +964,12 @@
case HCIBLOCKADDR:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- return hci_sock_blacklist_add(hdev, (void __user *)arg);
+ return hci_sock_reject_list_add(hdev, (void __user *)arg);
case HCIUNBLOCKADDR:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- return hci_sock_blacklist_del(hdev, (void __user *)arg);
+ return hci_sock_reject_list_del(hdev, (void __user *)arg);
}
return -ENOIOCTLCMD;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index b69d88b..ccd2c37 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -48,6 +48,9 @@
BT_DBG("conn %p", conn);
+ if (device_is_registered(&conn->dev))
+ return;
+
dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
if (device_add(&conn->dev) < 0) {
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 0ddbc41..c5e4d2b 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -61,6 +61,9 @@
static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
struct sk_buff_head *skbs, u8 event);
+static void l2cap_retrans_timeout(struct work_struct *work);
+static void l2cap_monitor_timeout(struct work_struct *work);
+static void l2cap_ack_timeout(struct work_struct *work);
static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type)
{
@@ -111,7 +114,8 @@
}
/* Find channel with given SCID.
- * Returns locked channel. */
+ * Returns a reference locked channel.
+ */
static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
u16 cid)
{
@@ -119,15 +123,19 @@
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_scid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
}
/* Find channel with given DCID.
- * Returns locked channel.
+ * Returns a reference locked channel.
*/
static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
u16 cid)
@@ -136,8 +144,12 @@
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_dcid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
@@ -162,8 +174,12 @@
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_ident(conn, ident);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
@@ -463,6 +479,9 @@
write_unlock(&chan_list_lock);
INIT_DELAYED_WORK(&chan->chan_timer, l2cap_chan_timeout);
+ INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout);
+ INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout);
+ INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout);
chan->state = BT_OPEN;
@@ -497,6 +516,16 @@
kref_get(&c->kref);
}
+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c)
+{
+ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
+
+ if (!kref_get_unless_zero(&c->kref))
+ return NULL;
+
+ return c;
+}
+
void l2cap_chan_put(struct l2cap_chan *c)
{
BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref));
@@ -1438,6 +1467,7 @@
l2cap_ecred_init(chan, 0);
+ memset(&data, 0, sizeof(data));
data.pdu.req.psm = chan->psm;
data.pdu.req.mtu = cpu_to_le16(chan->imtu);
data.pdu.req.mps = cpu_to_le16(chan->mps);
@@ -1687,8 +1717,8 @@
if (hcon->out)
smp_conn_security(hcon, hcon->pending_sec_level);
- /* For LE slave connections, make sure the connection interval
- * is in the range of the minium and maximum interval that has
+ /* For LE peripheral connections, make sure the connection interval
+ * is in the range of the minimum and maximum interval that has
* been configured for this connection. If not, then trigger
* the connection update procedure.
*/
@@ -1942,11 +1972,11 @@
bdaddr_t *dst,
u8 link_type)
{
- struct l2cap_chan *c, *c1 = NULL;
+ struct l2cap_chan *c, *tmp, *c1 = NULL;
read_lock(&chan_list_lock);
- list_for_each_entry(c, &chan_list, global_l) {
+ list_for_each_entry_safe(c, tmp, &chan_list, global_l) {
if (state && c->state != state)
continue;
@@ -1956,7 +1986,7 @@
if (link_type == LE_LINK && c->src_type == BDADDR_BREDR)
continue;
- if (c->psm == psm) {
+ if (c->chan_type != L2CAP_CHAN_FIXED && c->psm == psm) {
int src_match, dst_match;
int src_any, dst_any;
@@ -1964,7 +1994,9 @@
src_match = !bacmp(&c->src, src);
dst_match = !bacmp(&c->dst, dst);
if (src_match && dst_match) {
- l2cap_chan_hold(c);
+ if (!l2cap_chan_hold_unless_zero(c))
+ continue;
+
read_unlock(&chan_list_lock);
return c;
}
@@ -1979,7 +2011,7 @@
}
if (c1)
- l2cap_chan_hold(c1);
+ c1 = l2cap_chan_hold_unless_zero(c1);
read_unlock(&chan_list_lock);
@@ -3290,10 +3322,6 @@
chan->rx_state = L2CAP_RX_STATE_RECV;
chan->tx_state = L2CAP_TX_STATE_XMIT;
- INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout);
- INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout);
- INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout);
-
skb_queue_head_init(&chan->srej_q);
err = l2cap_seq_list_init(&chan->srej_list, chan->tx_win);
@@ -3732,7 +3760,8 @@
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
- if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) {
+ if (remote_efs &&
+ test_bit(FLAG_EFS_ENABLE, &chan->flags)) {
chan->remote_id = efs.id;
chan->remote_stype = efs.stype;
chan->remote_msdu = le16_to_cpu(efs.msdu);
@@ -4277,6 +4306,12 @@
}
}
+ chan = l2cap_chan_hold_unless_zero(chan);
+ if (!chan) {
+ err = -EBADSLT;
+ goto unlock;
+ }
+
err = 0;
l2cap_chan_lock(chan);
@@ -4306,6 +4341,7 @@
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
unlock:
mutex_unlock(&conn->chan_lock);
@@ -4459,6 +4495,7 @@
unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
@@ -4572,6 +4609,7 @@
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
@@ -5299,6 +5337,7 @@
l2cap_send_move_chan_rsp(chan, result);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5391,6 +5430,7 @@
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
@@ -5420,6 +5460,7 @@
l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static int l2cap_move_channel_rsp(struct l2cap_conn *conn,
@@ -5483,6 +5524,7 @@
l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5518,6 +5560,7 @@
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5766,6 +5809,19 @@
BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm),
scid, mtu, mps);
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A
+ * page 1059:
+ *
+ * Valid range: 0x0001-0x00ff
+ *
+ * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges
+ */
+ if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) {
+ result = L2CAP_CR_LE_BAD_PSM;
+ chan = NULL;
+ goto response;
+ }
+
/* Check if we have socket listening on psm */
pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src,
&conn->hcon->dst, LE_LINK);
@@ -5890,12 +5946,11 @@
if (credits > max_credits) {
BT_ERR("LE credits overflow");
l2cap_send_disconn_req(chan, ECONNRESET);
- l2cap_chan_unlock(chan);
/* Return 0 so that we don't trigger an unnecessary
* command reject packet.
*/
- return 0;
+ goto unlock;
}
chan->tx_credits += credits;
@@ -5906,7 +5961,9 @@
if (chan->tx_credits)
chan->ops->resume(chan);
+unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5945,6 +6002,18 @@
psm = req->psm;
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A
+ * page 1059:
+ *
+ * Valid range: 0x0001-0x00ff
+ *
+ * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges
+ */
+ if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) {
+ result = L2CAP_CR_LE_BAD_PSM;
+ goto response;
+ }
+
BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps);
memset(&pdu, 0, sizeof(pdu));
@@ -6831,6 +6900,7 @@
struct l2cap_ctrl *control,
struct sk_buff *skb, u8 event)
{
+ struct l2cap_ctrl local_control;
int err = 0;
bool skb_in_use = false;
@@ -6855,15 +6925,32 @@
chan->buffer_seq = chan->expected_tx_seq;
skb_in_use = true;
+ /* l2cap_reassemble_sdu may free skb, hence invalidate
+ * control, so make a copy in advance to use it after
+ * l2cap_reassemble_sdu returns and to avoid the race
+ * condition, for example:
+ *
+ * The current thread calls:
+ * l2cap_reassemble_sdu
+ * chan->ops->recv == l2cap_sock_recv_cb
+ * __sock_queue_rcv_skb
+ * Another thread calls:
+ * bt_sock_recvmsg
+ * skb_recv_datagram
+ * skb_free_datagram
+ * Then the current thread tries to access control, but
+ * it was freed by skb_free_datagram.
+ */
+ local_control = *control;
err = l2cap_reassemble_sdu(chan, skb, control);
if (err)
break;
- if (control->final) {
+ if (local_control.final) {
if (!test_and_clear_bit(CONN_REJ_ACT,
&chan->conn_state)) {
- control->final = 0;
- l2cap_retransmit_all(chan, control);
+ local_control.final = 0;
+ l2cap_retransmit_all(chan, &local_control);
l2cap_ertm_send(chan);
}
}
@@ -7243,11 +7330,27 @@
static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
struct sk_buff *skb)
{
+ /* l2cap_reassemble_sdu may free skb, hence invalidate control, so store
+ * the txseq field in advance to use it after l2cap_reassemble_sdu
+ * returns and to avoid the race condition, for example:
+ *
+ * The current thread calls:
+ * l2cap_reassemble_sdu
+ * chan->ops->recv == l2cap_sock_recv_cb
+ * __sock_queue_rcv_skb
+ * Another thread calls:
+ * bt_sock_recvmsg
+ * skb_recv_datagram
+ * skb_free_datagram
+ * Then the current thread tries to access control, but it was freed by
+ * skb_free_datagram.
+ */
+ u16 txseq = control->txseq;
+
BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb,
chan->rx_state);
- if (l2cap_classify_txseq(chan, control->txseq) ==
- L2CAP_TXSEQ_EXPECTED) {
+ if (l2cap_classify_txseq(chan, txseq) == L2CAP_TXSEQ_EXPECTED) {
l2cap_pass_to_tx(chan, control);
BT_DBG("buffer_seq %d->%d", chan->buffer_seq,
@@ -7270,8 +7373,8 @@
}
}
- chan->last_acked_seq = control->txseq;
- chan->expected_tx_seq = __next_seq(chan, control->txseq);
+ chan->last_acked_seq = txseq;
+ chan->expected_tx_seq = __next_seq(chan, txseq);
return 0;
}
@@ -7527,6 +7630,7 @@
return;
}
+ l2cap_chan_hold(chan);
l2cap_chan_lock(chan);
} else {
BT_DBG("unknown cid 0x%4.4x", cid);
@@ -7539,7 +7643,7 @@
BT_DBG("chan %p, len %d", chan, skb->len);
/* If we receive data on a fixed channel before the info req/rsp
- * procdure is done simply assume that the channel is supported
+ * procedure is done simply assume that the channel is supported
* and mark it as ready.
*/
if (chan->chan_type == L2CAP_CHAN_FIXED)
@@ -7586,6 +7690,7 @@
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
@@ -7651,7 +7756,7 @@
* at least ensure that we ignore incoming data from them.
*/
if (hcon->type == LE_LINK &&
- hci_bdaddr_list_lookup(&hcon->hdev->blacklist, &hcon->dst,
+ hci_bdaddr_list_lookup(&hcon->hdev->reject_list, &hcon->dst,
bdaddr_dst_type(hcon))) {
kfree_skb(skb);
return;
@@ -8073,7 +8178,7 @@
if (src_type != c->src_type)
continue;
- l2cap_chan_hold(c);
+ c = l2cap_chan_hold_unless_zero(c);
read_unlock(&chan_list_lock);
return c;
}
@@ -8107,7 +8212,7 @@
dst_type = bdaddr_dst_type(hcon);
/* If device is blocked, do not create channels for it */
- if (hci_bdaddr_list_lookup(&hdev->blacklist, &hcon->dst, dst_type))
+ if (hci_bdaddr_list_lookup(&hdev->reject_list, &hcon->dst, dst_type))
return;
/* Find fixed channels and notify them of the new connection. We
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 08f67f9..878bf73 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4041,7 +4041,7 @@
memset(&rp, 0, sizeof(rp));
if (cp->addr.type == BDADDR_BREDR) {
- br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+ br_params = hci_bdaddr_list_lookup_with_flags(&hdev->accept_list,
&cp->addr.bdaddr,
cp->addr.type);
if (!br_params)
@@ -4109,7 +4109,7 @@
hci_dev_lock(hdev);
if (cp->addr.type == BDADDR_BREDR) {
- br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+ br_params = hci_bdaddr_list_lookup_with_flags(&hdev->accept_list,
&cp->addr.bdaddr,
cp->addr.type);
@@ -4979,7 +4979,7 @@
hci_dev_lock(hdev);
- err = hci_bdaddr_list_add(&hdev->blacklist, &cp->addr.bdaddr,
+ err = hci_bdaddr_list_add(&hdev->reject_list, &cp->addr.bdaddr,
cp->addr.type);
if (err < 0) {
status = MGMT_STATUS_FAILED;
@@ -5015,7 +5015,7 @@
hci_dev_lock(hdev);
- err = hci_bdaddr_list_del(&hdev->blacklist, &cp->addr.bdaddr,
+ err = hci_bdaddr_list_del(&hdev->reject_list, &cp->addr.bdaddr,
cp->addr.type);
if (err < 0) {
status = MGMT_STATUS_INVALID_PARAMS;
@@ -6506,7 +6506,7 @@
goto unlock;
}
- err = hci_bdaddr_list_add_with_flags(&hdev->whitelist,
+ err = hci_bdaddr_list_add_with_flags(&hdev->accept_list,
&cp->addr.bdaddr,
cp->addr.type, 0);
if (err)
@@ -6604,7 +6604,7 @@
}
if (cp->addr.type == BDADDR_BREDR) {
- err = hci_bdaddr_list_del(&hdev->whitelist,
+ err = hci_bdaddr_list_del(&hdev->accept_list,
&cp->addr.bdaddr,
cp->addr.type);
if (err) {
@@ -6675,7 +6675,7 @@
goto unlock;
}
- list_for_each_entry_safe(b, btmp, &hdev->whitelist, list) {
+ list_for_each_entry_safe(b, btmp, &hdev->accept_list, list) {
device_removed(sk, hdev, &b->bdaddr, b->bdaddr_type);
list_del(&b->list);
kfree(b);
diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
index b30b571..2d3ad28 100644
--- a/net/bluetooth/mgmt_config.c
+++ b/net/bluetooth/mgmt_config.c
@@ -67,6 +67,8 @@
HDEV_PARAM_U16(0x001a, le_supv_timeout),
HDEV_PARAM_U16_JIFFIES_TO_MSECS(0x001b,
def_le_autoconnect_timeout),
+ HDEV_PARAM_U16(0x001d, advmon_allowlist_duration),
+ HDEV_PARAM_U16(0x001e, advmon_no_filter_duration),
};
struct mgmt_rp_read_def_system_config *rp = (void *)params;
@@ -138,6 +140,8 @@
case 0x0019:
case 0x001a:
case 0x001b:
+ case 0x001d:
+ case 0x001e:
if (len != sizeof(u16)) {
bt_dev_warn(hdev, "invalid length %d, exp %zu for type %d",
len, sizeof(u16), type);
@@ -251,6 +255,12 @@
hdev->def_le_autoconnect_timeout =
msecs_to_jiffies(TLV_GET_LE16(buffer));
break;
+ case 0x0001d:
+ hdev->advmon_allowlist_duration = TLV_GET_LE16(buffer);
+ break;
+ case 0x0001e:
+ hdev->advmon_no_filter_duration = TLV_GET_LE16(buffer);
+ break;
default:
bt_dev_warn(hdev, "unsupported parameter %u", type);
break;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index f2bacb4..7324764 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -549,22 +549,58 @@
return dlc;
}
-int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
+static int rfcomm_dlc_send_frag(struct rfcomm_dlc *d, struct sk_buff *frag)
{
- int len = skb->len;
-
- if (d->state != BT_CONNECTED)
- return -ENOTCONN;
+ int len = frag->len;
BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len);
if (len > d->mtu)
return -EINVAL;
- rfcomm_make_uih(skb, d->addr);
- skb_queue_tail(&d->tx_queue, skb);
+ rfcomm_make_uih(frag, d->addr);
+ __skb_queue_tail(&d->tx_queue, frag);
- if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags))
+ return len;
+}
+
+int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
+{
+ unsigned long flags;
+ struct sk_buff *frag, *next;
+ int len;
+
+ if (d->state != BT_CONNECTED)
+ return -ENOTCONN;
+
+ frag = skb_shinfo(skb)->frag_list;
+ skb_shinfo(skb)->frag_list = NULL;
+
+ /* Queue all fragments atomically. */
+ spin_lock_irqsave(&d->tx_queue.lock, flags);
+
+ len = rfcomm_dlc_send_frag(d, skb);
+ if (len < 0 || !frag)
+ goto unlock;
+
+ for (; frag; frag = next) {
+ int ret;
+
+ next = frag->next;
+
+ ret = rfcomm_dlc_send_frag(d, frag);
+ if (ret < 0) {
+ kfree_skb(frag);
+ goto unlock;
+ }
+
+ len += ret;
+ }
+
+unlock:
+ spin_unlock_irqrestore(&d->tx_queue.lock, flags);
+
+ if (len > 0 && !test_bit(RFCOMM_TX_THROTTLED, &d->flags))
rfcomm_schedule();
return len;
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index ae6f807..4cf1fa9 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -575,47 +575,21 @@
lock_sock(sk);
sent = bt_sock_wait_ready(sk, msg->msg_flags);
- if (sent)
- goto done;
- while (len) {
- size_t size = min_t(size_t, len, d->mtu);
- int err;
-
- skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE,
- msg->msg_flags & MSG_DONTWAIT, &err);
- if (!skb) {
- if (sent == 0)
- sent = err;
- break;
- }
- skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE);
-
- err = memcpy_from_msg(skb_put(skb, size), msg, size);
- if (err) {
- kfree_skb(skb);
- if (sent == 0)
- sent = err;
- break;
- }
-
- skb->priority = sk->sk_priority;
-
- err = rfcomm_dlc_send(d, skb);
- if (err < 0) {
- kfree_skb(skb);
- if (sent == 0)
- sent = err;
- break;
- }
-
- sent += size;
- len -= size;
- }
-
-done:
release_sock(sk);
+ if (sent)
+ return sent;
+
+ skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE,
+ RFCOMM_SKB_TAIL_RESERVE);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ sent = rfcomm_dlc_send(d, skb);
+ if (sent < 0)
+ kfree_skb(skb);
+
return sent;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 2f2b8dd..8244d3a 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -280,12 +280,10 @@
return err;
}
-static int sco_send_frame(struct sock *sk, void *buf, int len,
- unsigned int msg_flags)
+static int sco_send_frame(struct sock *sk, struct sk_buff *skb)
{
struct sco_conn *conn = sco_pi(sk)->conn;
- struct sk_buff *skb;
- int err;
+ int len = skb->len;
/* Check outgoing MTU */
if (len > conn->mtu)
@@ -293,11 +291,6 @@
BT_DBG("sk %p len %d", sk, len);
- skb = bt_skb_send_alloc(sk, len, msg_flags & MSG_DONTWAIT, &err);
- if (!skb)
- return err;
-
- memcpy(skb_put(skb, len), buf, len);
hci_send_sco(conn->hcon, skb);
return len;
@@ -575,19 +568,24 @@
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
- if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
- return -EBADFD;
+ lock_sock(sk);
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
+ err = -EBADFD;
+ goto done;
+ }
- if (sk->sk_type != SOCK_SEQPACKET)
- return -EINVAL;
+ if (sk->sk_type != SOCK_SEQPACKET) {
+ err = -EINVAL;
+ goto done;
+ }
hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
- if (!hdev)
- return -EHOSTUNREACH;
+ if (!hdev) {
+ err = -EHOSTUNREACH;
+ goto done;
+ }
hci_dev_lock(hdev);
- lock_sock(sk);
-
/* Set destination address and psm */
bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
@@ -722,7 +720,7 @@
size_t len)
{
struct sock *sk = sock->sk;
- void *buf;
+ struct sk_buff *skb;
int err;
BT_DBG("sock %p, sk %p", sock, sk);
@@ -734,24 +732,21 @@
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
- buf = kmalloc(len, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- if (memcpy_from_msg(buf, msg, len)) {
- kfree(buf);
- return -EFAULT;
- }
+ skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
lock_sock(sk);
if (sk->sk_state == BT_CONNECTED)
- err = sco_send_frame(sk, buf, len, msg->msg_flags);
+ err = sco_send_frame(sk, skb);
else
err = -ENOTCONN;
release_sock(sk);
- kfree(buf);
+
+ if (err < 0)
+ kfree_skb(skb);
return err;
}
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 2b7879a..b7374db 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -909,8 +909,8 @@
hcon->pending_sec_level = BT_SECURITY_HIGH;
}
- /* If both devices have Keyoard-Display I/O, the master
- * Confirms and the slave Enters the passkey.
+ /* If both devices have Keyboard-Display I/O, the initiator
+ * Confirms and the responder Enters the passkey.
*/
if (smp->method == OVERLAP) {
if (hcon->role == HCI_ROLE_MASTER)
@@ -3076,7 +3076,7 @@
if (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags))
return;
- /* Only master may initiate SMP over BR/EDR */
+ /* Only initiator may initiate SMP over BR/EDR */
if (hcon->role != HCI_ROLE_MASTER)
return;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index eb684f3..717b01f 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,20 +10,86 @@
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <net/tcp.h>
+#include <net/net_namespace.h>
#include <linux/error-injection.h>
#include <linux/smp.h>
+#include <linux/sock_diag.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
+struct bpf_test_timer {
+ enum { NO_PREEMPT, NO_MIGRATE } mode;
+ u32 i;
+ u64 time_start, time_spent;
+};
+
+static void bpf_test_timer_enter(struct bpf_test_timer *t)
+ __acquires(rcu)
+{
+ rcu_read_lock();
+ if (t->mode == NO_PREEMPT)
+ preempt_disable();
+ else
+ migrate_disable();
+
+ t->time_start = ktime_get_ns();
+}
+
+static void bpf_test_timer_leave(struct bpf_test_timer *t)
+ __releases(rcu)
+{
+ t->time_start = 0;
+
+ if (t->mode == NO_PREEMPT)
+ preempt_enable();
+ else
+ migrate_enable();
+ rcu_read_unlock();
+}
+
+static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
+ __must_hold(rcu)
+{
+ t->i++;
+ if (t->i >= repeat) {
+ /* We're done. */
+ t->time_spent += ktime_get_ns() - t->time_start;
+ do_div(t->time_spent, t->i);
+ *duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;
+ *err = 0;
+ goto reset;
+ }
+
+ if (signal_pending(current)) {
+ /* During iteration: we've been cancelled, abort. */
+ *err = -EINTR;
+ goto reset;
+ }
+
+ if (need_resched()) {
+ /* During iteration: we need to reschedule between runs. */
+ t->time_spent += ktime_get_ns() - t->time_start;
+ bpf_test_timer_leave(t);
+ cond_resched();
+ bpf_test_timer_enter(t);
+ }
+
+ /* Do another round. */
+ return true;
+
+reset:
+ t->i = 0;
+ return false;
+}
+
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time, bool xdp)
{
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+ struct bpf_test_timer t = { NO_MIGRATE };
enum bpf_cgroup_storage_type stype;
- u64 time_start, time_spent = 0;
- int ret = 0;
- u32 i;
+ int ret;
for_each_cgroup_storage_type(stype) {
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
@@ -38,10 +104,8 @@
if (!repeat)
repeat = 1;
- rcu_read_lock();
- migrate_disable();
- time_start = ktime_get_ns();
- for (i = 0; i < repeat; i++) {
+ bpf_test_timer_enter(&t);
+ do {
ret = bpf_cgroup_storage_set(storage);
if (ret)
break;
@@ -53,29 +117,8 @@
bpf_cgroup_storage_unset();
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
-
- if (need_resched()) {
- time_spent += ktime_get_ns() - time_start;
- migrate_enable();
- rcu_read_unlock();
-
- cond_resched();
-
- rcu_read_lock();
- migrate_disable();
- time_start = ktime_get_ns();
- }
- }
- time_spent += ktime_get_ns() - time_start;
- migrate_enable();
- rcu_read_unlock();
-
- do_div(time_spent, repeat);
- *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+ } while (bpf_test_timer_continue(&t, repeat, &ret, time));
+ bpf_test_timer_leave(&t);
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
@@ -188,6 +231,7 @@
if (user_size > size)
return ERR_PTR(-EMSGSIZE);
+ size = SKB_DATA_ALIGN(size);
data = kzalloc(size + headroom + tailroom, GFP_USER);
if (!data)
return ERR_PTR(-ENOMEM);
@@ -398,6 +442,9 @@
{
struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
+ if (!skb->len)
+ return -EINVAL;
+
if (!__skb)
return 0;
@@ -688,18 +735,17 @@
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
+ struct bpf_test_timer t = { NO_PREEMPT };
u32 size = kattr->test.data_size_in;
struct bpf_flow_dissector ctx = {};
u32 repeat = kattr->test.repeat;
struct bpf_flow_keys *user_ctx;
struct bpf_flow_keys flow_keys;
- u64 time_start, time_spent = 0;
const struct ethhdr *eth;
unsigned int flags = 0;
u32 retval, duration;
void *data;
int ret;
- u32 i;
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
return -EINVAL;
@@ -735,39 +781,15 @@
ctx.data = data;
ctx.data_end = (__u8 *)data + size;
- rcu_read_lock();
- preempt_disable();
- time_start = ktime_get_ns();
- for (i = 0; i < repeat; i++) {
+ bpf_test_timer_enter(&t);
+ do {
retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
size, flags);
+ } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+ bpf_test_timer_leave(&t);
- if (signal_pending(current)) {
- preempt_enable();
- rcu_read_unlock();
-
- ret = -EINTR;
- goto out;
- }
-
- if (need_resched()) {
- time_spent += ktime_get_ns() - time_start;
- preempt_enable();
- rcu_read_unlock();
-
- cond_resched();
-
- rcu_read_lock();
- preempt_disable();
- time_start = ktime_get_ns();
- }
- }
- time_spent += ktime_get_ns() - time_start;
- preempt_enable();
- rcu_read_unlock();
-
- do_div(time_spent, repeat);
- duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+ if (ret < 0)
+ goto out;
ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
retval, duration);
@@ -780,3 +802,106 @@
kfree(data);
return ret;
}
+
+int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ struct bpf_test_timer t = { NO_PREEMPT };
+ struct bpf_prog_array *progs = NULL;
+ struct bpf_sk_lookup_kern ctx = {};
+ u32 repeat = kattr->test.repeat;
+ struct bpf_sk_lookup *user_ctx;
+ u32 retval, duration;
+ int ret = -EINVAL;
+
+ if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
+ return -EINVAL;
+
+ if (kattr->test.flags || kattr->test.cpu)
+ return -EINVAL;
+
+ if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
+ kattr->test.data_size_out)
+ return -EINVAL;
+
+ if (!repeat)
+ repeat = 1;
+
+ user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
+ if (IS_ERR(user_ctx))
+ return PTR_ERR(user_ctx);
+
+ if (!user_ctx)
+ return -EINVAL;
+
+ if (user_ctx->sk)
+ goto out;
+
+ if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
+ goto out;
+
+ if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
+ ret = -ERANGE;
+ goto out;
+ }
+
+ ctx.family = (u16)user_ctx->family;
+ ctx.protocol = (u16)user_ctx->protocol;
+ ctx.dport = (u16)user_ctx->local_port;
+ ctx.sport = (__force __be16)user_ctx->remote_port;
+
+ switch (ctx.family) {
+ case AF_INET:
+ ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
+ ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
+ break;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
+ ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
+ break;
+#endif
+
+ default:
+ ret = -EAFNOSUPPORT;
+ goto out;
+ }
+
+ progs = bpf_prog_array_alloc(1, GFP_KERNEL);
+ if (!progs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ progs->items[0].prog = prog;
+
+ bpf_test_timer_enter(&t);
+ do {
+ ctx.selected_sk = NULL;
+ retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
+ } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+ bpf_test_timer_leave(&t);
+
+ if (ret < 0)
+ goto out;
+
+ user_ctx->cookie = 0;
+ if (ctx.selected_sk) {
+ if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
+ }
+
+ ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
+ if (!ret)
+ ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
+
+out:
+ bpf_prog_array_free(progs);
+ kfree(user_ctx);
+ return ret;
+}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 59a318b..bf5bf14 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -43,6 +43,13 @@
u64_stats_update_end(&brstats->syncp);
vg = br_vlan_group_rcu(br);
+
+ /* Reset the offload_fwd_mark because there could be a stacked
+ * bridge above, and it should not think this bridge it doing
+ * that bridge's work forwarding out its ports.
+ */
+ br_switchdev_frame_unmark(skb);
+
/* Bridge is just like any other port. Make sure the
* packet is allowed except in promisc modue when someone
* may be running packet capture.
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 68c0d0f..a718204 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -384,6 +384,7 @@
/* - Bridged-and-DNAT'ed traffic doesn't
* require ip_forwarding. */
if (rt->dst.dev == dev) {
+ skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
goto bridged_dnat;
}
@@ -413,6 +414,7 @@
kfree_skb(skb);
return 0;
}
+ skb_dst_drop(skb);
skb_dst_set_noref(skb, &rt->dst);
}
@@ -1012,9 +1014,24 @@
return okfn(net, sk, skb);
ops = nf_hook_entries_get_hook_ops(e);
- for (i = 0; i < e->num_hook_entries &&
- ops[i]->priority <= NF_BR_PRI_BRNF; i++)
- ;
+ for (i = 0; i < e->num_hook_entries; i++) {
+ /* These hooks have already been called */
+ if (ops[i]->priority < NF_BR_PRI_BRNF)
+ continue;
+
+ /* These hooks have not been called yet, run them. */
+ if (ops[i]->priority > NF_BR_PRI_BRNF)
+ break;
+
+ /* take a closer look at NF_BR_PRI_BRNF. */
+ if (ops[i]->hook == br_nf_pre_routing) {
+ /* This hook diverted the skb to this function,
+ * hooks after this have not been run yet.
+ */
+ i++;
+ break;
+ }
+ }
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index e4e0c83..6b07f30 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -197,6 +197,7 @@
kfree_skb(skb);
return 0;
}
+ skb_dst_drop(skb);
skb_dst_set_noref(skb, &rt->dst);
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 852f4b5..1dc5db0 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -855,26 +855,37 @@
int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
{
+ struct switchdev_attr attr = {
+ .orig_dev = br->dev,
+ .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL,
+ .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
+ .u.vlan_protocol = ntohs(proto),
+ };
int err = 0;
struct net_bridge_port *p;
struct net_bridge_vlan *vlan;
struct net_bridge_vlan_group *vg;
- __be16 oldproto;
+ __be16 oldproto = br->vlan_proto;
if (br->vlan_proto == proto)
return 0;
+ err = switchdev_port_attr_set(br->dev, &attr);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
/* Add VLANs for the new proto to the device filter. */
list_for_each_entry(p, &br->port_list, list) {
vg = nbp_vlan_group(p);
list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
+ continue;
err = vlan_vid_add(p->dev, proto, vlan->vid);
if (err)
goto err_filt;
}
}
- oldproto = br->vlan_proto;
br->vlan_proto = proto;
recalculate_group_addr(br);
@@ -883,20 +894,32 @@
/* Delete VLANs for the old proto from the device filter. */
list_for_each_entry(p, &br->port_list, list) {
vg = nbp_vlan_group(p);
- list_for_each_entry(vlan, &vg->vlan_list, vlist)
+ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
+ continue;
vlan_vid_del(p->dev, oldproto, vlan->vid);
+ }
}
return 0;
err_filt:
- list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist)
+ attr.u.vlan_protocol = ntohs(oldproto);
+ switchdev_port_attr_set(br->dev, &attr);
+
+ list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) {
+ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
+ continue;
vlan_vid_del(p->dev, proto, vlan->vid);
+ }
list_for_each_entry_continue_reverse(p, &br->port_list, list) {
vg = nbp_vlan_group(p);
- list_for_each_entry(vlan, &vg->vlan_list, vlist)
+ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
+ continue;
vlan_vid_del(p->dev, proto, vlan->vid);
+ }
}
return err;
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 32bc282..57f91ef 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -36,18 +36,10 @@
.entries = (char *)&initial_chain,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~(1 << NF_BR_BROUTING))
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table broute_table = {
.name = "broute",
.table = &initial_table,
.valid_hooks = 1 << NF_BR_BROUTING,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index bcf982e..7f2e620 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -43,18 +43,10 @@
.entries = (char *)initial_chains,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~FILTER_VALID_HOOKS)
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table frame_filter = {
.name = "filter",
.table = &initial_table,
.valid_hooks = FILTER_VALID_HOOKS,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 0d09277..1743a10 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -43,18 +43,10 @@
.entries = (char *)initial_chains,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~NAT_VALID_HOOKS)
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table frame_nat = {
.name = "nat",
.table = &initial_table,
.valid_hooks = NAT_VALID_HOOKS,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index d481ff2..06b80b5 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -999,9 +999,10 @@
goto free_iterate;
}
- /* the table doesn't like it */
- if (t->check && (ret = t->check(newinfo, repl->valid_hooks)))
+ if (repl->valid_hooks != t->valid_hooks) {
+ ret = -EINVAL;
goto free_unlock;
+ }
if (repl->num_counters && repl->num_counters != t->private->nentries) {
ret = -EINVAL;
@@ -1186,11 +1187,6 @@
if (ret != 0)
goto free_chainstack;
- if (table->check && table->check(newinfo, table->valid_hooks)) {
- ret = -EINVAL;
- goto free_chainstack;
- }
-
table->private = newinfo;
rwlock_init(&table->lock);
mutex_lock(&ebt_mutex);
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index 8e8ffac..97805ec 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -87,9 +87,8 @@
return nft_meta_get_init(ctx, expr, tb);
}
- priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static struct nft_expr_type nft_meta_bridge_type;
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 42dc080..806fb4d 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -315,9 +315,6 @@
if (result == 0) {
pr_debug("connect timeout\n");
- caif_disconnect_client(dev_net(dev), &priv->chnl);
- priv->state = CAIF_DISCONNECTED;
- pr_debug("state disconnected\n");
result = -ETIMEDOUT;
goto error;
}
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 1c95ede..cf554e8 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -451,7 +451,7 @@
/* insert new receiver (dev,canid,mask) -> (func,data) */
- if (dev && dev->type != ARPHRD_CAN)
+ if (dev && (dev->type != ARPHRD_CAN || !can_get_ml_priv(dev)))
return -ENODEV;
if (dev && !net_eq(net, dev_net(dev)))
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 0928a39..afa82ad 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -100,6 +100,7 @@
struct bcm_op {
struct list_head list;
+ struct rcu_head rcu;
int ifindex;
canid_t can_id;
u32 flags;
@@ -273,6 +274,7 @@
struct sk_buff *skb;
struct net_device *dev;
struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe;
+ int err;
/* no target device? => exit */
if (!op->ifindex)
@@ -297,11 +299,11 @@
/* send with loopback */
skb->dev = dev;
can_skb_set_owner(skb, op->sk);
- can_send(skb, 1);
+ err = can_send(skb, 1);
+ if (!err)
+ op->frames_abs++;
- /* update statistics */
op->currframe++;
- op->frames_abs++;
/* reached last frame? */
if (op->currframe >= op->nframes)
@@ -718,10 +720,9 @@
return NULL;
}
-static void bcm_remove_op(struct bcm_op *op)
+static void bcm_free_op_rcu(struct rcu_head *rcu_head)
{
- hrtimer_cancel(&op->timer);
- hrtimer_cancel(&op->thrtimer);
+ struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
if ((op->frames) && (op->frames != &op->sframe))
kfree(op->frames);
@@ -732,6 +733,14 @@
kfree(op);
}
+static void bcm_remove_op(struct bcm_op *op)
+{
+ hrtimer_cancel(&op->timer);
+ hrtimer_cancel(&op->thrtimer);
+
+ call_rcu(&op->rcu, bcm_free_op_rcu);
+}
+
static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
{
if (op->rx_reg_dev == dev) {
@@ -757,6 +766,9 @@
if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
(op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
+ /* disable automatic timer on frame reception */
+ op->flags |= RX_NO_AUTOTIMER;
+
/*
* Don't care if we're bound or not (due to netdev
* problems) can_rx_unregister() is always a save
@@ -785,7 +797,6 @@
bcm_rx_handler, op);
list_del(&op->list);
- synchronize_rcu();
bcm_remove_op(op);
return 1; /* done */
}
diff --git a/net/can/isotp.c b/net/can/isotp.c
index d0581dc..f2f0bc7 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -141,6 +141,7 @@
struct can_isotp_options opt;
struct can_isotp_fc_options rxfc, txfc;
struct can_isotp_ll_options ll;
+ u32 frame_txtime;
u32 force_tx_stmin;
u32 force_rx_stmin;
struct tpcon rx, tx;
@@ -360,7 +361,7 @@
so->tx_gap = ktime_set(0, 0);
/* add transmission time for CAN frame N_As */
- so->tx_gap = ktime_add_ns(so->tx_gap, so->opt.frame_txtime);
+ so->tx_gap = ktime_add_ns(so->tx_gap, so->frame_txtime);
/* add waiting time for consecutive frames N_Cs */
if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
so->tx_gap = ktime_add_ns(so->tx_gap,
@@ -863,6 +864,7 @@
struct canfd_frame *cf;
int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
int wait_tx_done = (so->opt.flags & CAN_ISOTP_WAIT_TX_DONE) ? 1 : 0;
+ s64 hrtimer_sec = 0;
int off;
int err;
@@ -961,7 +963,9 @@
isotp_create_fframe(cf, so, ae);
/* start timeout for FC */
- hrtimer_start(&so->txtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
+ hrtimer_sec = 1;
+ hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0),
+ HRTIMER_MODE_REL_SOFT);
}
/* send the first or only CAN frame */
@@ -974,6 +978,11 @@
if (err) {
pr_notice_once("can-isotp: %s: can_send_ret %d\n",
__func__, err);
+
+ /* no transmission -> no timeout monitoring */
+ if (hrtimer_sec)
+ hrtimer_cancel(&so->txtimer);
+
goto err_out_drop;
}
@@ -1003,26 +1012,29 @@
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
- int err = 0;
- int noblock;
+ struct isotp_sock *so = isotp_sk(sk);
+ int noblock = flags & MSG_DONTWAIT;
+ int ret = 0;
- noblock = flags & MSG_DONTWAIT;
+ if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
+ return -EINVAL;
+
+ if (!so->bound)
+ return -EADDRNOTAVAIL;
+
flags &= ~MSG_DONTWAIT;
-
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, noblock, &ret);
if (!skb)
- return err;
+ return ret;
if (size < skb->len)
msg->msg_flags |= MSG_TRUNC;
else
size = skb->len;
- err = memcpy_to_msg(msg, skb->data, size);
- if (err < 0) {
- skb_free_datagram(sk, skb);
- return err;
- }
+ ret = memcpy_to_msg(msg, skb->data, size);
+ if (ret < 0)
+ goto out_err;
sock_recv_timestamp(msg, sk, skb);
@@ -1032,9 +1044,13 @@
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
}
+ /* set length of return value */
+ ret = (flags & MSG_TRUNC) ? skb->len : size;
+
+out_err:
skb_free_datagram(sk, skb);
- return size;
+ return ret;
}
static int isotp_release(struct socket *sock)
@@ -1102,6 +1118,7 @@
struct net *net = sock_net(sk);
int ifindex;
struct net_device *dev;
+ canid_t tx_id, rx_id;
int err = 0;
int notify_enetdown = 0;
int do_rx_reg = 1;
@@ -1109,35 +1126,38 @@
if (len < ISOTP_MIN_NAMELEN)
return -EINVAL;
- if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
- return -EADDRNOTAVAIL;
+ /* sanitize tx/rx CAN identifiers */
+ tx_id = addr->can_addr.tp.tx_id;
+ if (tx_id & CAN_EFF_FLAG)
+ tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
+ else
+ tx_id &= CAN_SFF_MASK;
+
+ rx_id = addr->can_addr.tp.rx_id;
+ if (rx_id & CAN_EFF_FLAG)
+ rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
+ else
+ rx_id &= CAN_SFF_MASK;
if (!addr->can_ifindex)
return -ENODEV;
lock_sock(sk);
+ if (so->bound) {
+ err = -EINVAL;
+ goto out;
+ }
+
/* do not register frame reception for functional addressing */
if (so->opt.flags & CAN_ISOTP_SF_BROADCAST)
do_rx_reg = 0;
/* do not validate rx address for functional addressing */
- if (do_rx_reg) {
- if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) {
- err = -EADDRNOTAVAIL;
- goto out;
- }
-
- if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) {
- err = -EADDRNOTAVAIL;
- goto out;
- }
- }
-
- if (so->bound && addr->can_ifindex == so->ifindex &&
- addr->can_addr.tp.rx_id == so->rxid &&
- addr->can_addr.tp.tx_id == so->txid)
+ if (do_rx_reg && rx_id == tx_id) {
+ err = -EADDRNOTAVAIL;
goto out;
+ }
dev = dev_get_by_index(net, addr->can_ifindex);
if (!dev) {
@@ -1160,29 +1180,15 @@
ifindex = dev->ifindex;
if (do_rx_reg)
- can_rx_register(net, dev, addr->can_addr.tp.rx_id,
- SINGLE_MASK(addr->can_addr.tp.rx_id),
+ can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id),
isotp_rcv, sk, "isotp", sk);
dev_put(dev);
- if (so->bound && do_rx_reg) {
- /* unregister old filter */
- if (so->ifindex) {
- dev = dev_get_by_index(net, so->ifindex);
- if (dev) {
- can_rx_unregister(net, dev, so->rxid,
- SINGLE_MASK(so->rxid),
- isotp_rcv, sk);
- dev_put(dev);
- }
- }
- }
-
/* switch to new settings */
so->ifindex = ifindex;
- so->rxid = addr->can_addr.tp.rx_id;
- so->txid = addr->can_addr.tp.tx_id;
+ so->rxid = rx_id;
+ so->txid = tx_id;
so->bound = 1;
out:
@@ -1236,6 +1242,14 @@
/* no separate rx_ext_address is given => use ext_address */
if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR))
so->opt.rx_ext_address = so->opt.ext_address;
+
+ /* check for frame_txtime changes (0 => no changes) */
+ if (so->opt.frame_txtime) {
+ if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO)
+ so->frame_txtime = 0;
+ else
+ so->frame_txtime = so->opt.frame_txtime;
+ }
break;
case CAN_ISOTP_RECV_FC:
@@ -1437,6 +1451,7 @@
so->opt.rxpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
so->opt.txpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
so->opt.frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
+ so->frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
so->rxfc.bs = CAN_ISOTP_DEFAULT_RECV_BS;
so->rxfc.stmin = CAN_ISOTP_DEFAULT_RECV_STMIN;
so->rxfc.wftmax = CAN_ISOTP_DEFAULT_RECV_WFTMAX;
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index ca75d1b..9da8fbc 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -332,6 +332,9 @@
/* re-claim the CAN_HDR from the SKB */
cf = skb_push(skb, J1939_CAN_HDR);
+ /* initialize header structure */
+ memset(cf, 0, J1939_CAN_HDR);
+
/* make it a full can frame again */
skb_put(skb, J1939_CAN_FTR + (8 - dlc));
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index e1a3998..709141a 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -178,7 +178,10 @@
if (!first)
return;
- if (WARN_ON_ONCE(j1939_session_activate(first))) {
+ if (j1939_session_activate(first)) {
+ netdev_warn_once(first->priv->ndev,
+ "%s: 0x%p: Identical session is already activated.\n",
+ __func__, first);
first->err = -EBUSY;
goto activate_next;
} else {
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 9c39b0f..78f6a91 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -260,6 +260,8 @@
static void j1939_session_destroy(struct j1939_session *session)
{
+ struct sk_buff *skb;
+
if (session->err)
j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT);
else
@@ -270,7 +272,11 @@
WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry));
WARN_ON_ONCE(!list_empty(&session->active_session_list_entry));
- skb_queue_purge(&session->skb_queue);
+ while ((skb = skb_dequeue(&session->skb_queue)) != NULL) {
+ /* drop ref taken in j1939_session_skb_queue() */
+ skb_unref(skb);
+ kfree_skb(skb);
+ }
__j1939_session_drop(session);
j1939_priv_put(session->priv);
kfree(session);
@@ -332,10 +338,12 @@
__skb_unlink(do_skb, &session->skb_queue);
/* drop ref taken in j1939_session_skb_queue() */
skb_unref(do_skb);
+ spin_unlock_irqrestore(&session->skb_queue.lock, flags);
kfree_skb(do_skb);
+ } else {
+ spin_unlock_irqrestore(&session->skb_queue.lock, flags);
}
- spin_unlock_irqrestore(&session->skb_queue.lock, flags);
}
void j1939_session_skb_queue(struct j1939_session *session,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 7901ab6..1e9fab7 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -537,43 +537,6 @@
target_init(&req->r_t);
}
-/*
- * This is ugly, but it allows us to reuse linger registration and ping
- * requests, keeping the structure of the code around send_linger{_ping}()
- * reasonable. Setting up a min_nr=2 mempool for each linger request
- * and dealing with copying ops (this blasts req only, watch op remains
- * intact) isn't any better.
- */
-static void request_reinit(struct ceph_osd_request *req)
-{
- struct ceph_osd_client *osdc = req->r_osdc;
- bool mempool = req->r_mempool;
- unsigned int num_ops = req->r_num_ops;
- u64 snapid = req->r_snapid;
- struct ceph_snap_context *snapc = req->r_snapc;
- bool linger = req->r_linger;
- struct ceph_msg *request_msg = req->r_request;
- struct ceph_msg *reply_msg = req->r_reply;
-
- dout("%s req %p\n", __func__, req);
- WARN_ON(kref_read(&req->r_kref) != 1);
- request_release_checks(req);
-
- WARN_ON(kref_read(&request_msg->kref) != 1);
- WARN_ON(kref_read(&reply_msg->kref) != 1);
- target_destroy(&req->r_t);
-
- request_init(req);
- req->r_osdc = osdc;
- req->r_mempool = mempool;
- req->r_num_ops = num_ops;
- req->r_snapid = snapid;
- req->r_snapc = snapc;
- req->r_linger = linger;
- req->r_request = request_msg;
- req->r_reply = reply_msg;
-}
-
struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_snap_context *snapc,
unsigned int num_ops,
@@ -918,14 +881,30 @@
* @watch_opcode: CEPH_OSD_WATCH_OP_*
*/
static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
- u64 cookie, u8 watch_opcode)
+ u8 watch_opcode, u64 cookie, u32 gen)
{
struct ceph_osd_req_op *op;
op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
op->watch.cookie = cookie;
op->watch.op = watch_opcode;
- op->watch.gen = 0;
+ op->watch.gen = gen;
+}
+
+/*
+ * prot_ver, timeout and notify payload (may be empty) should already be
+ * encoded in @request_pl
+ */
+static void osd_req_op_notify_init(struct ceph_osd_request *req, int which,
+ u64 cookie, struct ceph_pagelist *request_pl)
+{
+ struct ceph_osd_req_op *op;
+
+ op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
+ op->notify.cookie = cookie;
+
+ ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl);
+ op->indata_len = request_pl->length;
}
/*
@@ -2727,10 +2706,13 @@
WARN_ON(!list_empty(&lreq->pending_lworks));
WARN_ON(lreq->osd);
- if (lreq->reg_req)
- ceph_osdc_put_request(lreq->reg_req);
- if (lreq->ping_req)
- ceph_osdc_put_request(lreq->ping_req);
+ if (lreq->request_pl)
+ ceph_pagelist_release(lreq->request_pl);
+ if (lreq->notify_id_pages)
+ ceph_release_page_vector(lreq->notify_id_pages, 1);
+
+ ceph_osdc_put_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->ping_req);
target_destroy(&lreq->t);
kfree(lreq);
}
@@ -2999,6 +2981,12 @@
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
lreq->linger_id, req->r_result);
linger_reg_commit_complete(lreq, req->r_result);
@@ -3022,6 +3010,7 @@
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3044,6 +3033,12 @@
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__,
lreq, lreq->linger_id, req->r_result, lreq->last_error);
if (req->r_result < 0) {
@@ -3053,46 +3048,64 @@
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
static void send_linger(struct ceph_osd_linger_request *lreq)
{
- struct ceph_osd_request *req = lreq->reg_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_client *osdc = lreq->osdc;
+ struct ceph_osd_request *req;
+ int ret;
- verify_osdc_wrlocked(req->r_osdc);
+ verify_osdc_wrlocked(osdc);
+ mutex_lock(&lreq->lock);
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->reg_req) {
+ if (lreq->reg_req->r_osd)
+ cancel_linger_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->reg_req);
+ }
- request_reinit(req);
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
+
target_copy(&req->r_t, &lreq->t);
req->r_mtime = lreq->mtime;
- mutex_lock(&lreq->lock);
if (lreq->is_watch && lreq->committed) {
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id);
- op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT;
- op->watch.gen = ++lreq->register_gen;
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT,
+ lreq->linger_id, ++lreq->register_gen);
dout("lreq %p reconnect register_gen %u\n", lreq,
- op->watch.gen);
+ req->r_ops[0].watch.gen);
req->r_callback = linger_reconnect_cb;
} else {
- if (!lreq->is_watch)
+ if (lreq->is_watch) {
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH,
+ lreq->linger_id, 0);
+ } else {
lreq->notify_id = 0;
- else
- WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH);
+
+ refcount_inc(&lreq->request_pl->refcnt);
+ osd_req_op_notify_init(req, 0, lreq->linger_id,
+ lreq->request_pl);
+ ceph_osd_data_pages_init(
+ osd_req_op_data(req, 0, notify, response_data),
+ lreq->notify_id_pages, PAGE_SIZE, 0, false, false);
+ }
dout("lreq %p register\n", lreq);
req->r_callback = linger_commit_cb;
}
- mutex_unlock(&lreq->lock);
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->reg_req = req;
+ mutex_unlock(&lreq->lock);
submit_request(req, true);
}
@@ -3102,6 +3115,12 @@
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->ping_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->ping_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n",
__func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent,
lreq->last_error);
@@ -3117,6 +3136,7 @@
lreq->register_gen, req->r_ops[0].watch.gen);
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3124,8 +3144,8 @@
static void send_linger_ping(struct ceph_osd_linger_request *lreq)
{
struct ceph_osd_client *osdc = lreq->osdc;
- struct ceph_osd_request *req = lreq->ping_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_request *req;
+ int ret;
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) {
dout("%s PAUSERD\n", __func__);
@@ -3137,19 +3157,26 @@
__func__, lreq, lreq->linger_id, lreq->ping_sent,
lreq->register_gen);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->ping_req) {
+ if (lreq->ping_req->r_osd)
+ cancel_linger_request(lreq->ping_req);
+ ceph_osdc_put_request(lreq->ping_req);
+ }
- request_reinit(req);
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
+
target_copy(&req->r_t, &lreq->t);
-
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id ||
- op->watch.op != CEPH_OSD_WATCH_OP_PING);
- op->watch.gen = lreq->register_gen;
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id,
+ lreq->register_gen);
req->r_callback = linger_ping_cb;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
+
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->ping_req = req;
ceph_osdc_get_request(req);
account_request(req);
@@ -3165,12 +3192,6 @@
down_write(&osdc->lock);
linger_register(lreq);
- if (lreq->is_watch) {
- lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
- lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
- } else {
- lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
- }
calc_target(osdc, &lreq->t, false);
osd = lookup_create_osd(osdc, lreq->t.osd, true);
@@ -3202,9 +3223,9 @@
*/
static void __linger_cancel(struct ceph_osd_linger_request *lreq)
{
- if (lreq->is_watch && lreq->ping_req->r_osd)
+ if (lreq->ping_req && lreq->ping_req->r_osd)
cancel_linger_request(lreq->ping_req);
- if (lreq->reg_req->r_osd)
+ if (lreq->reg_req && lreq->reg_req->r_osd)
cancel_linger_request(lreq->reg_req);
cancel_linger_map_check(lreq);
unlink_linger(lreq->osd, lreq);
@@ -4651,43 +4672,6 @@
}
EXPORT_SYMBOL(ceph_osdc_sync);
-static struct ceph_osd_request *
-alloc_linger_request(struct ceph_osd_linger_request *lreq)
-{
- struct ceph_osd_request *req;
-
- req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO);
- if (!req)
- return NULL;
-
- ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
- ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
- return req;
-}
-
-static struct ceph_osd_request *
-alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
-{
- struct ceph_osd_request *req;
-
- req = alloc_linger_request(lreq);
- if (!req)
- return NULL;
-
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- osd_req_op_watch_init(req, 0, 0, watch_opcode);
-
- if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
- ceph_osdc_put_request(req);
- return NULL;
- }
-
- return req;
-}
-
/*
* Returns a handle, caller owns a ref.
*/
@@ -4717,18 +4701,6 @@
lreq->t.flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&lreq->mtime);
- lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
- if (!lreq->reg_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
- lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
- if (!lreq->ping_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (ret) {
@@ -4766,8 +4738,8 @@
ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
req->r_flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&req->r_mtime);
- osd_req_op_watch_init(req, 0, lreq->linger_id,
- CEPH_OSD_WATCH_OP_UNWATCH);
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH,
+ lreq->linger_id, 0);
ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
if (ret)
@@ -4853,35 +4825,6 @@
}
EXPORT_SYMBOL(ceph_osdc_notify_ack);
-static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
- u64 cookie, u32 prot_ver, u32 timeout,
- void *payload, u32 payload_len)
-{
- struct ceph_osd_req_op *op;
- struct ceph_pagelist *pl;
- int ret;
-
- op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
- op->notify.cookie = cookie;
-
- pl = ceph_pagelist_alloc(GFP_NOIO);
- if (!pl)
- return -ENOMEM;
-
- ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
- ret |= ceph_pagelist_encode_32(pl, timeout);
- ret |= ceph_pagelist_encode_32(pl, payload_len);
- ret |= ceph_pagelist_append(pl, payload, payload_len);
- if (ret) {
- ceph_pagelist_release(pl);
- return -ENOMEM;
- }
-
- ceph_osd_data_pagelist_init(&op->notify.request_data, pl);
- op->indata_len = pl->length;
- return 0;
-}
-
/*
* @timeout: in seconds
*
@@ -4900,7 +4843,6 @@
size_t *preply_len)
{
struct ceph_osd_linger_request *lreq;
- struct page **pages;
int ret;
WARN_ON(!timeout);
@@ -4913,6 +4855,29 @@
if (!lreq)
return -ENOMEM;
+ lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO);
+ if (!lreq->request_pl) {
+ ret = -ENOMEM;
+ goto out_put_lreq;
+ }
+
+ ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout);
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len);
+ ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out_put_lreq;
+ }
+
+ /* for notify_id */
+ lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO);
+ if (IS_ERR(lreq->notify_id_pages)) {
+ ret = PTR_ERR(lreq->notify_id_pages);
+ lreq->notify_id_pages = NULL;
+ goto out_put_lreq;
+ }
+
lreq->preply_pages = preply_pages;
lreq->preply_len = preply_len;
@@ -4920,35 +4885,6 @@
ceph_oloc_copy(&lreq->t.base_oloc, oloc);
lreq->t.flags = CEPH_OSD_FLAG_READ;
- lreq->reg_req = alloc_linger_request(lreq);
- if (!lreq->reg_req) {
- ret = -ENOMEM;
- goto out_put_lreq;
- }
-
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
- payload, payload_len);
- if (ret)
- goto out_put_lreq;
-
- /* for notify_id */
- pages = ceph_alloc_page_vector(1, GFP_NOIO);
- if (IS_ERR(pages)) {
- ret = PTR_ERR(pages);
- goto out_put_lreq;
- }
- ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
- response_data),
- pages, PAGE_SIZE, 0, false, true);
-
- ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
- if (ret)
- goto out_put_lreq;
-
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (!ret)
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index c907f0d..d67d06d 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -15,18 +15,6 @@
DEFINE_BPF_STORAGE_CACHE(sk_cache);
-static int omem_charge(struct sock *sk, unsigned int size)
-{
- /* same check as in sock_kmalloc() */
- if (size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
- atomic_add(size, &sk->sk_omem_alloc);
- return 0;
- }
-
- return -ENOMEM;
-}
-
static struct bpf_local_storage_data *
sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
{
@@ -316,7 +304,17 @@
static int sk_storage_charge(struct bpf_local_storage_map *smap,
void *owner, u32 size)
{
- return omem_charge(owner, size);
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
+ struct sock *sk = (struct sock *)owner;
+
+ /* same check as in sock_kmalloc() */
+ if (size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
+ atomic_add(size, &sk->sk_omem_alloc);
+ return 0;
+ }
+
+ return -ENOMEM;
}
static void sk_storage_uncharge(struct bpf_local_storage_map *smap,
@@ -794,10 +792,18 @@
{
struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
+ bpf_map_inc_with_uref(aux->map);
seq_info->map = aux->map;
return 0;
}
+static void bpf_iter_fini_sk_storage_map(void *priv_data)
+{
+ struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
+
+ bpf_map_put_with_uref(seq_info->map);
+}
+
static int bpf_iter_attach_map(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
struct bpf_iter_aux_info *aux)
@@ -815,7 +821,7 @@
if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
goto put_map;
- if (prog->aux->max_rdonly_access > map->value_size) {
+ if (prog->aux->max_rdwr_access > map->value_size) {
err = -EACCES;
goto put_map;
}
@@ -843,7 +849,7 @@
static const struct bpf_iter_seq_info iter_seq_info = {
.seq_ops = &bpf_sk_storage_map_seq_ops,
.init_seq_private = bpf_iter_init_sk_storage_map,
- .fini_seq_private = NULL,
+ .fini_seq_private = bpf_iter_fini_sk_storage_map,
.seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info),
};
diff --git a/net/core/dev.c b/net/core/dev.c
index 0bab2ac..34b5aab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3241,11 +3241,15 @@
}
offset = skb_checksum_start_offset(skb);
- BUG_ON(offset >= skb_headlen(skb));
+ ret = -EINVAL;
+ if (WARN_ON_ONCE(offset >= skb_headlen(skb)))
+ goto out;
+
csum = skb_checksum(skb, offset, skb->len - offset, 0);
offset += skb->csum_offset;
- BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
+ if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb)))
+ goto out;
ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
if (ret)
@@ -4093,6 +4097,7 @@
bool again = false;
skb_reset_mac_header(skb);
+ skb_assert_len(skb);
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
__skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
@@ -4512,7 +4517,7 @@
struct softnet_data *sd;
unsigned int old_flow, new_flow;
- if (qlen < (netdev_max_backlog >> 1))
+ if (qlen < (READ_ONCE(netdev_max_backlog) >> 1))
return false;
sd = this_cpu_ptr(&softnet_data);
@@ -4560,7 +4565,7 @@
if (!netif_running(skb->dev))
goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
+ if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) {
if (qlen) {
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
@@ -4791,7 +4796,7 @@
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_rx(skb);
@@ -5152,7 +5157,7 @@
int ret = NET_RX_DROP;
__be16 type;
- net_timestamp_check(!netdev_tstamp_prequeue, skb);
+ net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_receive_skb(skb);
@@ -5554,7 +5559,7 @@
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
@@ -5584,7 +5589,7 @@
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
@@ -5746,7 +5751,7 @@
}
/* we can have in flight packet[s] on the cpus we are not flushing,
- * synchronize_net() in rollback_registered_many() will take care of
+ * synchronize_net() in unregister_netdevice_many() will take care of
* them
*/
for_each_cpu(cpu, &flush_cpus)
@@ -6367,7 +6372,7 @@
net_rps_action_and_irq_enable(sd);
}
- napi->weight = dev_rx_weight;
+ napi->weight = READ_ONCE(dev_rx_weight);
while (again) {
struct sk_buff *skb;
@@ -6875,8 +6880,8 @@
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
- usecs_to_jiffies(netdev_budget_usecs);
- int budget = netdev_budget;
+ usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
+ int budget = READ_ONCE(netdev_budget);
LIST_HEAD(list);
LIST_HEAD(repoll);
@@ -9504,106 +9509,6 @@
dev_net(dev)->dev_unreg_count++;
}
-static void rollback_registered_many(struct list_head *head)
-{
- struct net_device *dev, *tmp;
- LIST_HEAD(close_head);
-
- BUG_ON(dev_boot_phase);
- ASSERT_RTNL();
-
- list_for_each_entry_safe(dev, tmp, head, unreg_list) {
- /* Some devices call without registering
- * for initialization unwind. Remove those
- * devices and proceed with the remaining.
- */
- if (dev->reg_state == NETREG_UNINITIALIZED) {
- pr_debug("unregister_netdevice: device %s/%p never was registered\n",
- dev->name, dev);
-
- WARN_ON(1);
- list_del(&dev->unreg_list);
- continue;
- }
- dev->dismantle = true;
- BUG_ON(dev->reg_state != NETREG_REGISTERED);
- }
-
- /* If device is running, close it first. */
- list_for_each_entry(dev, head, unreg_list)
- list_add_tail(&dev->close_list, &close_head);
- dev_close_many(&close_head, true);
-
- list_for_each_entry(dev, head, unreg_list) {
- /* And unlink it from device chain. */
- unlist_netdevice(dev);
-
- dev->reg_state = NETREG_UNREGISTERING;
- }
- flush_all_backlogs();
-
- synchronize_net();
-
- list_for_each_entry(dev, head, unreg_list) {
- struct sk_buff *skb = NULL;
-
- /* Shutdown queueing discipline. */
- dev_shutdown(dev);
-
- dev_xdp_uninstall(dev);
-
- /* Notify protocols, that we are about to destroy
- * this device. They should clean all the things.
- */
- call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL, NULL, 0);
-
- /*
- * Flush the unicast and multicast chains
- */
- dev_uc_flush(dev);
- dev_mc_flush(dev);
-
- netdev_name_node_alt_flush(dev);
- netdev_name_node_free(dev->name_node);
-
- if (dev->netdev_ops->ndo_uninit)
- dev->netdev_ops->ndo_uninit(dev);
-
- if (skb)
- rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
-
- /* Notifier chain MUST detach us all upper devices. */
- WARN_ON(netdev_has_any_upper_dev(dev));
- WARN_ON(netdev_has_any_lower_dev(dev));
-
- /* Remove entries from kobject tree */
- netdev_unregister_kobject(dev);
-#ifdef CONFIG_XPS
- /* Remove XPS queueing entries */
- netif_reset_xps_queues_gt(dev, 0);
-#endif
- }
-
- synchronize_net();
-
- list_for_each_entry(dev, head, unreg_list)
- dev_put(dev);
-}
-
-static void rollback_registered(struct net_device *dev)
-{
- LIST_HEAD(single);
-
- list_add(&dev->unreg_list, &single);
- rollback_registered_many(&single);
- list_del(&single);
-}
-
static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
struct net_device *upper, netdev_features_t features)
{
@@ -10140,17 +10045,10 @@
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
ret = notifier_to_errno(ret);
if (ret) {
- rollback_registered(dev);
- rcu_barrier();
-
- dev->reg_state = NETREG_UNREGISTERED;
- /* We should put the kobject that hold in
- * netdev_unregister_kobject(), otherwise
- * the net device cannot be freed when
- * driver calls free_netdev(), because the
- * kobject is being hold.
- */
- kobject_put(&dev->dev.kobj);
+ /* Expect explicit free_netdev() on failure */
+ dev->needs_free_netdev = false;
+ unregister_netdevice_queue(dev, NULL);
+ goto out;
}
/*
* Prevent userspace races by waiting until the network
@@ -10679,6 +10577,17 @@
struct napi_struct *p, *n;
might_sleep();
+
+ /* When called immediately after register_netdevice() failed the unwind
+ * handling may still be dismantling the device. Handle that case by
+ * deferring the free.
+ */
+ if (dev->reg_state == NETREG_UNREGISTERING) {
+ ASSERT_RTNL();
+ dev->needs_free_netdev = true;
+ return;
+ }
+
netif_free_tx_queues(dev);
netif_free_rx_queues(dev);
@@ -10745,9 +10654,10 @@
if (head) {
list_move_tail(&dev->unreg_list, head);
} else {
- rollback_registered(dev);
- /* Finish processing unregister after unlock */
- net_set_todo(dev);
+ LIST_HEAD(single);
+
+ list_add(&dev->unreg_list, &single);
+ unregister_netdevice_many(&single);
}
}
EXPORT_SYMBOL(unregister_netdevice_queue);
@@ -10761,14 +10671,100 @@
*/
void unregister_netdevice_many(struct list_head *head)
{
- struct net_device *dev;
+ struct net_device *dev, *tmp;
+ LIST_HEAD(close_head);
- if (!list_empty(head)) {
- rollback_registered_many(head);
- list_for_each_entry(dev, head, unreg_list)
- net_set_todo(dev);
- list_del(head);
+ BUG_ON(dev_boot_phase);
+ ASSERT_RTNL();
+
+ if (list_empty(head))
+ return;
+
+ list_for_each_entry_safe(dev, tmp, head, unreg_list) {
+ /* Some devices call without registering
+ * for initialization unwind. Remove those
+ * devices and proceed with the remaining.
+ */
+ if (dev->reg_state == NETREG_UNINITIALIZED) {
+ pr_debug("unregister_netdevice: device %s/%p never was registered\n",
+ dev->name, dev);
+
+ WARN_ON(1);
+ list_del(&dev->unreg_list);
+ continue;
+ }
+ dev->dismantle = true;
+ BUG_ON(dev->reg_state != NETREG_REGISTERED);
}
+
+ /* If device is running, close it first. */
+ list_for_each_entry(dev, head, unreg_list)
+ list_add_tail(&dev->close_list, &close_head);
+ dev_close_many(&close_head, true);
+
+ list_for_each_entry(dev, head, unreg_list) {
+ /* And unlink it from device chain. */
+ unlist_netdevice(dev);
+
+ dev->reg_state = NETREG_UNREGISTERING;
+ }
+ flush_all_backlogs();
+
+ synchronize_net();
+
+ list_for_each_entry(dev, head, unreg_list) {
+ struct sk_buff *skb = NULL;
+
+ /* Shutdown queueing discipline. */
+ dev_shutdown(dev);
+
+ dev_xdp_uninstall(dev);
+
+ /* Notify protocols, that we are about to destroy
+ * this device. They should clean all the things.
+ */
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+
+ if (!dev->rtnl_link_ops ||
+ dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
+ GFP_KERNEL, NULL, 0);
+
+ /*
+ * Flush the unicast and multicast chains
+ */
+ dev_uc_flush(dev);
+ dev_mc_flush(dev);
+
+ netdev_name_node_alt_flush(dev);
+ netdev_name_node_free(dev->name_node);
+
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
+
+ if (skb)
+ rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
+
+ /* Notifier chain MUST detach us all upper devices. */
+ WARN_ON(netdev_has_any_upper_dev(dev));
+ WARN_ON(netdev_has_any_lower_dev(dev));
+
+ /* Remove entries from kobject tree */
+ netdev_unregister_kobject(dev);
+#ifdef CONFIG_XPS
+ /* Remove XPS queueing entries */
+ netif_reset_xps_queues_gt(dev, 0);
+#endif
+ }
+
+ synchronize_net();
+
+ list_for_each_entry(dev, head, unreg_list) {
+ dev_put(dev);
+ net_set_todo(dev);
+ }
+
+ list_del(head);
}
EXPORT_SYMBOL(unregister_netdevice_many);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 54fb18b..993420d 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kmod.h>
#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
#include <linux/net_tstamp.h>
@@ -25,26 +26,6 @@
return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex);
}
-static gifconf_func_t *gifconf_list[NPROTO];
-
-/**
- * register_gifconf - register a SIOCGIF handler
- * @family: Address family
- * @gifconf: Function handler
- *
- * Register protocol dependent address dumping routines. The handler
- * that is passed must not be freed or reused until it has been replaced
- * by another handler.
- */
-int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
-{
- if (family >= NPROTO)
- return -EINVAL;
- gifconf_list[family] = gifconf;
- return 0;
-}
-EXPORT_SYMBOL(register_gifconf);
-
/*
* Perform a SIOCGIFCONF call. This structure will change
* size eventually, and there is nothing I can do about it.
@@ -57,7 +38,6 @@
char __user *pos;
int len;
int total;
- int i;
/*
* Fetch the caller's info block.
@@ -72,19 +52,15 @@
total = 0;
for_each_netdev(net, dev) {
- for (i = 0; i < NPROTO; i++) {
- if (gifconf_list[i]) {
- int done;
- if (!pos)
- done = gifconf_list[i](dev, NULL, 0, size);
- else
- done = gifconf_list[i](dev, pos + total,
- len - total, size);
- if (done < 0)
- return -EFAULT;
- total += done;
- }
- }
+ int done;
+ if (!pos)
+ done = inet_gifconf(dev, NULL, 0, size);
+ else
+ done = inet_gifconf(dev, pos + total,
+ len - total, size);
+ if (done < 0)
+ return -EFAULT;
+ total += done;
}
/*
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 646d90f..7204775 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3620,7 +3620,7 @@
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->get)
+ if (!param->get || devlink->reload_failed)
return -EOPNOTSUPP;
return param->get(devlink, param->id, ctx);
}
@@ -3629,7 +3629,7 @@
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->set)
+ if (!param->set || devlink->reload_failed)
return -EOPNOTSUPP;
return param->set(devlink, param->id, ctx);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 659a328..4c22e6d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1212,10 +1212,11 @@
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
u32 filter_size = bpf_prog_size(fp->prog->len);
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
/* same check as in sock_kmalloc() */
- if (filter_size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
+ if (filter_size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) {
atomic_add(filter_size, &sk->sk_omem_alloc);
return true;
}
@@ -1547,7 +1548,7 @@
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
err = -ENOMEM;
else
err = reuseport_attach_prog(sk, prog);
@@ -1614,7 +1615,7 @@
}
} else {
/* BPF_PROG_TYPE_SOCKET_FILTER */
- if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
+ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
err = -ENOMEM;
goto err_prog_put;
}
@@ -1687,7 +1688,7 @@
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
return -EINVAL;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
@@ -1722,7 +1723,7 @@
{
void *ptr;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
goto err_clear;
ptr = skb_header_pointer(skb, offset, len, to);
@@ -4713,14 +4714,14 @@
/* Only some socketops are supported */
switch (optname) {
case SO_RCVBUF:
- val = min_t(u32, val, sysctl_rmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_rmem_max));
val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
WRITE_ONCE(sk->sk_rcvbuf,
max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_SNDBUF:
- val = min_t(u32, val, sysctl_wmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
WRITE_ONCE(sk->sk_sndbuf,
@@ -5624,7 +5625,6 @@
if (err)
return err;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
return seg6_lookup_nexthop(skb, NULL, 0);
@@ -5982,10 +5982,21 @@
ifindex, proto, netns_id, flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
@@ -6019,10 +6030,21 @@
flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
@@ -6486,30 +6508,39 @@
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -EINVAL;
if (!th->ack || th->rst || th->syn)
return -ENOENT;
+ if (unlikely(iph_len < sizeof(struct iphdr)))
+ return -EINVAL;
+
if (tcp_synq_no_recent_overflow(sk))
return -ENOENT;
cookie = ntohl(th->ack_seq) - 1;
- switch (sk->sk_family) {
- case AF_INET:
- if (unlikely(iph_len < sizeof(struct iphdr)))
+ /* Both struct iphdr and struct ipv6hdr have the version field at the
+ * same offset so we can cast to the shorter header (struct iphdr).
+ */
+ switch (((struct iphdr *)iph)->version) {
+ case 4:
+ if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
return -EINVAL;
ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
break;
#if IS_BUILTIN(CONFIG_IPV6)
- case AF_INET6:
+ case 6:
if (unlikely(iph_len < sizeof(struct ipv6hdr)))
return -EINVAL;
+ if (sk->sk_family != AF_INET6)
+ return -EINVAL;
+
ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
break;
#endif /* CONFIG_IPV6 */
@@ -6552,7 +6583,7 @@
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -ENOENT;
if (!th->syn || th->ack || th->fin || th->rst)
@@ -7709,6 +7740,7 @@
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
+ int field_size;
if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
@@ -7720,7 +7752,6 @@
case offsetof(struct bpf_sock, family):
case offsetof(struct bpf_sock, type):
case offsetof(struct bpf_sock, protocol):
- case offsetof(struct bpf_sock, dst_port):
case offsetof(struct bpf_sock, src_port):
case offsetof(struct bpf_sock, rx_queue_mapping):
case bpf_ctx_range(struct bpf_sock, src_ip4):
@@ -7729,6 +7760,14 @@
case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
+ case bpf_ctx_range(struct bpf_sock, dst_port):
+ field_size = size == size_default ?
+ size_default : sizeof_field(struct bpf_sock, dst_port);
+ bpf_ctx_record_field_size(info, field_size);
+ return bpf_ctx_narrow_access_ok(off, size, field_size);
+ case offsetofend(struct bpf_sock, dst_port) ...
+ offsetof(struct bpf_sock, dst_ip4) - 1:
+ return false;
}
return size == size_default;
@@ -10296,6 +10335,7 @@
}
const struct bpf_prog_ops sk_lookup_prog_ops = {
+ .test_run = bpf_prog_test_run_sk_lookup,
};
const struct bpf_verifier_ops sk_lookup_verifier_ops = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 813c709..b8d082f 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -263,7 +263,7 @@
key->ct_zone = ct->zone.id;
#endif
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
- key->ct_mark = ct->mark;
+ key->ct_mark = READ_ONCE(ct->mark);
#endif
cl = nf_ct_labels_find(ct);
@@ -1171,6 +1171,7 @@
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
}
key_vlan->vlan_tpid = saved_vlan_tpid;
+ key_vlan->vlan_eth_type = proto;
}
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
@@ -1484,7 +1485,7 @@
}
EXPORT_SYMBOL(flow_get_u32_dst);
-/* Sort the source and destination IP (and the ports if the IP are the same),
+/* Sort the source and destination IP and the ports,
* to have consistent hash within the two directions
*/
static inline void __flow_hash_consistentify(struct flow_keys *keys)
@@ -1493,13 +1494,12 @@
switch (keys->control.addr_type) {
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
- addr_diff = (__force u32)keys->addrs.v4addrs.dst -
- (__force u32)keys->addrs.v4addrs.src;
- if ((addr_diff < 0) ||
- (addr_diff == 0 &&
- ((__force u16)keys->ports.dst <
- (__force u16)keys->ports.src))) {
+ if ((__force u32)keys->addrs.v4addrs.dst <
+ (__force u32)keys->addrs.v4addrs.src)
swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
+
+ if ((__force u16)keys->ports.dst <
+ (__force u16)keys->ports.src) {
swap(keys->ports.src, keys->ports.dst);
}
break;
@@ -1507,13 +1507,13 @@
addr_diff = memcmp(&keys->addrs.v6addrs.dst,
&keys->addrs.v6addrs.src,
sizeof(keys->addrs.v6addrs.dst));
- if ((addr_diff < 0) ||
- (addr_diff == 0 &&
- ((__force u16)keys->ports.dst <
- (__force u16)keys->ports.src))) {
+ if (addr_diff < 0) {
for (i = 0; i < 4; i++)
swap(keys->addrs.v6addrs.src.s6_addr32[i],
keys->addrs.v6addrs.dst.s6_addr32[i]);
+ }
+ if ((__force u16)keys->ports.dst <
+ (__force u16)keys->ports.src) {
swap(keys->ports.src, keys->ports.dst);
}
break;
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index e3f0d59..8d95829 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -566,3 +566,9 @@
return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0;
}
EXPORT_SYMBOL(flow_indr_dev_setup_offload);
+
+bool flow_indr_dev_exists(void)
+{
+ return !list_empty(&flow_block_indr_dev_list);
+}
+EXPORT_SYMBOL(flow_indr_dev_exists);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 6eb2e5e..2f66f3f 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -26,7 +26,7 @@
cell = this_cpu_ptr(gcells->cells);
- if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
+ if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) {
drop:
atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 2f7940b..3fd207f 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -158,10 +158,8 @@
return dst->lwtstate->orig_output(net, sk, skb);
}
-static int xmit_check_hhlen(struct sk_buff *skb)
+static int xmit_check_hhlen(struct sk_buff *skb, int hh_len)
{
- int hh_len = skb_dst(skb)->dev->hard_header_len;
-
if (skb_headroom(skb) < hh_len) {
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
@@ -273,6 +271,7 @@
bpf = bpf_lwt_lwtunnel(dst->lwtstate);
if (bpf->xmit.prog) {
+ int hh_len = dst->dev->hard_header_len;
__be16 proto = skb->protocol;
int ret;
@@ -290,7 +289,7 @@
/* If the header was expanded, headroom might be too
* small for L2 header to come, expand as needed.
*/
- ret = xmit_check_hhlen(skb);
+ ret = xmit_check_hhlen(skb, hh_len);
if (unlikely(ret))
return ret;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 52a1c87..f6f580e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -280,11 +280,26 @@
return 0;
}
-static void pneigh_queue_purge(struct sk_buff_head *list)
+static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
{
+ struct sk_buff_head tmp;
+ unsigned long flags;
struct sk_buff *skb;
- while ((skb = skb_dequeue(list)) != NULL) {
+ skb_queue_head_init(&tmp);
+ spin_lock_irqsave(&list->lock, flags);
+ skb = skb_peek(list);
+ while (skb != NULL) {
+ struct sk_buff *skb_next = skb_peek_next(skb, list);
+ if (net == NULL || net_eq(dev_net(skb->dev), net)) {
+ __skb_unlink(skb, list);
+ __skb_queue_tail(&tmp, skb);
+ }
+ skb = skb_next;
+ }
+ spin_unlock_irqrestore(&list->lock, flags);
+
+ while ((skb = __skb_dequeue(&tmp))) {
dev_put(skb->dev);
kfree_skb(skb);
}
@@ -358,9 +373,9 @@
write_lock_bh(&tbl->lock);
neigh_flush_dev(tbl, dev, skip_perm);
pneigh_ifdown_and_unlock(tbl, dev);
-
- del_timer_sync(&tbl->proxy_timer);
- pneigh_queue_purge(&tbl->proxy_queue);
+ pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL);
+ if (skb_queue_empty_lockless(&tbl->proxy_queue))
+ del_timer_sync(&tbl->proxy_timer);
return 0;
}
@@ -1743,7 +1758,7 @@
/* It is not clean... Fix it to unload IPv6 module safely */
cancel_delayed_work_sync(&tbl->gc_work);
del_timer_sync(&tbl->proxy_timer);
- pneigh_queue_purge(&tbl->proxy_queue);
+ pneigh_queue_purge(&tbl->proxy_queue, NULL);
neigh_ifdown(tbl, NULL);
if (atomic_read(&tbl->entries))
pr_crit("neighbour leakage\n");
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index cbff7d9..a3b7d96 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -135,6 +135,7 @@
static int ops_init(const struct pernet_operations *ops, struct net *net)
{
+ struct net_generic *ng;
int err = -ENOMEM;
void *data = NULL;
@@ -153,7 +154,13 @@
if (!err)
return 0;
+ if (ops->id && ops->size) {
cleanup:
+ ng = rcu_dereference_protected(net->gen,
+ lockdep_is_held(&pernet_ops_rwsem));
+ ng->ptr[*ops->id] = NULL;
+ }
+
kfree(data);
out:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9ff6d41..3c9c2d6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3442,26 +3442,15 @@
dev->ifindex = ifm->ifi_index;
- if (ops->newlink) {
+ if (ops->newlink)
err = ops->newlink(link_net ? : net, dev, tb, data, extack);
- /* Drivers should call free_netdev() in ->destructor
- * and unregister it on failure after registration
- * so that device could be finally freed in rtnl_unlock.
- */
- if (err < 0) {
- /* If device is not registered at all, free it now */
- if (dev->reg_state == NETREG_UNINITIALIZED ||
- dev->reg_state == NETREG_UNREGISTERED)
- free_netdev(dev);
- goto out;
- }
- } else {
+ else
err = register_netdevice(dev);
- if (err < 0) {
- free_netdev(dev);
- goto out;
- }
+ if (err < 0) {
+ free_netdev(dev);
+ goto out;
}
+
err = rtnl_configure_link(dev, ifm);
if (err < 0)
goto out_unregister;
@@ -3626,13 +3615,24 @@
bool *changed, struct netlink_ext_ack *extack)
{
char *alt_ifname;
+ size_t size;
int err;
err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack);
if (err)
return err;
- alt_ifname = nla_strdup(attr, GFP_KERNEL);
+ if (cmd == RTM_NEWLINKPROP) {
+ size = rtnl_prop_list_size(dev);
+ size += nla_total_size(ALTIFNAMSIZ);
+ if (size >= U16_MAX) {
+ NL_SET_ERR_MSG(extack,
+ "effective property list too long");
+ return -EINVAL;
+ }
+ }
+
+ alt_ifname = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
if (!alt_ifname)
return -ENOMEM;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index b5bc680..189eea1 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -22,6 +22,8 @@
static siphash_key_t net_secret __read_mostly;
static siphash_key_t ts_secret __read_mostly;
+#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
+
static __always_inline void net_secret_init(void)
{
net_get_random_once(&net_secret, sizeof(net_secret));
@@ -62,7 +64,7 @@
.daddr = *(struct in6_addr *)daddr,
};
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
@@ -94,17 +96,19 @@
}
EXPORT_SYMBOL(secure_tcpv6_seq);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
{
const struct {
struct in6_addr saddr;
struct in6_addr daddr;
+ unsigned int timeseed;
__be16 dport;
} __aligned(SIPHASH_ALIGNMENT) combined = {
.saddr = *(struct in6_addr *)saddr,
.daddr = *(struct in6_addr *)daddr,
- .dport = dport
+ .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ .dport = dport,
};
net_secret_init();
return siphash(&combined, offsetofend(typeof(combined), dport),
@@ -116,7 +120,7 @@
#ifdef CONFIG_INET
u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
{
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
@@ -142,11 +146,13 @@
}
EXPORT_SYMBOL_GPL(secure_tcp_seq);
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
net_secret_init();
- return siphash_3u32((__force u32)saddr, (__force u32)daddr,
- (__force u16)dport, &net_secret);
+ return siphash_4u32((__force u32)saddr, (__force u32)daddr,
+ (__force u16)dport,
+ jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ &net_secret);
}
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 48b6438..0616988 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3809,23 +3809,25 @@
int i = 0;
int pos;
- if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
- (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
- /* gso_size is untrusted, and we have a frag_list with a linear
- * non head_frag head.
- *
- * (we assume checking the first list_skb member suffices;
- * i.e if either of the list_skb members have non head_frag
- * head, then the first one has too).
- *
- * If head_skb's headlen does not fit requested gso_size, it
- * means that the frag_list members do NOT terminate on exact
- * gso_size boundaries. Hence we cannot perform skb_frag_t page
- * sharing. Therefore we must fallback to copying the frag_list
- * skbs; we do so by disabling SG.
- */
- if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
- features &= ~NETIF_F_SG;
+ if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
+ mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
+ struct sk_buff *check_skb;
+
+ for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
+ if (skb_headlen(check_skb) && !check_skb->head_frag) {
+ /* gso_size is untrusted, and we have a frag_list with
+ * a linear non head_frag item.
+ *
+ * If head_skb's headlen does not fit requested gso_size,
+ * it means that the frag_list members do NOT terminate
+ * on exact gso_size boundaries. Hence we cannot perform
+ * skb_frag_t page sharing. Therefore we must fallback to
+ * copying the frag_list skbs; we do so by disabling SG.
+ */
+ features &= ~NETIF_F_SG;
+ break;
+ }
+ }
}
__skb_push(head_skb, doffset);
@@ -3986,9 +3988,8 @@
SKB_GSO_CB(nskb)->csum_start =
skb_headroom(nskb) + doffset;
} else {
- skb_copy_bits(head_skb, offset,
- skb_put(nskb, len),
- len);
+ if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
+ goto err;
}
continue;
}
@@ -4691,7 +4692,7 @@
{
bool ret;
- if (likely(sysctl_tstamp_allow_data || tsonly))
+ if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
return true;
read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index e4bb895..bb4fbc6 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -27,6 +27,7 @@
int elem_first_coalesce)
{
struct page_frag *pfrag = sk_page_frag(sk);
+ u32 osize = msg->sg.size;
int ret = 0;
len -= msg->sg.size;
@@ -35,13 +36,17 @@
u32 orig_offset;
int use, i;
- if (!sk_page_frag_refill(sk, pfrag))
- return -ENOMEM;
+ if (!sk_page_frag_refill(sk, pfrag)) {
+ ret = -ENOMEM;
+ goto msg_trim;
+ }
orig_offset = pfrag->offset;
use = min_t(int, len, pfrag->size - orig_offset);
- if (!sk_wmem_schedule(sk, use))
- return -ENOMEM;
+ if (!sk_wmem_schedule(sk, use)) {
+ ret = -ENOMEM;
+ goto msg_trim;
+ }
i = msg->sg.end;
sk_msg_iter_var_prev(i);
@@ -71,6 +76,10 @@
}
return ret;
+
+msg_trim:
+ sk_msg_trim(sk, msg, osize);
+ return ret;
}
EXPORT_SYMBOL_GPL(sk_msg_alloc);
@@ -603,7 +612,9 @@
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
refcount_set(&psock->refcnt, 1);
- rcu_assign_sk_user_data_nocopy(sk, psock);
+ __rcu_assign_sk_user_data_with_flags(sk, psock,
+ SK_USER_DATA_NOCOPY |
+ SK_USER_DATA_PSOCK);
sock_hold(sk);
out:
diff --git a/net/core/sock.c b/net/core/sock.c
index 6d9af4e..1bb6a00 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -887,7 +887,7 @@
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- val = min_t(u32, val, sysctl_wmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
set_sndbuf:
/* Ensure val * 2 fits into an int, to prevent max_t()
* from treating it as a negative value.
@@ -919,7 +919,7 @@
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
+ __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
break;
case SO_RCVBUFFORCE:
@@ -2219,7 +2219,7 @@
/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
- sysctl_optmem_max)
+ READ_ONCE(sysctl_optmem_max))
return NULL;
skb = alloc_skb(size, priority);
@@ -2237,8 +2237,10 @@
*/
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
{
- if ((unsigned int)size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
+
+ if ((unsigned int)size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
void *mem;
/* First do the add, to avoid the race if kmalloc
* might sleep.
@@ -2974,8 +2976,8 @@
timer_setup(&sk->sk_timer, NULL, 0);
sk->sk_allocation = GFP_KERNEL;
- sk->sk_rcvbuf = sysctl_rmem_default;
- sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
sk->sk_state = TCP_CLOSE;
sk_set_socket(sk, sock);
@@ -3030,7 +3032,7 @@
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
- sk->sk_ll_usec = sysctl_net_busy_read;
+ sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
#endif
sk->sk_max_pacing_rate = ~0UL;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 4ea5bc6..cbf4184 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -815,13 +815,22 @@
{
struct sock_map_seq_info *info = priv_data;
+ bpf_map_inc_with_uref(aux->map);
info->map = aux->map;
return 0;
}
+static void sock_map_fini_seq_private(void *priv_data)
+{
+ struct sock_map_seq_info *info = priv_data;
+
+ bpf_map_put_with_uref(info->map);
+}
+
static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
.seq_ops = &sock_map_seq_ops,
.init_seq_private = sock_map_init_seq_private,
+ .fini_seq_private = sock_map_fini_seq_private,
.seq_priv_size = sizeof(struct sock_map_seq_info),
};
@@ -1422,18 +1431,27 @@
};
static int sock_hash_init_seq_private(void *priv_data,
- struct bpf_iter_aux_info *aux)
+ struct bpf_iter_aux_info *aux)
{
struct sock_hash_seq_info *info = priv_data;
+ bpf_map_inc_with_uref(aux->map);
info->map = aux->map;
info->htab = container_of(aux->map, struct bpf_shtab, map);
return 0;
}
+static void sock_hash_fini_seq_private(void *priv_data)
+{
+ struct sock_hash_seq_info *info = priv_data;
+
+ bpf_map_put_with_uref(info->map);
+}
+
static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
.seq_ops = &sock_hash_seq_ops,
.init_seq_private = sock_hash_init_seq_private,
+ .fini_seq_private = sock_hash_fini_seq_private,
.seq_priv_size = sizeof(struct sock_hash_seq_info),
};
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index b065f0a..49f9c2c 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -18,6 +18,22 @@
static DEFINE_IDA(reuseport_ida);
+void reuseport_has_conns_set(struct sock *sk)
+{
+ struct sock_reuseport *reuse;
+
+ if (!rcu_access_pointer(sk->sk_reuseport_cb))
+ return;
+
+ spin_lock_bh(&reuseport_lock);
+ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ if (likely(reuse))
+ reuse->has_conns = 1;
+ spin_unlock_bh(&reuseport_lock);
+}
+EXPORT_SYMBOL(reuseport_has_conns_set);
+
static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
unsigned int size = sizeof(struct sock_reuseport) +
diff --git a/net/core/stream.c b/net/core/stream.c
index a166a32..a611305 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -159,7 +159,8 @@
*timeo_p = current_timeo;
}
out:
- remove_wait_queue(sk_sleep(sk), &wait);
+ if (!sock_flag(sk, SOCK_DEAD))
+ remove_wait_queue(sk_sleep(sk), &wait);
return err;
do_error:
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 2e0a437..0dfe9f2 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -235,14 +235,17 @@
static int proc_do_dev_weight(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- int ret;
+ static DEFINE_MUTEX(dev_weight_mutex);
+ int ret, weight;
+ mutex_lock(&dev_weight_mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
- if (ret != 0)
- return ret;
-
- dev_rx_weight = weight_p * dev_weight_rx_bias;
- dev_tx_weight = weight_p * dev_weight_tx_bias;
+ if (!ret && write) {
+ weight = READ_ONCE(weight_p);
+ WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
+ WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
+ }
+ mutex_unlock(&dev_weight_mutex);
return ret;
}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b0b6e6a..a2a8b95 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -130,6 +130,8 @@
* This unhashes the socket and releases the local port, if necessary.
*/
dccp_set_state(sk, DCCP_CLOSED);
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ inet_reset_saddr(sk);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;
@@ -464,7 +466,7 @@
.fl4_dport = dccp_hdr(skb)->dccph_sport,
};
- security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
+ security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {
IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 49f4034..21c61a9 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -203,7 +203,7 @@
fl6.flowi6_oif = ireq->ir_iif;
fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = htons(ireq->ir_num);
- security_req_classify_flow(req, flowi6_to_flowi(&fl6));
+ security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
rcu_read_lock();
@@ -279,7 +279,7 @@
fl6.flowi6_oif = inet6_iif(rxskb);
fl6.fl6_dport = dccp_hdr(skb)->dccph_dport;
fl6.fl6_sport = dccp_hdr(skb)->dccph_sport;
- security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6));
+ security_skb_classify_flow(rxskb, flowi6_to_flowi_common(&fl6));
/* sk = NULL, but it is safe for now. RST socket required. */
dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
@@ -912,7 +912,7 @@
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
final_p = fl6_update_dst(&fl6, opt, &final);
@@ -957,6 +957,8 @@
late_failure:
dccp_set_state(sk, DCCP_CLOSED);
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ inet_reset_saddr(sk);
__sk_dst_reset(sk);
failure:
inet->inet_dport = 0;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 548cf01..65e81e0 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -747,11 +747,6 @@
lock_sock(sk);
- if (dccp_qpolicy_full(sk)) {
- rc = -EAGAIN;
- goto out_release;
- }
-
timeo = sock_sndtimeo(sk, noblock);
/*
@@ -770,6 +765,11 @@
if (skb == NULL)
goto out_release;
+ if (dccp_qpolicy_full(sk)) {
+ rc = -EAGAIN;
+ goto out_discard;
+ }
+
if (sk->sk_state == DCCP_CLOSED) {
rc = -ENOTCONN;
goto out_discard;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index dc92a67..7d542eb 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -480,8 +480,8 @@
sk->sk_family = PF_DECnet;
sk->sk_protocol = 0;
sk->sk_allocation = gfp;
- sk->sk_sndbuf = sysctl_decnet_wmem[1];
- sk->sk_rcvbuf = sysctl_decnet_rmem[1];
+ sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]);
+ sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]);
/* Initialization of DECnet Session Control Port */
scp = DN_SK(sk);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 73569c9..c9d552c 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -721,8 +721,10 @@
if (ds->ops->phylink_mac_link_down)
ds->ops->phylink_mac_link_down(ds, port,
MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
+ of_node_put(phy_np);
return dsa_port_phylink_register(dp);
}
+ of_node_put(phy_np);
return 0;
}
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index baf4765..908324b 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -108,15 +108,15 @@
struct hsr_port *port)
{
if (!frame->skb_std) {
- if (frame->skb_hsr) {
+ if (frame->skb_hsr)
frame->skb_std =
create_stripped_skb_hsr(frame->skb_hsr, frame);
- } else {
- /* Unexpected */
- WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n",
- __FILE__, __LINE__, port->dev->name);
+ else
+ netdev_warn_once(port->dev,
+ "Unexpected frame received in hsr_get_untagged_frame()\n");
+
+ if (!frame->skb_std)
return NULL;
- }
}
return skb_clone(frame->skb_std, GFP_ATOMIC);
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index c25f761..d4c275e 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -201,8 +201,9 @@
int err = 0;
struct net_device *dev = NULL;
- if (len < sizeof(*uaddr))
- return -EINVAL;
+ err = ieee802154_sockaddr_check_size(uaddr, len);
+ if (err < 0)
+ return err;
uaddr = (struct sockaddr_ieee802154 *)_uaddr;
if (uaddr->family != AF_IEEE802154)
@@ -272,6 +273,10 @@
err = -EMSGSIZE;
goto out_dev;
}
+ if (!size) {
+ err = 0;
+ goto out_dev;
+ }
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
@@ -494,11 +499,14 @@
ro->bound = 0;
- if (len < sizeof(*addr))
+ err = ieee802154_sockaddr_check_size(addr, len);
+ if (err < 0)
goto out;
- if (addr->family != AF_IEEE802154)
+ if (addr->family != AF_IEEE802154) {
+ err = -EINVAL;
goto out;
+ }
ieee802154_addr_from_sa(&haddr, &addr->addr);
dev = ieee802154_get_dev(sock_net(sk), &haddr);
@@ -565,8 +573,9 @@
struct dgram_sock *ro = dgram_sk(sk);
int err = 0;
- if (len < sizeof(*addr))
- return -EINVAL;
+ err = ieee802154_sockaddr_check_size(addr, len);
+ if (err < 0)
+ return err;
if (addr->family != AF_IEEE802154)
return -EINVAL;
@@ -605,6 +614,7 @@
struct ieee802154_mac_cb *cb;
struct dgram_sock *ro = dgram_sk(sk);
struct ieee802154_addr dst_addr;
+ DECLARE_SOCKADDR(struct sockaddr_ieee802154*, daddr, msg->msg_name);
int hlen, tlen;
int err;
@@ -613,10 +623,20 @@
return -EOPNOTSUPP;
}
- if (!ro->connected && !msg->msg_name)
- return -EDESTADDRREQ;
- else if (ro->connected && msg->msg_name)
- return -EISCONN;
+ if (msg->msg_name) {
+ if (ro->connected)
+ return -EISCONN;
+ if (msg->msg_namelen < IEEE802154_MIN_NAMELEN)
+ return -EINVAL;
+ err = ieee802154_sockaddr_check_size(daddr, msg->msg_namelen);
+ if (err < 0)
+ return err;
+ ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
+ } else {
+ if (!ro->connected)
+ return -EDESTADDRREQ;
+ dst_addr = ro->dst_addr;
+ }
if (!ro->bound)
dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
@@ -652,16 +672,6 @@
cb = mac_cb_init(skb);
cb->type = IEEE802154_FC_TYPE_DATA;
cb->ackreq = ro->want_ack;
-
- if (msg->msg_name) {
- DECLARE_SOCKADDR(struct sockaddr_ieee802154*,
- daddr, msg->msg_name);
-
- ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
- } else {
- dst_addr = ro->dst_addr;
- }
-
cb->secen = ro->secen;
cb->secen_override = ro->secen_override;
cb->seclevel = ro->seclevel;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 87983e7..23b0606 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -403,6 +403,16 @@
If unsure, say Y.
+config INET_TABLE_PERTURB_ORDER
+ int "INET: Source port perturbation table size (as power of 2)" if EXPERT
+ default 16
+ help
+ Source port perturbation table size (as power of 2) for
+ RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm.
+
+ The default is almost always what you want.
+ Only change this if you know what you are doing.
+
config INET_XFRM_TUNNEL
tristate
select INET_TUNNEL
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7422185..48223c2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -158,7 +158,7 @@
kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
- dst_release(sk->sk_rx_dst);
+ dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1));
sk_refcnt_debug_dec(sk);
}
EXPORT_SYMBOL(inet_sock_destruct);
@@ -220,7 +220,7 @@
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
@@ -338,7 +338,7 @@
inet->hdrincl = 1;
}
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -1245,7 +1245,7 @@
if (new_saddr == old_saddr)
return 0;
- if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) {
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
__func__, &old_saddr, &new_saddr);
}
@@ -1300,7 +1300,7 @@
* Other protocols have to map its equivalent state to TCP_SYN_SENT.
* DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
*/
- if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr ||
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
sk->sk_state != TCP_SYN_SENT ||
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
(err = inet_sk_reselect_saddr(sk)) != 0)
@@ -1726,12 +1726,7 @@
};
#endif
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol tcp_protocol = {
- .early_demux = tcp_v4_early_demux,
- .early_demux_handler = tcp_v4_early_demux,
+static const struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
@@ -1739,12 +1734,7 @@
.icmp_strict_tag_validation = 1,
};
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol udp_protocol = {
- .early_demux = udp_v4_early_demux,
- .early_demux_handler = udp_v4_early_demux,
+static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 922dd73..83a4799 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1116,13 +1116,18 @@
return err;
}
-static int arp_invalidate(struct net_device *dev, __be32 ip)
+int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
{
struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
int err = -ENXIO;
struct neigh_table *tbl = &arp_tbl;
if (neigh) {
+ if ((neigh->nud_state & NUD_VALID) && !force) {
+ neigh_release(neigh);
+ return 0;
+ }
+
if (neigh->nud_state & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
@@ -1169,7 +1174,7 @@
if (!dev)
return -EINVAL;
}
- return arp_invalidate(dev, ip);
+ return arp_invalidate(dev, ip, true);
}
/*
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index ca217a6..d4a4160 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -240,7 +240,7 @@
struct cipso_v4_map_cache_entry *prev_entry = NULL;
u32 hash;
- if (!cipso_v4_cache_enabled)
+ if (!READ_ONCE(cipso_v4_cache_enabled))
return -ENOENT;
hash = cipso_v4_map_cache_hash(key, key_len);
@@ -297,13 +297,14 @@
int cipso_v4_cache_add(const unsigned char *cipso_ptr,
const struct netlbl_lsm_secattr *secattr)
{
+ int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize);
int ret_val = -EPERM;
u32 bkt;
struct cipso_v4_map_cache_entry *entry = NULL;
struct cipso_v4_map_cache_entry *old_entry = NULL;
u32 cipso_ptr_len;
- if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
+ if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0)
return 0;
cipso_ptr_len = cipso_ptr[1];
@@ -323,7 +324,7 @@
bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
spin_lock_bh(&cipso_v4_cache[bkt].lock);
- if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
+ if (cipso_v4_cache[bkt].size < bkt_size) {
list_add(&entry->list, &cipso_v4_cache[bkt].list);
cipso_v4_cache[bkt].size += 1;
} else {
@@ -1200,7 +1201,8 @@
/* This will send packets using the "optimized" format when
* possible as specified in section 3.4.2.6 of the
* CIPSO draft. */
- if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
+ if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 &&
+ ret_val <= 10)
tag_len = 14;
else
tag_len = 4 + ret_val;
@@ -1604,7 +1606,7 @@
* all the CIPSO validations here but it doesn't
* really specify _exactly_ what we need to validate
* ... so, just make it a sysctl tunable. */
- if (cipso_v4_rbm_strictvalid) {
+ if (READ_ONCE(cipso_v4_rbm_strictvalid)) {
if (cipso_v4_map_lvl_valid(doi_def,
tag[3]) < 0) {
err_offset = opt_iter + 3;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 4a8550c..112c6e8 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -70,7 +70,7 @@
}
inet->inet_daddr = fl4->daddr;
inet->inet_dport = usin->sin_port;
- reuseport_has_conns(sk, true);
+ reuseport_has_conns_set(sk);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
inet->inet_id = prandom_u32();
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 148ef48..88b6120 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1244,7 +1244,7 @@
return ret;
}
-static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
+int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
const struct in_ifaddr *ifa;
@@ -2668,23 +2668,27 @@
#endif
if (!net_eq(net, &init_net)) {
- if (IS_ENABLED(CONFIG_SYSCTL) &&
- sysctl_devconf_inherit_init_net == 3) {
+ switch (net_inherit_devconf()) {
+ case 3:
/* copy from the current netns */
memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
sizeof(ipv4_devconf));
memcpy(dflt,
current->nsproxy->net_ns->ipv4.devconf_dflt,
sizeof(ipv4_devconf_dflt));
- } else if (!IS_ENABLED(CONFIG_SYSCTL) ||
- sysctl_devconf_inherit_init_net != 2) {
- /* inherit == 0 or 1: copy from init_net */
+ break;
+ case 0:
+ case 1:
+ /* copy from init_net */
memcpy(all, init_net.ipv4.devconf_all,
sizeof(ipv4_devconf));
memcpy(dflt, init_net.ipv4.devconf_dflt,
sizeof(ipv4_devconf_dflt));
+ break;
+ case 2:
+ /* use compiled values */
+ break;
}
- /* else inherit == 2: use compiled values */
}
#ifdef CONFIG_SYSCTL
@@ -2762,8 +2766,6 @@
INIT_HLIST_HEAD(&inet_addr_lst[i]);
register_pernet_subsys(&devinet_ops);
-
- register_gifconf(PF_INET, inet_gifconf);
register_netdevice_notifier(&ip_netdev_notifier);
queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 9aae821..20d7381 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -448,7 +448,6 @@
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
- unsigned int allocsz;
/* this is non-NULL only with TCP/UDP Encapsulation */
if (x->encap) {
@@ -458,8 +457,8 @@
return err;
}
- allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
- if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+ if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+ ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
goto cow;
if (!skb_cloned(skb)) {
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 3450c9b..8425767 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -312,6 +312,9 @@
xo->seq.low += skb_shinfo(skb)->gso_segs;
}
+ if (xo->seq.low < seq)
+ xo->seq.hi++;
+
esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32));
ip_hdr(skb)->tot_len = htons(skb->len);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 917ea95..af8a425 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -389,7 +389,7 @@
dev_match = dev_match || (res.type == RTN_LOCAL &&
dev == net->loopback_dev);
if (dev_match) {
- ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
+ ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK;
return ret;
}
if (no_addr)
@@ -401,7 +401,7 @@
ret = 0;
if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
if (res.type == RTN_UNICAST)
- ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
+ ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK;
}
return ret;
@@ -1112,9 +1112,11 @@
return;
/* Add broadcast address, if it is explicitly assigned. */
- if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
+ if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) {
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
prim, 0);
+ arp_invalidate(dev, ifa->ifa_broadcast, false);
+ }
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
@@ -1130,6 +1132,7 @@
prim, 0);
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
32, prim, 0);
+ arp_invalidate(dev, prefix | ~mask, false);
}
}
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 838a876..3824b7a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -888,8 +888,13 @@
}
if (cfg->fc_oif || cfg->fc_gw_family) {
- struct fib_nh *nh = fib_info_nh(fi, 0);
+ struct fib_nh *nh;
+ /* cannot match on nexthop object attributes */
+ if (fi->nh)
+ return 1;
+
+ nh = fib_info_nh(fi, 0);
if (cfg->fc_encap) {
if (fib_encap_match(net, cfg->fc_encap_type,
cfg->fc_encap, nh, cfg, extack))
@@ -1224,7 +1229,7 @@
nh->fib_nh_dev = in_dev->dev;
dev_hold(nh->fib_nh_dev);
- nh->fib_nh_scope = RT_SCOPE_HOST;
+ nh->fib_nh_scope = RT_SCOPE_LINK;
if (!netif_carrier_ok(nh->fib_nh_dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
err = 0;
@@ -1826,7 +1831,7 @@
goto nla_put_failure;
if (nexthop_is_blackhole(fi->nh))
rtm->rtm_type = RTN_BLACKHOLE;
- if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode)
+ if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode))
goto offload;
}
@@ -2227,7 +2232,7 @@
}
change_nexthops(fi) {
- if (net->ipv4.sysctl_fib_multipath_use_neigh) {
+ if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
if (!fib_good_nh(nexthop_nh))
continue;
if (!first) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ffc5332..d11fb16 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -497,7 +497,7 @@
tn = container_of(head, struct tnode, rcu)->kv;
}
- if (tnode_free_size >= sysctl_fib_sync_mem) {
+ if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
tnode_free_size = 0;
synchronize_rcu();
}
@@ -1331,8 +1331,10 @@
/* The alias was already inserted, so the node must exist. */
l = l ? l : fib_find_node(t, &tp, key);
- if (WARN_ON_ONCE(!l))
+ if (WARN_ON_ONCE(!l)) {
+ err = -ENOENT;
goto out_free_new_fa;
+ }
if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
new_fa) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index b71b836..a1aacf5 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -261,11 +261,12 @@
spin_lock(&icmp_global.lock);
delta = min_t(u32, now - icmp_global.stamp, HZ);
if (delta >= HZ / 50) {
- incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
+ incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
if (incr)
WRITE_ONCE(icmp_global.stamp, now);
}
- credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
+ credit = min_t(u32, icmp_global.credit + incr,
+ READ_ONCE(sysctl_icmp_msgs_burst));
if (credit) {
/* We want to use a credit of one in average, but need to randomize
* it for security reasons.
@@ -289,7 +290,7 @@
return true;
/* Limit if icmp type is enabled in ratemask. */
- if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
+ if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))
return true;
return false;
@@ -327,7 +328,8 @@
vif = l3mdev_master_ifindex(dst->dev);
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
- rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit);
+ rc = inet_peer_xrlim_allow(peer,
+ READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
if (peer)
inet_putpeer(peer);
out:
@@ -447,7 +449,7 @@
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_proto = IPPROTO_ICMP;
fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
- security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
+ security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
goto out_unlock;
@@ -503,7 +505,7 @@
route_lookup_dev = icmp_get_route_lookup_dev(skb_in);
fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev);
- security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
+ security_skb_classify_flow(skb_in, flowi4_to_flowi_common(fl4));
rt = ip_route_output_key_hash(net, fl4, skb_in);
if (IS_ERR(rt))
return rt;
@@ -885,7 +887,7 @@
* values please see
* Documentation/networking/ip-sysctl.rst
*/
- switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
+ switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
default:
net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
&iph->daddr);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 0c32199..c71b863 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -467,7 +467,8 @@
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
- if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(pmc->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return skb;
mtu = READ_ONCE(dev->mtu);
@@ -593,7 +594,7 @@
if (pmc->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(pmc->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE])
@@ -736,7 +737,8 @@
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return 0;
if (type == IGMP_HOST_LEAVE_MESSAGE)
@@ -825,7 +827,7 @@
struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
igmp_ifc_start_timer(in_dev, 1);
}
@@ -920,7 +922,8 @@
if (group == IGMP_ALL_HOSTS)
return false;
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return false;
rcu_read_lock();
@@ -1006,7 +1009,7 @@
* received value was zero, use the default or statically
* configured value.
*/
- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
/* RFC3376, 8.3. Query Response Interval:
@@ -1045,7 +1048,7 @@
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&im->lock);
if (im->tm_running)
@@ -1186,7 +1189,7 @@
pmc->interface = im->interface;
in_dev_hold(in_dev);
pmc->multiaddr = im->multiaddr;
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
pmc->sfmode = im->sfmode;
if (pmc->sfmode == MCAST_INCLUDE) {
struct ip_sf_list *psf;
@@ -1237,9 +1240,11 @@
swap(im->tomb, pmc->tomb);
swap(im->sources, pmc->sources);
for (psf = im->sources; psf; psf = psf->sf_next)
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
} else {
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
in_dev_put(pmc->interface);
kfree_pmc(pmc);
@@ -1296,7 +1301,8 @@
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
reporter = im->reporter;
@@ -1338,13 +1344,14 @@
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
if (in_dev->dead)
return;
- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
+ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
spin_lock_bh(&im->lock);
igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
@@ -1358,7 +1365,7 @@
* IN() to IN(A).
*/
if (im->sfmode == MCAST_EXCLUDE)
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
igmp_ifc_event(in_dev);
#endif
@@ -1642,7 +1649,7 @@
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
/* a failover is happening and switches
@@ -1749,7 +1756,7 @@
in_dev->mr_qi = IGMP_QUERY_INTERVAL;
in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
#else
static void ip_mc_reset(struct in_device *in_dev)
@@ -1883,7 +1890,7 @@
#ifdef CONFIG_IP_MULTICAST
if (psf->sf_oldin &&
!IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
psf->sf_next = pmc->tomb;
pmc->tomb = psf;
rv = 1;
@@ -1947,7 +1954,7 @@
/* filter mode change */
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2126,7 +2133,7 @@
#ifdef CONFIG_IP_MULTICAST
/* else no filters; keep old mode for reports */
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2192,7 +2199,7 @@
count++;
}
err = -ENOBUFS;
- if (count >= net->ipv4.sysctl_igmp_max_memberships)
+ if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships))
goto done;
iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
if (!iml)
@@ -2379,7 +2386,7 @@
}
/* else, add a new source to the filter */
- if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
+ if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
err = -ENOBUFS;
goto done;
}
@@ -2401,9 +2408,10 @@
newpsl->sl_addr[i] = psl->sl_addr[i];
/* decrease mem now to avoid the memleak warning */
atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
}
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
psl = newpsl;
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
@@ -2501,11 +2509,13 @@
psl->sl_count, psl->sl_addr, 0);
/* decrease mem now to avoid the memleak warning */
atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
- } else
+ } else {
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
0, NULL, 0);
+ }
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
pmc->sfmode = msf->imsf_fmode;
err = 0;
done:
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index addd595..4d97133 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -251,7 +251,7 @@
goto other_half_scan;
}
- if (net->ipv4.sysctl_ip_autobind_reuse && !relax) {
+ if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
/* We still have a chance to connect to different destinations */
relax = true;
goto ports_exhausted;
@@ -602,7 +602,7 @@
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, ireq->ir_rmt_port,
htons(ireq->ir_num), sk->sk_uid);
- security_req_classify_flow(req, flowi4_to_flowi(fl4));
+ security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
@@ -640,7 +640,7 @@
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, ireq->ir_rmt_port,
htons(ireq->ir_num), sk->sk_uid);
- security_req_classify_flow(req, flowi4_to_flowi(fl4));
+ security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e093847..c68a1da 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -410,13 +410,11 @@
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (likely(INET_MATCH(sk, net, acookie,
- saddr, daddr, ports, dif, sdif))) {
+ if (likely(INET_MATCH(net, sk, acookie, ports, dif, sdif))) {
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
- if (unlikely(!INET_MATCH(sk, net, acookie,
- saddr, daddr, ports,
- dif, sdif))) {
+ if (unlikely(!INET_MATCH(net, sk, acookie,
+ ports, dif, sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -465,8 +463,7 @@
if (sk2->sk_hash != hash)
continue;
- if (likely(INET_MATCH(sk2, net, acookie,
- saddr, daddr, ports, dif, sdif))) {
+ if (likely(INET_MATCH(net, sk2, acookie, ports, dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp))
@@ -504,7 +501,7 @@
return -EADDRNOTAVAIL;
}
-static u32 inet_sk_port_offset(const struct sock *sk)
+static u64 inet_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -532,16 +529,14 @@
if (esk->sk_hash != sk->sk_hash)
continue;
if (sk->sk_family == AF_INET) {
- if (unlikely(INET_MATCH(esk, net, acookie,
- sk->sk_daddr,
- sk->sk_rcv_saddr,
+ if (unlikely(INET_MATCH(net, esk, acookie,
ports, dif, sdif))) {
return true;
}
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- if (unlikely(INET6_MATCH(esk, net,
+ if (unlikely(inet6_match(net, esk,
&sk->sk_v6_daddr,
&sk->sk_v6_rcv_saddr,
ports, dif, sdif))) {
@@ -637,7 +632,9 @@
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
+ local_bh_disable();
inet_ehash_nolisten(sk, osk, NULL);
+ local_bh_enable();
return 0;
}
WARN_ON(!sk_unhashed(sk));
@@ -669,50 +666,72 @@
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
EXPORT_SYMBOL_GPL(inet_hash);
-void inet_unhash(struct sock *sk)
+static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- struct inet_listen_hashbucket *ilb = NULL;
- spinlock_t *lock;
-
if (sk_unhashed(sk))
return;
- if (sk->sk_state == TCP_LISTEN) {
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
- lock = &ilb->lock;
- } else {
- lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
- }
- spin_lock_bh(lock);
- if (sk_unhashed(sk))
- goto unlock;
-
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_detach_sock(sk);
if (ilb) {
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
inet_unhash2(hashinfo, sk);
ilb->count--;
}
__sk_nulls_del_node_init_rcu(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-unlock:
- spin_unlock_bh(lock);
+}
+
+void inet_unhash(struct sock *sk)
+{
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
+ if (sk_unhashed(sk))
+ return;
+
+ if (sk->sk_state == TCP_LISTEN) {
+ struct inet_listen_hashbucket *ilb;
+
+ ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ /* Don't disable bottom halves while acquiring the lock to
+ * avoid circular locking dependency on PREEMPT_RT.
+ */
+ spin_lock(&ilb->lock);
+ __inet_unhash(sk, ilb);
+ spin_unlock(&ilb->lock);
+ } else {
+ spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+
+ spin_lock_bh(lock);
+ __inet_unhash(sk, NULL);
+ spin_unlock_bh(lock);
+ }
}
EXPORT_SYMBOL_GPL(inet_unhash);
+/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
+ * Note that we use 32bit integers (vs RFC 'short integers')
+ * because 2^16 is not a multiple of num_ephemeral and this
+ * property might be used by clever attacker.
+ *
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though
+ * attacks were since demonstrated, thus we use 65536 by default instead
+ * to really give more isolation and privacy, at the expense of 256kB
+ * of kernel memory.
+ */
+#define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER)
+static u32 *table_perturb;
+
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
+ struct sock *sk, u64 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16, struct inet_timewait_sock **))
{
@@ -724,8 +743,8 @@
struct inet_bind_bucket *tb;
u32 remaining, offset;
int ret, i, low, high;
- static u32 hint;
int l3mdev;
+ u32 index;
if (port) {
head = &hinfo->bhash[inet_bhashfn(net, port,
@@ -752,7 +771,13 @@
if (likely(remaining > 1))
remaining &= ~1U;
- offset = (hint + port_offset) % remaining;
+ get_random_slow_once(table_perturb,
+ INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
+ index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
+
+ offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);
+ offset %= remaining;
+
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
@@ -806,7 +831,13 @@
return -EADDRNOTAVAIL;
ok:
- hint += i + 2;
+ /* Here we want to add a little bit of randomness to the next source
+ * port that will be chosen. We use a max() with a random here so that
+ * on low contention the randomness is maximal and on high contention
+ * it may be inexistent.
+ */
+ i = max_t(int, i, (prandom_u32() & 7) * 2);
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, port);
@@ -829,7 +860,7 @@
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet_sk_port_offset(sk);
@@ -879,6 +910,12 @@
low_limit,
high_limit);
init_hashinfo_lhash2(h);
+
+ /* this one is used for source ports of outgoing connections */
+ table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE,
+ sizeof(*table_perturb), GFP_KERNEL);
+ if (!table_perturb)
+ panic("TCP: failed to alloc table_perturb");
}
int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index ff327a6..a186685 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -148,16 +148,20 @@
struct inet_peer *gc_stack[],
unsigned int gc_cnt)
{
+ int peer_threshold, peer_maxttl, peer_minttl;
struct inet_peer *p;
__u32 delta, ttl;
int i;
- if (base->total >= inet_peer_threshold)
+ peer_threshold = READ_ONCE(inet_peer_threshold);
+ peer_maxttl = READ_ONCE(inet_peer_maxttl);
+ peer_minttl = READ_ONCE(inet_peer_minttl);
+
+ if (base->total >= peer_threshold)
ttl = 0; /* be aggressive */
else
- ttl = inet_peer_maxttl
- - (inet_peer_maxttl - inet_peer_minttl) / HZ *
- base->total / inet_peer_threshold * HZ;
+ ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ *
+ base->total / peer_threshold * HZ;
for (i = 0; i < gc_cnt; i++) {
p = gc_stack[i];
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 00ec819..29730ed 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -151,7 +151,7 @@
!skb_sec_path(skb))
ip_rt_send_redirect(skb);
- if (net->ipv4.sysctl_ip_fwd_update_priority)
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority))
skb->priority = rt_tos2priority(iph->tos);
return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e4504dd..6ab5c50 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -454,14 +454,12 @@
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
-
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
+ __be16 flags = tunnel->parms.o_flags;
/* Push GRE header. */
gre_build_header(skb, tunnel->tun_hlen,
- tunnel->parms.o_flags, proto, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ flags, proto, tunnel->parms.o_key,
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -499,7 +497,7 @@
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
gre_build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -521,7 +519,6 @@
int tunnel_hlen;
int version;
int nhoff;
- int thoff;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -555,10 +552,16 @@
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (version == 1) {
erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
@@ -576,7 +579,7 @@
}
gre_build_header(skb, 8, TUNNEL_SEQ,
- proto, 0, htonl(tunnel->o_seqno++));
+ proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -626,21 +629,20 @@
}
if (dev->header_ops) {
- const int pull_len = tunnel->hlen + sizeof(struct iphdr);
-
if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
- if (pull_len > skb_transport_offset(skb))
- goto free_skb;
-
/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
* to gre header.
*/
- skb_pull(skb, pull_len);
+ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
skb_reset_mac_header(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_start(skb) < skb->data)
+ goto free_skb;
} else {
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index b0c244a..eccd789 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -309,14 +309,13 @@
ip_hdr(hint)->tos == iph->tos;
}
-INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
+int tcp_v4_early_demux(struct sk_buff *skb);
+int udp_v4_early_demux(struct sk_buff *skb);
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *dev,
const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
- int (*edemux)(struct sk_buff *skb);
struct rtable *rt;
int err;
@@ -327,21 +326,29 @@
goto drop_error;
}
- if (net->ipv4.sysctl_ip_early_demux &&
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) &&
!skb->sk &&
!ip_is_fragment(iph)) {
- const struct net_protocol *ipprot;
- int protocol = iph->protocol;
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
+ tcp_v4_early_demux(skb);
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
- err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
- udp_v4_early_demux, skb);
- if (unlikely(err))
- goto drop_error;
- /* must reload iph, skb->head might have changed */
- iph = ip_hdr(skb);
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
+ err = udp_v4_early_demux(skb);
+ if (unlikely(err))
+ goto drop_error;
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
}
}
@@ -354,6 +361,11 @@
iph->tos, dev);
if (unlikely(err))
goto drop_error;
+ } else {
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY))
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5e48b3d..0dbf950 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1712,7 +1712,7 @@
daddr, saddr,
tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
arg->uid);
- security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
+ security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
return;
@@ -1721,7 +1721,7 @@
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
- sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
ipc.sockc.mark = fl4.flowi4_mark;
err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
len, 0, &ipc, &rt, MSG_DONTWAIT);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ec60367..4cc39c6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -773,7 +773,7 @@
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > sysctl_optmem_max)
+ if (optlen > READ_ONCE(sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -783,7 +783,7 @@
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
if (gsf->gf_numsrc >= 0x1ffffff ||
- gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = -EINVAL;
@@ -808,7 +808,7 @@
if (optlen < size0)
return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
+ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -832,7 +832,7 @@
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
- if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
&gf32->gf_group, gf32->gf_slist);
@@ -1231,7 +1231,7 @@
if (optlen < IP_MSFILTER_SIZE(0))
goto e_inval;
- if (optlen > sysctl_optmem_max) {
+ if (optlen > READ_ONCE(sysctl_optmem_max)) {
err = -ENOBUFS;
break;
}
@@ -1242,7 +1242,7 @@
}
/* numsrc >= (1G-4) overflow in 32 bits */
if (msf->imsf_numsrc >= 0x3ffffffcU ||
- msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
+ msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
kfree(msf);
err = -ENOBUFS;
break;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index e25be2d..4b74c67 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -410,7 +410,7 @@
u32 mtu = dst_mtu(encap_dst) - headroom;
if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) ||
- (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu))
+ (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu))
return 0;
skb_dst_update_pmtu_no_confirm(skb, mtu);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1088564..77e3b67 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -424,7 +424,7 @@
switch (ctinfo) {
case IP_CT_NEW:
- ct->mark = hash;
+ WRITE_ONCE(ct->mark, hash);
break;
case IP_CT_RELATED:
case IP_CT_RELATED_REPLY:
@@ -441,7 +441,7 @@
#ifdef DEBUG
nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
#endif
- pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
+ pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark));
if (!clusterip_responsible(cipinfo->config, hash)) {
pr_debug("not responsible\n");
return NF_DROP;
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index bcdb37f..aeb6317 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -13,8 +13,8 @@
#include <net/netfilter/ipv4/nf_dup_ipv4.h>
struct nft_dup_ipv4 {
- enum nft_registers sreg_addr:8;
- enum nft_registers sreg_dev:8;
+ u8 sreg_addr;
+ u8 sreg_dev;
};
static void nft_dup_ipv4_eval(const struct nft_expr *expr,
@@ -40,16 +40,16 @@
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
- err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr));
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ sizeof(struct in_addr));
if (err < 0)
return err;
- if (tb[NFTA_DUP_SREG_DEV] != NULL) {
- priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
- }
- return 0;
+ if (tb[NFTA_DUP_SREG_DEV])
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ &priv->sreg_dev, sizeof(int));
+
+ return err;
}
static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 03df986..9e6f0f1 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -83,6 +83,9 @@
else
oif = NULL;
+ if (priv->flags & NFTA_FIB_F_IIF)
+ fl4.flowi4_oif = l3mdev_master_ifindex_rcu(oif);
+
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
nft_fib_store_result(dest, priv, nft_in(pkt));
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 8bd3f5e..7a0102a 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -882,7 +882,7 @@
/* __ip6_del_rt does a release, so do a hold here */
fib6_info_hold(f6i);
ipv6_stub->ip6_del_rt(net, f6i,
- !net->ipv4.sysctl_nexthop_compat_mode);
+ !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode));
}
}
@@ -1194,7 +1194,8 @@
if (!rc) {
nh_base_seq_inc(net);
nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
- if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
+ if (replace_notify &&
+ READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode))
nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
}
@@ -1345,7 +1346,7 @@
if (!err) {
nh->nh_flags = fib_nh->fib_nh_flags;
fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
- fib_nh->fib_nh_scope);
+ !fib_nh->fib_nh_scope ? 0 : fib_nh->fib_nh_scope - 1);
} else {
fib_nh_release(net, fib_nh);
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index e60ca03..1bad851 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -305,6 +305,7 @@
struct net *net = sock_net(sk);
if (sk->sk_family == AF_INET) {
struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+ u32 tb_id = RT_TABLE_LOCAL;
int chk_addr_ret;
if (addr_len < sizeof(*addr))
@@ -320,8 +321,10 @@
if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
chk_addr_ret = RTN_LOCAL;
- else
- chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
+ else {
+ tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
+ chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
+ }
if ((!inet_can_nonlocal_bind(net, isk) &&
chk_addr_ret != RTN_LOCAL) ||
@@ -359,6 +362,14 @@
return -ENODEV;
}
}
+
+ if (!dev && sk->sk_bound_dev_if) {
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ }
has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
scoped);
rcu_read_unlock();
@@ -785,7 +796,7 @@
fl4.fl4_icmp_type = user_icmph.type;
fl4.fl4_icmp_code = user_icmph.code;
- security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5d95f80..4899ebe 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -640,7 +640,7 @@
goto done;
}
- security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ce787c3..3746476 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -529,6 +529,15 @@
}
EXPORT_SYMBOL(__ip_select_ident);
+static void ip_rt_fix_tos(struct flowi4 *fl4)
+{
+ __u8 tos = RT_FL_TOS(fl4);
+
+ fl4->flowi4_tos = tos & IPTOS_RT_MASK;
+ fl4->flowi4_scope = tos & RTO_ONLINK ?
+ RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+}
+
static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
const struct sock *sk,
const struct iphdr *iph,
@@ -853,6 +862,7 @@
rt = (struct rtable *) dst;
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
+ ip_rt_fix_tos(&fl4);
__ip_do_redirect(rt, skb, &fl4, true);
}
@@ -1077,6 +1087,7 @@
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
+ ip_rt_fix_tos(&fl4);
/* Don't make lookup fail for bridged encapsulations */
if (skb && netif_is_any_bridge_port(skb->dev))
@@ -1151,6 +1162,8 @@
goto out;
new = true;
+ } else {
+ ip_rt_fix_tos(&fl4);
}
__ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
@@ -1429,7 +1442,7 @@
struct fib_info *fi = res->fi;
u32 mtu = 0;
- if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
mtu = fi->fib_mtu;
@@ -1752,6 +1765,7 @@
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
struct rtable *rth;
+ bool no_policy;
u32 itag = 0;
int err;
@@ -1762,8 +1776,12 @@
if (our)
flags |= RTCF_LOCAL;
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
+ no_policy, false);
if (!rth)
return -ENOBUFS;
@@ -1779,6 +1797,7 @@
#endif
RT_CACHE_STAT_INC(in_slow_mc);
+ skb_dst_drop(skb);
skb_dst_set(skb, &rth->dst);
return 0;
}
@@ -1821,7 +1840,7 @@
struct rtable *rth;
int err;
struct in_device *out_dev;
- bool do_cache;
+ bool do_cache, no_policy;
u32 itag = 0;
/* get a working reference to the output device */
@@ -1866,6 +1885,10 @@
}
}
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
fnhe = find_exception(nhc, daddr);
if (do_cache) {
if (fnhe)
@@ -1878,9 +1901,8 @@
}
}
- rth = rt_dst_alloc(out_dev->dev, 0, res->type,
- IN_DEV_CONF_GET(in_dev, NOPOLICY),
- IN_DEV_CONF_GET(out_dev, NOXFRM));
+ rth = rt_dst_alloc(out_dev->dev, 0, res->type, no_policy,
+ IN_DEV_ORCONF(out_dev, NOXFRM));
if (!rth) {
err = -ENOBUFS;
goto cleanup;
@@ -2131,6 +2153,7 @@
struct rtable *rth;
struct flowi4 fl4;
bool do_cache = true;
+ bool no_policy;
/* IP on this device is disabled. */
@@ -2248,6 +2271,10 @@
RT_CACHE_STAT_INC(in_brd);
local_input:
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
do_cache &= res->fi && !itag;
if (do_cache) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
@@ -2262,7 +2289,7 @@
rth = rt_dst_alloc(ip_rt_get_dev(net, res),
flags | RTCF_LOCAL, res->type,
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
+ no_policy, false);
if (!rth)
goto e_nobufs;
@@ -2485,8 +2512,8 @@
add:
rth = rt_dst_alloc(dev_out, flags, type,
- IN_DEV_CONF_GET(in_dev, NOPOLICY),
- IN_DEV_CONF_GET(in_dev, NOXFRM));
+ IN_DEV_ORCONF(in_dev, NOPOLICY),
+ IN_DEV_ORCONF(in_dev, NOXFRM));
if (!rth)
return ERR_PTR(-ENOBUFS);
@@ -2524,7 +2551,6 @@
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
const struct sk_buff *skb)
{
- __u8 tos = RT_FL_TOS(fl4);
struct fib_result res = {
.type = RTN_UNSPEC,
.fi = NULL,
@@ -2534,9 +2560,7 @@
struct rtable *rth;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
- fl4->flowi4_tos = tos & IPTOS_RT_MASK;
- fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
- RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+ ip_rt_fix_tos(fl4);
rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 00dc3f9..41afc91 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -249,12 +249,12 @@
return true;
}
- if (!net->ipv4.sysctl_tcp_timestamps)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
return false;
tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
- if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
+ if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
return false;
if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
@@ -263,7 +263,7 @@
tcp_opt->wscale_ok = 1;
tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
- return net->ipv4.sysctl_tcp_window_scaling != 0;
+ return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0;
}
EXPORT_SYMBOL(cookie_timestamp_decode);
@@ -283,6 +283,7 @@
EXPORT_SYMBOL(cookie_ecn_ok);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+ const struct tcp_request_sock_ops *af_ops,
struct sock *sk,
struct sk_buff *skb)
{
@@ -299,6 +300,10 @@
return NULL;
treq = tcp_rsk(req);
+
+ /* treq->af_specific might be used to perform TCP_MD5 lookup */
+ treq->af_specific = af_ops;
+
treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
#if IS_ENABLED(CONFIG_MPTCP)
treq->is_mptcp = sk_is_mptcp(sk);
@@ -337,7 +342,8 @@
struct flowi4 fl4;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
@@ -366,7 +372,8 @@
goto out;
ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb);
+ req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops,
+ &tcp_request_sock_ipv4_ops, sk, skb);
if (!req)
goto out;
@@ -418,7 +425,7 @@
inet_sk_flowi_flags(sk),
opt->srr ? opt->faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
- security_req_classify_flow(req, flowi4_to_flowi(&fl4));
+ security_req_classify_flow(req, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt)) {
reqsk_free(req);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0882980..439970e 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -95,7 +95,7 @@
* port limit.
*/
if ((range[1] < range[0]) ||
- (range[0] < net->ipv4.sysctl_ip_prot_sock))
+ (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
ret = -EINVAL;
else
set_local_port_range(net, range);
@@ -121,7 +121,7 @@
.extra2 = &ip_privileged_port_max,
};
- pports = net->ipv4.sysctl_ip_prot_sock;
+ pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
@@ -133,7 +133,7 @@
if (range[0] < pports)
ret = -EINVAL;
else
- net->ipv4.sysctl_ip_prot_sock = pports;
+ WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports);
}
return ret;
@@ -361,61 +361,6 @@
return ret;
}
-static void proc_configure_early_demux(int enabled, int protocol)
-{
- struct net_protocol *ipprot;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_protocol *ip6prot;
-#endif
-
- rcu_read_lock();
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot)
- ipprot->early_demux = enabled ? ipprot->early_demux_handler :
- NULL;
-
-#if IS_ENABLED(CONFIG_IPV6)
- ip6prot = rcu_dereference(inet6_protos[protocol]);
- if (ip6prot)
- ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
- NULL;
-#endif
- rcu_read_unlock();
-}
-
-static int proc_tcp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_tcp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_TCP);
- }
-
- return ret;
-}
-
-static int proc_udp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_udp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_UDP);
- }
-
- return ret;
-}
-
static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
@@ -685,14 +630,14 @@
.data = &init_net.ipv4.sysctl_udp_early_demux,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_udp_early_demux
+ .proc_handler = proc_douintvec_minmax,
},
{
.procname = "tcp_early_demux",
.data = &init_net.ipv4.sysctl_tcp_early_demux,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_tcp_early_demux
+ .proc_handler = proc_douintvec_minmax,
},
{
.procname = "nexthop_compat_mode",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a3ec2a0..cc588bc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -440,7 +440,7 @@
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
- tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
+ tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
@@ -451,8 +451,8 @@
icsk->icsk_sync_mss = tcp_sync_mss;
- WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
- WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
+ WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
+ WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
sk_sockets_allocated_inc(sk);
sk->sk_route_forced_caps = NETIF_F_GSO;
@@ -698,7 +698,7 @@
int size_goal)
{
return skb->len < size_goal &&
- sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
!tcp_rtx_queue_empty(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
}
@@ -1148,7 +1148,8 @@
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -1710,7 +1711,7 @@
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
else
- cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
+ cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
val = min(val, cap);
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
@@ -2490,7 +2491,8 @@
static bool tcp_too_many_orphans(int shift)
{
- return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans;
+ return READ_ONCE(tcp_orphan_cache) << shift >
+ READ_ONCE(sysctl_tcp_max_orphans);
}
bool tcp_check_oom(struct sock *sk, int shift)
@@ -2794,6 +2796,8 @@
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd = TCP_INIT_CWND;
tp->snd_cwnd_cnt = 0;
+ tp->is_cwnd_limited = 0;
+ tp->max_packets_out = 0;
tp->window_clamp = 0;
tp->delivered = 0;
tp->delivered_ce = 0;
@@ -2812,8 +2816,7 @@
icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
- dst_release(sk->sk_rx_dst);
- sk->sk_rx_dst = NULL;
+ dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL));
tcp_saved_syn_free(tp);
tp->compressed_ack = 0;
tp->segs_in = 0;
@@ -3288,7 +3291,7 @@
case TCP_REPAIR_OPTIONS:
if (!tp->repair)
err = -EINVAL;
- else if (sk->sk_state == TCP_ESTABLISHED)
+ else if (sk->sk_state == TCP_ESTABLISHED && !tp->bytes_sent)
err = tcp_repair_options_est(sk, optval, optlen);
else
err = -EPERM;
@@ -3389,7 +3392,8 @@
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
- } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
@@ -3726,7 +3730,7 @@
case TCP_LINGER2:
val = tp->linger2;
if (val >= 0)
- val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
+ val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
break;
case TCP_DEFER_ACCEPT:
val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
@@ -4038,12 +4042,16 @@
* to memory. See smp_rmb() in tcp_get_md5sig_pool()
*/
smp_wmb();
- tcp_md5sig_pool_populated = true;
+ /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
+ * and tcp_get_md5sig_pool().
+ */
+ WRITE_ONCE(tcp_md5sig_pool_populated, true);
}
bool tcp_alloc_md5sig_pool(void)
{
- if (unlikely(!tcp_md5sig_pool_populated)) {
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
mutex_lock(&tcp_md5sig_mutex);
if (!tcp_md5sig_pool_populated) {
@@ -4054,7 +4062,8 @@
mutex_unlock(&tcp_md5sig_mutex);
}
- return tcp_md5sig_pool_populated;
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ return READ_ONCE(tcp_md5sig_pool_populated);
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -4070,7 +4079,8 @@
{
local_bh_disable();
- if (tcp_md5sig_pool_populated) {
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ if (READ_ONCE(tcp_md5sig_pool_populated)) {
/* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
smp_rmb();
return this_cpu_ptr(&tcp_md5sig_pool);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 6b745ce..809ee0f 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -218,10 +218,9 @@
struct sk_psock *psock = sk_psock_get(sk);
int ret;
- if (unlikely(!psock)) {
- sk_msg_free(sk, msg);
- return 0;
- }
+ if (unlikely(!psock))
+ return -EPIPE;
+
ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
tcp_bpf_push_locked(sk, msg, bytes, flags, false);
sk_psock_put(sk, psock);
@@ -316,7 +315,7 @@
{
bool cork = false, enospc = sk_msg_full(msg);
struct sock *sk_redir;
- u32 tosend, delta = 0;
+ u32 tosend, origsize, sent, delta = 0;
u32 eval = __SK_NONE;
int ret;
@@ -374,7 +373,9 @@
sk_msg_return(sk, msg, tosend);
release_sock(sk);
+ origsize = msg->sg.size;
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
+ sent = origsize - msg->sg.size;
if (eval == __SK_REDIRECT)
sock_put(sk_redir);
@@ -411,8 +412,11 @@
}
if (msg &&
msg->sg.data[msg->sg.start].page_link &&
- msg->sg.data[msg->sg.start].length)
+ msg->sg.data[msg->sg.start].length) {
+ if (eval == __SK_REDIRECT)
+ sk_mem_charge(sk, tosend - sent);
goto more_data;
+ }
}
return ret;
}
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 709d238..56dede4 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -375,6 +375,7 @@
struct cdg *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ ca->gradients = NULL;
/* We silently fall back to window = 1 if allocation fails. */
if (window > 1)
ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
@@ -388,6 +389,7 @@
struct cdg *ca = inet_csk_ca(sk);
kfree(ca->gradients);
+ ca->gradients = NULL;
}
static struct tcp_congestion_ops tcp_cdg __read_mostly = {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 1071119..39fb037 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -349,7 +349,7 @@
const struct dst_entry *dst,
int flag)
{
- return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
+ return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) ||
tcp_sk(sk)->fastopen_no_cookie ||
(dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
}
@@ -364,7 +364,7 @@
const struct dst_entry *dst)
{
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
- int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
int ret = 0;
@@ -506,7 +506,7 @@
{
struct net *net = sock_net(sk);
- if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout))
return;
/* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
@@ -527,7 +527,8 @@
*/
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
- unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+ unsigned int tfo_bh_timeout =
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout);
unsigned long timeout;
int tfo_da_times;
int multiplier;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 12dd08a..541758c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -425,7 +425,7 @@
if (sk->sk_sndbuf < sndmem)
WRITE_ONCE(sk->sk_sndbuf,
- min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
+ min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -454,12 +454,13 @@
*/
/* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
+ unsigned int skbtruesize)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
- int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
- int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
+ int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
@@ -471,7 +472,27 @@
return 0;
}
-static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
+/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing
+ * can play nice with us, as sk_buff and skb->head might be either
+ * freed or shared with up to MAX_SKB_FRAGS segments.
+ * Only give a boost to drivers using page frag(s) to hold the frame(s),
+ * and if no payload was pulled in skb->head before reaching us.
+ */
+static u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
+{
+ u32 truesize = skb->truesize;
+
+ if (adjust && !skb_headlen(skb)) {
+ truesize -= SKB_TRUESIZE(skb_end_offset(skb));
+ /* paranoid check, some drivers might be buggy */
+ if (unlikely((int)truesize < (int)skb->len))
+ truesize = skb->truesize;
+ }
+ return truesize;
+}
+
+static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
+ bool adjust)
{
struct tcp_sock *tp = tcp_sk(sk);
int room;
@@ -480,15 +501,16 @@
/* Check #1 */
if (room > 0 && !tcp_under_memory_pressure(sk)) {
+ unsigned int truesize = truesize_adjust(adjust, skb);
int incr;
/* Check #2. Increase window, if skb with such overhead
* will fit to rcvbuf in future.
*/
- if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
+ if (tcp_win_from_space(sk, truesize) <= skb->len)
incr = 2 * tp->advmss;
else
- incr = __tcp_grow_window(sk, skb);
+ incr = __tcp_grow_window(sk, skb, truesize);
if (incr) {
incr = max_t(int, incr, 2 * skb->len);
@@ -503,7 +525,7 @@
*/
static void tcp_init_buffer_space(struct sock *sk)
{
- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
+ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
@@ -543,16 +565,17 @@
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
+ int rmem2;
icsk->icsk_ack.quick = 0;
+ rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
- if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
+ if (sk->sk_rcvbuf < rmem2 &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
WRITE_ONCE(sk->sk_rcvbuf,
- min(atomic_read(&sk->sk_rmem_alloc),
- net->ipv4.sysctl_tcp_rmem[2]));
+ min(atomic_read(&sk->sk_rmem_alloc), rmem2));
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -693,7 +716,7 @@
* <prev RTT . ><current RTT .. ><next RTT .... >
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
@@ -714,7 +737,7 @@
do_div(rcvwin, tp->advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
@@ -782,7 +805,7 @@
tcp_ecn_check_ce(sk, skb);
if (skb->len >= 128)
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, true);
}
/* Called to compute a smoothed rtt estimate. The data fed to this
@@ -1011,7 +1034,7 @@
tp->undo_marker ? tp->undo_retrans : 0);
#endif
tp->reordering = min_t(u32, (metric + mss - 1) / mss,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
}
/* This exciting event is worth to be remembered. 8) */
@@ -1990,7 +2013,7 @@
return;
tp->reordering = min_t(u32, tp->packets_out + addend,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
tp->reord_seen++;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
@@ -2055,7 +2078,8 @@
static bool tcp_is_rack(const struct sock *sk)
{
- return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
+ return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_LOSS_DETECTION;
}
/* If we detect SACK reneging, forget all SACK information
@@ -2099,6 +2123,7 @@
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
+ u8 reordering;
tcp_timeout_mark_lost(sk);
@@ -2119,10 +2144,12 @@
/* Timeout in disordered state after receiving substantial DUPACKs
* suggests that the degree of reordering is over-estimated.
*/
+ reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
- tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
+ tp->sacked_out >= reordering)
tp->reordering = min_t(unsigned int, tp->reordering,
- net->ipv4.sysctl_tcp_reordering);
+ reordering);
+
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
tcp_ecn_queue_cwr(tp);
@@ -2131,7 +2158,7 @@
* loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
*/
- tp->frto = net->ipv4.sysctl_tcp_frto &&
+ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}
@@ -2148,7 +2175,8 @@
*/
static bool tcp_check_sack_reneging(struct sock *sk, int flag)
{
- if (flag & FLAG_SACK_RENEGING) {
+ if (flag & FLAG_SACK_RENEGING &&
+ flag & FLAG_SND_UNA_ADVANCED) {
struct tcp_sock *tp = tcp_sk(sk);
unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
msecs_to_jiffies(10));
@@ -2469,6 +2497,21 @@
return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
}
+static bool tcp_is_non_sack_preventing_reopen(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
+ /* Hold old state until something *above* high_seq
+ * is ACKed. For Reno it is MUST to prevent false
+ * fast retransmits (RFC2582). SACK TCP is safe. */
+ if (!tcp_any_retrans_done(sk))
+ tp->retrans_stamp = 0;
+ return true;
+ }
+ return false;
+}
+
/* People celebrate: "We love our President!" */
static bool tcp_try_undo_recovery(struct sock *sk)
{
@@ -2491,14 +2534,8 @@
} else if (tp->rack.reo_wnd_persist) {
tp->rack.reo_wnd_persist--;
}
- if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
- /* Hold old state until something *above* high_seq
- * is ACKed. For Reno it is MUST to prevent false
- * fast retransmits (RFC2582). SACK TCP is safe. */
- if (!tcp_any_retrans_done(sk))
- tp->retrans_stamp = 0;
+ if (tcp_is_non_sack_preventing_reopen(sk))
return true;
- }
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
return false;
@@ -2534,6 +2571,8 @@
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
+ if (tcp_is_non_sack_preventing_reopen(sk))
+ return true;
if (frto_undo || tcp_is_sack(tp)) {
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
@@ -2667,12 +2706,15 @@
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ u64 val;
- /* FIXME: breaks with very large cwnd */
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_cwnd = tp->snd_cwnd *
- tcp_mss_to_mtu(sk, tp->mss_cache) /
- icsk->icsk_mtup.probe_size;
+
+ val = (u64)tp->snd_cwnd * tcp_mss_to_mtu(sk, tp->mss_cache);
+ do_div(val, icsk->icsk_mtup.probe_size);
+ WARN_ON_ONCE((u32)val != val);
+ tp->snd_cwnd = max_t(u32, 1U, val);
+
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_ssthresh = tcp_current_ssthresh(sk);
@@ -2997,7 +3039,7 @@
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
{
- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
+ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
struct tcp_sock *tp = tcp_sk(sk);
if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
@@ -3408,7 +3450,8 @@
* new SACK or ECE mark may first advance cwnd here and later reduce
* cwnd in tcp_fastretrans_alert() based on more states.
*/
- if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
+ if (tcp_sk(sk)->reordering >
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
return flag & FLAG_FORWARD_PROGRESS;
return flag & FLAG_DATA_ACKED;
@@ -3520,7 +3563,8 @@
if (*last_oow_ack_time) {
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
+ if (0 <= elapsed &&
+ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
@@ -3567,11 +3611,11 @@
/* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
- if (now != challenge_timestamp) {
- u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+ if (now != READ_ONCE(challenge_timestamp)) {
+ u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
u32 half = (ack_limit + 1) >> 1;
- challenge_timestamp = now;
+ WRITE_ONCE(challenge_timestamp, now);
WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
}
count = READ_ONCE(challenge_count);
@@ -3814,7 +3858,8 @@
tcp_process_tlp_ack(sk, ack, flag);
if (tcp_ack_is_dubious(sk, flag)) {
- if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
+ if (!(flag & (FLAG_SND_UNA_ADVANCED |
+ FLAG_NOT_DUP | FLAG_DSACKING_ACK))) {
num_dupack = 1;
/* Consider if pure acks were aggregated in tcp_add_backlog() */
if (!(flag & FLAG_DATA))
@@ -3999,7 +4044,7 @@
break;
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW && th->syn &&
- !estab && net->ipv4.sysctl_tcp_window_scaling) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
if (snd_wscale > TCP_MAX_WSCALE) {
@@ -4015,7 +4060,7 @@
case TCPOPT_TIMESTAMP:
if ((opsize == TCPOLEN_TIMESTAMP) &&
((estab && opt_rx->tstamp_ok) ||
- (!estab && net->ipv4.sysctl_tcp_timestamps))) {
+ (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
opt_rx->saw_tstamp = 1;
opt_rx->rcv_tsval = get_unaligned_be32(ptr);
opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -4023,7 +4068,7 @@
break;
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
- !estab && net->ipv4.sysctl_tcp_sack) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
@@ -4358,7 +4403,7 @@
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
int mib_idx;
if (before(seq, tp->rcv_nxt))
@@ -4405,7 +4450,7 @@
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
tcp_rcv_spurious_retrans(sk, skb);
@@ -4751,7 +4796,7 @@
* and trigger fast retransmit.
*/
if (tcp_is_sack(tp))
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, true);
kfree_skb_partial(skb, fragstolen);
skb = NULL;
goto add_sack;
@@ -4839,7 +4884,7 @@
* and trigger fast retransmit.
*/
if (tcp_is_sack(tp))
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, false);
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
@@ -5370,7 +5415,17 @@
sk->sk_write_space(sk);
}
-static void tcp_check_space(struct sock *sk)
+/* Caller made space either from:
+ * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced)
+ * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt)
+ *
+ * We might be able to generate EPOLLOUT to the application if:
+ * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2
+ * 2) notsent amount (tp->write_seq - tp->snd_nxt) became
+ * small enough that tcp_stream_memory_free() decides it
+ * is time to generate EPOLLOUT.
+ */
+void tcp_check_space(struct sock *sk)
{
/* pairs with tcp_poll() */
smp_mb();
@@ -5420,7 +5475,7 @@
}
if (!tcp_is_sack(tp) ||
- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
+ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
goto send_now;
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
@@ -5441,11 +5496,12 @@
if (tp->srtt_us && tp->srtt_us < rtt)
rtt = tp->srtt_us;
- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
+ delay = min_t(unsigned long,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
rtt * (NSEC_PER_USEC >> 3)/20);
sock_hold(sk);
hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
HRTIMER_MODE_REL_PINNED_SOFT);
}
@@ -5473,7 +5529,7 @@
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
- if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
+ if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
ptr--;
ptr += ntohl(th->seq);
@@ -5722,7 +5778,7 @@
trace_tcp_probe(sk, skb);
tcp_mstamp_refresh(tp);
- if (unlikely(!sk->sk_rx_dst))
+ if (unlikely(!rcu_access_pointer(sk->sk_rx_dst)))
inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
* Header prediction.
@@ -6669,11 +6725,14 @@
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
- bool want_cookie = false;
struct net *net = sock_net(sk);
+ bool want_cookie = false;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
#ifdef CONFIG_SYN_COOKIES
- if (net->ipv4.sysctl_tcp_syncookies) {
+ if (syncookies) {
msg = "Sending cookies";
want_cookie = true;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@ -6681,8 +6740,7 @@
#endif
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
- if (!queue->synflood_warned &&
- net->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (!queue->synflood_warned && syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0)
net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, sk->sk_num, msg);
@@ -6731,7 +6789,7 @@
struct tcp_sock *tp = tcp_sk(sk);
u16 mss;
- if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
!inet_csk_reqsk_queue_is_full(sk))
return 0;
@@ -6765,13 +6823,15 @@
bool want_cookie = false;
struct dst_entry *dst;
struct flowi fl;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
- if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
- inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+ if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
if (!want_cookie)
goto drop;
@@ -6825,10 +6885,12 @@
goto drop_and_free;
if (!want_cookie && !isn) {
+ int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
+
/* Kill the following clause, if you dislike this way. */
- if (!net->ipv4.sysctl_tcp_syncookies &&
- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+ if (!syncookies &&
+ (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (max_syn_backlog >> 2)) &&
!tcp_peer_is_proven(req, dst)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 017cd66..8bd7b1e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -106,10 +106,10 @@
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
+ int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
- int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
if (reuse == 2) {
/* Still does not detect *everything* that goes through
@@ -322,6 +322,8 @@
* if necessary.
*/
tcp_set_state(sk, TCP_CLOSE);
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ inet_reset_saddr(sk);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;
@@ -983,7 +985,7 @@
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(inet_sk(sk)->tos & INET_ECN_MASK) :
inet_sk(sk)->tos;
@@ -1558,7 +1560,7 @@
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
if (!dst) {
@@ -1670,15 +1672,18 @@
struct sock *rsk;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
- struct dst_entry *dst = sk->sk_rx_dst;
+ struct dst_entry *dst;
+
+ dst = rcu_dereference_protected(sk->sk_rx_dst,
+ lockdep_sock_is_held(sk));
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
!dst->ops->check(dst, 0)) {
+ RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
dst_release(dst);
- sk->sk_rx_dst = NULL;
}
}
tcp_rcv_established(sk, skb);
@@ -1753,7 +1758,7 @@
skb->sk = sk;
skb->destructor = sock_edemux;
if (sk_fullsock(sk)) {
- struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
+ struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, 0);
@@ -1767,8 +1772,7 @@
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
- u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
- u32 tail_gso_size, tail_gso_segs;
+ u32 limit, tail_gso_size, tail_gso_segs;
struct skb_shared_info *shinfo;
const struct tcphdr *th;
struct tcphdr *thtail;
@@ -1871,11 +1875,13 @@
__skb_push(skb, hdrlen);
no_coalesce:
+ limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
+
/* Only socket owner can try to collapse/prune rx queues
* to reduce memory overhead, so add a little headroom here.
* Few sockets backlog are possibly concurrently non empty.
*/
- limit += 64*1024;
+ limit += 64 * 1024;
if (unlikely(sk_add_backlog(sk, skb, limit))) {
bh_unlock_sock(sk);
@@ -1980,7 +1986,8 @@
struct sock *nsk;
sk = req->rsk_listener;
- if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {
+ if (unlikely(!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb) ||
+ tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -2019,6 +2026,7 @@
}
goto discard_and_relse;
}
+ nf_reset_ct(skb);
if (nsk == sk) {
reqsk_put(req);
tcp_v4_restore_cb(skb);
@@ -2160,7 +2168,7 @@
struct dst_entry *dst = skb_dst(skb);
if (dst && dst_hold_safe(dst)) {
- sk->sk_rx_dst = dst;
+ rcu_assign_pointer(sk->sk_rx_dst, dst);
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
}
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 6b27c48..f3ca6ee 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -329,7 +329,7 @@
int m;
sk_dst_confirm(sk);
- if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
return;
rcu_read_lock();
@@ -385,7 +385,7 @@
if (tcp_in_initial_slowstart(tp)) {
/* Slow start still did not finish. */
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && (tp->snd_cwnd >> 1) > val)
@@ -401,7 +401,7 @@
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
@@ -418,7 +418,7 @@
tcp_metric_set(tm, TCP_METRIC_CWND,
(val + tp->snd_ssthresh) >> 1);
}
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && tp->snd_ssthresh > val)
@@ -428,7 +428,8 @@
if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val < tp->reordering &&
- tp->reordering != net->ipv4.sysctl_tcp_reordering)
+ tp->reordering !=
+ READ_ONCE(net->ipv4.sysctl_tcp_reordering))
tcp_metric_set(tm, TCP_METRIC_REORDERING,
tp->reordering);
}
@@ -462,7 +463,7 @@
if (tcp_metric_locked(tm, TCP_METRIC_CWND))
tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
- val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
+ val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) {
tp->snd_ssthresh = val;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f0f67b2..e423123 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -180,7 +180,7 @@
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
- if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
+ if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
@@ -538,7 +538,7 @@
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (newtp->af_specific->md5_lookup(sk, newsk))
+ if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 19ef457..eefd032 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -82,6 +82,7 @@
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
tcp_skb_pcount(skb));
+ tcp_check_space(sk);
}
/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
@@ -166,16 +167,13 @@
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
- /* If this is the first data packet sent in response to the
- * previous received data,
- * and it is a reply for ato after last received packet,
- * increase pingpong count.
- */
- if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
- (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
- inet_csk_inc_pingpong_cnt(sk);
-
tp->lsndtime = now;
+
+ /* If it is a reply for ato after last received
+ * packet, enter pingpong mode.
+ */
+ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+ inet_csk_enter_pingpong_mode(sk);
}
/* Account for an ACK we sent. */
@@ -240,8 +238,8 @@
*rcv_wscale = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
- space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
- space = max_t(u32, space, sysctl_rmem_max);
+ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+ space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
space = min_t(u32, space, *window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
0, TCP_MAX_WSCALE);
@@ -788,18 +786,18 @@
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) {
opts->ws = tp->rx_opt.rcv_wscale;
opts->options |= OPTION_WSCALE;
remaining -= TCPOLEN_WSCALE_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!(OPTION_TS & opts->options)))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
@@ -1719,7 +1717,8 @@
mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
- mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
+ mss_now = max(mss_now,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss));
return mss_now;
}
@@ -1762,10 +1761,10 @@
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
- icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
+ icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
icsk->icsk_af_ops->net_header_len;
- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss));
icsk->icsk_mtup.probe_size = 0;
if (icsk->icsk_mtup.enabled)
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
@@ -1877,15 +1876,20 @@
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
struct tcp_sock *tp = tcp_sk(sk);
- /* Track the maximum number of outstanding packets in each
- * window, and remember whether we were cwnd-limited then.
+ /* Track the strongest available signal of the degree to which the cwnd
+ * is fully utilized. If cwnd-limited then remember that fact for the
+ * current window. If not cwnd-limited then track the maximum number of
+ * outstanding packets in the current window. (If cwnd-limited then we
+ * chose to not update tp->max_packets_out to avoid an extra else
+ * clause with no functional impact.)
*/
- if (!before(tp->snd_una, tp->max_packets_seq) ||
- tp->packets_out > tp->max_packets_out ||
- is_cwnd_limited) {
- tp->max_packets_out = tp->packets_out;
- tp->max_packets_seq = tp->snd_nxt;
+ if (!before(tp->snd_una, tp->cwnd_usage_seq) ||
+ is_cwnd_limited ||
+ (!tp->is_cwnd_limited &&
+ tp->packets_out > tp->max_packets_out)) {
tp->is_cwnd_limited = is_cwnd_limited;
+ tp->max_packets_out = tp->packets_out;
+ tp->cwnd_usage_seq = tp->snd_nxt;
}
if (tcp_is_cwnd_limited(sk)) {
@@ -1897,7 +1901,7 @@
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
- if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
@@ -1985,7 +1989,7 @@
min_tso = ca_ops->min_tso_segs ?
ca_ops->min_tso_segs(sk) :
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
@@ -2276,7 +2280,7 @@
u32 interval;
s32 delta;
- interval = net->ipv4.sysctl_tcp_probe_interval;
+ interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval);
delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
if (unlikely(delta >= interval * HZ)) {
int mss = tcp_current_mss(sk);
@@ -2358,7 +2362,7 @@
* probing process by not resetting search range to its orignal.
*/
if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
- interval < net->ipv4.sysctl_tcp_probe_threshold) {
+ interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) {
/* Check whether enough time has elaplased for
* another round of probing.
*/
@@ -2500,7 +2504,7 @@
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
limit <<= factor;
if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
@@ -2733,7 +2737,7 @@
if (rcu_access_pointer(tp->fastopen_rsk))
return false;
- early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
+ early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
/* Schedule a loss probe in 2*RTT for SACK capable connections
* not in loss recovery, that are either limited by cwnd or application.
*/
@@ -3098,7 +3102,7 @@
struct sk_buff *skb = to, *tmp;
bool first = true;
- if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
@@ -3138,7 +3142,7 @@
struct tcp_sock *tp = tcp_sk(sk);
unsigned int cur_mss;
int diff, len, err;
-
+ int avail_wnd;
/* Inconclusive MTU probe */
if (icsk->icsk_mtup.probe_size)
@@ -3168,17 +3172,25 @@
return -EHOSTUNREACH; /* Routing failure or similar. */
cur_mss = tcp_current_mss(sk);
+ avail_wnd = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
/* If receiver has shrunk his window, and skb is out of
* new window, do not retransmit it. The exception is the
* case, when window is shrunk to zero. In this case
- * our retransmit serves as a zero window probe.
+ * our retransmit of one segment serves as a zero window probe.
*/
- if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
- TCP_SKB_CB(skb)->seq != tp->snd_una)
- return -EAGAIN;
+ if (avail_wnd <= 0) {
+ if (TCP_SKB_CB(skb)->seq != tp->snd_una)
+ return -EAGAIN;
+ avail_wnd = cur_mss;
+ }
len = cur_mss * segs;
+ if (len > avail_wnd) {
+ len = rounddown(avail_wnd, cur_mss);
+ if (!len)
+ len = avail_wnd;
+ }
if (skb->len > len) {
if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
cur_mss, GFP_ATOMIC))
@@ -3192,8 +3204,9 @@
diff -= tcp_skb_pcount(skb);
if (diff)
tcp_adjust_pcount(sk, skb, diff);
- if (skb->len < cur_mss)
- tcp_retrans_try_collapse(sk, skb, cur_mss);
+ avail_wnd = min_t(int, avail_wnd, cur_mss);
+ if (skb->len < avail_wnd)
+ tcp_retrans_try_collapse(sk, skb, avail_wnd);
}
/* RFC3168, section 6.1.1.1. ECN fallback */
@@ -3364,11 +3377,12 @@
*/
void sk_forced_mem_schedule(struct sock *sk, int size)
{
- int amt;
+ int delta, amt;
- if (size <= sk->sk_forward_alloc)
+ delta = size - sk->sk_forward_alloc;
+ if (delta <= 0)
return;
- amt = sk_mem_pages(size);
+ amt = sk_mem_pages(delta);
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
sk_memory_allocated_add(sk, amt);
@@ -3646,7 +3660,7 @@
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
*/
tp->tcp_header_len = sizeof(struct tcphdr);
- if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
@@ -3682,7 +3696,7 @@
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
- sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling),
&rcv_wscale,
rcv_wnd);
@@ -3733,6 +3747,7 @@
*/
static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
int space, err = 0;
@@ -3747,8 +3762,10 @@
* private TCP options. The cost is reduced data space in SYN :(
*/
tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
+ /* Sync mss_cache after updating the mss_clamp */
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
+ space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
space = min_t(size_t, space, fo->size);
@@ -4087,7 +4104,7 @@
icsk->icsk_probes_out++;
if (err <= 0) {
- if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
+ if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
icsk->icsk_backoff++;
timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
} else {
@@ -4111,8 +4128,8 @@
res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
NULL);
if (!res) {
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
if (unlikely(tcp_passive_fastopen(sk)))
tcp_sk(sk)->total_retrans++;
trace_tcp_retransmit_synack(sk, req);
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 0de6935..6ab1979 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -73,26 +73,31 @@
*
* If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is
* called multiple times. We favor the information from the most recently
- * sent skb, i.e., the skb with the highest prior_delivered count.
+ * sent skb, i.e., the skb with the most recently sent time and the highest
+ * sequence.
*/
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ u64 tx_tstamp;
if (!scb->tx.delivered_mstamp)
return;
+ tx_tstamp = tcp_skb_timestamp_us(skb);
if (!rs->prior_delivered ||
- after(scb->tx.delivered, rs->prior_delivered)) {
+ tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
+ scb->end_seq, rs->last_end_seq)) {
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
rs->is_app_limited = scb->tx.is_app_limited;
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
+ rs->last_end_seq = scb->end_seq;
/* Record send time of most recently ACKed packet: */
- tp->first_tx_mstamp = tcp_skb_timestamp_us(skb);
+ tp->first_tx_mstamp = tx_tstamp;
/* Find the duration of the "send phase" of this window: */
rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
scb->tx.first_tx_mstamp);
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 31fc178..21fc985 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -19,7 +19,8 @@
return 0;
if (tp->sacked_out >= tp->reordering &&
- !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
+ !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_NO_DUPTHRESH))
return 0;
}
@@ -190,7 +191,8 @@
{
struct tcp_sock *tp = tcp_sk(sk);
- if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+ if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_STATIC_REO_WND) ||
!rs->prior_delivered)
return;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4ef0807..888683f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -143,7 +143,7 @@
*/
static int tcp_orphan_retries(struct sock *sk, bool alive)
{
- int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
+ int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
/* We know from an ICMP that something is wrong. */
if (sk->sk_err_soft && !alive)
@@ -163,7 +163,7 @@
int mss;
/* Black hole detection */
- if (!net->ipv4.sysctl_tcp_mtu_probing)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
return;
if (!icsk->icsk_mtup.enabled) {
@@ -171,9 +171,9 @@
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
} else {
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
- mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
- mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
- mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
+ mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
}
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -242,14 +242,14 @@
retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
expired = icsk->icsk_retransmits >= retry_until;
} else {
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
__dst_negative_advice(sk);
}
- retry_until = net->ipv4.sysctl_tcp_retries2;
+ retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
@@ -380,7 +380,7 @@
msecs_to_jiffies(icsk->icsk_user_timeout))
goto abort;
- max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
+ max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -574,7 +574,7 @@
* linear-timeout retransmissions into a black hole
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
+ (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
@@ -585,7 +585,7 @@
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
__sk_dst_reset(sk);
out:;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ef2068a..b093daa 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -446,7 +446,7 @@
result = lookup_reuseport(net, sk, skb,
saddr, sport, daddr, hnum);
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk, false))
+ if (result && !reuseport_has_conns(sk))
return result;
result = result ? : sk;
@@ -598,6 +598,12 @@
}
EXPORT_SYMBOL(udp_encap_enable);
+void udp_encap_disable(void)
+{
+ static_branch_dec(&udp_encap_needed_key);
+}
+EXPORT_SYMBOL(udp_encap_disable);
+
/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
* through error handlers in encapsulations looking for a match.
*/
@@ -1198,7 +1204,7 @@
faddr, saddr, dport, inet->inet_sport,
sk->sk_uid);
- security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
@@ -2187,7 +2193,7 @@
struct dst_entry *old;
if (dst_hold_safe(dst)) {
- old = xchg(&sk->sk_rx_dst, dst);
+ old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst);
dst_release(old);
return old != dst;
}
@@ -2377,7 +2383,7 @@
struct dst_entry *dst = skb_dst(skb);
int ret;
- if (unlikely(sk->sk_rx_dst != dst))
+ if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
udp_sk_rx_dst_set(sk, dst);
ret = udp_unicast_rcv_skb(sk, skb, uh);
@@ -2484,8 +2490,7 @@
struct sock *sk;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- if (INET_MATCH(sk, net, acookie, rmt_addr,
- loc_addr, ports, dif, sdif))
+ if (INET_MATCH(net, sk, acookie, ports, dif, sdif))
return sk;
/* Only check first socket in chain */
break;
@@ -2536,7 +2541,7 @@
skb->sk = sk;
skb->destructor = sock_efree;
- dst = READ_ONCE(sk->sk_rx_dst);
+ dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, 0);
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index ea595c8..cfd4622 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -307,4 +307,3 @@
{
xfrm_input_register_afinfo(&xfrm4_input_afinfo);
}
-EXPORT_SYMBOL(xfrm4_protocol_init);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7c5bf39..ed1e5bf 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -542,7 +542,7 @@
#ifdef CONFIG_IPV6_MROUTE
if ((all || type == NETCONFA_MC_FORWARDING) &&
nla_put_s32(skb, NETCONFA_MC_FORWARDING,
- devconf->mc_forwarding) < 0)
+ atomic_read(&devconf->mc_forwarding)) < 0)
goto nla_put_failure;
#endif
if ((all || type == NETCONFA_PROXY_NEIGH) &&
@@ -789,6 +789,7 @@
{
struct net_device *dev;
struct inet6_ifaddr *ifa;
+ LIST_HEAD(tmp_addr_list);
if (!idev)
return;
@@ -807,14 +808,24 @@
}
}
+ read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
if (ifa->flags&IFA_F_TENTATIVE)
continue;
+ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
+ }
+ read_unlock_bh(&idev->lock);
+
+ while (!list_empty(&tmp_addr_list)) {
+ ifa = list_first_entry(&tmp_addr_list,
+ struct inet6_ifaddr, if_list_aux);
+ list_del(&ifa->if_list_aux);
if (idev->cnf.forwarding)
addrconf_join_anycast(ifa);
else
addrconf_leave_anycast(ifa);
}
+
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_FORWARDING,
dev->ifindex, &idev->cnf);
@@ -1091,10 +1102,6 @@
goto out;
}
- if (net->ipv6.devconf_all->disable_policy ||
- idev->cnf.disable_policy)
- f6i->dst_nopolicy = true;
-
neigh_parms_data_state_setall(idev->nd_parms);
ifa->addr = *cfg->pfx;
@@ -3710,7 +3717,8 @@
unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
- struct inet6_ifaddr *ifa, *tmp;
+ struct inet6_ifaddr *ifa;
+ LIST_HEAD(tmp_addr_list);
bool keep_addr = false;
bool was_ready;
int state, i;
@@ -3802,16 +3810,23 @@
write_lock_bh(&idev->lock);
}
- list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
+ list_for_each_entry(ifa, &idev->addr_list, if_list)
+ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
+ write_unlock_bh(&idev->lock);
+
+ while (!list_empty(&tmp_addr_list)) {
struct fib6_info *rt = NULL;
bool keep;
+ ifa = list_first_entry(&tmp_addr_list,
+ struct inet6_ifaddr, if_list_aux);
+ list_del(&ifa->if_list_aux);
+
addrconf_del_dad_work(ifa);
keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
!addr_is_local(&ifa->addr);
- write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
if (keep) {
@@ -3842,15 +3857,14 @@
addrconf_leave_solict(ifa->idev, &ifa->addr);
}
- write_lock_bh(&idev->lock);
if (!keep) {
+ write_lock_bh(&idev->lock);
list_del_rcu(&ifa->if_list);
+ write_unlock_bh(&idev->lock);
in6_ifa_put(ifa);
}
}
- write_unlock_bh(&idev->lock);
-
/* Step 5: Discard anycast and multicast list */
if (unregister) {
ipv6_ac_destroy_dev(idev);
@@ -4182,7 +4196,8 @@
send_rs = send_mld &&
ipv6_accept_ra(ifp->idev) &&
ifp->idev->cnf.rtr_solicits != 0 &&
- (dev->flags&IFF_LOOPBACK) == 0;
+ (dev->flags & IFF_LOOPBACK) == 0 &&
+ (dev->type != ARPHRD_TUNNEL);
read_unlock_bh(&ifp->idev->lock);
/* While dad is in progress mld report's source address is in6_addrany.
@@ -5515,7 +5530,7 @@
array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
#endif
#ifdef CONFIG_IPV6_MROUTE
- array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
+ array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
#endif
array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
@@ -7027,9 +7042,8 @@
if (!dflt)
goto err_alloc_dflt;
- if (IS_ENABLED(CONFIG_SYSCTL) &&
- !net_eq(net, &init_net)) {
- switch (sysctl_devconf_inherit_init_net) {
+ if (!net_eq(net, &init_net)) {
+ switch (net_inherit_devconf()) {
case 1: /* copy from init_net */
memcpy(all, init_net.ipv6.devconf_all,
sizeof(ipv6_devconf));
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 8a22486..17ac45a 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -437,6 +437,7 @@
{
struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
ifal->ifal_family = AF_INET6;
+ ifal->__ifal_reserved = 0;
ifal->ifal_prefixlen = prefixlen;
ifal->ifal_flags = 0;
ifal->ifal_index = ifindex;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 0905753..d30c9d9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -225,7 +225,7 @@
inet->mc_list = NULL;
inet->rcv_tos = 0;
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -819,7 +819,7 @@
fl6.fl6_dport = inet->inet_dport;
fl6.fl6_sport = inet->inet_sport;
fl6.flowi6_uid = sk->sk_uid;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
rcu_read_lock();
final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index cc8ad7d..f4559e5 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -60,7 +60,7 @@
if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr))
fl6->flowi6_oif = np->mcast_oif;
- security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
}
int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
@@ -256,7 +256,7 @@
goto out;
}
- reuseport_has_conns(sk, true);
+ reuseport_has_conns_set(sk);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
out:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 20c7bef..cb28f89 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -483,7 +483,6 @@
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
- unsigned int allocsz;
if (x->encap) {
int err = esp6_output_encap(x, skb, esp);
@@ -492,8 +491,8 @@
return err;
}
- allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
- if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+ if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+ ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
goto cow;
if (!skb_cloned(skb)) {
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 1c3f02d..7608be0 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -343,6 +343,9 @@
xo->seq.low += skb_shinfo(skb)->gso_segs;
}
+ if (xo->seq.low < seq)
+ xo->seq.hi++;
+
esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
len = skb->len - sizeof(struct ipv6hdr);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index cbab41d..fd1f896 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -573,7 +573,7 @@
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
np = inet6_sk(sk);
@@ -755,7 +755,7 @@
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
local_bh_disable();
sk = icmpv6_xmit_lock(net);
@@ -1008,7 +1008,7 @@
fl6->fl6_icmp_type = type;
fl6->fl6_icmp_code = 0;
fl6->flowi6_oif = oif;
- security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
}
static void __net_exit icmpv6_sk_exit(struct net *net)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e315526..5a9f4d7 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -46,7 +46,7 @@
fl6->fl6_dport = ireq->ir_rmt_port;
fl6->fl6_sport = htons(ireq->ir_num);
fl6->flowi6_uid = sk->sk_uid;
- security_req_classify_flow(req, flowi6_to_flowi(fl6));
+ security_req_classify_flow(req, flowi6_to_flowi_common(fl6));
dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
if (IS_ERR(dst))
@@ -95,7 +95,7 @@
fl6->fl6_sport = inet->inet_sport;
fl6->fl6_dport = inet->inet_dport;
fl6->flowi6_uid = sk->sk_uid;
- security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
rcu_read_lock();
final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 67c9114..b4a5e01 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -71,12 +71,12 @@
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))
+ if (!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
- if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) {
+ if (unlikely(!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -269,7 +269,7 @@
if (sk2->sk_hash != hash)
continue;
- if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports,
+ if (likely(inet6_match(net, sk2, saddr, daddr, ports,
dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
@@ -308,7 +308,7 @@
return -EADDRNOTAVAIL;
}
-static u32 inet6_sk_port_offset(const struct sock *sk)
+static u64 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -320,7 +320,7 @@
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet6_sk_port_offset(sk);
@@ -333,11 +333,8 @@
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 9a0263f..0010f9e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -724,6 +724,7 @@
{
struct ip6_tnl *tunnel = netdev_priv(dev);
__be16 protocol;
+ __be16 flags;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -733,16 +734,13 @@
else
fl6->daddr = tunnel->parms.raddr;
- if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
- return -ENOMEM;
-
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
if (tunnel->parms.collect_md) {
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
- __be16 flags;
+ int tun_hlen;
tun_info = skb_tunnel_info_txcheck(skb);
if (IS_ERR(tun_info) ||
@@ -760,21 +758,27 @@
dsfield = key->tos;
flags = key->tun_flags &
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
- tunnel->tun_hlen = gre_calc_hlen(flags);
+ tun_hlen = gre_calc_hlen(flags);
- gre_build_header(skb, tunnel->tun_hlen,
+ if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen))
+ return -ENOMEM;
+
+ gre_build_header(skb, tun_hlen,
flags, protocol,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
: 0);
} else {
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
+ if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
+ return -ENOMEM;
- gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ flags = tunnel->parms.o_flags;
+
+ gre_build_header(skb, tunnel->tun_hlen, flags,
protocol, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
+ : 0);
}
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -940,7 +944,6 @@
__be16 proto;
__u32 mtu;
int nhoff;
- int thoff;
if (!pskb_inet_may_pull(skb))
goto tx_err;
@@ -961,10 +964,16 @@
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
goto tx_err;
@@ -1052,7 +1061,7 @@
/* Push GRE header. */
proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
: htons(ETH_P_ERSPAN2);
- gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
+ gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
@@ -1144,14 +1153,16 @@
dev->needed_headroom = dst_len;
if (set_mtu) {
- dev->mtu = rt->dst.dev->mtu - t_hlen;
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
- if (dev->type == ARPHRD_ETHER)
- dev->mtu -= ETH_HLEN;
+ int mtu = rt->dst.dev->mtu - t_hlen;
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ mtu -= 8;
+ if (dev->type == ARPHRD_ETHER)
+ mtu -= ETH_HLEN;
+
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ WRITE_ONCE(dev->mtu, mtu);
}
}
ip6_rt_put(rt);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 06d6066..4eb9fbf 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -44,21 +44,25 @@
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
-INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
+void udp_v6_early_demux(struct sk_buff *);
+void tcp_v6_early_demux(struct sk_buff *);
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- void (*edemux)(struct sk_buff *skb);
-
- if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
- const struct inet6_protocol *ipprot;
-
- ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
- INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
- udp_v6_early_demux, skb);
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
+ !skb_dst(skb) && !skb->sk) {
+ switch (ipv6_hdr(skb)->nexthdr) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
+ tcp_v6_early_demux(skb);
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
+ udp_v6_early_demux(skb);
+ break;
+ }
}
+
if (!skb_valid_dst(skb))
ip6_route_input(skb);
}
@@ -509,7 +513,7 @@
/*
* IPv6 multicast router mode is now supported ;)
*/
- if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
+ if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) &&
!(ipv6_addr_type(&hdr->daddr) &
(IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2aa39ce..fadad8e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -508,7 +508,7 @@
goto drop;
if (!net->ipv6.devconf_all->disable_policy &&
- !idev->cnf.disable_policy &&
+ (!idev || !idev->cnf.disable_policy) &&
!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
@@ -1313,8 +1313,7 @@
fl6.daddr = info->key.u.ipv6.dst;
fl6.saddr = info->key.u.ipv6.src;
prio = info->key.tos;
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
- info->key.label);
+ fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
NULL);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3a27415..0d4cab9 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1476,8 +1476,8 @@
struct net_device *tdev = NULL;
struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
- unsigned int mtu;
int t_hlen;
+ int mtu;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
@@ -1524,12 +1524,13 @@
dev->hard_header_len = tdev->hard_header_len + t_hlen;
mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU);
- dev->mtu = mtu - t_hlen;
+ mtu = mtu - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
+ mtu -= 8;
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ WRITE_ONCE(dev->mtu, mtu);
}
}
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 41cb348..5f0ac47 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -740,7 +740,7 @@
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
- in6_dev->cnf.mc_forwarding--;
+ atomic_dec(&in6_dev->cnf.mc_forwarding);
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
@@ -908,7 +908,7 @@
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
- in6_dev->cnf.mc_forwarding++;
+ atomic_inc(&in6_dev->cnf.mc_forwarding);
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
@@ -1558,7 +1558,7 @@
} else {
rcu_assign_pointer(mrt->mroute_sk, sk);
sock_set_flag(sk, SOCK_RCU_FREE);
- net->ipv6.devconf_all->mc_forwarding++;
+ atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
}
write_unlock_bh(&mrt_lock);
@@ -1591,7 +1591,7 @@
* so the RCU grace period before sk freeing
* is guaranteed by sk_destruct()
*/
- net->ipv6.devconf_all->mc_forwarding--;
+ atomic_dec(&net->ipv6.devconf_all->mc_forwarding);
write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 43a894b..2017257 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -208,7 +208,7 @@
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > sysctl_optmem_max)
+ if (optlen > READ_ONCE(sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -242,7 +242,7 @@
if (optlen < size0)
return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
+ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -417,6 +417,12 @@
rtnl_lock();
lock_sock(sk);
+ /* Another thread has converted the socket into IPv4 with
+ * IPV6_ADDRFORM concurrently.
+ */
+ if (unlikely(sk->sk_family != AF_INET6))
+ goto unlock;
+
switch (optname) {
case IPV6_ADDRFORM:
@@ -976,6 +982,7 @@
break;
}
+unlock:
release_sock(sk);
if (needs_rtnl)
rtnl_unlock();
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 4aef6ba..bf95513 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -179,7 +179,7 @@
fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev);
fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark);
- security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
+ security_skb_classify_flow(oldskb, flowi6_to_flowi_common(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
dst_release(dst);
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 8b5193e..3a00d95 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -13,8 +13,8 @@
#include <net/netfilter/ipv6/nf_dup_ipv6.h>
struct nft_dup_ipv6 {
- enum nft_registers sreg_addr:8;
- enum nft_registers sreg_dev:8;
+ u8 sreg_addr;
+ u8 sreg_dev;
};
static void nft_dup_ipv6_eval(const struct nft_expr *expr,
@@ -38,16 +38,16 @@
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
- err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr));
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ sizeof(struct in6_addr));
if (err < 0)
return err;
- if (tb[NFTA_DUP_SREG_DEV] != NULL) {
- priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
- }
- return 0;
+ if (tb[NFTA_DUP_SREG_DEV])
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ &priv->sreg_dev, sizeof(int));
+
+ return err;
}
static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 92f3235..602743f 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -37,6 +37,9 @@
if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
lookup_flags |= RT6_LOOKUP_F_IFACE;
fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
+ } else if ((priv->flags & NFTA_FIB_F_IIF) &&
+ (netif_is_l3_master(dev) || netif_is_l3_slave(dev))) {
+ fl6->flowi6_oif = dev->ifindex;
}
if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
@@ -193,7 +196,8 @@
if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
goto put_rt_err;
- if (oif && oif != rt->rt6i_idev->dev)
+ if (oif && oif != rt->rt6i_idev->dev &&
+ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex)
goto put_rt_err;
nft_fib_store_result(dest, priv, rt->rt6i_idev->dev);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 6caa062..135e3a0 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -22,6 +22,11 @@
#include <linux/proc_fs.h>
#include <net/ping.h>
+static void ping_v6_destroy(struct sock *sk)
+{
+ inet6_destroy_sock(sk);
+}
+
/* Compatibility glue so we can support IPv6 when it's compiled as a module */
static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
int *addr_len)
@@ -111,7 +116,7 @@
fl6.flowi6_uid = sk->sk_uid;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
ipcm6_init_sk(&ipc6, np);
ipc6.sockc.mark = sk->sk_mark;
@@ -166,6 +171,7 @@
.owner = THIS_MODULE,
.init = ping_init_sock,
.close = ping_close,
+ .destroy = ping_v6_destroy,
.connect = ip6_datagram_connect_v6_only,
.disconnect = __udp_disconnect,
.setsockopt = ipv6_setsockopt,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 3834905..31eb54e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -915,7 +915,7 @@
fl6.flowi6_oif = np->mcast_oif;
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
if (hdrincl)
fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 352e645..803d1aa 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3192,6 +3192,7 @@
int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
+ unsigned int val;
int entries;
entries = dst_entries_get_fast(ops);
@@ -3202,13 +3203,13 @@
entries <= rt_max_size)
goto out;
- net->ipv6.ip6_rt_gc_expire++;
- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
+ fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
- net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
out:
- net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
+ val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
return entries > rt_max_size;
}
@@ -4398,7 +4399,7 @@
struct inet6_dev *idev;
int type;
- if (netif_is_l3_master(skb->dev) &&
+ if (netif_is_l3_master(skb->dev) ||
dst->dev == net->loopback_dev)
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
else
@@ -4478,8 +4479,15 @@
}
f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
- if (!IS_ERR(f6i))
+ if (!IS_ERR(f6i)) {
f6i->dst_nocount = true;
+
+ if (!anycast &&
+ (net->ipv6.devconf_all->disable_policy ||
+ idev->cnf.disable_policy))
+ f6i->dst_nopolicy = true;
+ }
+
return f6i;
}
@@ -5633,7 +5641,7 @@
if (nexthop_is_blackhole(rt->nh))
rtm->rtm_type = RTN_BLACKHOLE;
- if (net->ipv4.sysctl_nexthop_compat_mode &&
+ if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
goto nla_put_failure;
@@ -6363,7 +6371,7 @@
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
net->ipv6.sysctl.skip_notify_on_dev_down = 0;
- net->ipv6.ip6_rt_gc_expire = 30*HZ;
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
ret = 0;
out:
@@ -6397,10 +6405,16 @@
static int __net_init ip6_route_net_init_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
- proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
- sizeof(struct ipv6_route_iter));
- proc_create_net_single("rt6_stats", 0444, net->proc_net,
- rt6_stats_seq_show, NULL);
+ if (!proc_create_net("ipv6_route", 0, net->proc_net,
+ &ipv6_route_seq_ops,
+ sizeof(struct ipv6_route_iter)))
+ return -ENOMEM;
+
+ if (!proc_create_net_single("rt6_stats", 0444, net->proc_net,
+ rt6_stats_seq_show, NULL)) {
+ remove_proc_entry("ipv6_route", net->proc_net);
+ return -ENOMEM;
+ }
#endif
return 0;
}
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index d2f8138..2278c02 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -135,6 +135,11 @@
goto out_unlock;
}
+ if (slen > nla_len(info->attrs[SEG6_ATTR_SECRET])) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
if (hinfo) {
err = seg6_hmac_info_del(net, hmackeyid);
if (err)
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 85dddfe..552bce1 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -400,7 +400,6 @@
{
return seg6_hmac_init_algo();
}
-EXPORT_SYMBOL(seg6_hmac_init);
int __net_init seg6_hmac_net_init(struct net *net)
{
@@ -410,7 +409,6 @@
return 0;
}
-EXPORT_SYMBOL(seg6_hmac_net_init);
void seg6_hmac_exit(void)
{
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 4d4399c..40ac232 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -188,6 +188,8 @@
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, tot_len);
return 0;
@@ -240,6 +242,8 @@
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
return 0;
@@ -301,7 +305,6 @@
break;
}
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
return 0;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index eba2327..11f7da4 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -435,7 +435,6 @@
if (err)
goto drop;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_nexthop(skb, NULL, 0);
@@ -467,7 +466,6 @@
if (err)
goto drop;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_nexthop(skb, NULL, 0);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index bab0e99..1ce486a 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -321,9 +321,7 @@
kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
NULL;
- rcu_read_lock();
-
- ca = t->prl_count < cmax ? t->prl_count : cmax;
+ ca = min(t->prl_count, cmax);
if (!kp) {
/* We don't try hard to allocate much memory for
@@ -338,7 +336,7 @@
}
}
- c = 0;
+ rcu_read_lock();
for_each_prl_rcu(t->prl) {
if (c >= cmax)
break;
@@ -350,7 +348,7 @@
if (kprl.addr != htonl(INADDR_ANY))
break;
}
-out:
+
rcu_read_unlock();
len = sizeof(*kp) * c;
@@ -359,7 +357,7 @@
ret = -EFAULT;
kfree(kp);
-
+out:
return ret;
}
@@ -1125,10 +1123,12 @@
if (tdev && !netif_is_l3_master(tdev)) {
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+ int mtu;
- dev->mtu = tdev->mtu - t_hlen;
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
+ mtu = tdev->mtu - t_hlen;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ WRITE_ONCE(dev->mtu, mtu);
}
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 9b6cae1..12ae817 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -141,7 +141,8 @@
__u8 rcv_wscale;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
@@ -170,7 +171,8 @@
goto out;
ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb);
+ req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops,
+ &tcp_request_sock_ipv6_ops, sk, skb);
if (!req)
goto out;
@@ -233,7 +235,7 @@
fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = inet_sk(sk)->inet_sport;
fl6.flowi6_uid = sk->sk_uid;
- security_req_classify_flow(req, flowi6_to_flowi(&fl6));
+ security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index df33145..c599e14 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -107,7 +107,7 @@
if (dst && dst_hold_safe(dst)) {
const struct rt6_info *rt = (const struct rt6_info *)dst;
- sk->sk_rx_dst = dst;
+ rcu_assign_pointer(sk->sk_rx_dst, dst);
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
}
@@ -278,7 +278,7 @@
opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
final_p = fl6_update_dst(&fl6, opt, &final);
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -339,6 +339,8 @@
late_failure:
tcp_set_state(sk, TCP_CLOSE);
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ inet_reset_saddr(sk);
failure:
inet->inet_dport = 0;
sk->sk_route_caps = 0;
@@ -542,7 +544,7 @@
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(np->tclass & INET_ECN_MASK) :
np->tclass;
@@ -975,7 +977,7 @@
fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source;
fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
/* Pass a socket to ip6_dst_lookup either it is for RST
* Underlying function will use this to retrieve the network
@@ -1344,7 +1346,7 @@
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
/* Clone native IPv6 options from listening socket (if any)
@@ -1482,15 +1484,18 @@
opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
- struct dst_entry *dst = sk->sk_rx_dst;
+ struct dst_entry *dst;
+
+ dst = rcu_dereference_protected(sk->sk_rx_dst,
+ lockdep_sock_is_held(sk));
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
+ RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
dst_release(dst);
- sk->sk_rx_dst = NULL;
}
}
@@ -1815,7 +1820,7 @@
goto discard_it;
}
-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
+void tcp_v6_early_demux(struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
const struct tcphdr *th;
@@ -1842,7 +1847,7 @@
skb->sk = sk;
skb->destructor = sock_edemux;
if (sk_fullsock(sk)) {
- struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
+ struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
@@ -2166,12 +2171,7 @@
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol tcpv6_protocol = {
- .early_demux = tcp_v6_early_demux,
- .early_demux_handler = tcp_v6_early_demux,
+static const struct inet6_protocol tcpv6_protocol = {
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 069551a..1805cc5 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -179,7 +179,7 @@
result = lookup_reuseport(net, sk, skb,
saddr, sport, daddr, hnum);
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk, false))
+ if (result && !reuseport_has_conns(sk))
return result;
result = result ? : sk;
@@ -941,7 +941,7 @@
struct dst_entry *dst = skb_dst(skb);
int ret;
- if (unlikely(sk->sk_rx_dst != dst))
+ if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
udp6_sk_rx_dst_set(sk, dst);
if (!uh->check && !udp_sk(sk)->no_check6_rx) {
@@ -1019,7 +1019,7 @@
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (sk->sk_state == TCP_ESTABLISHED &&
- INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif))
+ inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif))
return sk;
/* Only check first socket in chain */
break;
@@ -1027,7 +1027,7 @@
return NULL;
}
-INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
+void udp_v6_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
const struct udphdr *uh;
@@ -1055,7 +1055,7 @@
skb->sk = sk;
skb->destructor = sock_efree;
- dst = READ_ONCE(sk->sk_rx_dst);
+ dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
@@ -1497,7 +1497,7 @@
} else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
if (ipc6.tclass < 0)
ipc6.tclass = np->tclass;
@@ -1610,8 +1610,10 @@
if (encap_destroy)
encap_destroy(sk);
}
- if (up->encap_enabled)
+ if (up->encap_enabled) {
static_branch_dec(&udpv6_encap_needed_key);
+ udp_encap_disable();
+ }
}
inet6_destroy_sock(sk);
@@ -1638,12 +1640,7 @@
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol udpv6_protocol = {
- .early_demux = udp_v6_early_demux,
- .early_demux_handler = udp_v6_early_demux,
+static const struct inet6_protocol udpv6_protocol = {
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6abb45a..ee349c2 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -52,6 +52,19 @@
return xfrm_output(sk, skb);
}
+static int xfrm6_noneed_fragment(struct sk_buff *skb)
+{
+ struct frag_hdr *fh;
+ u8 prevhdr = ipv6_hdr(skb)->nexthdr;
+
+ if (prevhdr != NEXTHDR_FRAGMENT)
+ return 0;
+ fh = (struct frag_hdr *)(skb->data + sizeof(struct ipv6hdr));
+ if (fh->nexthdr == NEXTHDR_ESP || fh->nexthdr == NEXTHDR_AUTH)
+ return 1;
+ return 0;
+}
+
static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -80,6 +93,9 @@
xfrm6_local_rxpmtu(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
+ } else if (toobig && xfrm6_noneed_fragment(skb)) {
+ skb->ignore_df = 1;
+ goto skip_frag;
} else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
kfree_skb(skb);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index af7a4b8..247296e 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -289,9 +289,13 @@
if (ret)
goto out_state;
- register_pernet_subsys(&xfrm6_net_ops);
+ ret = register_pernet_subsys(&xfrm6_net_ops);
+ if (ret)
+ goto out_protocol;
out:
return ret;
+out_protocol:
+ xfrm6_protocol_fini();
out_state:
xfrm6_state_fini();
out_policy:
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 56dad95..32b516a 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -161,7 +161,8 @@
/* Buffer limit is okay now, add to ready list */
list_add_tail(&kcm->wait_rx_list,
&kcm->mux->kcm_rx_waiters);
- kcm->rx_wait = true;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, true);
}
static void kcm_rfree(struct sk_buff *skb)
@@ -177,7 +178,7 @@
/* For reading rx_wait and rx_psock without holding lock */
smp_mb__after_atomic();
- if (!kcm->rx_wait && !kcm->rx_psock &&
+ if (!READ_ONCE(kcm->rx_wait) && !READ_ONCE(kcm->rx_psock) &&
sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
spin_lock_bh(&mux->rx_lock);
kcm_rcv_ready(kcm);
@@ -220,7 +221,7 @@
struct sk_buff *skb;
struct kcm_sock *kcm;
- while ((skb = __skb_dequeue(head))) {
+ while ((skb = skb_dequeue(head))) {
/* Reset destructor to avoid calling kcm_rcv_ready */
skb->destructor = sock_rfree;
skb_orphan(skb);
@@ -236,7 +237,8 @@
if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
/* Should mean socket buffer full */
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
/* Commit rx_wait to read in kcm_free */
smp_wmb();
@@ -279,10 +281,12 @@
kcm = list_first_entry(&mux->kcm_rx_waiters,
struct kcm_sock, wait_rx_list);
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
psock->rx_kcm = kcm;
- kcm->rx_psock = psock;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_psock, psock);
spin_unlock_bh(&mux->rx_lock);
@@ -309,7 +313,8 @@
spin_lock_bh(&mux->rx_lock);
psock->rx_kcm = NULL;
- kcm->rx_psock = NULL;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_psock, NULL);
/* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
* kcm_rfree
@@ -1079,53 +1084,18 @@
return err;
}
-static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
- long timeo, int *err)
-{
- struct sk_buff *skb;
-
- while (!(skb = skb_peek(&sk->sk_receive_queue))) {
- if (sk->sk_err) {
- *err = sock_error(sk);
- return NULL;
- }
-
- if (sock_flag(sk, SOCK_DONE))
- return NULL;
-
- if ((flags & MSG_DONTWAIT) || !timeo) {
- *err = -EAGAIN;
- return NULL;
- }
-
- sk_wait_data(sk, &timeo, NULL);
-
- /* Handle signals */
- if (signal_pending(current)) {
- *err = sock_intr_errno(timeo);
- return NULL;
- }
- }
-
- return skb;
-}
-
static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
+ int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct kcm_sock *kcm = kcm_sk(sk);
int err = 0;
- long timeo;
struct strp_msg *stm;
int copied = 0;
struct sk_buff *skb;
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
- lock_sock(sk);
-
- skb = kcm_wait_data(sk, flags, timeo, &err);
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
goto out;
@@ -1156,14 +1126,11 @@
/* Finished with message */
msg->msg_flags |= MSG_EOR;
KCM_STATS_INCR(kcm->stats.rx_msgs);
- skb_unlink(skb, &sk->sk_receive_queue);
- kfree_skb(skb);
}
}
out:
- release_sock(sk);
-
+ skb_free_datagram(sk, skb);
return copied ? : err;
}
@@ -1171,9 +1138,9 @@
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
+ int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct kcm_sock *kcm = kcm_sk(sk);
- long timeo;
struct strp_msg *stm;
int err = 0;
ssize_t copied;
@@ -1181,11 +1148,7 @@
/* Only support splice for SOCKSEQPACKET */
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
- lock_sock(sk);
-
- skb = kcm_wait_data(sk, flags, timeo, &err);
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
goto err_out;
@@ -1213,13 +1176,11 @@
* finish reading the message.
*/
- release_sock(sk);
-
+ skb_free_datagram(sk, skb);
return copied;
err_out:
- release_sock(sk);
-
+ skb_free_datagram(sk, skb);
return err;
}
@@ -1239,7 +1200,8 @@
if (!kcm->rx_psock) {
if (kcm->rx_wait) {
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
}
requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
@@ -1411,12 +1373,6 @@
psock->sk = csk;
psock->bpf_prog = prog;
- err = strp_init(&psock->strp, csk, &cb);
- if (err) {
- kmem_cache_free(kcm_psockp, psock);
- goto out;
- }
-
write_lock_bh(&csk->sk_callback_lock);
/* Check if sk_user_data is aready by KCM or someone else.
@@ -1424,13 +1380,18 @@
*/
if (csk->sk_user_data) {
write_unlock_bh(&csk->sk_callback_lock);
- strp_stop(&psock->strp);
- strp_done(&psock->strp);
kmem_cache_free(kcm_psockp, psock);
err = -EALREADY;
goto out;
}
+ err = strp_init(&psock->strp, csk, &cb);
+ if (err) {
+ write_unlock_bh(&csk->sk_callback_lock);
+ kmem_cache_free(kcm_psockp, psock);
+ goto out;
+ }
+
psock->save_data_ready = csk->sk_data_ready;
psock->save_write_space = csk->sk_write_space;
psock->save_state_change = csk->sk_state_change;
@@ -1793,7 +1754,8 @@
if (kcm->rx_wait) {
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
}
/* Move any pending receive messages to other kcm sockets */
requeue_rx_msgs(mux, &sk->sk_receive_queue);
@@ -1838,10 +1800,10 @@
kcm = kcm_sk(sk);
mux = kcm->mux;
+ lock_sock(sk);
sock_orphan(sk);
kfree_skb(kcm->seq_skb);
- lock_sock(sk);
/* Purge queue under lock to avoid race condition with tx_work trying
* to act when queue is nonempty. If tx_work runs after this point
* it will just return.
diff --git a/net/key/af_key.c b/net/key/af_key.c
index d1364b8..8bc7d39 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1701,9 +1701,12 @@
pfk->registered |= (1<<hdr->sadb_msg_satype);
}
+ mutex_lock(&pfkey_mutex);
xfrm_probe_algs();
- supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
+ supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
+ mutex_unlock(&pfkey_mutex);
+
if (!supp_skb) {
if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
pfk->registered &= ~(1<<hdr->sadb_msg_satype);
@@ -2830,6 +2833,10 @@
void *ext_hdrs[SADB_EXT_MAX];
int err;
+ /* Non-zero return value of pfkey_broadcast() does not always signal
+ * an error and even on an actual error we may still want to process
+ * the message so rather ignore the return value.
+ */
pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
@@ -2938,9 +2945,10 @@
return sz + sizeof(struct sadb_prop);
}
-static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
+static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
{
struct sadb_prop *p;
+ int sz = 0;
int i;
p = skb_put(skb, sizeof(struct sadb_prop));
@@ -2968,13 +2976,17 @@
c->sadb_comb_soft_addtime = 20*60*60;
c->sadb_comb_hard_usetime = 8*60*60;
c->sadb_comb_soft_usetime = 7*60*60;
+ sz += sizeof(*c);
}
}
+
+ return sz + sizeof(*p);
}
-static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
+static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
{
struct sadb_prop *p;
+ int sz = 0;
int i, k;
p = skb_put(skb, sizeof(struct sadb_prop));
@@ -3016,8 +3028,11 @@
c->sadb_comb_soft_addtime = 20*60*60;
c->sadb_comb_hard_usetime = 8*60*60;
c->sadb_comb_soft_usetime = 7*60*60;
+ sz += sizeof(*c);
}
}
+
+ return sz + sizeof(*p);
}
static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c)
@@ -3147,6 +3162,7 @@
struct sadb_x_sec_ctx *sec_ctx;
struct xfrm_sec_ctx *xfrm_ctx;
int ctx_size = 0;
+ int alg_size = 0;
sockaddr_size = pfkey_sockaddr_size(x->props.family);
if (!sockaddr_size)
@@ -3158,16 +3174,16 @@
sizeof(struct sadb_x_policy);
if (x->id.proto == IPPROTO_AH)
- size += count_ah_combs(t);
+ alg_size = count_ah_combs(t);
else if (x->id.proto == IPPROTO_ESP)
- size += count_esp_combs(t);
+ alg_size = count_esp_combs(t);
if ((xfrm_ctx = x->security)) {
ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
size += sizeof(struct sadb_x_sec_ctx) + ctx_size;
}
- skb = alloc_skb(size + 16, GFP_ATOMIC);
+ skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
@@ -3221,10 +3237,13 @@
pol->sadb_x_policy_priority = xp->priority;
/* Set sadb_comb's. */
+ alg_size = 0;
if (x->id.proto == IPPROTO_AH)
- dump_ah_combs(skb, t);
+ alg_size = dump_ah_combs(skb, t);
else if (x->id.proto == IPPROTO_ESP)
- dump_esp_combs(skb, t);
+ alg_size = dump_esp_combs(skb, t);
+
+ hdr->sadb_msg_len += alg_size / 8;
/* security context */
if (xfrm_ctx) {
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 561b6d6..dc8987e 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1480,11 +1480,15 @@
tunnel->l2tp_net = net;
pn = l2tp_pernet(net);
+ sk = sock->sk;
+ sock_hold(sk);
+ tunnel->sock = sk;
+
spin_lock_bh(&pn->l2tp_tunnel_list_lock);
list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) {
if (tunnel_walk->tunnel_id == tunnel->tunnel_id) {
spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-
+ sock_put(sk);
ret = -EEXIST;
goto err_sock;
}
@@ -1492,10 +1496,6 @@
list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
- sk = sock->sk;
- sock_hold(sk);
- tunnel->sock = sk;
-
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
struct udp_tunnel_sock_cfg udp_cfg = {
.sk_user_data = tunnel,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index e5e5036..d54dbd0 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -502,14 +502,15 @@
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
int transhdrlen = 4; /* zero session-id */
- int ulen = len + transhdrlen;
+ int ulen;
int err;
/* Rough check on arithmetic overflow,
* better check is made in ip6_append_data().
*/
- if (len > INT_MAX)
+ if (len > INT_MAX - transhdrlen)
return -EMSGSIZE;
+ ulen = len + transhdrlen;
/* Mirror BSD error message compatibility */
if (msg->msg_flags & MSG_OOB)
@@ -606,7 +607,7 @@
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
if (ipc6.tclass < 0)
ipc6.tclass = np->tclass;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 864326f..f2c3a61 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -147,7 +147,7 @@
dev = dev_get_by_index_rcu(net, ifindex);
while (dev && !netif_is_l3_master(dev))
- dev = netdev_master_upper_dev_get(dev);
+ dev = netdev_master_upper_dev_get_rcu(dev);
return dev ? dev->ifindex : 0;
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8010967..c6a7f1c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3357,9 +3357,6 @@
case NL80211_IFTYPE_MESH_POINT: {
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- if (params->chandef.width != sdata->vif.bss_conf.chandef.width)
- return -EINVAL;
-
/* changes into another band are not supported */
if (sdata->vif.bss_conf.chandef.chan->band !=
params->chandef.chan->band)
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 8f48aff..5639a71 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1652,12 +1652,9 @@
if (new_ctx->replace_state == IEEE80211_CHANCTX_REPLACE_NONE) {
if (old_ctx)
- err = ieee80211_vif_use_reserved_reassign(sdata);
- else
- err = ieee80211_vif_use_reserved_assign(sdata);
+ return ieee80211_vif_use_reserved_reassign(sdata);
- if (err)
- return err;
+ return ieee80211_vif_use_reserved_assign(sdata);
}
/*
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a7ac53a..78ae58e 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -541,6 +541,10 @@
sdata_assert_lock(sdata);
+ /* When not connected/joined, sending CSA doesn't make sense. */
+ if (ifibss->state != IEEE80211_IBSS_MLME_JOINED)
+ return -ENOLINK;
+
/* update cfg80211 bss information with the new channel */
if (!is_zero_ether_addr(ifibss->bssid)) {
cbss = cfg80211_get_bss(sdata->local->hw.wiphy,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index fe8f586..63499db 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1103,6 +1103,9 @@
* a scan complete for an aborted scan.
* @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being
* cancelled.
+ * @SCAN_BEACON_WAIT: Set whenever we're passive scanning because of radar/no-IR
+ * and could send a probe request after receiving a beacon.
+ * @SCAN_BEACON_DONE: Beacon received, we can now send a probe request
*/
enum {
SCAN_SW_SCANNING,
@@ -1111,6 +1114,8 @@
SCAN_COMPLETED,
SCAN_ABORTED,
SCAN_HW_CANCELLED,
+ SCAN_BEACON_WAIT,
+ SCAN_BEACON_DONE,
};
/**
@@ -1480,7 +1485,6 @@
const u8 *supp_rates;
const u8 *ds_params;
const struct ieee80211_tim_ie *tim;
- const u8 *challenge;
const u8 *rsn;
const u8 *rsnx;
const u8 *erp_info;
@@ -1533,7 +1537,6 @@
u8 ssid_len;
u8 supp_rates_len;
u8 tim_len;
- u8 challenge_len;
u8 rsn_len;
u8 rsnx_len;
u8 ext_supp_rates_len;
@@ -1548,6 +1551,8 @@
u8 country_elem_len;
u8 bssid_index_len;
+ void *nontx_profile;
+
/* whether a parse error occurred while retrieving these elements */
bool parse_error;
};
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 7389302..ae90ac3 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1349,8 +1349,10 @@
ieee80211_led_exit(local);
destroy_workqueue(local->workqueue);
fail_workqueue:
- if (local->wiphy_ciphers_allocated)
+ if (local->wiphy_ciphers_allocated) {
kfree(local->hw.wiphy->cipher_suites);
+ local->wiphy_ciphers_allocated = false;
+ }
kfree(local->int_scan_req);
return result;
}
@@ -1420,8 +1422,10 @@
mutex_destroy(&local->iflist_mtx);
mutex_destroy(&local->mtx);
- if (local->wiphy_ciphers_allocated)
+ if (local->wiphy_ciphers_allocated) {
kfree(local->hw.wiphy->cipher_suites);
+ local->wiphy_ciphers_allocated = false;
+ }
idr_for_each(&local->ack_status_frames,
ieee80211_free_ack_frame, NULL);
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 870c8ea..c2b051e 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -718,7 +718,7 @@
void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
- kfree_skb(skb);
+ ieee80211_free_txskb(&sdata->local->hw, skb);
sdata->u.mesh.mshstats.dropped_frames_no_route++;
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0dba353..c52b8eb 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2899,14 +2899,14 @@
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data;
+ const struct element *challenge;
u8 *pos;
- struct ieee802_11_elems elems;
u32 tx_flags = 0;
pos = mgmt->u.auth.variable;
- ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
- mgmt->bssid, auth_data->bss->bssid);
- if (!elems.challenge)
+ challenge = cfg80211_find_elem(WLAN_EID_CHALLENGE, pos,
+ len - (pos - (u8 *)mgmt));
+ if (!challenge)
return;
auth_data->expected_transaction = 4;
drv_mgd_prepare_tx(sdata->local, sdata, 0);
@@ -2914,7 +2914,8 @@
tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_INTFL_MLME_CONN_TX;
ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0,
- elems.challenge - 2, elems.challenge_len + 2,
+ (void *)challenge,
+ challenge->datalen + sizeof(*challenge),
auth_data->bss->bssid, auth_data->bss->bssid,
auth_data->key, auth_data->key_len,
auth_data->key_idx, tx_flags);
@@ -3299,7 +3300,7 @@
}
capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, elems,
- mgmt->bssid, assoc_data->bss->bssid);
+ mgmt->bssid, NULL);
if (elems->aid_resp)
aid = le16_to_cpu(elems->aid_resp->aid);
@@ -3393,6 +3394,7 @@
sdata_info(sdata,
"AP bug: VHT operation missing from AssocResp\n");
}
+ kfree(bss_elems.nontx_profile);
}
/*
@@ -3528,6 +3530,12 @@
cbss->transmitted_bss->bssid);
bss_conf->bssid_indicator = cbss->max_bssid_indicator;
bss_conf->bssid_index = cbss->bssid_index;
+ } else {
+ bss_conf->nontransmitted = false;
+ memset(bss_conf->transmitter_bssid, 0,
+ sizeof(bss_conf->transmitter_bssid));
+ bss_conf->bssid_indicator = 0;
+ bss_conf->bssid_index = 0;
}
/*
@@ -3701,7 +3709,7 @@
return;
ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
- mgmt->bssid, assoc_data->bss->bssid);
+ mgmt->bssid, NULL);
if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
elems.timeout_int &&
@@ -4038,6 +4046,7 @@
ifmgd->assoc_data->timeout = jiffies;
ifmgd->assoc_data->timeout_started = true;
run_again(sdata, ifmgd->assoc_data->timeout);
+ kfree(elems.nontx_profile);
return;
}
@@ -4215,7 +4224,7 @@
ieee80211_report_disconnect(sdata, deauth_buf,
sizeof(deauth_buf), true,
WLAN_REASON_DEAUTH_LEAVING);
- return;
+ goto free;
}
if (sta && elems.opmode_notif)
@@ -4230,6 +4239,8 @@
elems.cisco_dtpc_elem);
ieee80211_bss_info_change_notify(sdata, changed);
+free:
+ kfree(elems.nontx_profile);
}
void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1e7614a..97a63b9 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1387,8 +1387,7 @@
goto dont_reorder;
/* not part of a BA session */
- if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
- ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
+ if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK)
goto dont_reorder;
/* new, potentially un-ordered, ampdu frame - process it */
@@ -1976,10 +1975,11 @@
if (mmie_keyidx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS ||
mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS +
- NUM_DEFAULT_BEACON_KEYS) {
- cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
- skb->data,
- skb->len);
+ NUM_DEFAULT_BEACON_KEYS) {
+ if (rx->sdata->dev)
+ cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
+ skb->data,
+ skb->len);
return RX_DROP_MONITOR; /* unexpected BIP keyidx */
}
@@ -2127,7 +2127,8 @@
/* either the frame has been decrypted or will be dropped */
status->flag |= RX_FLAG_DECRYPTED;
- if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE))
+ if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE &&
+ rx->sdata->dev))
cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
skb->data, skb->len);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 6b50cb5..b241ff8 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -227,6 +227,8 @@
rx_status, beacon);
}
+ kfree(elems.nontx_profile);
+
return bss;
}
@@ -277,6 +279,16 @@
if (likely(!sdata1 && !sdata2))
return;
+ if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) {
+ /*
+ * we were passive scanning because of radar/no-IR, but
+ * the beacon/proberesp rx gives us an opportunity to upgrade
+ * to active scan
+ */
+ set_bit(SCAN_BEACON_DONE, &local->scanning);
+ ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
+ }
+
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
@@ -451,16 +463,19 @@
scan_req = rcu_dereference_protected(local->scan_req,
lockdep_is_held(&local->mtx));
- if (scan_req != local->int_scan_req) {
- local->scan_info.aborted = aborted;
- cfg80211_scan_done(scan_req, &local->scan_info);
- }
RCU_INIT_POINTER(local->scan_req, NULL);
RCU_INIT_POINTER(local->scan_sdata, NULL);
local->scanning = 0;
local->scan_chandef.chan = NULL;
+ synchronize_rcu();
+
+ if (scan_req != local->int_scan_req) {
+ local->scan_info.aborted = aborted;
+ cfg80211_scan_done(scan_req, &local->scan_info);
+ }
+
/* Set power back to normal operating levels. */
ieee80211_hw_config(local, 0);
@@ -783,6 +798,8 @@
IEEE80211_CHAN_RADAR)) ||
!req->n_ssids) {
next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
+ if (req->n_ssids)
+ set_bit(SCAN_BEACON_WAIT, &local->scanning);
} else {
ieee80211_scan_state_send_probe(local, &next_delay);
next_delay = IEEE80211_CHANNEL_TIME;
@@ -994,6 +1011,8 @@
!scan_req->n_ssids) {
*next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
local->next_scan_state = SCAN_DECISION;
+ if (scan_req->n_ssids)
+ set_bit(SCAN_BEACON_WAIT, &local->scanning);
return;
}
@@ -1086,6 +1105,8 @@
goto out;
}
+ clear_bit(SCAN_BEACON_WAIT, &local->scanning);
+
/*
* as long as no delay is required advance immediately
* without scheduling a new work
@@ -1096,6 +1117,10 @@
goto out_complete;
}
+ if (test_and_clear_bit(SCAN_BEACON_DONE, &local->scanning) &&
+ local->next_scan_state == SCAN_DECISION)
+ local->next_scan_state = SCAN_SEND_PROBE;
+
switch (local->next_scan_state) {
case SCAN_DECISION:
/* if no more bands/channels left, complete scan */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index e18c385..cee39ae 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -645,13 +645,13 @@
/* check if STA exists already */
if (sta_info_get_bss(sdata, sta->sta.addr)) {
err = -EEXIST;
- goto out_err;
+ goto out_cleanup;
}
sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL);
if (!sinfo) {
err = -ENOMEM;
- goto out_err;
+ goto out_cleanup;
}
local->num_sta++;
@@ -707,8 +707,8 @@
out_drop_sta:
local->num_sta--;
synchronize_net();
+ out_cleanup:
cleanup_single_sta(sta);
- out_err:
mutex_unlock(&local->sta_mtx);
kfree(sinfo);
rcu_read_lock();
@@ -2175,9 +2175,9 @@
u64 value;
do {
- start = u64_stats_fetch_begin(&rxstats->syncp);
+ start = u64_stats_fetch_begin_irq(&rxstats->syncp);
value = rxstats->msdu[tid];
- } while (u64_stats_fetch_retry(&rxstats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start));
return value;
}
@@ -2241,9 +2241,9 @@
u64 value;
do {
- start = u64_stats_fetch_begin(&rxstats->syncp);
+ start = u64_stats_fetch_begin_irq(&rxstats->syncp);
value = rxstats->bytes;
- } while (u64_stats_fetch_retry(&rxstats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start));
return value;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index a1f1292..7fa6efa 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1124,10 +1124,6 @@
} else
elem_parse_failed = true;
break;
- case WLAN_EID_CHALLENGE:
- elems->challenge = pos;
- elems->challenge_len = elen;
- break;
case WLAN_EID_VENDOR_SPECIFIC:
if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 &&
pos[2] == 0xf2) {
@@ -1409,6 +1405,8 @@
for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) {
if (elem->datalen < 2)
continue;
+ if (elem->data[0] < 1 || elem->data[0] > 8)
+ continue;
for_each_element(sub, elem->data + 1, elem->datalen - 1) {
u8 new_bssid[ETH_ALEN];
@@ -1485,6 +1483,11 @@
cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
nontransmitted_profile,
nontransmitted_profile_len);
+ if (!nontransmitted_profile_len) {
+ nontransmitted_profile_len = 0;
+ kfree(nontransmitted_profile);
+ nontransmitted_profile = NULL;
+ }
}
crc = _ieee802_11_parse_elems_crc(start, len, action, elems, filter,
@@ -1514,7 +1517,7 @@
offsetofend(struct ieee80211_bssid_index, dtim_count))
elems->dtim_count = elems->bssid_index->dtim_count;
- kfree(nontransmitted_profile);
+ elems->nontx_profile = nontransmitted_profile;
return crc;
}
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 2fb9932..b9404b0 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -145,8 +145,8 @@
bool qos;
/* all mesh/ocb stations are required to support WME */
- if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
- sdata->vif.type == NL80211_IFTYPE_OCB)
+ if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
+ sdata->vif.type == NL80211_IFTYPE_OCB))
qos = true;
else if (sta)
qos = sta->sta.wme;
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index b8ce846..726b47a 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -44,7 +44,7 @@
switch (mac_cb(skb)->dest.mode) {
case IEEE802154_ADDR_NONE:
- if (mac_cb(skb)->dest.mode != IEEE802154_ADDR_NONE)
+ if (hdr->source.mode != IEEE802154_ADDR_NONE)
/* FIXME: check if we are PAN coordinator */
skb->pkt_type = PACKET_OTHERHOST;
else
@@ -132,7 +132,7 @@
ieee802154_parse_frame_start(struct sk_buff *skb, struct ieee802154_hdr *hdr)
{
int hlen;
- struct ieee802154_mac_cb *cb = mac_cb_init(skb);
+ struct ieee802154_mac_cb *cb = mac_cb(skb);
skb_reset_mac_header(skb);
@@ -294,8 +294,9 @@
ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi)
{
struct ieee802154_local *local = hw_to_local(hw);
+ struct ieee802154_mac_cb *cb = mac_cb_init(skb);
- mac_cb(skb)->lqi = lqi;
+ cb->lqi = lqi;
skb->pkt_type = IEEE802154_RX_MSG;
skb_queue_tail(&local->skb_queue, skb);
tasklet_schedule(&local->tasklet);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 9c047c1..7239814 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1078,9 +1078,9 @@
p = per_cpu_ptr(mdev->stats, i);
do {
- start = u64_stats_fetch_begin(&p->syncp);
+ start = u64_stats_fetch_begin_irq(&p->syncp);
local = p->stats;
- } while (u64_stats_fetch_retry(&p->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&p->syncp, start));
stats->rx_packets += local.rx_packets;
stats->rx_bytes += local.rx_bytes;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8123c79..e61c858 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1421,7 +1421,7 @@
if (msk->rcvq_space.copied <= msk->rcvq_space.space)
goto new_measure;
- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
@@ -1439,7 +1439,7 @@
do_div(rcvwin, advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp;
@@ -1872,8 +1872,8 @@
return ret;
sk_sockets_allocated_inc(sk);
- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
+ sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
+ sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
return 0;
}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6bafd38..8bf70ce 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -118,7 +118,6 @@
config NF_CONNTRACK_PROCFS
bool "Supply CT list in procfs (OBSOLETE)"
- default y
depends on PROC_FS
help
This option enables for the list of known conntrack entries
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 2b19189..c17a7dd 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -963,20 +963,9 @@
start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
enum ipset_cmd cmd)
{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
-
- nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd),
- sizeof(*nfmsg), flags);
- if (!nlh)
- return NULL;
-
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = NFPROTO_IPV4;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- return nlh;
+ return nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags,
+ NFPROTO_IPV4, NFNETLINK_V0, 0);
}
/* Create a set */
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 5d6d68e..d7a81b2 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -131,8 +131,11 @@
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -143,18 +146,20 @@
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
- if (retried) {
+ /* 64bit division is not allowed on 32bit */
+ if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
+ if (retried)
ip = ntohl(h->next.ip);
- e.ip = htonl(ip);
- }
for (; ip <= ip_to;) {
+ e.ip = htonl(ip);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
ip += hosts;
- e.ip = htonl(ip);
- if (e.ip == 0)
+ if (ip == 0)
return 0;
ret = 0;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index aba1df6..eefce34 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -120,6 +120,8 @@
e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
+ if (e.mark == 0 && e.ip == 0)
+ return -IPSET_ERR_HASH_ELEM;
if (adt == IPSET_TEST ||
!(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
@@ -132,8 +134,11 @@
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (e.mark == 0 && ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -142,6 +147,9 @@
ip_set_mask_from_to(ip, ip_to, cidr);
}
+ if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 1ff2287..4a54e9e 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -172,6 +172,9 @@
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index fa88afd..09737de 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -179,6 +179,9 @@
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index eef6ecf..0268537 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -252,6 +252,9 @@
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 136cf07..9d1beaa 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -139,7 +139,7 @@
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -187,6 +187,15 @@
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
do {
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index be5e95a..c3ada9c 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -201,7 +201,7 @@
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -255,6 +255,14 @@
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index da4ef91..b1411bc 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -167,7 +167,8 @@
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0;
- u32 ip2 = 0, ip2_from = 0, ip2_to = 0;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
+ u64 n = 0, m = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -243,6 +244,19 @@
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+ n++;
+ } while (ipn++ < ip_to);
+ ipn = ip2_from;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+ m++;
+ } while (ipn++ < ip2_to);
+
+ if (n*m > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 34448df..d26d135 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -157,7 +157,8 @@
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 port, port_to, p = 0, ip = 0, ip_to = 0;
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
+ u64 n = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -234,6 +235,14 @@
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip);
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 934c171..6446f4f 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -181,7 +181,8 @@
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
+ u64 n = 0, m = 0;
bool with_ports = false;
int ret;
@@ -283,6 +284,19 @@
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+ n++;
+ } while (ipn++ < ip_to);
+ ipn = ip2_from;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+ m++;
+ } while (ipn++ < ip2_to);
+
+ if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index f9b16f2..fdacbc3 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -599,13 +599,19 @@
int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
{
INIT_LIST_HEAD(&ipvs->app_list);
- proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops,
- sizeof(struct seq_net_private));
+#ifdef CONFIG_PROC_FS
+ if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net,
+ &ip_vs_app_seq_ops,
+ sizeof(struct seq_net_private)))
+ return -ENOMEM;
+#endif
return 0;
}
void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
{
unregister_ip_vs_app(ipvs, NULL /* all */);
+#ifdef CONFIG_PROC_FS
remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
+#endif
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 2c467c4..cb6d682 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1265,8 +1265,8 @@
* The drop rate array needs tuning for real environments.
* Called from timer bh only => no locking
*/
- static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
- static char todrop_counter[9] = {0};
+ static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+ static signed char todrop_counter[9] = {0};
int i;
/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
@@ -1447,20 +1447,36 @@
{
atomic_set(&ipvs->conn_count, 0);
- proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,
- &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state));
- proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net,
- &ip_vs_conn_sync_seq_ops,
- sizeof(struct ip_vs_iter_state));
+#ifdef CONFIG_PROC_FS
+ if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,
+ &ip_vs_conn_seq_ops,
+ sizeof(struct ip_vs_iter_state)))
+ goto err_conn;
+
+ if (!proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net,
+ &ip_vs_conn_sync_seq_ops,
+ sizeof(struct ip_vs_iter_state)))
+ goto err_conn_sync;
+#endif
+
return 0;
+
+#ifdef CONFIG_PROC_FS
+err_conn_sync:
+ remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
+err_conn:
+ return -ENOMEM;
+#endif
}
void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
{
/* flush all the connection entries first */
ip_vs_conn_flush(ipvs);
+#ifdef CONFIG_PROC_FS
remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net);
+#endif
}
int __init ip_vs_conn_init(void)
@@ -1495,7 +1511,7 @@
pr_info("Connection hash table configured "
"(size=%d, memory=%ldKbytes)\n",
ip_vs_conn_tab_size,
- (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
+ (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
sizeof(struct ip_vs_conn));
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 16b4806..daab857 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1280,12 +1280,12 @@
lock_sock(sk);
if (mode) {
val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
- sysctl_wmem_max);
+ READ_ONCE(sysctl_wmem_max));
sk->sk_sndbuf = val * 2;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
} else {
val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
- sysctl_rmem_max);
+ READ_ONCE(sysctl_rmem_max));
sk->sk_rcvbuf = val * 2;
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8369af0..193a18b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1598,7 +1598,7 @@
}
#ifdef CONFIG_NF_CONNTRACK_MARK
- ct->mark = exp->master->mark;
+ ct->mark = READ_ONCE(exp->master->mark);
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
ct->secmark = exp->master->secmark;
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index e40988a..65b5b05 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -148,15 +148,37 @@
data = ib_ptr;
data_limit = ib_ptr + skb->len - dataoff;
- /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
- * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
- while (data < data_limit - (19 + MINMATCHLEN)) {
- if (memcmp(data, "\1DCC ", 5)) {
+ /* Skip any whitespace */
+ while (data < data_limit - 10) {
+ if (*data == ' ' || *data == '\r' || *data == '\n')
+ data++;
+ else
+ break;
+ }
+
+ /* strlen("PRIVMSG x ")=10 */
+ if (data < data_limit - 10) {
+ if (strncasecmp("PRIVMSG ", data, 8))
+ goto out;
+ data += 8;
+ }
+
+ /* strlen(" :\1DCC SENT t AAAAAAAA P\1\n")=26
+ * 7+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=26
+ */
+ while (data < data_limit - (21 + MINMATCHLEN)) {
+ /* Find first " :", the start of message */
+ if (memcmp(data, " :", 2)) {
data++;
continue;
}
+ data += 2;
+
+ /* then check that place only for the DCC command */
+ if (memcmp(data, "\1DCC ", 5))
+ goto out;
data += 5;
- /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
+ /* we have at least (21+MINMATCHLEN)-(2+5) bytes valid data left */
iph = ip_hdr(skb);
pr_debug("DCC found in master %pI4:%u %pI4:%u\n",
@@ -172,7 +194,7 @@
pr_debug("DCC %s detected\n", dccprotos[i]);
/* we have at least
- * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
+ * (21+MINMATCHLEN)-7-dccprotos[i].matchlen bytes valid
* data left (== 14/13 bytes) */
if (parse_dcc(data, data_limit, &dcc_ip,
&dcc_port, &addr_beg_p, &addr_end_p)) {
@@ -185,8 +207,9 @@
/* dcc_ip can be the internal OR external (NAT'ed) IP */
tuple = &ct->tuplehash[dir].tuple;
- if (tuple->src.u3.ip != dcc_ip &&
- tuple->dst.u3.ip != dcc_ip) {
+ if ((tuple->src.u3.ip != dcc_ip &&
+ ct->tuplehash[!dir].tuple.dst.u3.ip != dcc_ip) ||
+ dcc_port == 0) {
net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n",
&tuple->src.u3.ip,
&dcc_ip, dcc_port);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index eeeaa34..c402283 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -317,9 +317,9 @@
}
#ifdef CONFIG_NF_CONNTRACK_MARK
-static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
+static int ctnetlink_dump_mark(struct sk_buff *skb, u32 mark)
{
- if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark)))
+ if (nla_put_be32(skb, CTA_MARK, htonl(mark)))
goto nla_put_failure;
return 0;
@@ -532,7 +532,7 @@
static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
{
if (ctnetlink_dump_status(skb, ct) < 0 ||
- ctnetlink_dump_mark(skb, ct) < 0 ||
+ ctnetlink_dump_mark(skb, READ_ONCE(ct->mark)) < 0 ||
ctnetlink_dump_secctx(skb, ct) < 0 ||
ctnetlink_dump_id(skb, ct) < 0 ||
ctnetlink_dump_use(skb, ct) < 0 ||
@@ -553,22 +553,17 @@
{
const struct nf_conntrack_zone *zone;
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
unsigned int event;
if (portid)
flags |= NLM_F_MULTI;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, nf_ct_l3num(ct),
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = nf_ct_l3num(ct);
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG);
@@ -711,12 +706,12 @@
const struct nf_conntrack_zone *zone;
struct net *net;
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
struct nf_conn *ct = item->ct;
struct sk_buff *skb;
unsigned int type;
unsigned int flags = 0, group;
+ u32 mark;
int err;
if (events & (1 << IPCT_DESTROY)) {
@@ -741,15 +736,11 @@
goto errout;
type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type);
- nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, nf_ct_l3num(ct),
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = nf_ct_l3num(ct);
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG);
@@ -821,8 +812,9 @@
}
#ifdef CONFIG_NF_CONNTRACK_MARK
- if ((events & (1 << IPCT_MARK) || ct->mark)
- && ctnetlink_dump_mark(skb, ct) < 0)
+ mark = READ_ONCE(ct->mark);
+ if ((events & (1 << IPCT_MARK) || mark) &&
+ ctnetlink_dump_mark(skb, mark) < 0)
goto nla_put_failure;
#endif
nlmsg_end(skb, nlh);
@@ -1109,7 +1101,7 @@
}
#ifdef CONFIG_NF_CONNTRACK_MARK
- if ((ct->mark & filter->mark.mask) != filter->mark.val)
+ if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val)
goto ignore_entry;
#endif
@@ -1989,9 +1981,9 @@
mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
mark = ntohl(nla_get_be32(cda[CTA_MARK]));
- newmark = (ct->mark & mask) ^ mark;
- if (newmark != ct->mark)
- ct->mark = newmark;
+ newmark = (READ_ONCE(ct->mark) & mask) ^ mark;
+ if (newmark != READ_ONCE(ct->mark))
+ WRITE_ONCE(ct->mark, newmark);
}
#endif
@@ -2483,20 +2475,15 @@
__u16 cpu, const struct ip_conntrack_stat *st)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK,
IPCTNL_MSG_CT_GET_STATS_CPU);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, htons(cpu));
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(cpu);
-
if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) ||
nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) ||
@@ -2568,20 +2555,15 @@
struct net *net)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
unsigned int nr_conntracks = atomic_read(&net->ct.count);
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
goto nla_put_failure;
@@ -2689,6 +2671,7 @@
{
const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
+ u32 mark;
zone = nf_ct_zone(ct);
@@ -2746,7 +2729,8 @@
goto nla_put_failure;
#ifdef CONFIG_NF_CONNTRACK_MARK
- if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0)
+ mark = READ_ONCE(ct->mark);
+ if (mark && ctnetlink_dump_mark(skb, mark) < 0)
goto nla_put_failure;
#endif
if (ctnetlink_dump_labels(skb, ct) < 0)
@@ -3085,19 +3069,14 @@
int event, const struct nf_conntrack_expect *exp)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags,
+ exp->tuple.src.l3num, NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = exp->tuple.src.l3num;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
@@ -3117,7 +3096,6 @@
struct nf_conntrack_expect *exp = item->exp;
struct net *net = nf_ct_exp_net(exp);
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
struct sk_buff *skb;
unsigned int type, group;
int flags = 0;
@@ -3140,15 +3118,11 @@
goto errout;
type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type);
- nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, item->portid, 0, type, flags,
+ exp->tuple.src.l3num, NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = exp->tuple.src.l3num;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
@@ -3716,20 +3690,15 @@
const struct ip_conntrack_stat *st)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK,
IPCTNL_MSG_EXP_GET_STATS_CPU);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, htons(cpu));
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(cpu);
-
if (nla_put_be32(skb, CTA_STATS_EXP_NEW, htonl(st->expect_new)) ||
nla_put_be32(skb, CTA_STATS_EXP_CREATE, htonl(st->expect_create)) ||
nla_put_be32(skb, CTA_STATS_EXP_DELETE, htonl(st->expect_delete)))
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index c8fb218..3f785bd 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -354,8 +354,8 @@
length, buff);
BUG_ON(ptr == NULL);
- state->td_scale =
- state->flags = 0;
+ state->td_scale = 0;
+ state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
while (length > 0) {
int opcode=*ptr++;
@@ -840,6 +840,16 @@
test_bit(IPS_ASSURED_BIT, &ct->status);
}
+static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state)
+{
+ state->td_end = 0;
+ state->td_maxend = 0;
+ state->td_maxwin = 0;
+ state->td_maxack = 0;
+ state->td_scale = 0;
+ state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
+}
+
/* Returns verdict for packet, or -1 for invalid. */
int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct sk_buff *skb,
@@ -946,8 +956,7 @@
ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
ct->proto.tcp.last_flags;
- memset(&ct->proto.tcp.seen[dir], 0,
- sizeof(struct ip_ct_tcp_state));
+ nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]);
break;
}
ct->proto.tcp.last_index = index;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index b83dc9b..78fd912 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -477,7 +477,7 @@
return ret;
if (ret == 0)
break;
- dataoff += *matchoff;
+ dataoff = *matchoff;
}
*in_header = 0;
}
@@ -489,7 +489,7 @@
break;
if (ret == 0)
return ret;
- dataoff += *matchoff;
+ dataoff = *matchoff;
}
if (in_header)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 313d1c8..a7f88cd 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -360,7 +360,7 @@
goto release;
#if defined(CONFIG_NF_CONNTRACK_MARK)
- seq_printf(s, "mark=%u ", ct->mark);
+ seq_printf(s, "mark=%u ", READ_ONCE(ct->mark));
#endif
ct_show_secctx(s, ct);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index d186278..28306cb 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -910,6 +910,7 @@
struct flow_block_cb *block_cb, *next;
int err = 0;
+ down_write(&flowtable->flow_block_lock);
switch (cmd) {
case FLOW_BLOCK_BIND:
list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
@@ -924,6 +925,7 @@
WARN_ON_ONCE(1);
err = -EOPNOTSUPP;
}
+ up_write(&flowtable->flow_block_lock);
return err;
}
@@ -980,7 +982,9 @@
nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
extack);
+ down_write(&flowtable->flow_block_lock);
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
+ up_write(&flowtable->flow_block_lock);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 2fc4ae9..3d6d494 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -854,7 +854,7 @@
fl6.fl6_sport = nth->source;
fl6.fl6_dport = nth->dest;
security_skb_classify_flow((struct sk_buff *)skb,
- flowi6_to_flowi(&fl6));
+ flowi6_to_flowi_common(&fl6));
err = nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false);
if (err) {
goto free_nskb;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fdd1da9..2143eda 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -66,6 +66,41 @@
.automatic_shrinking = true,
};
+struct nft_audit_data {
+ struct nft_table *table;
+ int entries;
+ int op;
+ struct list_head list;
+};
+
+static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
+ [NFT_MSG_NEWTABLE] = AUDIT_NFT_OP_TABLE_REGISTER,
+ [NFT_MSG_GETTABLE] = AUDIT_NFT_OP_INVALID,
+ [NFT_MSG_DELTABLE] = AUDIT_NFT_OP_TABLE_UNREGISTER,
+ [NFT_MSG_NEWCHAIN] = AUDIT_NFT_OP_CHAIN_REGISTER,
+ [NFT_MSG_GETCHAIN] = AUDIT_NFT_OP_INVALID,
+ [NFT_MSG_DELCHAIN] = AUDIT_NFT_OP_CHAIN_UNREGISTER,
+ [NFT_MSG_NEWRULE] = AUDIT_NFT_OP_RULE_REGISTER,
+ [NFT_MSG_GETRULE] = AUDIT_NFT_OP_INVALID,
+ [NFT_MSG_DELRULE] = AUDIT_NFT_OP_RULE_UNREGISTER,
+ [NFT_MSG_NEWSET] = AUDIT_NFT_OP_SET_REGISTER,
+ [NFT_MSG_GETSET] = AUDIT_NFT_OP_INVALID,
+ [NFT_MSG_DELSET] = AUDIT_NFT_OP_SET_UNREGISTER,
+ [NFT_MSG_NEWSETELEM] = AUDIT_NFT_OP_SETELEM_REGISTER,
+ [NFT_MSG_GETSETELEM] = AUDIT_NFT_OP_INVALID,
+ [NFT_MSG_DELSETELEM] = AUDIT_NFT_OP_SETELEM_UNREGISTER,
+ [NFT_MSG_NEWGEN] = AUDIT_NFT_OP_GEN_REGISTER,
+ [NFT_MSG_GETGEN] = AUDIT_NFT_OP_INVALID,
+ [NFT_MSG_TRACE] = AUDIT_NFT_OP_INVALID,
+ [NFT_MSG_NEWOBJ] = AUDIT_NFT_OP_OBJ_REGISTER,
+ [NFT_MSG_GETOBJ] = AUDIT_NFT_OP_INVALID,
+ [NFT_MSG_DELOBJ] = AUDIT_NFT_OP_OBJ_UNREGISTER,
+ [NFT_MSG_GETOBJ_RESET] = AUDIT_NFT_OP_OBJ_RESET,
+ [NFT_MSG_NEWFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_REGISTER,
+ [NFT_MSG_GETFLOWTABLE] = AUDIT_NFT_OP_INVALID,
+ [NFT_MSG_DELFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+};
+
static void nft_validate_state_update(struct net *net, u8 new_validate_state)
{
switch (net->nft.validate_state) {
@@ -114,6 +149,7 @@
if (trans == NULL)
return NULL;
+ INIT_LIST_HEAD(&trans->list);
trans->msg_type = msg_type;
trans->ctx = *ctx;
@@ -481,6 +517,7 @@
if (msg_type == NFT_MSG_NEWFLOWTABLE)
nft_activate_next(ctx->net, flowtable);
+ INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
nft_trans_flowtable(trans) = flowtable;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -646,6 +683,11 @@
return ERR_PTR(-ENOENT);
}
+static __be16 nft_base_seq(const struct net *net)
+{
+ return htons(net->nft.base_seq & 0xffff);
+}
+
static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
[NFTA_TABLE_NAME] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
@@ -660,18 +702,13 @@
int family, const struct nft_table *table)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+ NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
@@ -708,17 +745,6 @@
{
struct sk_buff *skb;
int err;
- char *buf = kasprintf(GFP_KERNEL, "%s:%llu;?:0",
- ctx->table->name, ctx->table->handle);
-
- audit_log_nfcfg(buf,
- ctx->family,
- ctx->table->use,
- event == NFT_MSG_NEWTABLE ?
- AUDIT_NFT_OP_TABLE_REGISTER :
- AUDIT_NFT_OP_TABLE_UNREGISTER,
- GFP_KERNEL);
- kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -1412,18 +1438,13 @@
const struct nft_chain *chain)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+ NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
goto nla_put_failure;
if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle),
@@ -1475,18 +1496,6 @@
{
struct sk_buff *skb;
int err;
- char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
- ctx->table->name, ctx->table->handle,
- ctx->chain->name, ctx->chain->handle);
-
- audit_log_nfcfg(buf,
- ctx->family,
- ctx->chain->use,
- event == NFT_MSG_NEWCHAIN ?
- AUDIT_NFT_OP_CHAIN_REGISTER :
- AUDIT_NFT_OP_CHAIN_UNREGISTER,
- GFP_KERNEL);
- kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -1733,7 +1742,6 @@
goto err_hook_dev;
}
hook->ops.dev = dev;
- hook->inactive = false;
return hook;
@@ -1963,8 +1971,10 @@
chain->flags |= NFT_CHAIN_BASE | flags;
basechain->policy = NF_ACCEPT;
if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
- nft_chain_offload_priority(basechain) < 0)
+ !nft_chain_offload_support(basechain)) {
+ list_splice_init(&basechain->hook_list, &hook->list);
return -EOPNOTSUPP;
+ }
flow_block_init(&basechain->flow_block);
@@ -1993,7 +2003,6 @@
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
struct nft_base_chain *basechain;
- struct nft_stats __percpu *stats;
struct net *net = ctx->net;
char name[NFT_NAME_MAXLEN];
struct nft_trans *trans;
@@ -2005,6 +2014,7 @@
return -EOVERFLOW;
if (nla[NFTA_CHAIN_HOOK]) {
+ struct nft_stats __percpu *stats = NULL;
struct nft_chain_hook hook;
if (flags & NFT_CHAIN_BINDING)
@@ -2029,15 +2039,17 @@
return PTR_ERR(stats);
}
rcu_assign_pointer(basechain->stats, stats);
- static_branch_inc(&nft_counters_enabled);
}
err = nft_basechain_init(basechain, family, &hook, flags);
if (err < 0) {
nft_chain_release_hook(&hook);
kfree(basechain);
+ free_percpu(stats);
return err;
}
+ if (stats)
+ static_branch_inc(&nft_counters_enabled);
} else {
if (flags & NFT_CHAIN_BASE)
return -EINVAL;
@@ -2265,6 +2277,7 @@
}
static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla)
{
u32 id = ntohl(nla_get_be32(nla));
@@ -2274,6 +2287,7 @@
struct nft_chain *chain = trans->ctx.chain;
if (trans->msg_type == NFT_MSG_NEWCHAIN &&
+ chain->table == table &&
id == nft_trans_chain_id(trans))
return chain;
}
@@ -2679,27 +2693,31 @@
err = nf_tables_expr_parse(ctx, nla, &info);
if (err < 0)
- goto err1;
+ goto err_expr_parse;
+
+ err = -EOPNOTSUPP;
+ if (!(info.ops->type->flags & NFT_EXPR_STATEFUL))
+ goto err_expr_stateful;
err = -ENOMEM;
expr = kzalloc(info.ops->size, GFP_KERNEL);
if (expr == NULL)
- goto err2;
+ goto err_expr_stateful;
err = nf_tables_newexpr(ctx, &info, expr);
if (err < 0)
- goto err3;
+ goto err_expr_new;
return expr;
-err3:
+err_expr_new:
kfree(expr);
-err2:
+err_expr_stateful:
owner = info.ops->type->owner;
if (info.ops->type->release_ops)
info.ops->type->release_ops(info.ops);
module_put(owner);
-err1:
+err_expr_parse:
return ERR_PTR(err);
}
@@ -2779,20 +2797,15 @@
const struct nft_rule *prule)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
const struct nft_expr *expr, *next;
struct nlattr *list;
u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0,
+ nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
@@ -2837,18 +2850,6 @@
{
struct sk_buff *skb;
int err;
- char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
- ctx->table->name, ctx->table->handle,
- ctx->chain->name, ctx->chain->handle);
-
- audit_log_nfcfg(buf,
- ctx->family,
- rule->handle,
- event == NFT_MSG_NEWRULE ?
- AUDIT_NFT_OP_RULE_REGISTER :
- AUDIT_NFT_OP_RULE_UNREGISTER,
- GFP_KERNEL);
- kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -3150,6 +3151,7 @@
}
static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+ const struct nft_chain *chain,
const struct nlattr *nla);
#define NFT_RULE_MAXEXPRS 128
@@ -3195,7 +3197,7 @@
return -EOPNOTSUPP;
} else if (nla[NFTA_RULE_CHAIN_ID]) {
- chain = nft_chain_lookup_byid(net, nla[NFTA_RULE_CHAIN_ID]);
+ chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID]);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
return PTR_ERR(chain);
@@ -3237,7 +3239,7 @@
return PTR_ERR(old_rule);
}
} else if (nla[NFTA_RULE_POSITION_ID]) {
- old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]);
+ old_rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_POSITION_ID]);
if (IS_ERR(old_rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]);
return PTR_ERR(old_rule);
@@ -3376,6 +3378,7 @@
}
static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+ const struct nft_chain *chain,
const struct nlattr *nla)
{
u32 id = ntohl(nla_get_be32(nla));
@@ -3385,6 +3388,7 @@
struct nft_rule *rule = nft_trans_rule(trans);
if (trans->msg_type == NFT_MSG_NEWRULE &&
+ trans->ctx.chain == chain &&
id == nft_trans_rule_id(trans))
return rule;
}
@@ -3433,7 +3437,7 @@
err = nft_delrule(&ctx, rule);
} else if (nla[NFTA_RULE_ID]) {
- rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]);
+ rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_ID]);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]);
return PTR_ERR(rule);
@@ -3634,6 +3638,7 @@
}
static struct nft_set *nft_set_lookup_byid(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
struct nft_trans *trans;
@@ -3644,6 +3649,7 @@
struct nft_set *set = nft_trans_set(trans);
if (id == nft_trans_set_id(trans) &&
+ set->table == table &&
nft_active_genmask(set, genmask))
return set;
}
@@ -3664,7 +3670,7 @@
if (!nla_set_id)
return set;
- set = nft_set_lookup_byid(net, nla_set_id, genmask);
+ set = nft_set_lookup_byid(net, table, nla_set_id, genmask);
}
return set;
}
@@ -3690,7 +3696,7 @@
list_for_each_entry(i, &ctx->table->sets, list) {
int tmp;
- if (!nft_is_active_next(ctx->net, set))
+ if (!nft_is_active_next(ctx->net, i))
continue;
if (!sscanf(i->name, name, &tmp))
continue;
@@ -3774,23 +3780,17 @@
static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
const struct nft_set *set, u16 event, u16 flags)
{
- struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
u32 portid = ctx->portid;
struct nlattr *nest;
u32 seq = ctx->seq;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
- flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
+ NFNETLINK_V0, nft_base_seq(ctx->net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = ctx->family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_SET_NAME, set->name))
@@ -3870,18 +3870,6 @@
struct sk_buff *skb;
u32 portid = ctx->portid;
int err;
- char *buf = kasprintf(gfp_flags, "%s:%llu;%s:%llu",
- ctx->table->name, ctx->table->handle,
- set->name, set->handle);
-
- audit_log_nfcfg(buf,
- ctx->family,
- set->field_count,
- event == NFT_MSG_NEWSET ?
- AUDIT_NFT_OP_SET_REGISTER :
- AUDIT_NFT_OP_SET_UNREGISTER,
- gfp_flags);
- kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -4047,6 +4035,9 @@
u32 len;
int err;
+ if (desc->field_count >= ARRAY_SIZE(desc->field_len))
+ return -E2BIG;
+
err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr,
nft_concat_policy, NULL);
if (err < 0)
@@ -4056,9 +4047,8 @@
return -EINVAL;
len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN]));
-
- if (len * BITS_PER_BYTE / 32 > NFT_REG32_COUNT)
- return -E2BIG;
+ if (!len || len > U8_MAX)
+ return -EINVAL;
desc->field_len[desc->field_count++] = len;
@@ -4069,7 +4059,8 @@
const struct nlattr *nla)
{
struct nlattr *attr;
- int rem, err;
+ u32 num_regs = 0;
+ int rem, err, i;
nla_for_each_nested(attr, nla, rem) {
if (nla_type(attr) != NFTA_LIST_ELEM)
@@ -4080,6 +4071,12 @@
return err;
}
+ for (i = 0; i < desc->field_count; i++)
+ num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32));
+
+ if (num_regs > NFT_REG32_COUNT)
+ return -E2BIG;
+
return 0;
}
@@ -4220,6 +4217,11 @@
err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
if (err < 0)
return err;
+
+ if (desc.field_count > 1 && !(flags & NFT_SET_CONCAT))
+ return -EINVAL;
+ } else if (flags & NFT_SET_CONCAT) {
+ return -EINVAL;
}
if (nla[NFTA_SET_EXPR])
@@ -4401,6 +4403,12 @@
return nft_delset(&ctx, set);
}
+static int nft_validate_register_store(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type,
+ unsigned int len);
+
static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
@@ -4690,7 +4698,6 @@
struct nft_set *set;
struct nft_set_dump_args args;
bool set_found = false;
- struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
struct nlattr *nest;
u32 portid, seq;
@@ -4723,16 +4730,11 @@
portid = NETLINK_CB(cb->skb).portid;
seq = cb->nlh->nlmsg_seq;
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
- NLM_F_MULTI);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI,
+ table->family, NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = table->family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
@@ -4789,22 +4791,16 @@
const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
struct nlattr *nest;
int err;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
- flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
+ NFNETLINK_V0, nft_base_seq(ctx->net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = ctx->family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_SET_NAME, set->name))
@@ -4847,19 +4843,13 @@
static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data *key, struct nlattr *attr)
{
- struct nft_data_desc desc;
- int err;
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = NFT_DATA_VALUE_MAXLEN,
+ .len = set->klen,
+ };
- err = nft_data_init(ctx, key, NFT_DATA_VALUE_MAXLEN, &desc, attr);
- if (err < 0)
- return err;
-
- if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) {
- nft_data_release(key, desc.type);
- return -EINVAL;
- }
-
- return 0;
+ return nft_data_init(ctx, key, &desc, attr);
}
static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
@@ -4867,18 +4857,19 @@
struct nft_data *data,
struct nlattr *attr)
{
- int err;
+ u32 dtype;
- err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr);
- if (err < 0)
- return err;
+ if (set->dtype == NFT_DATA_VERDICT)
+ dtype = NFT_DATA_VERDICT;
+ else
+ dtype = NFT_DATA_VALUE;
- if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) {
- nft_data_release(data, desc->type);
- return -EINVAL;
- }
+ desc->type = dtype;
+ desc->size = NFT_DATA_VALUE_MAXLEN;
+ desc->len = set->dlen;
+ desc->flags = NFT_DATA_DESC_SETELEM;
- return 0;
+ return nft_data_init(ctx, data, desc, attr);
}
static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
@@ -4996,18 +4987,6 @@
u32 portid = ctx->portid;
struct sk_buff *skb;
int err;
- char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
- ctx->table->name, ctx->table->handle,
- set->name, set->handle);
-
- audit_log_nfcfg(buf,
- ctx->family,
- set->handle,
- event == NFT_MSG_NEWSETELEM ?
- AUDIT_NFT_OP_SETELEM_REGISTER :
- AUDIT_NFT_OP_SETELEM_UNREGISTER,
- GFP_KERNEL);
- kfree(buf);
if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
return;
@@ -5055,9 +5034,6 @@
return expr;
err = -EOPNOTSUPP;
- if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL))
- goto err_set_elem_expr;
-
if (expr->ops->type->flags & NFT_EXPR_GC) {
if (set->flags & NFT_SET_TIMEOUT)
goto err_set_elem_expr;
@@ -5214,6 +5190,15 @@
return -EINVAL;
}
+ if (set->flags & NFT_SET_OBJECT) {
+ if (!nla[NFTA_SET_ELEM_OBJREF] &&
+ !(flags & NFT_SET_ELEM_INTERVAL_END))
+ return -EINVAL;
+ } else {
+ if (nla[NFTA_SET_ELEM_OBJREF])
+ return -EINVAL;
+ }
+
if ((flags & NFT_SET_ELEM_INTERVAL_END) &&
(nla[NFTA_SET_ELEM_DATA] ||
nla[NFTA_SET_ELEM_OBJREF] ||
@@ -5291,10 +5276,6 @@
expr->ops->size);
if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
- if (!(set->flags & NFT_SET_OBJECT)) {
- err = -EINVAL;
- goto err_parse_key_end;
- }
obj = nft_obj_lookup(ctx->net, ctx->table,
nla[NFTA_SET_ELEM_OBJREF],
set->objtype, genmask);
@@ -6064,19 +6045,14 @@
int family, const struct nft_table *table,
struct nft_object *obj, bool reset)
{
- struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+ NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) ||
nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
@@ -6139,12 +6115,11 @@
filter->type != NFT_OBJECT_UNSPEC &&
obj->ops->type->type != filter->type)
goto cont;
-
if (reset) {
char *buf = kasprintf(GFP_ATOMIC,
- "%s:%llu;?:0",
+ "%s:%u",
table->name,
- table->handle);
+ net->nft.base_seq);
audit_log_nfcfg(buf,
family,
@@ -6265,8 +6240,8 @@
reset = true;
if (reset) {
- char *buf = kasprintf(GFP_ATOMIC, "%s:%llu;?:0",
- table->name, table->handle);
+ char *buf = kasprintf(GFP_ATOMIC, "%s:%u",
+ table->name, net->nft.base_seq);
audit_log_nfcfg(buf,
family,
@@ -6353,15 +6328,15 @@
{
struct sk_buff *skb;
int err;
- char *buf = kasprintf(gfp, "%s:%llu;?:0",
- table->name, table->handle);
+ char *buf = kasprintf(gfp, "%s:%u",
+ table->name, net->nft.base_seq);
audit_log_nfcfg(buf,
family,
obj->handle,
event == NFT_MSG_NEWOBJ ?
- AUDIT_NFT_OP_OBJ_REGISTER :
- AUDIT_NFT_OP_OBJ_UNREGISTER,
+ AUDIT_NFT_OP_OBJ_REGISTER :
+ AUDIT_NFT_OP_OBJ_UNREGISTER,
gfp);
kfree(buf);
@@ -6684,11 +6659,15 @@
if (nla[NFTA_FLOWTABLE_FLAGS]) {
flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
- if (flags & ~NFT_FLOWTABLE_MASK)
- return -EOPNOTSUPP;
+ if (flags & ~NFT_FLOWTABLE_MASK) {
+ err = -EOPNOTSUPP;
+ goto err_flowtable_update_hook;
+ }
if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^
- (flags & NFT_FLOWTABLE_HW_OFFLOAD))
- return -EOPNOTSUPP;
+ (flags & NFT_FLOWTABLE_HW_OFFLOAD)) {
+ err = -EOPNOTSUPP;
+ goto err_flowtable_update_hook;
+ }
} else {
flags = flowtable->data.flags;
}
@@ -6870,6 +6849,7 @@
{
const struct nlattr * const *nla = ctx->nla;
struct nft_flowtable_hook flowtable_hook;
+ LIST_HEAD(flowtable_del_list);
struct nft_hook *this, *hook;
struct nft_trans *trans;
int err;
@@ -6885,7 +6865,7 @@
err = -ENOENT;
goto err_flowtable_del_hook;
}
- hook->inactive = true;
+ list_move(&hook->list, &flowtable_del_list);
}
trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
@@ -6898,6 +6878,7 @@
nft_trans_flowtable(trans) = flowtable;
nft_trans_flowtable_update(trans) = true;
INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+ list_splice(&flowtable_del_list, &nft_trans_flowtable_hooks(trans));
nft_flowtable_hook_release(&flowtable_hook);
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -6905,13 +6886,7 @@
return 0;
err_flowtable_del_hook:
- list_for_each_entry(this, &flowtable_hook.list, list) {
- hook = nft_hook_list_find(&flowtable->hook_list, this);
- if (!hook)
- break;
-
- hook->inactive = false;
- }
+ list_splice(&flowtable_del_list, &flowtable->hook_list);
nft_flowtable_hook_release(&flowtable_hook);
return err;
@@ -6976,20 +6951,15 @@
struct list_head *hook_list)
{
struct nlattr *nest, *nest_devs;
- struct nfgenmsg *nfmsg;
struct nft_hook *hook;
struct nlmsghdr *nlh;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+ NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
@@ -7179,18 +7149,6 @@
{
struct sk_buff *skb;
int err;
- char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
- flowtable->table->name, flowtable->table->handle,
- flowtable->name, flowtable->handle);
-
- audit_log_nfcfg(buf,
- ctx->family,
- flowtable->hooknum,
- event == NFT_MSG_NEWFLOWTABLE ?
- AUDIT_NFT_OP_FLOWTABLE_REGISTER :
- AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
- GFP_KERNEL);
- kfree(buf);
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -7234,19 +7192,14 @@
u32 portid, u32 seq)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
char buf[TASK_COMM_LEN];
int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC,
+ NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
@@ -7311,9 +7264,6 @@
struct sk_buff *skb2;
int err;
- audit_log_nfcfg("?:0;?:0", 0, net->nft.base_seq,
- AUDIT_NFT_OP_GEN_REGISTER, GFP_KERNEL);
-
if (!nlmsg_report(nlh) &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
return;
@@ -7761,17 +7711,6 @@
list_del_rcu(&chain->list);
}
-static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable,
- struct list_head *hook_list)
-{
- struct nft_hook *hook, *next;
-
- list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- if (hook->inactive)
- list_move(&hook->list, hook_list);
- }
-}
-
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nft_module_request *req, *next;
@@ -7852,12 +7791,74 @@
WARN_ON_ONCE(!list_empty(&net->nft.notify_list));
}
+static int nf_tables_commit_audit_alloc(struct list_head *adl,
+ struct nft_table *table)
+{
+ struct nft_audit_data *adp;
+
+ list_for_each_entry(adp, adl, list) {
+ if (adp->table == table)
+ return 0;
+ }
+ adp = kzalloc(sizeof(*adp), GFP_KERNEL);
+ if (!adp)
+ return -ENOMEM;
+ adp->table = table;
+ list_add(&adp->list, adl);
+ return 0;
+}
+
+static void nf_tables_commit_audit_free(struct list_head *adl)
+{
+ struct nft_audit_data *adp, *adn;
+
+ list_for_each_entry_safe(adp, adn, adl, list) {
+ list_del(&adp->list);
+ kfree(adp);
+ }
+}
+
+static void nf_tables_commit_audit_collect(struct list_head *adl,
+ struct nft_table *table, u32 op)
+{
+ struct nft_audit_data *adp;
+
+ list_for_each_entry(adp, adl, list) {
+ if (adp->table == table)
+ goto found;
+ }
+ WARN_ONCE(1, "table=%s not expected in commit list", table->name);
+ return;
+found:
+ adp->entries++;
+ if (!adp->op || adp->op > op)
+ adp->op = op;
+}
+
+#define AUNFTABLENAMELEN (NFT_TABLE_MAXNAMELEN + 22)
+
+static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation)
+{
+ struct nft_audit_data *adp, *adn;
+ char aubuf[AUNFTABLENAMELEN];
+
+ list_for_each_entry_safe(adp, adn, adl, list) {
+ snprintf(aubuf, AUNFTABLENAMELEN, "%s:%u", adp->table->name,
+ generation);
+ audit_log_nfcfg(aubuf, adp->table->family, adp->entries,
+ nft2audit_op[adp->op], GFP_KERNEL);
+ list_del(&adp->list);
+ kfree(adp);
+ }
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
+ LIST_HEAD(adl);
int err;
if (list_empty(&net->nft.commit_list)) {
@@ -7877,6 +7878,12 @@
list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
int ret;
+ ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
+ if (ret) {
+ nf_tables_commit_chain_prepare_cancel(net);
+ nf_tables_commit_audit_free(&adl);
+ return ret;
+ }
if (trans->msg_type == NFT_MSG_NEWRULE ||
trans->msg_type == NFT_MSG_DELRULE) {
chain = trans->ctx.chain;
@@ -7884,6 +7891,7 @@
ret = nf_tables_commit_chain_prepare(net, chain);
if (ret < 0) {
nf_tables_commit_chain_prepare_cancel(net);
+ nf_tables_commit_audit_free(&adl);
return ret;
}
}
@@ -7905,6 +7913,8 @@
net->nft.gencursor = nft_gencursor_next(net);
list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ nf_tables_commit_audit_collect(&adl, trans->ctx.table,
+ trans->msg_type);
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
@@ -7947,6 +7957,9 @@
nf_tables_rule_notify(&trans->ctx,
nft_trans_rule(trans),
NFT_MSG_NEWRULE);
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
nft_trans_destroy(trans);
break;
case NFT_MSG_DELRULE:
@@ -7957,6 +7970,9 @@
nft_rule_expr_deactivate(&trans->ctx,
nft_trans_rule(trans),
NFT_TRANS_COMMIT);
+
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
nft_clear(net, nft_trans_set(trans));
@@ -8035,8 +8051,6 @@
break;
case NFT_MSG_DELFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- nft_flowtable_hooks_del(nft_trans_flowtable(trans),
- &nft_trans_flowtable_hooks(trans));
nf_tables_flowtable_notify(&trans->ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans),
@@ -8058,6 +8072,7 @@
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+ nf_tables_commit_audit_log(&adl, net->nft.base_seq);
nf_tables_commit_release(net);
return 0;
@@ -8114,7 +8129,6 @@
{
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
- struct nft_hook *hook;
if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
@@ -8232,8 +8246,8 @@
break;
case NFT_MSG_DELFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- list_for_each_entry(hook, &nft_trans_flowtable(trans)->hook_list, list)
- hook->inactive = false;
+ list_splice(&nft_trans_flowtable_hooks(trans),
+ &nft_trans_flowtable(trans)->hook_list);
} else {
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
@@ -8512,7 +8526,7 @@
* Validate that the input register is one of the general purpose
* registers and that the length of the load is within the bounds.
*/
-int nft_validate_register_load(enum nft_registers reg, unsigned int len)
+static int nft_validate_register_load(enum nft_registers reg, unsigned int len)
{
if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
return -EINVAL;
@@ -8523,7 +8537,21 @@
return 0;
}
-EXPORT_SYMBOL_GPL(nft_validate_register_load);
+
+int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
+{
+ u32 reg;
+ int err;
+
+ reg = nft_parse_register(attr);
+ err = nft_validate_register_load(reg, len);
+ if (err < 0)
+ return err;
+
+ *sreg = reg;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_register_load);
/**
* nft_validate_register_store - validate an expressions' register store
@@ -8539,10 +8567,11 @@
* A value of NULL for the data means that its runtime gathered
* data.
*/
-int nft_validate_register_store(const struct nft_ctx *ctx,
- enum nft_registers reg,
- const struct nft_data *data,
- enum nft_data_types type, unsigned int len)
+static int nft_validate_register_store(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type,
+ unsigned int len)
{
int err;
@@ -8574,7 +8603,24 @@
return 0;
}
}
-EXPORT_SYMBOL_GPL(nft_validate_register_store);
+
+int nft_parse_register_store(const struct nft_ctx *ctx,
+ const struct nlattr *attr, u8 *dreg,
+ const struct nft_data *data,
+ enum nft_data_types type, unsigned int len)
+{
+ int err;
+ u32 reg;
+
+ reg = nft_parse_register(attr);
+ err = nft_validate_register_store(ctx, reg, data, type, len);
+ if (err < 0)
+ return err;
+
+ *dreg = reg;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_register_store);
static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
[NFTA_VERDICT_CODE] = { .type = NLA_U32 },
@@ -8622,7 +8668,7 @@
tb[NFTA_VERDICT_CHAIN],
genmask);
} else if (tb[NFTA_VERDICT_CHAIN_ID]) {
- chain = nft_chain_lookup_byid(ctx->net,
+ chain = nft_chain_lookup_byid(ctx->net, ctx->table,
tb[NFTA_VERDICT_CHAIN_ID]);
if (IS_ERR(chain))
return PTR_ERR(chain);
@@ -8634,6 +8680,11 @@
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
return -EOPNOTSUPP;
+ if (nft_chain_is_bound(chain))
+ return -EINVAL;
+ if (desc->flags & NFT_DATA_DESC_SETELEM &&
+ chain->flags & NFT_CHAIN_BINDING)
+ return -EINVAL;
chain->use++;
data->verdict.chain = chain;
@@ -8641,7 +8692,7 @@
}
desc->len = sizeof(data->verdict);
- desc->type = NFT_DATA_VERDICT;
+
return 0;
}
@@ -8694,20 +8745,25 @@
}
static int nft_value_init(const struct nft_ctx *ctx,
- struct nft_data *data, unsigned int size,
- struct nft_data_desc *desc, const struct nlattr *nla)
+ struct nft_data *data, struct nft_data_desc *desc,
+ const struct nlattr *nla)
{
unsigned int len;
len = nla_len(nla);
if (len == 0)
return -EINVAL;
- if (len > size)
+ if (len > desc->size)
return -EOVERFLOW;
+ if (desc->len) {
+ if (len != desc->len)
+ return -EINVAL;
+ } else {
+ desc->len = len;
+ }
nla_memcpy(data->data, nla, len);
- desc->type = NFT_DATA_VALUE;
- desc->len = len;
+
return 0;
}
@@ -8727,7 +8783,6 @@
*
* @ctx: context of the expression using the data
* @data: destination struct nft_data
- * @size: maximum data length
* @desc: data description
* @nla: netlink attribute containing data
*
@@ -8737,24 +8792,35 @@
* The caller can indicate that it only wants to accept data of type
* NFT_DATA_VALUE by passing NULL for the ctx argument.
*/
-int nft_data_init(const struct nft_ctx *ctx,
- struct nft_data *data, unsigned int size,
+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
struct nft_data_desc *desc, const struct nlattr *nla)
{
struct nlattr *tb[NFTA_DATA_MAX + 1];
int err;
+ if (WARN_ON_ONCE(!desc->size))
+ return -EINVAL;
+
err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla,
nft_data_policy, NULL);
if (err < 0)
return err;
- if (tb[NFTA_DATA_VALUE])
- return nft_value_init(ctx, data, size, desc,
- tb[NFTA_DATA_VALUE]);
- if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
- return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
- return -EINVAL;
+ if (tb[NFTA_DATA_VALUE]) {
+ if (desc->type != NFT_DATA_VALUE)
+ return -EINVAL;
+
+ err = nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+ } else if (tb[NFTA_DATA_VERDICT] && ctx != NULL) {
+ if (desc->type != NFT_DATA_VERDICT)
+ return -EINVAL;
+
+ err = nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
+ } else {
+ err = -EINVAL;
+ }
+
+ return err;
}
EXPORT_SYMBOL_GPL(nft_data_init);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index a61b5bf..9dc1842 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -67,6 +67,50 @@
regs->verdict.code = NFT_BREAK;
}
+static void nft_cmp16_fast_eval(const struct nft_expr *expr,
+ struct nft_regs *regs)
+{
+ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ const u64 *reg_data = (const u64 *)®s->data[priv->sreg];
+ const u64 *mask = (const u64 *)&priv->mask;
+ const u64 *data = (const u64 *)&priv->data;
+
+ if (((reg_data[0] & mask[0]) == data[0] &&
+ ((reg_data[1] & mask[1]) == data[1])) ^ priv->inv)
+ return;
+ regs->verdict.code = NFT_BREAK;
+}
+
+static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
+ const struct nft_chain *chain,
+ const struct nft_regs *regs)
+{
+ enum nft_trace_types type;
+
+ switch (regs->verdict.code) {
+ case NFT_CONTINUE:
+ case NFT_RETURN:
+ type = NFT_TRACETYPE_RETURN;
+ break;
+ default:
+ type = NFT_TRACETYPE_RULE;
+ break;
+ }
+
+ __nft_trace_packet(info, chain, type);
+}
+
+static inline void nft_trace_verdict(struct nft_traceinfo *info,
+ const struct nft_chain *chain,
+ const struct nft_rule *rule,
+ const struct nft_regs *regs)
+{
+ if (static_branch_unlikely(&nft_trace_enabled)) {
+ info->rule = rule;
+ __nft_trace_verdict(info, chain, regs);
+ }
+}
+
static bool nft_payload_fast_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -185,6 +229,8 @@
nft_rule_for_each_expr(expr, last, rule) {
if (expr->ops == &nft_cmp_fast_ops)
nft_cmp_fast_eval(expr, ®s);
+ else if (expr->ops == &nft_cmp16_fast_ops)
+ nft_cmp16_fast_eval(expr, ®s);
else if (expr->ops == &nft_bitwise_fast_ops)
nft_bitwise_fast_eval(expr, ®s);
else if (expr->ops != &nft_payload_fast_ops ||
@@ -207,13 +253,13 @@
break;
}
+ nft_trace_verdict(&info, chain, rule, ®s);
+
switch (regs.verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
case NF_STOLEN:
- nft_trace_packet(&info, chain, rule,
- NFT_TRACETYPE_RULE);
return regs.verdict.code;
}
@@ -226,15 +272,10 @@
stackptr++;
fallthrough;
case NFT_GOTO:
- nft_trace_packet(&info, chain, rule,
- NFT_TRACETYPE_RULE);
-
chain = regs.verdict.chain;
goto do_chain;
case NFT_CONTINUE:
case NFT_RETURN:
- nft_trace_packet(&info, chain, rule,
- NFT_TRACETYPE_RETURN);
break;
default:
WARN_ON(1);
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 839fd09..4e99b17 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -208,7 +208,7 @@
return 0;
}
-int nft_chain_offload_priority(struct nft_base_chain *basechain)
+static int nft_chain_offload_priority(const struct nft_base_chain *basechain)
{
if (basechain->ops.priority <= 0 ||
basechain->ops.priority > USHRT_MAX)
@@ -217,6 +217,27 @@
return 0;
}
+bool nft_chain_offload_support(const struct nft_base_chain *basechain)
+{
+ struct net_device *dev;
+ struct nft_hook *hook;
+
+ if (nft_chain_offload_priority(basechain) < 0)
+ return false;
+
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (hook->ops.pf != NFPROTO_NETDEV ||
+ hook->ops.hooknum != NF_NETDEV_INGRESS)
+ return false;
+
+ dev = hook->ops.dev;
+ if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists())
+ return false;
+ }
+
+ return true;
+}
+
static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
const struct nft_base_chain *basechain,
const struct nft_rule *rule,
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 87b36da..0cf3278 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -183,7 +183,6 @@
void nft_trace_notify(struct nft_traceinfo *info)
{
const struct nft_pktinfo *pkt = info->pkt;
- struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
struct sk_buff *skb;
unsigned int size;
@@ -219,15 +218,11 @@
return;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE);
- nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0);
+ nlh = nfnl_msg_put(skb, 0, 0, event, 0, info->basechain->type->family,
+ NFNETLINK_V0, 0);
if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = info->basechain->type->family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt))))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 5bfec82..ec3e378 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -132,21 +132,16 @@
int event, struct nf_acct *acct)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
u64 pkts, bytes;
u32 old_flags;
event = nfnl_msg_type(NFNL_SUBSYS_ACCT, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_string(skb, NFACCT_NAME, acct->name))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 91afbf8..52d5f24 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -530,20 +530,15 @@
int event, struct nf_conntrack_helper *helper)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
int status;
event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_string(skb, NFCTH_NAME, helper->name))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 89a381f..de831a2 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -160,22 +160,17 @@
int event, struct ctnl_timeout *timeout)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto;
struct nlattr *nest_parms;
int ret;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) ||
nla_put_be16(skb, CTA_TIMEOUT_L3PROTO,
htons(timeout->timeout.l3num)) ||
@@ -382,21 +377,16 @@
const unsigned int *timeouts)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
struct nlattr *nest_parms;
int ret;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l3num)) ||
nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 33c13ed..f087baa 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -452,20 +452,15 @@
{
struct nfulnl_msg_packet_hdr pmsg;
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
sk_buff_data_t old_tail = inst->skb->tail;
struct sock *sk;
const unsigned char *hwhdrp;
- nlh = nlmsg_put(inst->skb, 0, 0,
- nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
- sizeof(struct nfgenmsg), 0);
+ nlh = nfnl_msg_put(inst->skb, 0, 0,
+ nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
+ 0, pf, NFNETLINK_V0, htons(inst->group_num));
if (!nlh)
return -1;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = pf;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(inst->group_num);
memset(&pmsg, 0, sizeof(pmsg));
pmsg.hw_protocol = skb->protocol;
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 79fbf37..51e3953 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -269,6 +269,7 @@
struct nf_osf_hdr_ctx ctx;
const struct tcphdr *tcp;
struct tcphdr _tcph;
+ bool found = false;
memset(&ctx, 0, sizeof(ctx));
@@ -283,10 +284,11 @@
data->genre = f->genre;
data->version = f->version;
+ found = true;
break;
}
- return true;
+ return found;
}
EXPORT_SYMBOL_GPL(nf_osf_find);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 1640da5..9d87606 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -383,7 +383,6 @@
struct nlattr *nla;
struct nfqnl_msg_packet_hdr *pmsg;
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
struct sk_buff *entskb = entry->skb;
struct net_device *indev;
struct net_device *outdev;
@@ -469,18 +468,15 @@
goto nlmsg_failure;
}
- nlh = nlmsg_put(skb, 0, 0,
- nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET),
- sizeof(struct nfgenmsg), 0);
+ nlh = nfnl_msg_put(skb, 0, 0,
+ nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET),
+ 0, entry->state.pf, NFNETLINK_V0,
+ htons(queue->queue_num));
if (!nlh) {
skb_tx_error(entskb);
kfree_skb(skb);
goto nlmsg_failure;
}
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = entry->state.pf;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(queue->queue_num);
nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
pmsg = nla_data(nla);
@@ -838,11 +834,16 @@
}
static int
-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
+nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
{
struct sk_buff *nskb;
if (diff < 0) {
+ unsigned int min_len = skb_transport_offset(e->skb);
+
+ if (data_len < min_len)
+ return -EINVAL;
+
if (pskb_trim(e->skb, data_len))
return -ENOMEM;
} else if (diff > 0) {
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index bbd773d..d6ab7aa 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -16,8 +16,8 @@
#include <net/netfilter/nf_tables_offload.h>
struct nft_bitwise {
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
enum nft_bitwise_ops op:8;
u8 len;
struct nft_data mask;
@@ -93,7 +93,16 @@
static int nft_bitwise_init_bool(struct nft_bitwise *priv,
const struct nlattr *const tb[])
{
- struct nft_data_desc mask, xor;
+ struct nft_data_desc mask = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->mask),
+ .len = priv->len,
+ };
+ struct nft_data_desc xor = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->xor),
+ .len = priv->len,
+ };
int err;
if (tb[NFTA_BITWISE_DATA])
@@ -103,36 +112,30 @@
!tb[NFTA_BITWISE_XOR])
return -EINVAL;
- err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &mask,
- tb[NFTA_BITWISE_MASK]);
+ err = nft_data_init(NULL, &priv->mask, &mask, tb[NFTA_BITWISE_MASK]);
if (err < 0)
return err;
- if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) {
- err = -EINVAL;
- goto err1;
- }
- err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor,
- tb[NFTA_BITWISE_XOR]);
+ err = nft_data_init(NULL, &priv->xor, &xor, tb[NFTA_BITWISE_XOR]);
if (err < 0)
- goto err1;
- if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) {
- err = -EINVAL;
- goto err2;
- }
+ goto err_xor_err;
return 0;
-err2:
- nft_data_release(&priv->xor, xor.type);
-err1:
+
+err_xor_err:
nft_data_release(&priv->mask, mask.type);
+
return err;
}
static int nft_bitwise_init_shift(struct nft_bitwise *priv,
const struct nlattr *const tb[])
{
- struct nft_data_desc d;
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data),
+ .len = sizeof(u32),
+ };
int err;
if (tb[NFTA_BITWISE_MASK] ||
@@ -142,13 +145,12 @@
if (!tb[NFTA_BITWISE_DATA])
return -EINVAL;
- err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &d,
- tb[NFTA_BITWISE_DATA]);
+ err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_BITWISE_DATA]);
if (err < 0)
return err;
- if (d.type != NFT_DATA_VALUE || d.len != sizeof(u32) ||
- priv->data.data[0] >= BITS_PER_TYPE(u32)) {
- nft_data_release(&priv->data, d.type);
+
+ if (priv->data.data[0] >= BITS_PER_TYPE(u32)) {
+ nft_data_release(&priv->data, desc.type);
return -EINVAL;
}
@@ -169,14 +171,14 @@
priv->len = len;
- priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]);
- err = nft_validate_register_load(priv->sreg, priv->len);
+ err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+ priv->len);
if (err < 0)
return err;
- priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
if (err < 0)
return err;
@@ -290,22 +292,21 @@
static int
nft_bitwise_extract_u32_data(const struct nlattr * const tb, u32 *out)
{
- struct nft_data_desc desc;
struct nft_data data;
- int err = 0;
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(data),
+ .len = sizeof(u32),
+ };
+ int err;
- err = nft_data_init(NULL, &data, sizeof(data), &desc, tb);
+ err = nft_data_init(NULL, &data, &desc, tb);
if (err < 0)
return err;
- if (desc.type != NFT_DATA_VALUE || desc.len != sizeof(u32)) {
- err = -EINVAL;
- goto err;
- }
*out = data.data[0];
-err:
- nft_data_release(&data, desc.type);
- return err;
+
+ return 0;
}
static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
@@ -315,14 +316,13 @@
struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
int err;
- priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]);
- err = nft_validate_register_load(priv->sreg, sizeof(u32));
+ err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+ sizeof(u32));
if (err < 0)
return err;
- priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
if (err < 0)
return err;
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 12bed3f..9d5947a 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -16,8 +16,8 @@
#include <net/netfilter/nf_tables.h>
struct nft_byteorder {
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
enum nft_byteorder_ops op:8;
u8 len;
u8 size;
@@ -131,20 +131,20 @@
return -EINVAL;
}
- priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]);
err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len);
if (err < 0)
return err;
priv->len = len;
- err = nft_validate_register_load(priv->sreg, priv->len);
+ err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg,
+ priv->len);
if (err < 0)
return err;
- priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
}
static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 1d42d06..461763a 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -18,7 +18,7 @@
struct nft_cmp_expr {
struct nft_data data;
- enum nft_registers sreg:8;
+ u8 sreg;
u8 len;
enum nft_cmp_ops op:8;
};
@@ -73,22 +73,17 @@
const struct nlattr * const tb[])
{
struct nft_cmp_expr *priv = nft_expr_priv(expr);
- struct nft_data_desc desc;
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data),
+ };
int err;
- err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
- tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
if (err < 0)
return err;
- if (desc.type != NFT_DATA_VALUE) {
- err = -EINVAL;
- nft_data_release(&priv->data, desc.type);
- return err;
- }
-
- priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
- err = nft_validate_register_load(priv->sreg, desc.len);
+ err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -202,17 +197,18 @@
const struct nlattr * const tb[])
{
struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
- struct nft_data_desc desc;
struct nft_data data;
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(data),
+ };
int err;
- err = nft_data_init(NULL, &data, sizeof(data), &desc,
- tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
if (err < 0)
return err;
- priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
- err = nft_validate_register_load(priv->sreg, desc.len);
+ err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -274,12 +270,108 @@
.offload = nft_cmp_fast_offload,
};
+static u32 nft_cmp_mask(u32 bitlen)
+{
+ return (__force u32)cpu_to_le32(~0U >> (sizeof(u32) * BITS_PER_BYTE - bitlen));
+}
+
+static void nft_cmp16_fast_mask(struct nft_data *data, unsigned int bitlen)
+{
+ int len = bitlen / BITS_PER_BYTE;
+ int i, words = len / sizeof(u32);
+
+ for (i = 0; i < words; i++) {
+ data->data[i] = 0xffffffff;
+ bitlen -= sizeof(u32) * BITS_PER_BYTE;
+ }
+
+ if (len % sizeof(u32))
+ data->data[i++] = nft_cmp_mask(bitlen);
+
+ for (; i < 4; i++)
+ data->data[i] = 0;
+}
+
+static int nft_cmp16_fast_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data),
+ };
+ int err;
+
+ err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+ if (err < 0)
+ return err;
+
+ err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ if (err < 0)
+ return err;
+
+ nft_cmp16_fast_mask(&priv->mask, desc.len * BITS_PER_BYTE);
+ priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ;
+ priv->len = desc.len;
+
+ return 0;
+}
+
+static int nft_cmp16_fast_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ struct nft_cmp_expr cmp = {
+ .data = priv->data,
+ .sreg = priv->sreg,
+ .len = priv->len,
+ .op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ,
+ };
+
+ return __nft_cmp_offload(ctx, flow, &cmp);
+}
+
+static int nft_cmp16_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ;
+
+ if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op)))
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
+ NFT_DATA_VALUE, priv->len) < 0)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+
+const struct nft_expr_ops nft_cmp16_fast_ops = {
+ .type = &nft_cmp_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp16_fast_expr)),
+ .eval = NULL, /* inlined */
+ .init = nft_cmp16_fast_init,
+ .dump = nft_cmp16_fast_dump,
+ .offload = nft_cmp16_fast_offload,
+};
+
static const struct nft_expr_ops *
nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
{
- struct nft_data_desc desc;
struct nft_data data;
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(data),
+ };
enum nft_cmp_ops op;
+ u8 sreg;
int err;
if (tb[NFTA_CMP_SREG] == NULL ||
@@ -300,23 +392,21 @@
return ERR_PTR(-EINVAL);
}
- err = nft_data_init(NULL, &data, sizeof(data), &desc,
- tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
if (err < 0)
return ERR_PTR(err);
- if (desc.type != NFT_DATA_VALUE) {
- err = -EINVAL;
- goto err1;
+ sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+
+ if (op == NFT_CMP_EQ || op == NFT_CMP_NEQ) {
+ if (desc.len <= sizeof(u32))
+ return &nft_cmp_fast_ops;
+ else if (desc.len <= sizeof(data) &&
+ ((sreg >= NFT_REG_1 && sreg <= NFT_REG_4) ||
+ (sreg >= NFT_REG32_00 && sreg <= NFT_REG32_12 && sreg % 2 == 0)))
+ return &nft_cmp16_fast_ops;
}
-
- if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ))
- return &nft_cmp_fast_ops;
-
return &nft_cmp_ops;
-err1:
- nft_data_release(&data, desc.type);
- return ERR_PTR(-EINVAL);
}
struct nft_expr_type nft_cmp_type __read_mostly = {
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 8e56f35..b8dbd20 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -591,19 +591,14 @@
int rev, int target)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
event = nfnl_msg_type(NFNL_SUBSYS_NFT_COMPAT, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_string(skb, NFTA_COMPAT_NAME, name) ||
nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) ||
nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target)))
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 7fcb73a..14093d8 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -27,8 +27,8 @@
enum nft_ct_keys key:8;
enum ip_conntrack_dir dir:8;
union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
+ u8 dreg;
+ u8 sreg;
};
};
@@ -97,7 +97,7 @@
return;
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
- *dest = ct->mark;
+ *dest = READ_ONCE(ct->mark);
return;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
@@ -294,8 +294,8 @@
switch (priv->key) {
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
- if (ct->mark != value) {
- ct->mark = value;
+ if (READ_ONCE(ct->mark) != value) {
+ WRITE_ONCE(ct->mark, value);
nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
@@ -499,9 +499,8 @@
}
}
- priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
if (err < 0)
return err;
@@ -608,8 +607,7 @@
}
}
- priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
- err = nft_validate_register_load(priv->sreg, len);
+ err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len);
if (err < 0)
goto err1;
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index 70c4574..5b5c607 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -14,7 +14,7 @@
#include <net/netfilter/nf_dup_netdev.h>
struct nft_dup_netdev {
- enum nft_registers sreg_dev:8;
+ u8 sreg_dev;
};
static void nft_dup_netdev_eval(const struct nft_expr *expr,
@@ -40,8 +40,8 @@
if (tb[NFTA_DUP_SREG_DEV] == NULL)
return -EINVAL;
- priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev,
+ sizeof(int));
}
static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 58904be..8c45e01 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -16,8 +16,8 @@
struct nft_set *set;
struct nft_set_ext_tmpl tmpl;
enum nft_dynset_ops op:8;
- enum nft_registers sreg_key:8;
- enum nft_registers sreg_data:8;
+ u8 sreg_key;
+ u8 sreg_data;
bool invert;
u64 timeout;
struct nft_expr *expr;
@@ -154,8 +154,8 @@
return err;
}
- priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
- err = nft_validate_register_load(priv->sreg_key, set->klen);
+ err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key,
+ set->klen);
if (err < 0)
return err;
@@ -165,8 +165,8 @@
if (set->dtype == NFT_DATA_VERDICT)
return -EOPNOTSUPP;
- priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]);
- err = nft_validate_register_load(priv->sreg_data, set->dlen);
+ err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA],
+ &priv->sreg_data, set->dlen);
if (err < 0)
return err;
} else if (set->flags & NFT_SET_MAP)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index faa0844..670dd14 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -19,8 +19,8 @@
u8 offset;
u8 len;
u8 op;
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
+ u8 dreg;
+ u8 sreg;
u8 flags;
};
@@ -353,12 +353,12 @@
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = offset;
priv->len = len;
- priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
priv->flags = flags;
priv->op = op;
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
}
static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
@@ -403,11 +403,11 @@
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = offset;
priv->len = len;
- priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]);
priv->flags = flags;
priv->op = op;
- return nft_validate_register_load(priv->sreg, priv->len);
+ return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg,
+ priv->len);
}
static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 4dfdaea..b10ce73 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -86,7 +86,6 @@
return -EINVAL;
priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
- priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]);
switch (priv->result) {
case NFT_FIB_RESULT_OIF:
@@ -106,8 +105,8 @@
return -EINVAL;
}
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ err = nft_parse_register_store(ctx, tb[NFTA_FIB_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 3b0dcd1..7730409 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -18,7 +18,7 @@
#include <net/ip.h>
struct nft_fwd_netdev {
- enum nft_registers sreg_dev:8;
+ u8 sreg_dev;
};
static void nft_fwd_netdev_eval(const struct nft_expr *expr,
@@ -50,8 +50,8 @@
if (tb[NFTA_FWD_SREG_DEV] == NULL)
return -EINVAL;
- priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ sizeof(int));
}
static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -83,8 +83,8 @@
}
struct nft_fwd_neigh {
- enum nft_registers sreg_dev:8;
- enum nft_registers sreg_addr:8;
+ u8 sreg_dev;
+ u8 sreg_addr;
u8 nfproto;
};
@@ -162,8 +162,6 @@
!tb[NFTA_FWD_NFPROTO])
return -EINVAL;
- priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]);
- priv->sreg_addr = nft_parse_register(tb[NFTA_FWD_SREG_ADDR]);
priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO]));
switch (priv->nfproto) {
@@ -177,11 +175,13 @@
return -EOPNOTSUPP;
}
- err = nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ sizeof(int));
if (err < 0)
return err;
- return nft_validate_register_load(priv->sreg_addr, addr_len);
+ return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr,
+ addr_len);
}
static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 96371d8..f829f52 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -14,8 +14,8 @@
#include <linux/jhash.h>
struct nft_jhash {
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
u8 len;
bool autogen_seed:1;
u32 modulus;
@@ -38,7 +38,7 @@
}
struct nft_symhash {
- enum nft_registers dreg:8;
+ u8 dreg;
u32 modulus;
u32 offset;
};
@@ -83,9 +83,6 @@
if (tb[NFTA_HASH_OFFSET])
priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
- priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
- priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
-
err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len);
if (err < 0)
return err;
@@ -94,6 +91,10 @@
priv->len = len;
+ err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len);
+ if (err < 0)
+ return err;
+
priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
if (priv->modulus < 1)
return -ERANGE;
@@ -108,9 +109,8 @@
get_random_bytes(&priv->seed, sizeof(priv->seed));
}
- return nft_validate_register_load(priv->sreg, len) &&
- nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
}
static int nft_symhash_init(const struct nft_ctx *ctx,
@@ -126,8 +126,6 @@
if (tb[NFTA_HASH_OFFSET])
priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
- priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
-
priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
if (priv->modulus < 1)
return -ERANGE;
@@ -135,8 +133,9 @@
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ sizeof(u32));
}
static int nft_jhash_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 5c9d885..fcdbc5e 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -29,28 +29,44 @@
[NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED },
};
+static enum nft_data_types nft_reg_to_type(const struct nlattr *nla)
+{
+ enum nft_data_types type;
+ u8 reg;
+
+ reg = ntohl(nla_get_be32(nla));
+ if (reg == NFT_REG_VERDICT)
+ type = NFT_DATA_VERDICT;
+ else
+ type = NFT_DATA_VALUE;
+
+ return type;
+}
+
static int nft_immediate_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_immediate_expr *priv = nft_expr_priv(expr);
- struct nft_data_desc desc;
+ struct nft_data_desc desc = {
+ .size = sizeof(priv->data),
+ };
int err;
if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
tb[NFTA_IMMEDIATE_DATA] == NULL)
return -EINVAL;
- err = nft_data_init(ctx, &priv->data, sizeof(priv->data), &desc,
- tb[NFTA_IMMEDIATE_DATA]);
+ desc.type = nft_reg_to_type(tb[NFTA_IMMEDIATE_DREG]);
+ err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
if (err < 0)
return err;
priv->dlen = desc.len;
- priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, &priv->data,
- desc.type, desc.len);
+ err = nft_parse_register_store(ctx, tb[NFTA_IMMEDIATE_DREG],
+ &priv->dreg, &priv->data, desc.type,
+ desc.len);
if (err < 0)
goto err1;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index f1363b8..b0f558b 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -17,8 +17,8 @@
struct nft_lookup {
struct nft_set *set;
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
bool invert;
struct nft_set_binding binding;
};
@@ -76,8 +76,8 @@
if (IS_ERR(set))
return PTR_ERR(set);
- priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]);
- err = nft_validate_register_load(priv->sreg, set->klen);
+ err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg,
+ set->klen);
if (err < 0)
return err;
@@ -100,9 +100,9 @@
if (!(set->flags & NFT_SET_MAP))
return -EINVAL;
- priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- set->dtype, set->dlen);
+ err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG],
+ &priv->dreg, NULL, set->dtype,
+ set->dlen);
if (err < 0)
return err;
} else if (set->flags & NFT_SET_MAP)
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 71390b7..9953e80 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -15,8 +15,8 @@
struct nft_masq {
u32 flags;
- enum nft_registers sreg_proto_min:8;
- enum nft_registers sreg_proto_max:8;
+ u8 sreg_proto_min;
+ u8 sreg_proto_max;
};
static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
@@ -54,19 +54,15 @@
}
if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
- priv->sreg_proto_min =
- nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]);
-
- err = nft_validate_register_load(priv->sreg_proto_min, plen);
+ err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_MASQ_REG_PROTO_MAX]) {
- priv->sreg_proto_max =
- nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]);
-
- err = nft_validate_register_load(priv->sreg_proto_max,
- plen);
+ err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX],
+ &priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index bf4b3ad..44d9b38 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -14,6 +14,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/random.h>
#include <linux/smp.h>
#include <linux/static_key.h>
#include <net/dst.h>
@@ -32,8 +33,6 @@
#define NFT_META_SECS_PER_DAY 86400
#define NFT_META_DAYS_PER_WEEK 7
-static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
-
static u8 nft_meta_weekday(void)
{
time64_t secs = ktime_get_real_seconds();
@@ -267,13 +266,6 @@
return true;
}
-static noinline u32 nft_prandom_u32(void)
-{
- struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
-
- return prandom_u32_state(state);
-}
-
#ifdef CONFIG_IP_ROUTE_CLASSID
static noinline bool
nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest)
@@ -385,7 +377,7 @@
break;
#endif
case NFT_META_PRANDOM:
- *dest = nft_prandom_u32();
+ *dest = get_random_u32();
break;
#ifdef CONFIG_XFRM
case NFT_META_SECPATH:
@@ -514,7 +506,6 @@
len = IFNAMSIZ;
break;
case NFT_META_PRANDOM:
- prandom_init_once(&nft_prandom_state);
len = sizeof(u32);
break;
#ifdef CONFIG_XFRM
@@ -535,9 +526,8 @@
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
@@ -661,8 +651,7 @@
return -EOPNOTSUPP;
}
- priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
- err = nft_validate_register_load(priv->sreg, len);
+ err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index ea53fd9..db8f911 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -21,10 +21,10 @@
#include <net/ip.h>
struct nft_nat {
- enum nft_registers sreg_addr_min:8;
- enum nft_registers sreg_addr_max:8;
- enum nft_registers sreg_proto_min:8;
- enum nft_registers sreg_proto_max:8;
+ u8 sreg_addr_min;
+ u8 sreg_addr_max;
+ u8 sreg_proto_min;
+ u8 sreg_proto_max;
enum nf_nat_manip_type type:8;
u8 family;
u16 flags;
@@ -208,18 +208,15 @@
priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
- priv->sreg_addr_min =
- nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]);
- err = nft_validate_register_load(priv->sreg_addr_min, alen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN],
+ &priv->sreg_addr_min, alen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_ADDR_MAX]) {
- priv->sreg_addr_max =
- nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]);
-
- err = nft_validate_register_load(priv->sreg_addr_max,
- alen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX],
+ &priv->sreg_addr_max,
+ alen);
if (err < 0)
return err;
} else {
@@ -231,19 +228,15 @@
plen = sizeof_field(struct nf_nat_range, min_addr.all);
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
- priv->sreg_proto_min =
- nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]);
-
- err = nft_validate_register_load(priv->sreg_proto_min, plen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_PROTO_MAX]) {
- priv->sreg_proto_max =
- nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]);
-
- err = nft_validate_register_load(priv->sreg_proto_max,
- plen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX],
+ &priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
@@ -341,7 +334,8 @@
{
const struct nft_nat *priv = nft_expr_priv(expr);
- if (priv->family == nft_pf(pkt))
+ if (priv->family == nft_pf(pkt) ||
+ priv->family == NFPROTO_INET)
nft_nat_eval(expr, regs, pkt);
}
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index f1fc824..4e43214 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -9,14 +9,13 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/random.h>
#include <linux/static_key.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
-static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
-
struct nft_ng_inc {
- enum nft_registers dreg:8;
+ u8 dreg;
u32 modulus;
atomic_t counter;
u32 offset;
@@ -66,11 +65,10 @@
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
atomic_set(&priv->counter, priv->modulus - 1);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
}
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
@@ -100,17 +98,14 @@
}
struct nft_ng_random {
- enum nft_registers dreg:8;
+ u8 dreg;
u32 modulus;
u32 offset;
};
-static u32 nft_ng_random_gen(struct nft_ng_random *priv)
+static u32 nft_ng_random_gen(const struct nft_ng_random *priv)
{
- struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
-
- return reciprocal_scale(prandom_u32_state(state), priv->modulus) +
- priv->offset;
+ return reciprocal_scale(get_random_u32(), priv->modulus) + priv->offset;
}
static void nft_ng_random_eval(const struct nft_expr *expr,
@@ -138,12 +133,8 @@
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- prandom_init_once(&nft_numgen_prandom_state);
-
- priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
-
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
}
static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 5f9207a..bc104d3 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -95,7 +95,7 @@
struct nft_objref_map {
struct nft_set *set;
- enum nft_registers sreg:8;
+ u8 sreg;
struct nft_set_binding binding;
};
@@ -137,8 +137,8 @@
if (!(set->flags & NFT_SET_OBJECT))
return -EINVAL;
- priv->sreg = nft_parse_register(tb[NFTA_OBJREF_SET_SREG]);
- err = nft_validate_register_load(priv->sreg, set->klen);
+ err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg,
+ set->klen);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 2c95762..720dc9f 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -6,7 +6,7 @@
#include <linux/netfilter/nfnetlink_osf.h>
struct nft_osf {
- enum nft_registers dreg:8;
+ u8 dreg;
u8 ttl;
u32 flags;
};
@@ -83,9 +83,9 @@
priv->flags = flags;
}
- priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN);
+ err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE,
+ NFT_OSF_MAXGENRELEN);
if (err < 0)
return err;
@@ -115,9 +115,21 @@
const struct nft_expr *expr,
const struct nft_data **data)
{
- return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_FORWARD));
+ unsigned int hooks;
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_FORWARD);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
}
static struct nft_expr_type nft_osf_type;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 6a8495b..551e0d6 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -144,10 +144,10 @@
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
- priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
}
static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -660,18 +660,23 @@
const struct nlattr * const tb[])
{
struct nft_payload_set *priv = nft_expr_priv(expr);
+ u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE;
+ int err;
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
- priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]);
if (tb[NFTA_PAYLOAD_CSUM_TYPE])
- priv->csum_type =
- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
- if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
- priv->csum_offset =
- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
+ csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
+ if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) {
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX,
+ &csum_offset);
+ if (err < 0)
+ return err;
+
+ priv->csum_offset = csum_offset;
+ }
if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) {
u32 flags;
@@ -682,7 +687,7 @@
priv->csum_flags = flags;
}
- switch (priv->csum_type) {
+ switch (csum_type) {
case NFT_PAYLOAD_CSUM_NONE:
case NFT_PAYLOAD_CSUM_INET:
break;
@@ -696,8 +701,10 @@
default:
return -EOPNOTSUPP;
}
+ priv->csum_type = csum_type;
- return nft_validate_register_load(priv->sreg, priv->len);
+ return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
+ priv->len);
}
static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -733,6 +740,7 @@
{
enum nft_payload_bases base;
unsigned int offset, len;
+ int err;
if (tb[NFTA_PAYLOAD_BASE] == NULL ||
tb[NFTA_PAYLOAD_OFFSET] == NULL ||
@@ -758,8 +766,13 @@
if (tb[NFTA_PAYLOAD_DREG] == NULL)
return ERR_PTR(-EINVAL);
- offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
- len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len);
+ if (err < 0)
+ return ERR_PTR(err);
if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
base != NFT_PAYLOAD_LL_HEADER)
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 23265d7..9ba1de5 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -19,10 +19,10 @@
static u32 jhash_initval __read_mostly;
struct nft_queue {
- enum nft_registers sreg_qnum:8;
- u16 queuenum;
- u16 queues_total;
- u16 flags;
+ u8 sreg_qnum;
+ u16 queuenum;
+ u16 queues_total;
+ u16 flags;
};
static void nft_queue_eval(const struct nft_expr *expr,
@@ -111,8 +111,8 @@
struct nft_queue *priv = nft_expr_priv(expr);
int err;
- priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]);
- err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32));
+ err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM],
+ &priv->sreg_qnum, sizeof(u32));
if (err < 0)
return err;
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 89efcc5..e6bbe32 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -15,7 +15,7 @@
struct nft_range_expr {
struct nft_data data_from;
struct nft_data data_to;
- enum nft_registers sreg:8;
+ u8 sreg;
u8 len;
enum nft_range_ops op:8;
};
@@ -51,7 +51,14 @@
const struct nlattr * const tb[])
{
struct nft_range_expr *priv = nft_expr_priv(expr);
- struct nft_data_desc desc_from, desc_to;
+ struct nft_data_desc desc_from = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data_from),
+ };
+ struct nft_data_desc desc_to = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data_to),
+ };
int err;
u32 op;
@@ -61,33 +68,23 @@
!tb[NFTA_RANGE_TO_DATA])
return -EINVAL;
- err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from),
- &desc_from, tb[NFTA_RANGE_FROM_DATA]);
+ err = nft_data_init(NULL, &priv->data_from, &desc_from,
+ tb[NFTA_RANGE_FROM_DATA]);
if (err < 0)
return err;
- if (desc_from.type != NFT_DATA_VALUE) {
- err = -EINVAL;
- goto err1;
- }
-
- err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to),
- &desc_to, tb[NFTA_RANGE_TO_DATA]);
+ err = nft_data_init(NULL, &priv->data_to, &desc_to,
+ tb[NFTA_RANGE_TO_DATA]);
if (err < 0)
goto err1;
- if (desc_to.type != NFT_DATA_VALUE) {
- err = -EINVAL;
- goto err2;
- }
-
if (desc_from.len != desc_to.len) {
err = -EINVAL;
goto err2;
}
- priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]);
- err = nft_validate_register_load(priv->sreg, desc_from.len);
+ err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg,
+ desc_from.len);
if (err < 0)
goto err2;
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 2056051..ba09890 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -14,8 +14,8 @@
#include <net/netfilter/nf_tables.h>
struct nft_redir {
- enum nft_registers sreg_proto_min:8;
- enum nft_registers sreg_proto_max:8;
+ u8 sreg_proto_min;
+ u8 sreg_proto_max;
u16 flags;
};
@@ -50,19 +50,15 @@
plen = sizeof_field(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
- priv->sreg_proto_min =
- nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]);
-
- err = nft_validate_register_load(priv->sreg_proto_min, plen);
+ err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_REDIR_REG_PROTO_MAX]) {
- priv->sreg_proto_max =
- nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]);
-
- err = nft_validate_register_load(priv->sreg_proto_max,
- plen);
+ err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX],
+ &priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 7cfcb0e..bcd01a6 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -15,7 +15,7 @@
struct nft_rt {
enum nft_rt_keys key:8;
- enum nft_registers dreg:8;
+ u8 dreg;
};
static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst)
@@ -141,9 +141,8 @@
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_RT_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static int nft_rt_get_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 858c8d4..a5cfb32 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -142,6 +142,7 @@
/* Another cpu may race to insert the element with the same key */
if (prev) {
nft_set_elem_destroy(set, he, true);
+ atomic_dec(&set->nelems);
he = prev;
}
@@ -151,6 +152,7 @@
err2:
nft_set_elem_destroy(set, he, true);
+ atomic_dec(&set->nelems);
err1:
return false;
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index f67c443..949da87 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -2121,6 +2121,32 @@
}
/**
+ * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array
+ * @set: nftables API set representation
+ * @m: matching data pointing to key mapping array
+ */
+static void nft_set_pipapo_match_destroy(const struct nft_set *set,
+ struct nft_pipapo_match *m)
+{
+ struct nft_pipapo_field *f;
+ int i, r;
+
+ for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
+ ;
+
+ for (r = 0; r < f->rules; r++) {
+ struct nft_pipapo_elem *e;
+
+ if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
+ continue;
+
+ e = f->mt[r].e;
+
+ nft_set_elem_destroy(set, e, true);
+ }
+}
+
+/**
* nft_pipapo_destroy() - Free private data for set and all committed elements
* @set: nftables API set representation
*/
@@ -2128,26 +2154,13 @@
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
- struct nft_pipapo_field *f;
- int i, r, cpu;
+ int cpu;
m = rcu_dereference_protected(priv->match, true);
if (m) {
rcu_barrier();
- for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
- ;
-
- for (r = 0; r < f->rules; r++) {
- struct nft_pipapo_elem *e;
-
- if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
- continue;
-
- e = f->mt[r].e;
-
- nft_set_elem_destroy(set, e, true);
- }
+ nft_set_pipapo_match_destroy(set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
@@ -2161,6 +2174,11 @@
}
if (priv->clone) {
+ m = priv->clone;
+
+ if (priv->dirty)
+ nft_set_pipapo_match_destroy(set, m);
+
#ifdef NFT_PIPAPO_ALIGN
free_percpu(priv->clone->scratch_aligned);
#endif
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 217ab36..94a5446 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -348,7 +348,11 @@
*ext = &rbe->ext;
return -EEXIST;
} else {
- p = &parent->rb_left;
+ overlap = false;
+ if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
}
}
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index a28aca5..f6d5171 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -10,7 +10,7 @@
struct nft_socket {
enum nft_socket_keys key:8;
union {
- enum nft_registers dreg:8;
+ u8 dreg;
};
};
@@ -33,6 +33,32 @@
}
}
+static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
+{
+ const struct net_device *indev = nft_in(pkt);
+ const struct sk_buff *skb = pkt->skb;
+ struct sock *sk = NULL;
+
+ if (!indev)
+ return NULL;
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev);
+ break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+ case NFPROTO_IPV6:
+ sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev);
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return sk;
+}
+
static void nft_socket_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -46,20 +72,7 @@
sk = NULL;
if (!sk)
- switch(nft_pf(pkt)) {
- case NFPROTO_IPV4:
- sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
- break;
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- case NFPROTO_IPV6:
- sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
- break;
-#endif
- default:
- WARN_ON_ONCE(1);
- regs->verdict.code = NFT_BREAK;
- return;
- }
+ sk = nft_socket_do_lookup(pkt);
if (!sk) {
regs->verdict.code = NFT_BREAK;
@@ -133,9 +146,8 @@
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_SOCKET_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static int nft_socket_dump(struct sk_buff *skb,
@@ -150,6 +162,16 @@
return 0;
}
+static int nft_socket_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT));
+}
+
static struct nft_expr_type nft_socket_type;
static const struct nft_expr_ops nft_socket_ops = {
.type = &nft_socket_type,
@@ -157,6 +179,7 @@
.eval = nft_socket_eval,
.init = nft_socket_init,
.dump = nft_socket_dump,
+ .validate = nft_socket_validate,
};
static struct nft_expr_type nft_socket_type __read_mostly = {
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index 242222d..37c728b 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -13,9 +13,9 @@
#endif
struct nft_tproxy {
- enum nft_registers sreg_addr:8;
- enum nft_registers sreg_port:8;
- u8 family;
+ u8 sreg_addr;
+ u8 sreg_port;
+ u8 family;
};
static void nft_tproxy_eval_v4(const struct nft_expr *expr,
@@ -254,15 +254,15 @@
}
if (tb[NFTA_TPROXY_REG_ADDR]) {
- priv->sreg_addr = nft_parse_register(tb[NFTA_TPROXY_REG_ADDR]);
- err = nft_validate_register_load(priv->sreg_addr, alen);
+ err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR],
+ &priv->sreg_addr, alen);
if (err < 0)
return err;
}
if (tb[NFTA_TPROXY_REG_PORT]) {
- priv->sreg_port = nft_parse_register(tb[NFTA_TPROXY_REG_PORT]);
- err = nft_validate_register_load(priv->sreg_port, sizeof(u16));
+ err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT],
+ &priv->sreg_port, sizeof(u16));
if (err < 0)
return err;
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index d3eb953..2ee5099 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -15,7 +15,7 @@
struct nft_tunnel {
enum nft_tunnel_keys key:8;
- enum nft_registers dreg:8;
+ u8 dreg;
enum nft_tunnel_mode mode:8;
};
@@ -93,8 +93,6 @@
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]);
-
if (tb[NFTA_TUNNEL_MODE]) {
priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE]));
if (priv->mode > NFT_TUNNEL_MODE_MAX)
@@ -103,8 +101,8 @@
priv->mode = NFT_TUNNEL_MODE_NONE;
}
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_TUNNEL_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static int nft_tunnel_get_dump(struct sk_buff *skb,
@@ -135,6 +133,7 @@
static struct nft_expr_type nft_tunnel_type __read_mostly = {
.name = "tunnel",
+ .family = NFPROTO_NETDEV,
.ops = &nft_tunnel_get_ops,
.policy = nft_tunnel_policy,
.maxattr = NFTA_TUNNEL_MAX,
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 06d5cab..cbbbc4e 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -24,7 +24,7 @@
struct nft_xfrm {
enum nft_xfrm_keys key:8;
- enum nft_registers dreg:8;
+ u8 dreg;
u8 dir;
u8 spnum;
};
@@ -86,9 +86,8 @@
priv->spnum = spnum;
- priv->dreg = nft_parse_register(tb[NFTA_XFRM_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_XFRM_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
/* Return true if key asks for daddr/saddr and current
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index e5ebc08..ad3c033 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -30,6 +30,7 @@
u_int32_t new_targetmark;
struct nf_conn *ct;
u_int32_t newmark;
+ u_int32_t oldmark;
ct = nf_ct_get(skb, &ctinfo);
if (ct == NULL)
@@ -37,14 +38,15 @@
switch (info->mode) {
case XT_CONNMARK_SET:
- newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
+ oldmark = READ_ONCE(ct->mark);
+ newmark = (oldmark & ~info->ctmask) ^ info->ctmark;
if (info->shift_dir == D_SHIFT_RIGHT)
newmark >>= info->shift_bits;
else
newmark <<= info->shift_bits;
- if (ct->mark != newmark) {
- ct->mark = newmark;
+ if (READ_ONCE(ct->mark) != newmark) {
+ WRITE_ONCE(ct->mark, newmark);
nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
@@ -55,15 +57,15 @@
else
new_targetmark <<= info->shift_bits;
- newmark = (ct->mark & ~info->ctmask) ^
+ newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^
new_targetmark;
- if (ct->mark != newmark) {
- ct->mark = newmark;
+ if (READ_ONCE(ct->mark) != newmark) {
+ WRITE_ONCE(ct->mark, newmark);
nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
case XT_CONNMARK_RESTORE:
- new_targetmark = (ct->mark & info->ctmask);
+ new_targetmark = (READ_ONCE(ct->mark) & info->ctmask);
if (info->shift_dir == D_SHIFT_RIGHT)
new_targetmark >>= info->shift_bits;
else
@@ -126,7 +128,7 @@
if (ct == NULL)
return false;
- return ((ct->mark & info->mask) == info->mark) ^ info->invert;
+ return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert;
}
static int connmark_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 5e1239c..91b35b7 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -885,6 +885,8 @@
unsigned char bitmask;
unsigned char byte;
+ if (offset >= bitmap_len)
+ return -1;
byte_offset = offset / 8;
byte = bitmap[byte_offset];
bit_spot = offset;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e55af5c..d96a610 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -149,6 +149,8 @@
static inline u32 netlink_group_mask(u32 group)
{
+ if (group > 32)
+ return 0;
return group ? 1 << (group - 1) : 0;
}
@@ -1986,7 +1988,6 @@
copied = len;
}
- skb_reset_transport_header(data_skb);
err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
if (msg->msg_name) {
@@ -2274,6 +2275,13 @@
* single netdev. The outcome is MSG_TRUNC error.
*/
skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+
+ /* Make sure malicious BPF programs can not read unitialized memory
+ * from skb->head -> skb->data
+ */
+ skb_reset_network_header(skb);
+ skb_reset_mac_header(skb);
+
netlink_skb_set_owner_r(skb, sk);
if (nlk->dump_done_errno > 0) {
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index c992424..9fd7ba0 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1182,13 +1182,17 @@
op.policy,
op.maxattr);
if (err)
- return err;
+ goto err_free_state;
}
}
if (!ctx->state)
return -ENODATA;
return 0;
+
+err_free_state:
+ netlink_policy_dump_free(ctx->state);
+ return err;
}
static void *ctrl_dumppolicy_prep(struct sk_buff *skb,
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
index 8d7c900..87e3de0 100644
--- a/net/netlink/policy.c
+++ b/net/netlink/policy.c
@@ -144,7 +144,7 @@
err = add_policy(&state, policy, maxtype);
if (err)
- return err;
+ goto err_try_undo;
for (policy_idx = 0;
policy_idx < state->n_alloc && state->policies[policy_idx].policy;
@@ -164,7 +164,7 @@
policy[type].nested_policy,
policy[type].len);
if (err)
- return err;
+ goto err_try_undo;
break;
default:
break;
@@ -174,6 +174,16 @@
*pstate = state;
return 0;
+
+err_try_undo:
+ /* Try to preserve reasonable unwind semantics - if we're starting from
+ * scratch clean up fully, otherwise record what we got and caller will.
+ */
+ if (!*pstate)
+ netlink_policy_dump_free(state);
+ else
+ *pstate = state;
+ return err;
}
static bool
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 6800470..2ef5636 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -38,7 +38,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -94,7 +94,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -142,7 +142,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -206,7 +206,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -245,7 +245,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -290,7 +290,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -334,7 +334,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -400,7 +400,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -446,7 +446,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -493,7 +493,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
kfree_skb(skb);
goto error;
@@ -550,7 +550,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -599,7 +599,7 @@
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -1126,6 +1126,7 @@
dev->rfkill = NULL;
}
}
+ dev->shutting_down = false;
device_unlock(&dev->dev);
rc = nfc_genl_device_added(dev);
@@ -1157,13 +1158,12 @@
if (dev->rfkill) {
rfkill_unregister(dev->rfkill);
rfkill_destroy(dev->rfkill);
+ dev->rfkill = NULL;
}
+ dev->shutting_down = true;
device_unlock(&dev->dev);
if (dev->ops->check_presence) {
- device_lock(&dev->dev);
- dev->shutting_down = true;
- device_unlock(&dev->dev);
del_timer_sync(&dev->check_pres_timer);
cancel_work_sync(&dev->check_pres_work);
}
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index e38719e..ed9019d 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -530,7 +530,7 @@
skb_queue_purge(&ndev->tx_q);
ndev->ops->close(ndev);
- ndev->flags = 0;
+ ndev->flags &= BIT(NCI_UNREG);
}
done:
@@ -548,6 +548,10 @@
mutex_lock(&ndev->req_lock);
if (!test_and_clear_bit(NCI_UP, &ndev->flags)) {
+ /* Need to flush the cmd wq in case
+ * there is a queued/running cmd_work
+ */
+ flush_workqueue(ndev->cmd_wq);
del_timer_sync(&ndev->cmd_timer);
del_timer_sync(&ndev->data_timer);
mutex_unlock(&ndev->req_lock);
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index ce3382b..b4548d8 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -118,7 +118,7 @@
skb_frag = nci_skb_alloc(ndev,
(NCI_DATA_HDR_SIZE + frag_len),
- GFP_KERNEL);
+ GFP_ATOMIC);
if (skb_frag == NULL) {
rc = -ENOMEM;
goto free_exit;
@@ -279,8 +279,10 @@
nci_plen(skb->data));
conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data));
- if (!conn_info)
+ if (!conn_info) {
+ kfree_skb(skb);
return;
+ }
/* strip the nci data header */
skb_pull(skb, NCI_DATA_HDR_SIZE);
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 04e55cc..4fe336f 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -153,7 +153,7 @@
i = 0;
skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
- NCI_DATA_HDR_SIZE, GFP_KERNEL);
+ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
if (!skb)
return -ENOMEM;
@@ -186,7 +186,7 @@
if (i < data_len) {
skb = nci_skb_alloc(ndev,
conn_info->max_pkt_payload_len +
- NCI_DATA_HDR_SIZE, GFP_KERNEL);
+ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
if (!skb)
return -ENOMEM;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 78acc4e..b8939eb 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1244,7 +1244,7 @@
struct sk_buff *msg;
void *hdr;
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!msg)
return -ENOMEM;
@@ -1260,7 +1260,7 @@
genlmsg_end(msg, hdr);
- genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
+ genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
return 0;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 525c154..80fee9d 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -372,6 +372,7 @@
update_ip_l4_checksum(skb, nh, *addr, new_addr);
csum_replace4(&nh->check, *addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
*addr = new_addr;
}
@@ -419,6 +420,7 @@
update_ipv6_checksum(skb, l4_proto, addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
memcpy(addr, new_addr, sizeof(__be32[4]));
}
@@ -659,6 +661,7 @@
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
{
+ ovs_ct_clear(skb, NULL);
inet_proto_csum_replace2(check, skb, *port, new_port, false);
*port = new_port;
}
@@ -698,6 +701,7 @@
uh->dest = dst;
flow_key->tp.src = src;
flow_key->tp.dst = dst;
+ ovs_ct_clear(skb, NULL);
}
skb_clear_hash(skb);
@@ -760,6 +764,8 @@
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
+
flow_key->tp.src = sh->source;
flow_key->tp.dst = sh->dest;
@@ -1044,7 +1050,7 @@
int rem = nla_len(attr);
bool dont_clone_flow_key;
- /* The first action is always 'OVS_CLONE_ATTR_ARG'. */
+ /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
clone_arg = nla_data(attr);
dont_clone_flow_key = nla_get_u32(clone_arg);
actions = nla_next(clone_arg, &rem);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index a11b558..0f0f380 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -150,7 +150,7 @@
static u32 ovs_ct_get_mark(const struct nf_conn *ct)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
- return ct ? ct->mark : 0;
+ return ct ? READ_ONCE(ct->mark) : 0;
#else
return 0;
#endif
@@ -336,9 +336,9 @@
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
u32 new_mark;
- new_mark = ct_mark | (ct->mark & ~(mask));
- if (ct->mark != new_mark) {
- ct->mark = new_mark;
+ new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask));
+ if (READ_ONCE(ct->mark) != new_mark) {
+ WRITE_ONCE(ct->mark, new_mark);
if (nf_ct_is_confirmed(ct))
nf_conntrack_event_cache(IPCT_MARK, ct);
key->ct.mark = new_mark;
@@ -730,6 +730,57 @@
}
#if IS_ENABLED(CONFIG_NF_NAT)
+static void ovs_nat_update_key(struct sw_flow_key *key,
+ const struct sk_buff *skb,
+ enum nf_nat_manip_type maniptype)
+{
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ __be16 src;
+
+ key->ct_state |= OVS_CS_F_SRC_NAT;
+ if (key->eth.type == htons(ETH_P_IP))
+ key->ipv4.addr.src = ip_hdr(skb)->saddr;
+ else if (key->eth.type == htons(ETH_P_IPV6))
+ memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
+ sizeof(key->ipv6.addr.src));
+ else
+ return;
+
+ if (key->ip.proto == IPPROTO_UDP)
+ src = udp_hdr(skb)->source;
+ else if (key->ip.proto == IPPROTO_TCP)
+ src = tcp_hdr(skb)->source;
+ else if (key->ip.proto == IPPROTO_SCTP)
+ src = sctp_hdr(skb)->source;
+ else
+ return;
+
+ key->tp.src = src;
+ } else {
+ __be16 dst;
+
+ key->ct_state |= OVS_CS_F_DST_NAT;
+ if (key->eth.type == htons(ETH_P_IP))
+ key->ipv4.addr.dst = ip_hdr(skb)->daddr;
+ else if (key->eth.type == htons(ETH_P_IPV6))
+ memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
+ sizeof(key->ipv6.addr.dst));
+ else
+ return;
+
+ if (key->ip.proto == IPPROTO_UDP)
+ dst = udp_hdr(skb)->dest;
+ else if (key->ip.proto == IPPROTO_TCP)
+ dst = tcp_hdr(skb)->dest;
+ else if (key->ip.proto == IPPROTO_SCTP)
+ dst = sctp_hdr(skb)->dest;
+ else
+ return;
+
+ key->tp.dst = dst;
+ }
+}
+
/* Modelled after nf_nat_ipv[46]_fn().
* range is only used for new, uninitialized NAT state.
* Returns either NF_ACCEPT or NF_DROP.
@@ -737,7 +788,7 @@
static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype)
+ enum nf_nat_manip_type maniptype, struct sw_flow_key *key)
{
int hooknum, nh_off, err = NF_ACCEPT;
@@ -810,60 +861,13 @@
skb_push(skb, nh_off);
skb_postpush_rcsum(skb, skb->data, nh_off);
+ /* Update the flow key if NAT successful. */
+ if (err == NF_ACCEPT)
+ ovs_nat_update_key(key, skb, maniptype);
+
return err;
}
-static void ovs_nat_update_key(struct sw_flow_key *key,
- const struct sk_buff *skb,
- enum nf_nat_manip_type maniptype)
-{
- if (maniptype == NF_NAT_MANIP_SRC) {
- __be16 src;
-
- key->ct_state |= OVS_CS_F_SRC_NAT;
- if (key->eth.type == htons(ETH_P_IP))
- key->ipv4.addr.src = ip_hdr(skb)->saddr;
- else if (key->eth.type == htons(ETH_P_IPV6))
- memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
- sizeof(key->ipv6.addr.src));
- else
- return;
-
- if (key->ip.proto == IPPROTO_UDP)
- src = udp_hdr(skb)->source;
- else if (key->ip.proto == IPPROTO_TCP)
- src = tcp_hdr(skb)->source;
- else if (key->ip.proto == IPPROTO_SCTP)
- src = sctp_hdr(skb)->source;
- else
- return;
-
- key->tp.src = src;
- } else {
- __be16 dst;
-
- key->ct_state |= OVS_CS_F_DST_NAT;
- if (key->eth.type == htons(ETH_P_IP))
- key->ipv4.addr.dst = ip_hdr(skb)->daddr;
- else if (key->eth.type == htons(ETH_P_IPV6))
- memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
- sizeof(key->ipv6.addr.dst));
- else
- return;
-
- if (key->ip.proto == IPPROTO_UDP)
- dst = udp_hdr(skb)->dest;
- else if (key->ip.proto == IPPROTO_TCP)
- dst = tcp_hdr(skb)->dest;
- else if (key->ip.proto == IPPROTO_SCTP)
- dst = sctp_hdr(skb)->dest;
- else
- return;
-
- key->tp.dst = dst;
- }
-}
-
/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
@@ -903,7 +907,7 @@
} else {
return NF_ACCEPT; /* Connection is not NATed. */
}
- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
+ err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key);
if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
if (ct->status & IPS_SRC_NAT) {
@@ -913,17 +917,13 @@
maniptype = NF_NAT_MANIP_SRC;
err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
- maniptype);
+ maniptype, key);
} else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,
- NF_NAT_MANIP_SRC);
+ NF_NAT_MANIP_SRC, key);
}
}
- /* Mark NAT done if successful and update the flow key. */
- if (err == NF_ACCEPT)
- ovs_nat_update_key(key, skb, maniptype);
-
return err;
}
#else /* !CONFIG_NF_NAT */
@@ -1324,7 +1324,8 @@
if (skb_nfct(skb)) {
nf_conntrack_put(skb_nfct(skb));
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
- ovs_ct_fill_key(skb, key);
+ if (key)
+ ovs_ct_fill_key(skb, key);
}
return 0;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9d6ef6c..7ed97dc 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -241,10 +241,17 @@
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
- if (unlikely(error))
- kfree_skb(skb);
- else
+ switch (error) {
+ case 0:
+ case -EAGAIN:
+ case -ERESTARTSYS:
+ case -EINTR:
consume_skb(skb);
+ break;
+ default:
+ kfree_skb(skb);
+ break;
+ }
stats_counter = &stats->n_missed;
goto out;
}
@@ -537,8 +544,9 @@
out:
if (err)
skb_tx_error(skb);
- kfree_skb(user_skb);
- kfree_skb(nskb);
+ consume_skb(user_skb);
+ consume_skb(nskb);
+
return err;
}
@@ -1584,7 +1592,8 @@
if (IS_ERR(dp))
return;
- WARN(dp->user_features, "Dropping previously announced user features\n");
+ pr_warn("%s: Dropping previously announced user features\n",
+ ovs_dp_name(dp));
dp->user_features = 0;
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index b03d142..c9ba614 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -265,7 +265,7 @@
if (flags & IP6_FH_F_FRAG) {
if (frag_off) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
- key->ip.proto = nexthdr;
+ key->ip.proto = NEXTHDR_FRAGMENT;
return 0;
}
key->ip.frag = OVS_FRAG_TYPE_FIRST;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 4c5c233..293a798 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2201,8 +2201,8 @@
icmpv6_key->icmpv6_type = ntohs(output->tp.src);
icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
- if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
- icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+ if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+ swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
struct ovs_key_nd *nd_key;
nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
@@ -2288,6 +2288,62 @@
return sfa;
}
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len);
+
+static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action)
+{
+ const struct nlattr *a;
+ int rem;
+
+ nla_for_each_nested(a, action, rem) {
+ switch (nla_type(a)) {
+ case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL:
+ case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER:
+ ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+ break;
+ }
+ }
+}
+
+static void ovs_nla_free_clone_action(const struct nlattr *action)
+{
+ const struct nlattr *a = nla_data(action);
+ int rem = nla_len(action);
+
+ switch (nla_type(a)) {
+ case OVS_CLONE_ATTR_EXEC:
+ /* The real list of actions follows this attribute. */
+ a = nla_next(a, &rem);
+ ovs_nla_free_nested_actions(a, rem);
+ break;
+ }
+}
+
+static void ovs_nla_free_dec_ttl_action(const struct nlattr *action)
+{
+ const struct nlattr *a = nla_data(action);
+
+ switch (nla_type(a)) {
+ case OVS_DEC_TTL_ATTR_ACTION:
+ ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+ break;
+ }
+}
+
+static void ovs_nla_free_sample_action(const struct nlattr *action)
+{
+ const struct nlattr *a = nla_data(action);
+ int rem = nla_len(action);
+
+ switch (nla_type(a)) {
+ case OVS_SAMPLE_ATTR_ARG:
+ /* The real list of actions follows this attribute. */
+ a = nla_next(a, &rem);
+ ovs_nla_free_nested_actions(a, rem);
+ break;
+ }
+}
+
static void ovs_nla_free_set_action(const struct nlattr *a)
{
const struct nlattr *ovs_key = nla_data(a);
@@ -2301,25 +2357,54 @@
}
}
-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
{
const struct nlattr *a;
int rem;
- if (!sf_acts)
+ /* Whenever new actions are added, the need to update this
+ * function should be considered.
+ */
+ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
+
+ if (!actions)
return;
- nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
+ nla_for_each_attr(a, actions, len, rem) {
switch (nla_type(a)) {
- case OVS_ACTION_ATTR_SET:
- ovs_nla_free_set_action(a);
+ case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+ ovs_nla_free_check_pkt_len_action(a);
break;
+
+ case OVS_ACTION_ATTR_CLONE:
+ ovs_nla_free_clone_action(a);
+ break;
+
case OVS_ACTION_ATTR_CT:
ovs_ct_free_action(a);
break;
+
+ case OVS_ACTION_ATTR_DEC_TTL:
+ ovs_nla_free_dec_ttl_action(a);
+ break;
+
+ case OVS_ACTION_ATTR_SAMPLE:
+ ovs_nla_free_sample_action(a);
+ break;
+
+ case OVS_ACTION_ATTR_SET:
+ ovs_nla_free_set_action(a);
+ break;
}
}
+}
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+{
+ if (!sf_acts)
+ return;
+
+ ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len);
kfree(sf_acts);
}
@@ -2351,7 +2436,7 @@
new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
+ if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
OVS_NLERR(log, "Flow action size exceeds max %u",
MAX_ACTIONS_BUFSIZE);
return ERR_PTR(-EMSGSIZE);
@@ -3419,7 +3504,9 @@
if (!start)
return -EMSGSIZE;
- err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+ /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */
+ attr = nla_next(nla_data(attr), &rem);
+ err = ovs_nla_put_actions(attr, rem, skb);
if (err)
nla_nest_cancel(skb, start);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d0c95d7..b70b06e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2817,8 +2817,9 @@
status = TP_STATUS_SEND_REQUEST;
err = po->xmit(skb);
- if (unlikely(err > 0)) {
- err = net_xmit_errno(err);
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
if (err && __packet_get_status(po, ph) ==
TP_STATUS_AVAILABLE) {
/* skb was destructed already */
@@ -2985,8 +2986,8 @@
if (err)
goto out_free;
- if (sock->type == SOCK_RAW &&
- !dev_validate_header(dev, skb->data, len)) {
+ if ((sock->type == SOCK_RAW &&
+ !dev_validate_header(dev, skb->data, len)) || !skb->len) {
err = -EINVAL;
goto out_free;
}
@@ -3019,8 +3020,12 @@
skb->no_fcs = 1;
err = po->xmit(skb);
- if (err > 0 && (err = net_xmit_errno(err)) != 0)
- goto out_unlock;
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
+ if (err)
+ goto out_unlock;
+ }
dev_put(dev);
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 56cffbf..13448ca 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -20,6 +20,8 @@
/* auto-bind range */
#define QRTR_MIN_EPH_SOCKET 0x4000
#define QRTR_MAX_EPH_SOCKET 0x7fff
+#define QRTR_EPH_PORT_RANGE \
+ XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
/**
* struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
@@ -106,8 +108,7 @@
static DEFINE_MUTEX(qrtr_node_lock);
/* local port allocation management */
-static DEFINE_IDR(qrtr_ports);
-static DEFINE_MUTEX(qrtr_port_lock);
+static DEFINE_XARRAY_ALLOC(qrtr_ports);
/**
* struct qrtr_node - endpoint node
@@ -635,7 +636,7 @@
port = 0;
rcu_read_lock();
- ipc = idr_find(&qrtr_ports, port);
+ ipc = xa_load(&qrtr_ports, port);
if (ipc)
sock_hold(&ipc->sk);
rcu_read_unlock();
@@ -677,9 +678,7 @@
__sock_put(&ipc->sk);
- mutex_lock(&qrtr_port_lock);
- idr_remove(&qrtr_ports, port);
- mutex_unlock(&qrtr_port_lock);
+ xa_erase(&qrtr_ports, port);
/* Ensure that if qrtr_port_lookup() did enter the RCU read section we
* wait for it to up increment the refcount */
@@ -698,29 +697,20 @@
*/
static int qrtr_port_assign(struct qrtr_sock *ipc, int *port)
{
- u32 min_port;
int rc;
- mutex_lock(&qrtr_port_lock);
if (!*port) {
- min_port = QRTR_MIN_EPH_SOCKET;
- rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, QRTR_MAX_EPH_SOCKET, GFP_ATOMIC);
- if (!rc)
- *port = min_port;
+ rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE,
+ GFP_KERNEL);
} else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) {
rc = -EACCES;
} else if (*port == QRTR_PORT_CTRL) {
- min_port = 0;
- rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, 0, GFP_ATOMIC);
+ rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL);
} else {
- min_port = *port;
- rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, *port, GFP_ATOMIC);
- if (!rc)
- *port = min_port;
+ rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL);
}
- mutex_unlock(&qrtr_port_lock);
- if (rc == -ENOSPC)
+ if (rc == -EBUSY)
return -EADDRINUSE;
else if (rc < 0)
return rc;
@@ -734,20 +724,16 @@
static void qrtr_reset_ports(void)
{
struct qrtr_sock *ipc;
- int id;
+ unsigned long index;
- mutex_lock(&qrtr_port_lock);
- idr_for_each_entry(&qrtr_ports, ipc, id) {
- /* Don't reset control port */
- if (id == 0)
- continue;
-
+ rcu_read_lock();
+ xa_for_each_start(&qrtr_ports, index, ipc, 1) {
sock_hold(&ipc->sk);
ipc->sk.sk_err = ENETRESET;
ipc->sk.sk_error_report(&ipc->sk);
sock_put(&ipc->sk);
}
- mutex_unlock(&qrtr_port_lock);
+ rcu_read_unlock();
}
/* Bind socket to address.
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 6fdedd9..cfbf0e1 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -363,6 +363,7 @@
static void release_refill(struct rds_connection *conn)
{
clear_bit(RDS_RECV_REFILL, &conn->c_flags);
+ smp_mb__after_atomic();
/* We don't use wait_on_bit()/wake_up_bit() because our waking is in a
* hot path and finding waiters is very rare. We don't want to walk
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 5327d13..b560d06 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -166,10 +166,10 @@
*/
atomic_set(&cp->cp_state, RDS_CONN_RESETTING);
wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags));
- lock_sock(osock->sk);
/* reset receive side state for rds_tcp_data_recv() for osock */
cancel_delayed_work_sync(&cp->cp_send_w);
cancel_delayed_work_sync(&cp->cp_recv_w);
+ lock_sock(osock->sk);
if (tc->t_tinc) {
rds_inc_put(&tc->t_tinc->ti_inc);
tc->t_tinc = NULL;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index cf7d974..29a208e 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -191,6 +191,7 @@
rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
if (rose->neighbour)
rose->neighbour->use--;
+ dev_put(rose->device);
rose->device = NULL;
}
}
@@ -591,6 +592,8 @@
rose->idle = orose->idle;
rose->defer = orose->defer;
rose->device = orose->device;
+ if (rose->device)
+ dev_hold(rose->device);
rose->qbitincl = orose->qbitincl;
return sk;
@@ -644,6 +647,7 @@
break;
}
+ dev_put(rose->device);
sock->sk = NULL;
release_sock(sk);
sock_put(sk);
@@ -720,7 +724,6 @@
struct rose_sock *rose = rose_sk(sk);
struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
unsigned char cause, diagnostic;
- struct net_device *dev;
ax25_uid_assoc *user;
int n, err = 0;
@@ -777,9 +780,12 @@
}
if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */
+ struct net_device *dev;
+
sock_reset_flag(sk, SOCK_ZAPPED);
- if ((dev = rose_dev_first()) == NULL) {
+ dev = rose_dev_first();
+ if (!dev) {
err = -ENETUNREACH;
goto out_release;
}
@@ -787,6 +793,7 @@
user = ax25_findbyuid(current_euid());
if (!user) {
err = -EINVAL;
+ dev_put(dev);
goto out_release;
}
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index f6102e6..730d220 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -236,6 +236,9 @@
unsigned char *dptr;
int len;
+ if (!neigh->dev)
+ return;
+
len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3;
if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 11c45c8..036d92c 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -96,7 +96,8 @@
}
if (frametype == ROSE_CALL_REQUEST) {
- if (!rose_loopback_neigh->dev) {
+ if (!rose_loopback_neigh->dev &&
+ !rose_loopback_neigh->loopback) {
kfree_skb(skb);
continue;
}
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 6e35703..981bdef 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -227,8 +227,8 @@
{
struct rose_neigh *s;
- rose_stop_ftimer(rose_neigh);
- rose_stop_t0timer(rose_neigh);
+ del_timer_sync(&rose_neigh->ftimer);
+ del_timer_sync(&rose_neigh->t0timer);
skb_queue_purge(&rose_neigh->queue);
@@ -613,6 +613,8 @@
if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
first = dev;
}
+ if (first)
+ dev_hold(first);
rcu_read_unlock();
return first;
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index b3138fc..f06ddbe 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -31,89 +31,89 @@
void rose_start_heartbeat(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
sk->sk_timer.function = rose_heartbeat_expiry;
sk->sk_timer.expires = jiffies + 5 * HZ;
- add_timer(&sk->sk_timer);
+ sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires);
}
void rose_start_t1timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t1;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_t2timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t2;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_t3timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t3;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_hbtimer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->hb;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_idletimer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->idletimer);
+ sk_stop_timer(sk, &rose->idletimer);
if (rose->idle > 0) {
rose->idletimer.function = rose_idletimer_expiry;
rose->idletimer.expires = jiffies + rose->idle;
- add_timer(&rose->idletimer);
+ sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires);
}
}
void rose_stop_heartbeat(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
}
void rose_stop_timer(struct sock *sk)
{
- del_timer(&rose_sk(sk)->timer);
+ sk_stop_timer(sk, &rose_sk(sk)->timer);
}
void rose_stop_idletimer(struct sock *sk)
{
- del_timer(&rose_sk(sk)->idletimer);
+ sk_stop_timer(sk, &rose_sk(sk)->idletimer);
}
static void rose_heartbeat_expiry(struct timer_list *t)
@@ -130,6 +130,7 @@
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
bh_unlock_sock(sk);
rose_destroy_socket(sk);
+ sock_put(sk);
return;
}
break;
@@ -152,6 +153,7 @@
rose_start_heartbeat(sk);
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void rose_timer_expiry(struct timer_list *t)
@@ -181,6 +183,7 @@
break;
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void rose_idletimer_expiry(struct timer_list *t)
@@ -205,4 +208,5 @@
sock_set_flag(sk, SOCK_DEAD);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 41671af..0354f90 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -351,7 +351,7 @@
*/
void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
{
- _enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+ _enter("%d{%d}", call->debug_id, refcount_read(&call->ref));
mutex_lock(&call->user_mutex);
rxrpc_release_call(rxrpc_sk(sock->sk), call);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index dce4816..d86894a 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -14,14 +14,6 @@
#include <net/af_rxrpc.h>
#include "protocol.h"
-#if 0
-#define CHECK_SLAB_OKAY(X) \
- BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
- (POISON_FREE << 8 | POISON_FREE))
-#else
-#define CHECK_SLAB_OKAY(X) do {} while (0)
-#endif
-
#define FCRYPT_BSIZE 8
struct rxrpc_crypt {
union {
@@ -86,7 +78,7 @@
struct work_struct client_conn_reaper;
struct timer_list client_conn_reap_timer;
- struct list_head local_endpoints;
+ struct hlist_head local_endpoints;
struct mutex local_mutex; /* Lock for ->local_endpoints */
DECLARE_HASHTABLE (peer_hash, 10);
@@ -264,9 +256,9 @@
struct rxrpc_local {
struct rcu_head rcu;
atomic_t active_users; /* Number of users of the local endpoint */
- atomic_t usage; /* Number of references to the structure */
+ refcount_t ref; /* Number of references to the structure */
struct rxrpc_net *rxnet; /* The network ns in which this resides */
- struct list_head link;
+ struct hlist_node link;
struct socket *socket; /* my UDP socket */
struct work_struct processor;
struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
@@ -289,7 +281,7 @@
*/
struct rxrpc_peer {
struct rcu_head rcu; /* This must be first */
- atomic_t usage;
+ refcount_t ref;
unsigned long hash_key;
struct hlist_node hash_link;
struct rxrpc_local *local;
@@ -391,7 +383,8 @@
*/
struct rxrpc_bundle {
struct rxrpc_conn_parameters params;
- atomic_t usage;
+ refcount_t ref;
+ atomic_t active; /* Number of active users */
unsigned int debug_id;
bool try_upgrade; /* True if the bundle is attempting upgrade */
bool alloc_conn; /* True if someone's getting a conn */
@@ -412,7 +405,7 @@
struct rxrpc_conn_proto proto;
struct rxrpc_conn_parameters params;
- atomic_t usage;
+ refcount_t ref;
struct rcu_head rcu;
struct list_head cache_link;
@@ -592,7 +585,7 @@
int error; /* Local error incurred */
enum rxrpc_call_state state; /* current state of call */
enum rxrpc_call_completion completion; /* Call completion condition */
- atomic_t usage;
+ refcount_t ref;
u16 service_id; /* service ID */
u8 security_ix; /* Security type */
enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */
@@ -659,13 +652,12 @@
spinlock_t input_lock; /* Lock for packet input to this call */
- /* receive-phase ACK management */
+ /* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
- rxrpc_serial_t ackr_first_seq; /* first sequence number received */
- rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
- rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */
- rxrpc_seq_t ackr_seen; /* Highest packet shown seen */
+ rxrpc_seq_t ackr_highest_seq; /* Higest sequence number received */
+ atomic_t ackr_nr_unacked; /* Number of unacked packets */
+ atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */
/* RTT management */
rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */
@@ -675,8 +667,10 @@
#define RXRPC_CALL_RTT_AVAIL_MASK 0xf
#define RXRPC_CALL_RTT_PEND_SHIFT 8
- /* transmission-phase ACK management */
+ /* Transmission-phase ACK management (ACKs we've received). */
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
+ rxrpc_seq_t acks_first_seq; /* first sequence number received */
+ rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
@@ -760,14 +754,12 @@
enum rxrpc_propose_ack_trace);
void rxrpc_process_call(struct work_struct *);
-static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call,
- unsigned long expire_at,
- unsigned long now,
- enum rxrpc_timer_trace why)
-{
- trace_rxrpc_timer(call, why, now);
- timer_reduce(&call->timer, expire_at);
-}
+void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+ unsigned long expire_at,
+ unsigned long now,
+ enum rxrpc_timer_trace why);
+
+void rxrpc_delete_call_timer(struct rxrpc_call *call);
/*
* call_object.c
@@ -791,6 +783,7 @@
bool __rxrpc_queue_call(struct rxrpc_call *);
bool rxrpc_queue_call(struct rxrpc_call *);
void rxrpc_see_call(struct rxrpc_call *);
+bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op);
void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_cleanup_call(struct rxrpc_call *);
@@ -1001,6 +994,7 @@
extern const struct seq_operations rxrpc_call_seq_ops;
extern const struct seq_operations rxrpc_connection_seq_ops;
extern const struct seq_operations rxrpc_peer_seq_ops;
+extern const struct seq_operations rxrpc_local_seq_ops;
/*
* recvmsg.c
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index a0b0339..2a14d69 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -91,7 +91,7 @@
(head + 1) & (size - 1));
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
- atomic_read(&conn->usage), here);
+ refcount_read(&conn->ref), here);
}
/* Now it gets complicated, because calls get registered with the
@@ -104,7 +104,7 @@
call->state = RXRPC_CALL_SERVER_PREALLOC;
trace_rxrpc_call(call->debug_id, rxrpc_call_new_service,
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
here, (const void *)user_call_ID);
write_lock(&rx->call_lock);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index df864e6..2a93e7b 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -166,7 +166,7 @@
_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
now = ktime_get_real();
- max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j));
+ max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j));
spin_lock_bh(&call->lock);
@@ -310,7 +310,7 @@
}
if (call->state == RXRPC_CALL_COMPLETE) {
- del_timer_sync(&call->timer);
+ rxrpc_delete_call_timer(call);
goto out_put;
}
@@ -377,9 +377,9 @@
if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
(int)call->conn->hi_serial - (int)call->rx_serial > 0) {
trace_rxrpc_call_reset(call);
- rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ECONNRESET);
+ rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET);
} else {
- rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
+ rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
}
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
@@ -406,7 +406,8 @@
goto recheck_state;
}
- if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) {
+ if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) &&
+ call->state != RXRPC_CALL_CLIENT_RECV_REPLY) {
rxrpc_resend(call, now);
goto recheck_state;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 4eb91d9..10dad28 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -53,10 +53,30 @@
if (call->state < RXRPC_CALL_COMPLETE) {
trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
- rxrpc_queue_call(call);
+ __rxrpc_queue_call(call);
+ } else {
+ rxrpc_put_call(call, rxrpc_call_put);
}
}
+void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+ unsigned long expire_at,
+ unsigned long now,
+ enum rxrpc_timer_trace why)
+{
+ if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) {
+ trace_rxrpc_timer(call, why, now);
+ if (timer_reduce(&call->timer, expire_at))
+ rxrpc_put_call(call, rxrpc_call_put_notimer);
+ }
+}
+
+void rxrpc_delete_call_timer(struct rxrpc_call *call)
+{
+ if (del_timer_sync(&call->timer))
+ rxrpc_put_call(call, rxrpc_call_put_timer);
+}
+
static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
/*
@@ -92,7 +112,7 @@
found_extant_call:
rxrpc_get_call(call, rxrpc_call_got);
read_unlock(&rx->call_lock);
- _leave(" = %p [%d]", call, atomic_read(&call->usage));
+ _leave(" = %p [%d]", call, refcount_read(&call->ref));
return call;
}
@@ -140,7 +160,7 @@
spin_lock_init(&call->notify_lock);
spin_lock_init(&call->input_lock);
rwlock_init(&call->state_lock);
- atomic_set(&call->usage, 1);
+ refcount_set(&call->ref, 1);
call->debug_id = debug_id;
call->tx_total_len = -1;
call->next_rx_timo = 20 * HZ;
@@ -265,8 +285,10 @@
_enter("%p,%lx", rx, p->user_call_ID);
limiter = rxrpc_get_call_slot(p, gfp);
- if (!limiter)
+ if (!limiter) {
+ release_sock(&rx->sk);
return ERR_PTR(-ERESTARTSYS);
+ }
call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
if (IS_ERR(call)) {
@@ -279,7 +301,7 @@
call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
here, (const void *)p->user_call_ID);
if (p->kernel)
__set_bit(RXRPC_CALL_KERNEL, &call->flags);
@@ -332,7 +354,7 @@
goto error_attached_to_socket;
trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
- atomic_read(&call->usage), here, NULL);
+ refcount_read(&call->ref), here, NULL);
rxrpc_start_call_timer(call);
@@ -352,7 +374,7 @@
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, -EEXIST);
trace_rxrpc_call(call->debug_id, rxrpc_call_error,
- atomic_read(&call->usage), here, ERR_PTR(-EEXIST));
+ refcount_read(&call->ref), here, ERR_PTR(-EEXIST));
rxrpc_release_call(rx, call);
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put);
@@ -366,7 +388,7 @@
*/
error_attached_to_socket:
trace_rxrpc_call(call->debug_id, rxrpc_call_error,
- atomic_read(&call->usage), here, ERR_PTR(ret));
+ refcount_read(&call->ref), here, ERR_PTR(ret));
set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, ret);
@@ -422,8 +444,9 @@
bool rxrpc_queue_call(struct rxrpc_call *call)
{
const void *here = __builtin_return_address(0);
- int n = atomic_fetch_add_unless(&call->usage, 1, 0);
- if (n == 0)
+ int n;
+
+ if (!__refcount_inc_not_zero(&call->ref, &n))
return false;
if (rxrpc_queue_work(&call->processor))
trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1,
@@ -439,7 +462,7 @@
bool __rxrpc_queue_call(struct rxrpc_call *call)
{
const void *here = __builtin_return_address(0);
- int n = atomic_read(&call->usage);
+ int n = refcount_read(&call->ref);
ASSERTCMP(n, >=, 1);
if (rxrpc_queue_work(&call->processor))
trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n,
@@ -456,22 +479,34 @@
{
const void *here = __builtin_return_address(0);
if (call) {
- int n = atomic_read(&call->usage);
+ int n = refcount_read(&call->ref);
trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n,
here, NULL);
}
}
+bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+{
+ const void *here = __builtin_return_address(0);
+ int n;
+
+ if (!__refcount_inc_not_zero(&call->ref, &n))
+ return false;
+ trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
+ return true;
+}
+
/*
* Note the addition of a ref on a call.
*/
void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
{
const void *here = __builtin_return_address(0);
- int n = atomic_inc_return(&call->usage);
+ int n;
- trace_rxrpc_call(call->debug_id, op, n, here, NULL);
+ __refcount_inc(&call->ref, &n);
+ trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
}
/*
@@ -496,10 +531,10 @@
struct rxrpc_connection *conn = call->conn;
bool put = false;
- _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
+ _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
trace_rxrpc_call(call->debug_id, rxrpc_call_release,
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
here, (const void *)call->flags);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
@@ -510,8 +545,7 @@
spin_unlock_bh(&call->lock);
rxrpc_put_call_slot(call);
-
- del_timer_sync(&call->timer);
+ rxrpc_delete_call_timer(call);
/* Make sure we don't get any more notifications */
write_lock_bh(&rx->recvmsg_lock);
@@ -589,14 +623,14 @@
struct rxrpc_net *rxnet = call->rxnet;
const void *here = __builtin_return_address(0);
unsigned int debug_id = call->debug_id;
+ bool dead;
int n;
ASSERT(call != NULL);
- n = atomic_dec_return(&call->usage);
+ dead = __refcount_dec_and_test(&call->ref, &n);
trace_rxrpc_call(debug_id, op, n, here, NULL);
- ASSERTCMP(n, >=, 0);
- if (n == 0) {
+ if (dead) {
_debug("call %d dead", call->debug_id);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
@@ -618,6 +652,8 @@
struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor);
struct rxrpc_net *rxnet = call->rxnet;
+ rxrpc_delete_call_timer(call);
+
rxrpc_put_connection(call->conn);
rxrpc_put_peer(call->peer);
kfree(call->rxtx_buffer);
@@ -652,8 +688,6 @@
memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
- del_timer_sync(&call->timer);
-
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
@@ -686,7 +720,7 @@
list_del_init(&call->link);
pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
- call, atomic_read(&call->usage),
+ call, refcount_read(&call->ref),
rxrpc_call_states[call->state],
call->flags, call->events);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index f5fb223..f5fa5f3 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -40,6 +40,8 @@
DEFINE_IDR(rxrpc_client_conn_ids);
static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
+static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
+
/*
* Get a connection ID and epoch for a client connection from the global pool.
* The connection struct pointer is then recorded in the idr radix tree. The
@@ -102,7 +104,7 @@
if (!idr_is_empty(&rxrpc_client_conn_ids)) {
idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
- conn, atomic_read(&conn->usage));
+ conn, refcount_read(&conn->ref));
}
BUG();
}
@@ -122,7 +124,8 @@
if (bundle) {
bundle->params = *cp;
rxrpc_get_peer(bundle->params.peer);
- atomic_set(&bundle->usage, 1);
+ refcount_set(&bundle->ref, 1);
+ atomic_set(&bundle->active, 1);
spin_lock_init(&bundle->channel_lock);
INIT_LIST_HEAD(&bundle->waiting_calls);
}
@@ -131,7 +134,7 @@
struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle)
{
- atomic_inc(&bundle->usage);
+ refcount_inc(&bundle->ref);
return bundle;
}
@@ -144,10 +147,13 @@
void rxrpc_put_bundle(struct rxrpc_bundle *bundle)
{
unsigned int d = bundle->debug_id;
- unsigned int u = atomic_dec_return(&bundle->usage);
+ bool dead;
+ int r;
- _debug("PUT B=%x %u", d, u);
- if (u == 0)
+ dead = __refcount_dec_and_test(&bundle->ref, &r);
+
+ _debug("PUT B=%x %d", d, r - 1);
+ if (dead)
rxrpc_free_bundle(bundle);
}
@@ -169,7 +175,7 @@
return ERR_PTR(-ENOMEM);
}
- atomic_set(&conn->usage, 1);
+ refcount_set(&conn->ref, 1);
conn->bundle = bundle;
conn->params = bundle->params;
conn->out_clientflag = RXRPC_CLIENT_INITIATED;
@@ -199,7 +205,7 @@
key_get(conn->params.key);
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client,
- atomic_read(&conn->usage),
+ refcount_read(&conn->ref),
__builtin_return_address(0));
atomic_inc(&rxnet->nr_client_conns);
@@ -341,6 +347,7 @@
rxrpc_free_bundle(candidate);
found_bundle:
rxrpc_get_bundle(bundle);
+ atomic_inc(&bundle->active);
spin_unlock(&local->client_bundles_lock);
_leave(" = %u [found]", bundle->debug_id);
return bundle;
@@ -438,6 +445,7 @@
if (old)
trace_rxrpc_client(old, -1, rxrpc_client_replace);
candidate->bundle_shift = shift;
+ atomic_inc(&bundle->active);
bundle->conns[i] = candidate;
for (j = 0; j < RXRPC_MAXCALLS; j++)
set_bit(shift + j, &bundle->avail_chans);
@@ -728,6 +736,7 @@
smp_rmb();
out_put_bundle:
+ rxrpc_deactivate_bundle(bundle);
rxrpc_put_bundle(bundle);
out:
_leave(" = %d", ret);
@@ -903,9 +912,8 @@
static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
{
struct rxrpc_bundle *bundle = conn->bundle;
- struct rxrpc_local *local = bundle->params.local;
unsigned int bindex;
- bool need_drop = false, need_put = false;
+ bool need_drop = false;
int i;
_enter("C=%x", conn->debug_id);
@@ -924,15 +932,22 @@
}
spin_unlock(&bundle->channel_lock);
- /* If there are no more connections, remove the bundle */
- if (!bundle->avail_chans) {
- _debug("maybe unbundle");
- spin_lock(&local->client_bundles_lock);
+ if (need_drop) {
+ rxrpc_deactivate_bundle(bundle);
+ rxrpc_put_connection(conn);
+ }
+}
- for (i = 0; i < ARRAY_SIZE(bundle->conns); i++)
- if (bundle->conns[i])
- break;
- if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) {
+/*
+ * Drop the active count on a bundle.
+ */
+static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
+{
+ struct rxrpc_local *local = bundle->params.local;
+ bool need_put = false;
+
+ if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) {
+ if (!bundle->params.exclusive) {
_debug("erase bundle");
rb_erase(&bundle->local_node, &local->client_bundles);
need_put = true;
@@ -942,10 +957,6 @@
if (need_put)
rxrpc_put_bundle(bundle);
}
-
- if (need_drop)
- rxrpc_put_connection(conn);
- _leave("");
}
/*
@@ -972,14 +983,13 @@
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = conn->debug_id;
- int n;
+ bool dead;
+ int r;
- n = atomic_dec_return(&conn->usage);
- trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here);
- if (n <= 0) {
- ASSERTCMP(n, >=, 0);
+ dead = __refcount_dec_and_test(&conn->ref, &r);
+ trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here);
+ if (dead)
rxrpc_kill_client_conn(conn);
- }
}
/*
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 3bcbe06..d829b97 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -105,7 +105,7 @@
goto not_found;
*_peer = peer;
conn = rxrpc_find_service_conn_rcu(peer, skb);
- if (!conn || atomic_read(&conn->usage) == 0)
+ if (!conn || refcount_read(&conn->ref) == 0)
goto not_found;
_leave(" = %p", conn);
return conn;
@@ -115,7 +115,7 @@
*/
conn = idr_find(&rxrpc_client_conn_ids,
sp->hdr.cid >> RXRPC_CIDSHIFT);
- if (!conn || atomic_read(&conn->usage) == 0) {
+ if (!conn || refcount_read(&conn->ref) == 0) {
_debug("no conn");
goto not_found;
}
@@ -184,7 +184,7 @@
chan->last_type = RXRPC_PACKET_TYPE_ABORT;
break;
default:
- chan->last_abort = RX_USER_ABORT;
+ chan->last_abort = RX_CALL_DEAD;
chan->last_type = RXRPC_PACKET_TYPE_ABORT;
break;
}
@@ -264,11 +264,12 @@
bool rxrpc_queue_conn(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
- int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
- if (n == 0)
+ int r;
+
+ if (!__refcount_inc_not_zero(&conn->ref, &r))
return false;
if (rxrpc_queue_work(&conn->processor))
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here);
else
rxrpc_put_connection(conn);
return true;
@@ -281,7 +282,7 @@
{
const void *here = __builtin_return_address(0);
if (conn) {
- int n = atomic_read(&conn->usage);
+ int n = refcount_read(&conn->ref);
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here);
}
@@ -293,9 +294,10 @@
struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
- int n = atomic_inc_return(&conn->usage);
+ int r;
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here);
+ __refcount_inc(&conn->ref, &r);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here);
return conn;
}
@@ -306,11 +308,11 @@
rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
+ int r;
if (conn) {
- int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
- if (n > 0)
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here);
+ if (__refcount_inc_not_zero(&conn->ref, &r))
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here);
else
conn = NULL;
}
@@ -334,12 +336,11 @@
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = conn->debug_id;
- int n;
+ int r;
- n = atomic_dec_return(&conn->usage);
- trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here);
- ASSERTCMP(n, >=, 0);
- if (n == 1)
+ __refcount_dec(&conn->ref, &r);
+ trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here);
+ if (r - 1 == 1)
rxrpc_set_service_reap_timer(conn->params.local->rxnet,
jiffies + rxrpc_connection_expiry);
}
@@ -352,9 +353,9 @@
struct rxrpc_connection *conn =
container_of(rcu, struct rxrpc_connection, rcu);
- _enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage));
+ _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref));
- ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+ ASSERTCMP(refcount_read(&conn->ref), ==, 0);
_net("DESTROY CONN %d", conn->debug_id);
@@ -394,8 +395,8 @@
write_lock(&rxnet->conn_lock);
list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
- ASSERTCMP(atomic_read(&conn->usage), >, 0);
- if (likely(atomic_read(&conn->usage) > 1))
+ ASSERTCMP(refcount_read(&conn->ref), >, 0);
+ if (likely(refcount_read(&conn->ref) > 1))
continue;
if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
continue;
@@ -407,7 +408,7 @@
expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ;
_debug("reap CONN %d { u=%d,t=%ld }",
- conn->debug_id, atomic_read(&conn->usage),
+ conn->debug_id, refcount_read(&conn->ref),
(long)expire_at - (long)now);
if (time_before(now, expire_at)) {
@@ -420,7 +421,7 @@
/* The usage count sits at 1 whilst the object is unused on the
* list; we reduce that to 0 to make the object unavailable.
*/
- if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
+ if (!refcount_dec_if_one(&conn->ref))
continue;
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL);
@@ -444,7 +445,7 @@
link);
list_del_init(&conn->link);
- ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+ ASSERTCMP(refcount_read(&conn->ref), ==, 0);
rxrpc_kill_connection(conn);
}
@@ -472,7 +473,7 @@
write_lock(&rxnet->conn_lock);
list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
- conn, atomic_read(&conn->usage));
+ conn, refcount_read(&conn->ref));
leak = true;
}
write_unlock(&rxnet->conn_lock);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 6c84772..6850816 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -9,7 +9,7 @@
#include "ar-internal.h"
static struct rxrpc_bundle rxrpc_service_dummy_bundle = {
- .usage = ATOMIC_INIT(1),
+ .ref = REFCOUNT_INIT(1),
.debug_id = UINT_MAX,
.channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock),
};
@@ -99,7 +99,7 @@
return;
found_extant_conn:
- if (atomic_read(&cursor->usage) == 0)
+ if (refcount_read(&cursor->ref) == 0)
goto replace_old_connection;
write_sequnlock_bh(&peer->service_conn_lock);
/* We should not be able to get here. rxrpc_incoming_connection() is
@@ -132,7 +132,7 @@
* the rxrpc_connections list.
*/
conn->state = RXRPC_CONN_SERVICE_PREALLOC;
- atomic_set(&conn->usage, 2);
+ refcount_set(&conn->ref, 2);
conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle);
atomic_inc(&rxnet->nr_conns);
@@ -142,7 +142,7 @@
write_unlock(&rxnet->conn_lock);
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
- atomic_read(&conn->usage),
+ refcount_read(&conn->ref),
__builtin_return_address(0));
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index dc20136..e917811 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -412,8 +412,8 @@
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
enum rxrpc_call_state state;
- unsigned int j, nr_subpackets;
- rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
+ unsigned int j, nr_subpackets, nr_unacked = 0;
+ rxrpc_serial_t serial = sp->hdr.serial, ack_serial = serial;
rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack;
bool immediate_ack = false, jumbo_bad = false;
u8 ack = 0;
@@ -453,7 +453,6 @@
!rxrpc_receiving_reply(call))
goto unlock;
- call->ackr_prev_seq = seq0;
hard_ack = READ_ONCE(call->rx_hard_ack);
nr_subpackets = sp->nr_subpackets;
@@ -534,6 +533,9 @@
ack_serial = serial;
}
+ if (after(seq0, call->ackr_highest_seq))
+ call->ackr_highest_seq = seq0;
+
/* Queue the packet. We use a couple of memory barriers here as need
* to make sure that rx_top is perceived to be set after the buffer
* pointer and that the buffer pointer is set after the annotation and
@@ -567,6 +569,8 @@
sp = NULL;
}
+ nr_unacked++;
+
if (last) {
set_bit(RXRPC_CALL_RX_LAST, &call->flags);
if (!ack) {
@@ -586,9 +590,14 @@
}
call->rx_expect_next = seq + 1;
}
+ if (!ack)
+ ack_serial = serial;
}
ack:
+ if (atomic_add_return(nr_unacked, &call->ackr_nr_unacked) > 2 && !ack)
+ ack = RXRPC_ACK_IDLE;
+
if (ack)
rxrpc_propose_ACK(call, ack, ack_serial,
immediate_ack, true,
@@ -812,7 +821,7 @@
static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt)
{
- rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq);
+ rxrpc_seq_t base = READ_ONCE(call->acks_first_seq);
if (after(first_pkt, base))
return true; /* The window advanced */
@@ -820,7 +829,7 @@
if (before(first_pkt, base))
return false; /* firstPacket regressed */
- if (after_eq(prev_pkt, call->ackr_prev_seq))
+ if (after_eq(prev_pkt, call->acks_prev_seq))
return true; /* previousPacket hasn't regressed. */
/* Some rx implementations put a serial number in previousPacket. */
@@ -906,8 +915,8 @@
/* Discard any out-of-order or duplicate ACKs (outside lock). */
if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
- first_soft_ack, call->ackr_first_seq,
- prev_pkt, call->ackr_prev_seq);
+ first_soft_ack, call->acks_first_seq,
+ prev_pkt, call->acks_prev_seq);
return;
}
@@ -922,14 +931,14 @@
/* Discard any out-of-order or duplicate ACKs (inside lock). */
if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
- first_soft_ack, call->ackr_first_seq,
- prev_pkt, call->ackr_prev_seq);
+ first_soft_ack, call->acks_first_seq,
+ prev_pkt, call->acks_prev_seq);
goto out;
}
call->acks_latest_ts = skb->tstamp;
- call->ackr_first_seq = first_soft_ack;
- call->ackr_prev_seq = prev_pkt;
+ call->acks_first_seq = first_soft_ack;
+ call->acks_prev_seq = prev_pkt;
/* Parse rwind and mtu sizes if provided. */
if (buf.info.rxMTU)
@@ -1154,8 +1163,6 @@
*/
static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
{
- CHECK_SLAB_OKAY(&local->usage);
-
if (rxrpc_get_local_maybe(local)) {
skb_queue_tail(&local->reject_queue, skb);
rxrpc_queue_local(local);
@@ -1413,7 +1420,7 @@
}
}
- if (!call || atomic_read(&call->usage) == 0) {
+ if (!call || refcount_read(&call->ref) == 0) {
if (rxrpc_to_client(sp) ||
sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
goto bad_message;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 8c28810..2c66ee9 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -78,10 +78,10 @@
local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
if (local) {
- atomic_set(&local->usage, 1);
+ refcount_set(&local->ref, 1);
atomic_set(&local->active_users, 1);
local->rxnet = rxnet;
- INIT_LIST_HEAD(&local->link);
+ INIT_HLIST_NODE(&local->link);
INIT_WORK(&local->processor, rxrpc_local_processor);
init_rwsem(&local->defrag_sem);
skb_queue_head_init(&local->reject_queue);
@@ -199,7 +199,7 @@
{
struct rxrpc_local *local;
struct rxrpc_net *rxnet = rxrpc_net(net);
- struct list_head *cursor;
+ struct hlist_node *cursor;
const char *age;
long diff;
int ret;
@@ -209,16 +209,12 @@
mutex_lock(&rxnet->local_mutex);
- for (cursor = rxnet->local_endpoints.next;
- cursor != &rxnet->local_endpoints;
- cursor = cursor->next) {
- local = list_entry(cursor, struct rxrpc_local, link);
+ hlist_for_each(cursor, &rxnet->local_endpoints) {
+ local = hlist_entry(cursor, struct rxrpc_local, link);
diff = rxrpc_local_cmp_key(local, srx);
- if (diff < 0)
+ if (diff != 0)
continue;
- if (diff > 0)
- break;
/* Services aren't allowed to share transport sockets, so
* reject that here. It is possible that the object is dying -
@@ -230,9 +226,10 @@
goto addr_in_use;
}
- /* Found a match. We replace a dying object. Attempting to
- * bind the transport socket may still fail if we're attempting
- * to use a local address that the dying object is still using.
+ /* Found a match. We want to replace a dying object.
+ * Attempting to bind the transport socket may still fail if
+ * we're attempting to use a local address that the dying
+ * object is still using.
*/
if (!rxrpc_use_local(local))
break;
@@ -249,10 +246,12 @@
if (ret < 0)
goto sock_error;
- if (cursor != &rxnet->local_endpoints)
- list_replace_init(cursor, &local->link);
- else
- list_add_tail(&local->link, cursor);
+ if (cursor) {
+ hlist_replace_rcu(cursor, &local->link);
+ cursor->pprev = NULL;
+ } else {
+ hlist_add_head_rcu(&local->link, &rxnet->local_endpoints);
+ }
age = "new";
found:
@@ -285,10 +284,10 @@
struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
- int n;
+ int r;
- n = atomic_inc_return(&local->usage);
- trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here);
+ __refcount_inc(&local->ref, &r);
+ trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here);
return local;
}
@@ -298,12 +297,12 @@
struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
+ int r;
if (local) {
- int n = atomic_fetch_add_unless(&local->usage, 1, 0);
- if (n > 0)
+ if (__refcount_inc_not_zero(&local->ref, &r))
trace_rxrpc_local(local->debug_id, rxrpc_local_got,
- n + 1, here);
+ r + 1, here);
else
local = NULL;
}
@@ -317,10 +316,10 @@
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = local->debug_id;
- int n = atomic_read(&local->usage);
+ int r = refcount_read(&local->ref);
if (rxrpc_queue_work(&local->processor))
- trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here);
+ trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here);
else
rxrpc_put_local(local);
}
@@ -332,15 +331,16 @@
{
const void *here = __builtin_return_address(0);
unsigned int debug_id;
- int n;
+ bool dead;
+ int r;
if (local) {
debug_id = local->debug_id;
- n = atomic_dec_return(&local->usage);
- trace_rxrpc_local(debug_id, rxrpc_local_put, n, here);
+ dead = __refcount_dec_and_test(&local->ref, &r);
+ trace_rxrpc_local(debug_id, rxrpc_local_put, r, here);
- if (n == 0)
+ if (dead)
call_rcu(&local->rcu, rxrpc_local_rcu);
}
}
@@ -393,7 +393,7 @@
local->dead = true;
mutex_lock(&rxnet->local_mutex);
- list_del_init(&local->link);
+ hlist_del_init_rcu(&local->link);
mutex_unlock(&rxnet->local_mutex);
rxrpc_clean_up_local_conns(local);
@@ -424,8 +424,11 @@
container_of(work, struct rxrpc_local, processor);
bool again;
+ if (local->dead)
+ return;
+
trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
- atomic_read(&local->usage), NULL);
+ refcount_read(&local->ref), NULL);
do {
again = false;
@@ -477,11 +480,11 @@
flush_workqueue(rxrpc_workqueue);
- if (!list_empty(&rxnet->local_endpoints)) {
+ if (!hlist_empty(&rxnet->local_endpoints)) {
mutex_lock(&rxnet->local_mutex);
- list_for_each_entry(local, &rxnet->local_endpoints, link) {
+ hlist_for_each_entry(local, &rxnet->local_endpoints, link) {
pr_err("AF_RXRPC: Leaked local %p {%d}\n",
- local, atomic_read(&local->usage));
+ local, refcount_read(&local->ref));
}
mutex_unlock(&rxnet->local_mutex);
BUG();
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 25bbc4c..34f3899 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -72,7 +72,7 @@
timer_setup(&rxnet->client_conn_reap_timer,
rxrpc_client_conn_reap_timeout, 0);
- INIT_LIST_HEAD(&rxnet->local_endpoints);
+ INIT_HLIST_HEAD(&rxnet->local_endpoints);
mutex_init(&rxnet->local_mutex);
hash_init(rxnet->peer_hash);
@@ -98,6 +98,9 @@
proc_create_net("peers", 0444, rxnet->proc_net,
&rxrpc_peer_seq_ops,
sizeof(struct seq_net_private));
+ proc_create_net("locals", 0444, rxnet->proc_net,
+ &rxrpc_local_seq_ops,
+ sizeof(struct seq_net_private));
return 0;
err_proc:
@@ -115,6 +118,8 @@
rxnet->live = false;
del_timer_sync(&rxnet->peer_keepalive_timer);
cancel_work_sync(&rxnet->peer_keepalive_work);
+ /* Remove the timer again as the worker may have restarted it. */
+ del_timer_sync(&rxnet->peer_keepalive_timer);
rxrpc_destroy_all_calls(rxnet);
rxrpc_destroy_all_connections(rxnet);
rxrpc_destroy_all_peers(rxnet);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index a45c83f..9683617 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -74,11 +74,18 @@
u8 reason)
{
rxrpc_serial_t serial;
+ unsigned int tmp;
rxrpc_seq_t hard_ack, top, seq;
int ix;
u32 mtu, jmax;
u8 *ackp = pkt->acks;
+ tmp = atomic_xchg(&call->ackr_nr_unacked, 0);
+ tmp |= atomic_xchg(&call->ackr_nr_consumed, 0);
+ if (!tmp && (reason == RXRPC_ACK_DELAY ||
+ reason == RXRPC_ACK_IDLE))
+ return 0;
+
/* Barrier against rxrpc_input_data(). */
serial = call->ackr_serial;
hard_ack = READ_ONCE(call->rx_hard_ack);
@@ -89,7 +96,7 @@
pkt->ack.bufferSpace = htons(8);
pkt->ack.maxSkew = htons(0);
pkt->ack.firstPacket = htonl(hard_ack + 1);
- pkt->ack.previousPacket = htonl(call->ackr_prev_seq);
+ pkt->ack.previousPacket = htonl(call->ackr_highest_seq);
pkt->ack.serial = htonl(serial);
pkt->ack.reason = reason;
pkt->ack.nAcks = top - hard_ack;
@@ -223,6 +230,10 @@
n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
spin_unlock_bh(&call->lock);
+ if (n == 0) {
+ kfree(pkt);
+ return 0;
+ }
iov[0].iov_base = pkt;
iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n;
@@ -259,13 +270,6 @@
ntohl(pkt->ack.serial),
false, true,
rxrpc_propose_ack_retry_tx);
- } else {
- spin_lock_bh(&call->lock);
- if (after(hard_ack, call->ackr_consumed))
- call->ackr_consumed = hard_ack;
- if (after(top, call->ackr_seen))
- call->ackr_seen = top;
- spin_unlock_bh(&call->lock);
}
rxrpc_set_keepalive(call);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 0298fe2..26d2ae9 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -121,7 +121,7 @@
hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) {
if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 &&
- atomic_read(&peer->usage) > 0)
+ refcount_read(&peer->ref) > 0)
return peer;
}
@@ -140,7 +140,7 @@
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
if (peer) {
_net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
- _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
}
return peer;
}
@@ -216,7 +216,7 @@
peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
if (peer) {
- atomic_set(&peer->usage, 1);
+ refcount_set(&peer->ref, 1);
peer->local = rxrpc_get_local(local);
INIT_HLIST_HEAD(&peer->error_targets);
peer->service_conns = RB_ROOT;
@@ -378,7 +378,7 @@
_net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
- _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
return peer;
}
@@ -388,10 +388,10 @@
struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
- int n;
+ int r;
- n = atomic_inc_return(&peer->usage);
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here);
+ __refcount_inc(&peer->ref, &r);
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
return peer;
}
@@ -401,11 +401,11 @@
struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
+ int r;
if (peer) {
- int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
- if (n > 0)
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here);
+ if (__refcount_inc_not_zero(&peer->ref, &r))
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
else
peer = NULL;
}
@@ -436,13 +436,14 @@
{
const void *here = __builtin_return_address(0);
unsigned int debug_id;
- int n;
+ bool dead;
+ int r;
if (peer) {
debug_id = peer->debug_id;
- n = atomic_dec_return(&peer->usage);
- trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
- if (n == 0)
+ dead = __refcount_dec_and_test(&peer->ref, &r);
+ trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
+ if (dead)
__rxrpc_put_peer(peer);
}
}
@@ -455,11 +456,12 @@
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = peer->debug_id;
- int n;
+ bool dead;
+ int r;
- n = atomic_dec_return(&peer->usage);
- trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
- if (n == 0) {
+ dead = __refcount_dec_and_test(&peer->ref, &r);
+ trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
+ if (dead) {
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
rxrpc_free_peer(peer);
@@ -481,7 +483,7 @@
hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) {
pr_err("Leaked peer %u {%u} %pISp\n",
peer->debug_id,
- atomic_read(&peer->usage),
+ refcount_read(&peer->ref),
&peer->srx.transport);
}
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index e2f9907..8967201 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -107,7 +107,7 @@
call->cid,
call->call_id,
rxrpc_is_service_call(call) ? "Svc" : "Clt",
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
rxrpc_call_states[call->state],
call->abort_code,
call->debug_id,
@@ -189,7 +189,7 @@
conn->service_id,
conn->proto.cid,
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
- atomic_read(&conn->usage),
+ refcount_read(&conn->ref),
rxrpc_conn_states[conn->state],
key_serial(conn->params.key),
atomic_read(&conn->serial),
@@ -239,7 +239,7 @@
" %3u %5u %6llus %8u %8u\n",
lbuff,
rbuff,
- atomic_read(&peer->usage),
+ refcount_read(&peer->ref),
peer->cong_cwnd,
peer->mtu,
now - peer->last_tx_at,
@@ -334,3 +334,72 @@
.stop = rxrpc_peer_seq_stop,
.show = rxrpc_peer_seq_show,
};
+
+/*
+ * Generate a list of extant virtual local endpoints in /proc/net/rxrpc/locals
+ */
+static int rxrpc_local_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_local *local;
+ char lbuff[50];
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Proto Local "
+ " Use Act\n");
+ return 0;
+ }
+
+ local = hlist_entry(v, struct rxrpc_local, link);
+
+ sprintf(lbuff, "%pISpc", &local->srx.transport);
+
+ seq_printf(seq,
+ "UDP %-47.47s %3u %3u\n",
+ lbuff,
+ refcount_read(&local->ref),
+ atomic_read(&local->active_users));
+
+ return 0;
+}
+
+static void *rxrpc_local_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rcu)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ unsigned int n;
+
+ rcu_read_lock();
+
+ if (*_pos >= UINT_MAX)
+ return NULL;
+
+ n = *_pos;
+ if (n == 0)
+ return SEQ_START_TOKEN;
+
+ return seq_hlist_start_rcu(&rxnet->local_endpoints, n - 1);
+}
+
+static void *rxrpc_local_seq_next(struct seq_file *seq, void *v, loff_t *_pos)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ if (*_pos >= UINT_MAX)
+ return NULL;
+
+ return seq_hlist_next_rcu(v, &rxnet->local_endpoints, _pos);
+}
+
+static void rxrpc_local_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu)
+{
+ rcu_read_unlock();
+}
+
+const struct seq_operations rxrpc_local_seq_ops = {
+ .start = rxrpc_local_seq_start,
+ .next = rxrpc_local_seq_next,
+ .stop = rxrpc_local_seq_stop,
+ .show = rxrpc_local_seq_show,
+};
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 2c84285..7878267 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -260,11 +260,9 @@
rxrpc_end_rx_phase(call, serial);
} else {
/* Check to see if there's an ACK that needs sending. */
- if (after_eq(hard_ack, call->ackr_consumed + 2) ||
- after_eq(top, call->ackr_seen + 2) ||
- (hard_ack == top && after(hard_ack, call->ackr_consumed)))
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial,
- true, true,
+ if (atomic_inc_return(&call->ackr_nr_consumed) > 2)
+ rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial,
+ true, false,
rxrpc_propose_ack_rotate_rx);
if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY)
rxrpc_send_ack_packet(call, false, NULL);
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index f114dc2..5345e8e 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -451,7 +451,7 @@
* directly into the target buffer.
*/
sg = _sg;
- nsg = skb_shinfo(skb)->nr_frags;
+ nsg = skb_shinfo(skb)->nr_frags + 1;
if (nsg <= 4) {
nsg = 4;
} else {
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index d27140c..eef3c14 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -51,10 +51,7 @@
return sock_intr_errno(*timeo);
trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- mutex_unlock(&call->user_mutex);
*timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0)
- return sock_intr_errno(*timeo);
}
}
@@ -290,37 +287,48 @@
static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
struct msghdr *msg, size_t len,
- rxrpc_notify_end_tx_t notify_end_tx)
+ rxrpc_notify_end_tx_t notify_end_tx,
+ bool *_dropped_lock)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
struct sock *sk = &rx->sk;
+ enum rxrpc_call_state state;
long timeo;
- bool more;
- int ret, copied;
+ bool more = msg->msg_flags & MSG_MORE;
+ int ret, copied = 0;
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
/* this should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+reload:
+ ret = -EPIPE;
if (sk->sk_shutdown & SEND_SHUTDOWN)
- return -EPIPE;
+ goto maybe_error;
+ state = READ_ONCE(call->state);
+ ret = -ESHUTDOWN;
+ if (state >= RXRPC_CALL_COMPLETE)
+ goto maybe_error;
+ ret = -EPROTO;
+ if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ state != RXRPC_CALL_SERVER_SEND_REPLY)
+ goto maybe_error;
- more = msg->msg_flags & MSG_MORE;
-
+ ret = -EMSGSIZE;
if (call->tx_total_len != -1) {
- if (len > call->tx_total_len)
- return -EMSGSIZE;
- if (!more && len != call->tx_total_len)
- return -EMSGSIZE;
+ if (len - copied > call->tx_total_len)
+ goto maybe_error;
+ if (!more && len - copied != call->tx_total_len)
+ goto maybe_error;
}
skb = call->tx_pending;
call->tx_pending = NULL;
rxrpc_see_skb(skb, rxrpc_skb_seen);
- copied = 0;
do {
/* Check to see if there's a ping ACK to reply to. */
if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
@@ -331,16 +339,8 @@
_debug("alloc");
- if (!rxrpc_check_tx_space(call, NULL)) {
- ret = -EAGAIN;
- if (msg->msg_flags & MSG_DONTWAIT)
- goto maybe_error;
- ret = rxrpc_wait_for_tx_window(rx, call,
- &timeo,
- msg->msg_flags & MSG_WAITALL);
- if (ret < 0)
- goto maybe_error;
- }
+ if (!rxrpc_check_tx_space(call, NULL))
+ goto wait_for_space;
max = RXRPC_JUMBO_DATALEN;
max -= call->conn->security_size;
@@ -461,6 +461,12 @@
success:
ret = copied;
+ if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) {
+ read_lock_bh(&call->state_lock);
+ if (call->error < 0)
+ ret = call->error;
+ read_unlock_bh(&call->state_lock);
+ }
out:
call->tx_pending = skb;
_leave(" = %d", ret);
@@ -479,6 +485,27 @@
efault:
ret = -EFAULT;
goto out;
+
+wait_for_space:
+ ret = -EAGAIN;
+ if (msg->msg_flags & MSG_DONTWAIT)
+ goto maybe_error;
+ mutex_unlock(&call->user_mutex);
+ *_dropped_lock = true;
+ ret = rxrpc_wait_for_tx_window(rx, call, &timeo,
+ msg->msg_flags & MSG_WAITALL);
+ if (ret < 0)
+ goto maybe_error;
+ if (call->interruptibility == RXRPC_INTERRUPTIBLE) {
+ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+ ret = sock_intr_errno(timeo);
+ goto maybe_error;
+ }
+ } else {
+ mutex_lock(&call->user_mutex);
+ }
+ *_dropped_lock = false;
+ goto reload;
}
/*
@@ -640,6 +667,7 @@
enum rxrpc_call_state state;
struct rxrpc_call *call;
unsigned long now, j;
+ bool dropped_lock = false;
int ret;
struct rxrpc_send_params p = {
@@ -748,21 +776,13 @@
ret = rxrpc_send_abort_packet(call);
} else if (p.command != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
- } else if (rxrpc_is_client_call(call) &&
- state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
- /* request phase complete for this client call */
- ret = -EPROTO;
- } else if (rxrpc_is_service_call(call) &&
- state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- state != RXRPC_CALL_SERVER_SEND_REPLY) {
- /* Reply phase not begun or not complete for service call. */
- ret = -EPROTO;
} else {
- ret = rxrpc_send_data(rx, call, msg, len, NULL);
+ ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock);
}
out_put_unlock:
- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
error_put:
rxrpc_put_call(call, rxrpc_call_put);
_leave(" = %d", ret);
@@ -790,6 +810,7 @@
struct msghdr *msg, size_t len,
rxrpc_notify_end_tx_t notify_end_tx)
{
+ bool dropped_lock = false;
int ret;
_enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
@@ -807,7 +828,7 @@
case RXRPC_CALL_SERVER_ACK_REQUEST:
case RXRPC_CALL_SERVER_SEND_REPLY:
ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
- notify_end_tx);
+ notify_end_tx, &dropped_lock);
break;
case RXRPC_CALL_COMPLETE:
read_lock_bh(&call->state_lock);
@@ -821,7 +842,8 @@
break;
}
- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
_leave(" = %d", ret);
return ret;
}
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 0348d2b..580a5ac 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -71,7 +71,6 @@
const void *here = __builtin_return_address(0);
if (skb) {
int n;
- CHECK_SLAB_OKAY(&skb->users);
n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
rxrpc_skb(skb)->rx_flags, here);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 540351d..555e091 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -12,7 +12,7 @@
static struct ctl_table_header *rxrpc_sysctl_reg_table;
static const unsigned int four = 4;
-static const unsigned int thirtytwo = 32;
+static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
static const unsigned long one_jiffy = 1;
@@ -89,7 +89,7 @@
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)&four,
- .extra2 = (void *)&thirtytwo,
+ .extra2 = (void *)&max_backlog,
},
{
.procname = "rx_window_size",
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index d762e89..bc4e5da 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -976,7 +976,7 @@
config NET_ACT_CT
tristate "connection tracking tc action"
- depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
+ depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE
help
Say Y here to allow sending the packets to conntrack module.
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 7b29aa1..4ab9c2a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -302,7 +302,8 @@
}
static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct nlattr *nest;
int n_i = 0;
@@ -318,20 +319,25 @@
if (nla_put_string(skb, TCA_KIND, ops->kind))
goto nla_put_failure;
+ ret = 0;
mutex_lock(&idrinfo->lock);
idr_for_each_entry_ul(idr, p, tmp, id) {
if (IS_ERR(p))
continue;
ret = tcf_idr_release_unsafe(p);
- if (ret == ACT_P_DELETED) {
+ if (ret == ACT_P_DELETED)
module_put(ops->owner);
- n_i++;
- } else if (ret < 0) {
- mutex_unlock(&idrinfo->lock);
- goto nla_put_failure;
- }
+ else if (ret < 0)
+ break;
+ n_i++;
}
mutex_unlock(&idrinfo->lock);
+ if (ret < 0) {
+ if (n_i)
+ NL_SET_ERR_MSG(extack, "Unable to flush all TC actions");
+ else
+ goto nla_put_failure;
+ }
ret = nla_put_u32(skb, TCA_FCNT, n_i);
if (ret)
@@ -352,7 +358,7 @@
struct tcf_idrinfo *idrinfo = tn->idrinfo;
if (type == RTM_DELACTION) {
- return tcf_del_walker(idrinfo, skb, ops);
+ return tcf_del_walker(idrinfo, skb, ops, extack);
} else if (type == RTM_GETACTION) {
return tcf_dump_walker(idrinfo, skb, cb);
} else {
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e19885d..31d268e 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -62,7 +62,7 @@
c = nf_ct_get(skb, &ctinfo);
if (c) {
- skb->mark = c->mark;
+ skb->mark = READ_ONCE(c->mark);
/* using overlimits stats to count how many packets marked */
ca->tcf_qstats.overlimits++;
goto out;
@@ -82,7 +82,7 @@
c = nf_ct_tuplehash_to_ctrack(thash);
/* using overlimits stats to count how many packets marked */
ca->tcf_qstats.overlimits++;
- skb->mark = c->mark;
+ skb->mark = READ_ONCE(c->mark);
nf_ct_put(c);
out:
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 825b3e9..2d41d86 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -177,7 +177,7 @@
entry = tcf_ct_flow_table_flow_action_get_next(action);
entry->id = FLOW_ACTION_CT_METADATA;
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
- entry->ct_metadata.mark = ct->mark;
+ entry->ct_metadata.mark = READ_ONCE(ct->mark);
#endif
ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
IP_CT_ESTABLISHED_REPLY;
@@ -843,9 +843,9 @@
if (!mask)
return;
- new_mark = mark | (ct->mark & ~(mask));
- if (ct->mark != new_mark) {
- ct->mark = new_mark;
+ new_mark = mark | (READ_ONCE(ct->mark) & ~(mask));
+ if (READ_ONCE(ct->mark) != new_mark) {
+ WRITE_ONCE(ct->mark, new_mark);
if (nf_ct_is_confirmed(ct))
nf_conntrack_event_cache(IPCT_MARK, ct);
}
@@ -1293,7 +1293,7 @@
err = tcf_ct_flow_table_get(params);
if (err)
- goto cleanup;
+ goto cleanup_params;
spin_lock_bh(&c->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -1308,6 +1308,9 @@
return res;
+cleanup_params:
+ if (params->tmpl)
+ nf_ct_put(params->tmpl);
cleanup:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index b20c8ce..06c74f2 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -33,7 +33,7 @@
{
u8 dscp, newdscp;
- newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) &
+ newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) &
~INET_ECN_MASK;
switch (proto) {
@@ -73,7 +73,7 @@
struct sk_buff *skb)
{
ca->stats_cpmark_set++;
- skb->mark = ct->mark & cp->cpmarkmask;
+ skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask;
}
static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
@@ -131,7 +131,7 @@
}
if (cp->mode & CTINFO_MODE_DSCP)
- if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask))
+ if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask))
tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto);
if (cp->mode & CTINFO_MODE_CPMARK)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b453044..0d5463d 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -149,7 +149,7 @@
struct nlattr *pattr;
struct tcf_pedit *p;
int ret = 0, err;
- int ksize;
+ int i, ksize;
u32 index;
if (!nla) {
@@ -228,6 +228,22 @@
p->tcfp_nkeys = parm->nkeys;
}
memcpy(p->tcfp_keys, parm->keys, ksize);
+ p->tcfp_off_max_hint = 0;
+ for (i = 0; i < p->tcfp_nkeys; ++i) {
+ u32 cur = p->tcfp_keys[i].off;
+
+ /* sanitize the shift value for any later use */
+ p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
+ p->tcfp_keys[i].shift);
+
+ /* The AT option can read a single byte, we can bound the actual
+ * value with uchar max.
+ */
+ cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
+
+ /* Each key touches 4 bytes starting from the computed offset */
+ p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
+ }
p->tcfp_flags = parm->flags;
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -308,13 +324,18 @@
struct tcf_result *res)
{
struct tcf_pedit *p = to_pedit(a);
+ u32 max_offset;
int i;
- if (skb_unclone(skb, GFP_ATOMIC))
- return p->tcf_action;
-
spin_lock(&p->tcf_lock);
+ max_offset = (skb_transport_header_was_set(skb) ?
+ skb_transport_offset(skb) :
+ skb_network_offset(skb)) +
+ p->tcfp_off_max_hint;
+ if (skb_ensure_writable(skb, min(skb->len, max_offset)))
+ goto unlock;
+
tcf_lastuse_update(&p->tcf_tm);
if (p->tcfp_nkeys > 0) {
@@ -403,6 +424,7 @@
p->tcf_qstats.overlimits++;
done:
bstats_update(&p->tcf_bstats, skb);
+unlock:
spin_unlock(&p->tcf_lock);
return p->tcf_action;
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8d8452b..3807335 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -213,6 +213,20 @@
return err;
}
+static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit)
+{
+ u32 len;
+
+ if (skb_is_gso(skb))
+ return skb_gso_validate_mac_len(skb, limit);
+
+ len = qdisc_pkt_len(skb);
+ if (skb_at_tc_ingress(skb))
+ len += skb->mac_len;
+
+ return len <= limit;
+}
+
static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -235,7 +249,7 @@
goto inc_overlimits;
}
- if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
+ if (tcf_police_mtu_check(skb, p->tcfp_mtu)) {
if (!p->rate_present) {
ret = p->tcfp_result;
goto end;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9a789a0..c410a73 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1656,10 +1656,10 @@
if (chain->flushing)
return -EAGAIN;
+ RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
if (*chain_info->pprev == chain->filter_chain)
tcf_chain0_head_change(chain, tp);
tcf_proto_get(tp);
- RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
rcu_assign_pointer(*chain_info->pprev, tp);
return 0;
@@ -2124,6 +2124,7 @@
}
if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
+ tfilter_put(tp, fh);
NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
err = -EINVAL;
goto errout;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 8ff6945..35ee6d8 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -998,6 +998,7 @@
static void fl_set_key_vlan(struct nlattr **tb,
__be16 ethertype,
int vlan_id_key, int vlan_prio_key,
+ int vlan_next_eth_type_key,
struct flow_dissector_key_vlan *key_val,
struct flow_dissector_key_vlan *key_mask)
{
@@ -1016,6 +1017,11 @@
}
key_val->vlan_tpid = ethertype;
key_mask->vlan_tpid = cpu_to_be16(~0);
+ if (tb[vlan_next_eth_type_key]) {
+ key_val->vlan_eth_type =
+ nla_get_be16(tb[vlan_next_eth_type_key]);
+ key_mask->vlan_eth_type = cpu_to_be16(~0);
+ }
}
static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
@@ -1497,8 +1503,9 @@
if (eth_type_vlan(ethertype)) {
fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
- TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
- &mask->vlan);
+ TCA_FLOWER_KEY_VLAN_PRIO,
+ TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+ &key->vlan, &mask->vlan);
if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
@@ -1506,6 +1513,7 @@
fl_set_key_vlan(tb, ethertype,
TCA_FLOWER_KEY_CVLAN_ID,
TCA_FLOWER_KEY_CVLAN_PRIO,
+ TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
&key->cvlan, &mask->cvlan);
fl_set_key_val(tb, &key->basic.n_proto,
TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
@@ -2861,13 +2869,13 @@
goto nla_put_failure;
if (mask->basic.n_proto) {
- if (mask->cvlan.vlan_tpid) {
+ if (mask->cvlan.vlan_eth_type) {
if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
key->basic.n_proto))
goto nla_put_failure;
- } else if (mask->vlan.vlan_tpid) {
+ } else if (mask->vlan.vlan_eth_type) {
if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
- key->basic.n_proto))
+ key->vlan.vlan_eth_type))
goto nla_put_failure;
}
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 5efa3e7..b775e68 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -424,6 +424,11 @@
return -EINVAL;
}
+ if (!nhandle) {
+ NL_SET_ERR_MSG(extack, "Replacing with handle of 0 is invalid");
+ return -EINVAL;
+ }
+
h1 = to_hash(nhandle);
b = rtnl_dereference(head->table[h1]);
if (!b) {
@@ -477,6 +482,11 @@
int err;
bool new = true;
+ if (!handle) {
+ NL_SET_ERR_MSG(extack, "Creating with handle of 0 is invalid");
+ return -EINVAL;
+ }
+
if (opt == NULL)
return handle ? -EINVAL : 0;
@@ -526,7 +536,7 @@
rcu_assign_pointer(f->next, f1);
rcu_assign_pointer(*fp, f);
- if (fold && fold->handle && f->handle != fold->handle) {
+ if (fold) {
th = to_hash(fold->handle);
h = from_hash(fold->handle >> 16);
b = rtnl_dereference(head->table[th]);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 54209a1..da042bc 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -386,14 +386,19 @@
return 0;
}
-static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
+static void __u32_destroy_key(struct tc_u_knode *n)
{
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
tcf_exts_destroy(&n->exts);
- tcf_exts_put_net(&n->exts);
if (ht && --ht->refcnt == 0)
kfree(ht);
+ kfree(n);
+}
+
+static void u32_destroy_key(struct tc_u_knode *n, bool free_pf)
+{
+ tcf_exts_put_net(&n->exts);
#ifdef CONFIG_CLS_U32_PERF
if (free_pf)
free_percpu(n->pf);
@@ -402,8 +407,7 @@
if (free_pf)
free_percpu(n->pcpu_success);
#endif
- kfree(n);
- return 0;
+ __u32_destroy_key(n);
}
/* u32_delete_key_rcu should be called when free'ing a copied
@@ -810,10 +814,6 @@
new->flags = n->flags;
RCU_INIT_POINTER(new->ht_down, ht);
- /* bump reference count as long as we hold pointer to structure */
- if (ht)
- ht->refcnt++;
-
#ifdef CONFIG_CLS_U32_PERF
/* Statistics may be incremented by readers during update
* so we must keep them in tact. When the node is later destroyed
@@ -835,6 +835,10 @@
return NULL;
}
+ /* bump reference count as long as we hold pointer to structure */
+ if (ht)
+ ht->refcnt++;
+
return new;
}
@@ -898,13 +902,13 @@
tca[TCA_RATE], ovr, extack);
if (err) {
- u32_destroy_key(new, false);
+ __u32_destroy_key(new);
return err;
}
err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
- u32_destroy_key(new, false);
+ __u32_destroy_key(new);
return err;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6e18aa4..d8ffe41 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1081,12 +1081,13 @@
skip:
if (!ingress) {
- notify_and_destroy(net, skb, n, classid,
- rtnl_dereference(dev->qdisc), new);
+ old = rtnl_dereference(dev->qdisc);
if (new && !new->ops->attach)
qdisc_refcount_inc(new);
rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
+ notify_and_destroy(net, skb, n, classid, old, new);
+
if (new && new->ops->attach)
new->ops->attach(new);
} else {
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 1c281cc..794c737 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -575,7 +575,6 @@
pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
list_for_each_entry(flow, &p->flows, list)
qdisc_reset(flow->q);
- sch->q.qlen = 0;
}
static void atm_tc_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index c580139..5dc7a3c 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -2224,8 +2224,12 @@
static void cake_reset(struct Qdisc *sch)
{
+ struct cake_sched_data *q = qdisc_priv(sch);
u32 c;
+ if (!q->tins)
+ return;
+
for (c = 0; c < CAKE_MAX_TINS; c++)
cake_clear_tin(sch, c);
}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4a78fcf..9a3dff0 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1053,7 +1053,6 @@
cl->cpriority = cl->priority;
}
}
- sch->q.qlen = 0;
}
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 2adbd94..25d2daa 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -315,8 +315,6 @@
rtnl_qdisc_drop(skb, sch);
}
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
if (q->tab)
memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index dde5646..08424aa 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -443,8 +443,6 @@
qdisc_reset(cl->qdisc);
}
}
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void drr_destroy_qdisc(struct Qdisc *sch)
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 76ed1a0..a75bc7f 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -408,8 +408,6 @@
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
if (p->q)
qdisc_reset(p->q);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void dsmark_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index c48f910..d96103b 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -445,9 +445,6 @@
timesortedlist_clear(sch);
__qdisc_reset_queue(&sch->q);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
-
q->last = 0;
}
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 9c22487..05817c5 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -722,8 +722,6 @@
}
for (band = 0; band < q->nbands; band++)
qdisc_reset(q->classes[band].qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void ets_qdisc_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 99e8db2..01d6eea 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -347,8 +347,6 @@
codel_vars_init(&flow->cvars);
}
memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
q->memory_usage = 0;
}
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index c708027..cf04f70 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -521,9 +521,6 @@
INIT_LIST_HEAD(&flow->flowchain);
pie_vars_init(&flow->vars);
}
-
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
}
static void fq_pie_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5d5391a..ecdd9e8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -403,7 +403,7 @@
void __qdisc_run(struct Qdisc *q)
{
- int quota = dev_tx_weight;
+ int quota = READ_ONCE(dev_tx_weight);
int packets;
while (qdisc_restart(q, &packets)) {
@@ -1057,6 +1057,21 @@
}
EXPORT_SYMBOL(dev_graft_qdisc);
+static void shutdown_scheduler_queue(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_qdisc_default)
+{
+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *qdisc_default = _qdisc_default;
+
+ if (qdisc) {
+ rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
+ dev_queue->qdisc_sleeping = qdisc_default;
+
+ qdisc_put(qdisc);
+ }
+}
+
static void attach_one_default_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_unused)
@@ -1104,6 +1119,7 @@
if (qdisc == &noop_qdisc) {
netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
default_qdisc_ops->id, noqueue_qdisc_ops.id);
+ netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
dev->priv_flags |= IFF_NO_QUEUE;
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
qdisc = txq->qdisc_sleeping;
@@ -1357,21 +1373,6 @@
timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
}
-static void shutdown_scheduler_queue(struct net_device *dev,
- struct netdev_queue *dev_queue,
- void *_qdisc_default)
-{
- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
- struct Qdisc *qdisc_default = _qdisc_default;
-
- if (qdisc) {
- rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
- dev_queue->qdisc_sleeping = qdisc_default;
-
- qdisc_put(qdisc);
- }
-}
-
void dev_shutdown(struct net_device *dev)
{
netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d1902fc..cdc43a0 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1484,8 +1484,6 @@
}
q->eligible = RB_ROOT;
qdisc_watchdog_cancel(&q->watchdog);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index cd70dbc..c3ba018 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -966,8 +966,6 @@
}
qdisc_watchdog_cancel(&q->watchdog);
__qdisc_reset_queue(&q->direct_queue);
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
memset(q->hlevel, 0, sizeof(q->hlevel));
memset(q->row_mask, 0, sizeof(q->row_mask));
}
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 5c27b42..1c6dbcf 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -152,7 +152,6 @@
for (band = 0; band < q->bands; band++)
qdisc_reset(q->queues[band]);
- sch->q.qlen = 0;
q->curband = 0;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 0c345e4..adc5407 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -1146,9 +1146,9 @@
struct tc_netem_rate rate;
struct tc_netem_slot slot;
- qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
+ qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
UINT_MAX);
- qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
+ qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
UINT_MAX);
qopt.limit = q->limit;
qopt.loss = q->loss;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 3eabb87..1c805fe 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -135,8 +135,6 @@
for (prio = 0; prio < q->bands; prio++)
qdisc_reset(q->queues[prio]);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index af8c63a..1d1d81a 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1458,8 +1458,6 @@
qdisc_reset(cl->qdisc);
}
}
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void qfq_destroy_qdisc(struct Qdisc *sch)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 40adf1f..935d908 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -72,6 +72,7 @@
{
struct red_sched_data *q = qdisc_priv(sch);
struct Qdisc *child = q->qdisc;
+ unsigned int len;
int ret;
q->vars.qavg = red_calc_qavg(&q->parms,
@@ -126,9 +127,10 @@
break;
}
+ len = qdisc_pkt_len(skb);
ret = qdisc_enqueue(skb, child, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
- qdisc_qstats_backlog_inc(sch, skb);
+ sch->qstats.backlog += len;
sch->q.qlen++;
} else if (net_xmit_drop_count(ret)) {
q->stats.pdrop++;
@@ -176,8 +178,6 @@
struct red_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
red_restart(&q->vars);
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index da047a3..9ded562 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -135,15 +135,15 @@
}
}
-static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
+static void increment_qlen(const struct sfb_skb_cb *cb, struct sfb_sched_data *q)
{
u32 sfbhash;
- sfbhash = sfb_hash(skb, 0);
+ sfbhash = cb->hashes[0];
if (sfbhash)
increment_one_qlen(sfbhash, 0, q);
- sfbhash = sfb_hash(skb, 1);
+ sfbhash = cb->hashes[1];
if (sfbhash)
increment_one_qlen(sfbhash, 1, q);
}
@@ -281,8 +281,10 @@
{
struct sfb_sched_data *q = qdisc_priv(sch);
+ unsigned int len = qdisc_pkt_len(skb);
struct Qdisc *child = q->qdisc;
struct tcf_proto *fl;
+ struct sfb_skb_cb cb;
int i;
u32 p_min = ~0;
u32 minqlen = ~0;
@@ -399,11 +401,12 @@
}
enqueue:
+ memcpy(&cb, sfb_skb_cb(skb), sizeof(cb));
ret = qdisc_enqueue(skb, child, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
- qdisc_qstats_backlog_inc(sch, skb);
+ sch->qstats.backlog += len;
sch->q.qlen++;
- increment_qlen(skb, q);
+ increment_qlen(&cb, q);
} else if (net_xmit_drop_count(ret)) {
q->stats.childdrop++;
qdisc_qstats_drop(sch);
@@ -452,9 +455,8 @@
{
struct sfb_sched_data *q = qdisc_priv(sch);
- qdisc_reset(q->qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
+ if (likely(q->qdisc))
+ qdisc_reset(q->qdisc);
q->slot = 0;
q->double_buffering = false;
sfb_zero_all_buckets(q);
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 7a5e4c4..df72fb8 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -213,9 +213,6 @@
struct skbprio_sched_data *q = qdisc_priv(sch);
int prio;
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
-
for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
__skb_queue_purge(&q->qdiscs[prio]);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 806babd..7f33b31 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -65,6 +65,7 @@
u32 flags;
enum tk_offsets tk_offset;
int clockid;
+ bool offloaded;
atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
* speeds it's sub-nanoseconds per byte
*/
@@ -427,7 +428,8 @@
if (unlikely(!child))
return qdisc_drop(skb, sch, to_free);
- if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) {
+ /* sk_flags are only safe to use on full sockets. */
+ if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
if (!is_valid_interval(skb, sch))
return qdisc_drop(skb, sch, to_free);
} else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
@@ -1266,6 +1268,8 @@
goto done;
}
+ q->offloaded = true;
+
done:
taprio_offload_free(offload);
@@ -1280,12 +1284,9 @@
struct tc_taprio_qopt_offload *offload;
int err;
- if (!FULL_OFFLOAD_IS_ENABLED(q->flags))
+ if (!q->offloaded)
return 0;
- if (!ops->ndo_setup_tc)
- return -EOPNOTSUPP;
-
offload = taprio_offload_alloc(0);
if (!offload) {
NL_SET_ERR_MSG(extack,
@@ -1301,6 +1302,8 @@
goto out;
}
+ q->offloaded = false;
+
out:
taprio_offload_free(offload);
@@ -1623,8 +1626,6 @@
if (q->qdiscs[i])
qdisc_reset(q->qdiscs[i]);
}
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void taprio_destroy(struct Qdisc *sch)
@@ -1903,12 +1904,14 @@
static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
{
- struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned int ntx = cl - 1;
- if (!dev_queue)
+ if (ntx >= dev->num_tx_queues)
return NULL;
- return dev_queue->qdisc_sleeping;
+ return q->qdiscs[ntx];
}
static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 78e7902..7461e5c 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -316,8 +316,6 @@
struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
q->t_c = ktime_get_ns();
q->tokens = q->buffer;
q->ptokens = q->mtu;
@@ -342,6 +340,7 @@
struct nlattr *tb[TCA_TBF_MAX + 1];
struct tc_tbf_qopt *qopt;
struct Qdisc *child = NULL;
+ struct Qdisc *old = NULL;
struct psched_ratecfg rate;
struct psched_ratecfg peak;
u64 max_size;
@@ -433,7 +432,7 @@
sch_tree_lock(sch);
if (child) {
qdisc_tree_flush_backlog(q->qdisc);
- qdisc_put(q->qdisc);
+ old = q->qdisc;
q->qdisc = child;
}
q->limit = qopt->limit;
@@ -453,6 +452,7 @@
memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
sch_tree_unlock(sch);
+ qdisc_put(old);
err = 0;
tbf_offload_change(sch);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6af6b95..79aaab5 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -124,7 +124,6 @@
struct teql_sched_data *dat = qdisc_priv(sch);
skb_queue_purge(&dat->q);
- sch->q.qlen = 0;
}
static void
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index fdb69d4..2d4ec61 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -226,9 +226,8 @@
if (!sctp_ulpq_init(&asoc->ulpq, asoc))
goto fail_init;
- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
- 0, gfp))
- goto fail_init;
+ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
+ goto stream_free;
/* Initialize default path MTU. */
asoc->pathmtu = sp->pathmtu;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index db6b737..3496414 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -863,12 +863,17 @@
}
list_del_init(&shkey->key_list);
- sctp_auth_shkey_release(shkey);
list_add(&cur_key->key_list, sh_keys);
- if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
- sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+ if (asoc && asoc->active_key_id == auth_key->sca_keynumber &&
+ sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) {
+ list_del_init(&cur_key->key_list);
+ sctp_auth_shkey_release(cur_key);
+ list_add(&shkey->key_list, sh_keys);
+ return -ENOMEM;
+ }
+ sctp_auth_shkey_release(shkey);
return 0;
}
@@ -902,8 +907,13 @@
return -EINVAL;
if (asoc) {
+ __u16 active_key_id = asoc->active_key_id;
+
asoc->active_key_id = key_id;
- sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+ if (sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) {
+ asoc->active_key_id = active_key_id;
+ return -ENOMEM;
+ }
} else
ep->active_key_id = key_id;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 34494a0..8f3aab6 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -92,6 +92,7 @@
struct sctp_chunk *chunk;
union sctp_addr src;
union sctp_addr dest;
+ int bound_dev_if;
int family;
struct sctp_af *af;
struct net *net = dev_net(skb->dev);
@@ -169,7 +170,8 @@
* If a frame arrives on an interface and the receiving socket is
* bound to another interface, via SO_BINDTODEVICE, treat it as OOTB
*/
- if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) {
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if && (bound_dev_if != af->skb_iif(skb))) {
if (transport) {
sctp_transport_put(transport);
asoc = NULL;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 3fd06a2..83a89dc 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -384,6 +384,7 @@
{
struct sctp_outq *q = &asoc->outqueue;
struct sctp_chunk *chk, *temp;
+ struct sctp_stream_out *sout;
q->sched->unsched_all(&asoc->stream);
@@ -398,12 +399,14 @@
sctp_sched_dequeue_common(q, chk);
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
- if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
- struct sctp_stream_out *streamout =
- SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream);
- streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
- }
+ sout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream);
+ sout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+
+ /* clear out_curr if all frag chunks are pruned */
+ if (asoc->stream.out_curr == sout &&
+ list_is_last(&chk->frag_list, &chk->msg->chunks))
+ asoc->stream.out_curr = NULL;
msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk);
sctp_chunk_free(chk);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 940f1e2..6e4ca83 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -358,7 +358,7 @@
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
!sp->inet.freebind &&
- !net->ipv4.sysctl_ip_nonlocal_bind)
+ !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
return 0;
if (ipv6_only_sock(sctp_opt2sk(sp)))
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 0948f14..d4e5969 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -458,6 +458,10 @@
goto out_unlock;
}
+ /* This happens when the response arrives after the timer is triggered. */
+ if (!asoc->strreset_chunk)
+ goto out_unlock;
+
error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF),
asoc->state, asoc->ep, asoc,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0a9e2c7..e9b4ea3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5518,7 +5518,7 @@
* Set the daddr and initialize id to something more random and also
* copy over any ip options.
*/
- sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk);
+ sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk);
sp->pf->copy_ip_options(sk, sock->sk);
/* Populate the fields of the newsk from the oldsk and migrate the
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 6dc95dc..ef9fcea 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -137,7 +137,7 @@
ret = sctp_stream_alloc_out(stream, outcnt, gfp);
if (ret)
- goto out_err;
+ return ret;
for (i = 0; i < stream->outcnt; i++)
SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
@@ -145,22 +145,9 @@
handle_in:
sctp_stream_interleave_init(stream);
if (!incnt)
- goto out;
+ return 0;
- ret = sctp_stream_alloc_in(stream, incnt, gfp);
- if (ret)
- goto in_err;
-
- goto out;
-
-in_err:
- sched->free(stream);
- genradix_free(&stream->in);
-out_err:
- genradix_free(&stream->out);
- stream->outcnt = 0;
-out:
- return ret;
+ return sctp_stream_alloc_in(stream, incnt, gfp);
}
int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 99e5f69..a2e1d34 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -163,7 +163,7 @@
if (!SCTP_SO(&asoc->stream, i)->ext)
continue;
- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
+ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
if (ret)
goto err;
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 4f16d40..41cbc7c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1057,6 +1057,8 @@
smc->sk.sk_state = SMC_CLOSED;
if (rc == -EPIPE || rc == -EAGAIN)
smc->sk.sk_err = EPIPE;
+ else if (rc == -ECONNREFUSED)
+ smc->sk.sk_err = ECONNREFUSED;
else if (signal_pending(current))
smc->sk.sk_err = -sock_intr_errno(timeo);
sock_put(&smc->sk); /* passive closing */
@@ -1116,9 +1118,9 @@
if (rc && rc != -EINPROGRESS)
goto out;
- sock_hold(&smc->sk); /* sock put in passive closing */
if (smc->use_fallback)
goto out;
+ sock_hold(&smc->sk); /* sock put in passive closing */
if (flags & O_NONBLOCK) {
if (queue_work(smc_hs_wq, &smc->connect_work))
smc->connect_nonblock = 1;
@@ -1323,7 +1325,6 @@
{
struct sock *newsmcsk = &new_smc->sk;
- sk_refcnt_debug_inc(newsmcsk);
if (newsmcsk->sk_state == SMC_INIT)
newsmcsk->sk_state = SMC_ACTIVE;
@@ -2144,8 +2145,10 @@
if (smc->use_fallback) {
rc = kernel_sock_shutdown(smc->clcsock, how);
sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
- if (sk->sk_shutdown == SHUTDOWN_MASK)
+ if (sk->sk_shutdown == SHUTDOWN_MASK) {
sk->sk_state = SMC_CLOSED;
+ sock_put(sk);
+ }
goto out;
}
switch (how) {
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 0c490cd..94503f3 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -72,7 +72,7 @@
/* abnormal termination */
if (!rc)
smc_wr_tx_put_slot(link,
- (struct smc_wr_tx_pend_priv *)pend);
+ (struct smc_wr_tx_pend_priv *)(*pend));
rc = -EPIPE;
}
return rc;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index d69aac6..bf485a2 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1426,7 +1426,7 @@
*/
static inline int smc_rmb_wnd_update_limit(int rmbe_size)
{
- return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
+ return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
/* map an rmb buf to a link */
@@ -1584,7 +1584,7 @@
static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
struct smc_buf_desc *buf_desc, bool is_rmb)
{
- int i, rc = 0;
+ int i, rc = 0, cnt = 0;
/* protect against parallel link reconfiguration */
mutex_lock(&lgr->llc_conf_mutex);
@@ -1597,9 +1597,12 @@
rc = -ENOMEM;
goto out;
}
+ cnt++;
}
out:
mutex_unlock(&lgr->llc_conf_mutex);
+ if (!rc && !cnt)
+ rc = -EINVAL;
return rc;
}
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index ee1f0fd..0ef15f8 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -1787,7 +1787,7 @@
init_waitqueue_head(&lgr->llc_flow_waiter);
init_waitqueue_head(&lgr->llc_msg_waiter);
mutex_init(&lgr->llc_conf_mutex);
- lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
+ lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
/* called after lgr was removed from lgr_list */
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 9007c7e..30bae60 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -310,8 +310,9 @@
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
if (!strncmp(ibdev->ibdev->name, ib_name,
sizeof(ibdev->ibdev->name)) ||
- !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
- IB_DEVICE_NAME_MAX - 1)) {
+ (ibdev->ibdev->dev.parent &&
+ !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
+ IB_DEVICE_NAME_MAX - 1))) {
goto out;
}
}
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index fcfac59..7f7e983 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -346,12 +346,12 @@
}
break;
}
+ if (!timeo)
+ return -EAGAIN;
if (signal_pending(current)) {
read_done = sock_intr_errno(timeo);
break;
}
- if (!timeo)
- return -EAGAIN;
}
if (!smc_rx_data_available(conn)) {
diff --git a/net/socket.c b/net/socket.c
index d52c265..bcf68b1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1670,7 +1670,7 @@
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
- somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
+ somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
if ((unsigned int)backlog > somaxconn)
backlog = somaxconn;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index a9f0d17..1bae32c 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -445,7 +445,7 @@
* Enforce a 60 second garbage collection moratorium
* Note that the cred_unused list must be time-ordered.
*/
- if (!time_in_range(cred->cr_expire, expired, jiffies))
+ if (time_in_range(cred->cr_expire, expired, jiffies))
continue;
if (!rpcauth_unhash_cred(cred))
continue;
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 22a2c23..77e347a 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -64,6 +64,17 @@
kfree(req);
}
+static void xprt_bc_reinit_xdr_buf(struct xdr_buf *buf)
+{
+ buf->head[0].iov_len = PAGE_SIZE;
+ buf->tail[0].iov_len = 0;
+ buf->pages = NULL;
+ buf->page_len = 0;
+ buf->flags = 0;
+ buf->len = 0;
+ buf->buflen = PAGE_SIZE;
+}
+
static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
{
struct page *page;
@@ -292,6 +303,9 @@
*/
spin_lock_bh(&xprt->bc_pa_lock);
if (xprt_need_to_requeue(xprt)) {
+ xprt_bc_reinit_xdr_buf(&req->rq_snd_buf);
+ xprt_bc_reinit_xdr_buf(&req->rq_rcv_buf);
+ req->rq_rcv_buf.len = PAGE_SIZE;
list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
xprt->bc_alloc_count++;
atomic_inc(&xprt->bc_slot_count);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 84c8a53..78c6648 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1867,7 +1867,7 @@
break;
case -EKEYEXPIRED:
if (!task->tk_cred_retry) {
- rpc_exit(task, task->tk_status);
+ rpc_call_rpcerror(task, task->tk_status);
} else {
task->tk_action = call_refresh;
task->tk_cred_retry--;
@@ -2175,6 +2175,7 @@
* socket just returned a connection error,
* then hold onto the transport lock.
*/
+ case -ENOMEM:
case -ENOBUFS:
rpc_delay(task, HZ>>2);
fallthrough;
@@ -2258,6 +2259,7 @@
case -ENOTCONN:
case -EPIPE:
break;
+ case -ENOMEM:
case -ENOBUFS:
rpc_delay(task, HZ>>2);
fallthrough;
@@ -2340,6 +2342,11 @@
case -EPIPE:
case -EAGAIN:
break;
+ case -ENFILE:
+ case -ENOBUFS:
+ case -ENOMEM:
+ rpc_delay(task, HZ>>2);
+ break;
case -EIO:
/* shutdown or soft timeout */
goto out_exit;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 5f854ff..bb13620 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -478,6 +478,7 @@
inode->i_fop = &simple_dir_operations;
inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
+ break;
default:
break;
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c045f63..f0f55fb 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -186,11 +186,6 @@
/*
* Add new request to wait queue.
- *
- * Swapper tasks always get inserted at the head of the queue.
- * This should avoid many nasty memory deadlocks and hopefully
- * improve overall performance.
- * Everyone else gets appended to the queue to ensure proper FIFO behavior.
*/
static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
struct rpc_task *task,
@@ -199,8 +194,6 @@
INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
if (RPC_IS_PRIORITY(queue))
__rpc_add_wait_queue_priority(queue, task, queue_priority);
- else if (RPC_IS_SWAPPER(task))
- list_add(&task->u.tk_wait.list, &queue->tasks[0]);
else
list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
task->tk_waitqueue = queue;
@@ -1012,8 +1005,10 @@
struct rpc_buffer *buf;
gfp_t gfp = GFP_NOFS;
+ if (RPC_IS_ASYNC(task))
+ gfp = GFP_NOWAIT | __GFP_NOWARN;
if (RPC_IS_SWAPPER(task))
- gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+ gfp |= __GFP_MEMALLOC;
size += sizeof(struct rpc_buffer);
if (size <= RPC_BUFFER_MAXSIZE)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index eba1714..6d5bb8b 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1091,7 +1091,9 @@
int flags, ret;
*sentp = 0;
- xdr_alloc_bvec(xdr, GFP_KERNEL);
+ ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
msg->msg_flags = MSG_MORE;
ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 71e03b9..d84bb50 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -752,7 +752,11 @@
*/
xdr->p = (void *)p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
- xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ if (space_left - frag1bytes >= PAGE_SIZE)
+ xdr->end = (void *)p + PAGE_SIZE;
+ else
+ xdr->end = (void *)p + space_left - frag1bytes;
+
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 04aaca4..13d5323 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -732,6 +732,21 @@
EXPORT_SYMBOL_GPL(xprt_disconnect_done);
/**
+ * xprt_schedule_autoclose_locked - Try to schedule an autoclose RPC call
+ * @xprt: transport to disconnect
+ */
+static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt)
+{
+ if (test_and_set_bit(XPRT_CLOSE_WAIT, &xprt->state))
+ return;
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+ queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+ else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
+ rpc_wake_up_queued_task_set_status(&xprt->pending,
+ xprt->snd_task, -ENOTCONN);
+}
+
+/**
* xprt_force_disconnect - force a transport to disconnect
* @xprt: transport to disconnect
*
@@ -742,13 +757,7 @@
/* Don't race with the test_bit() in xprt_clear_locked() */
spin_lock(&xprt->transport_lock);
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
- /* Try to schedule an autoclose RPC call */
- if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
- queue_work(xprtiod_workqueue, &xprt->task_cleanup);
- else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
- rpc_wake_up_queued_task_set_status(&xprt->pending,
- xprt->snd_task, -ENOTCONN);
+ xprt_schedule_autoclose_locked(xprt);
spin_unlock(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_force_disconnect);
@@ -788,11 +797,7 @@
goto out;
if (test_bit(XPRT_CLOSING, &xprt->state))
goto out;
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
- /* Try to schedule an autoclose RPC call */
- if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
- queue_work(xprtiod_workqueue, &xprt->task_cleanup);
- xprt_wake_pending_tasks(xprt, -EAGAIN);
+ xprt_schedule_autoclose_locked(xprt);
out:
spin_unlock(&xprt->transport_lock);
}
@@ -881,12 +886,7 @@
if (!xprt_lock_write(xprt, task))
return;
- if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
- trace_xprt_disconnect_cleanup(xprt);
- xprt->ops->close(xprt);
- }
-
- if (!xprt_connected(xprt)) {
+ if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
rpc_sleep_on_timeout(&xprt->pending, task, NULL,
xprt_request_timeout(task->tk_rqstp));
@@ -1306,17 +1306,6 @@
INIT_LIST_HEAD(&req->rq_xmit2);
goto out;
}
- } else if (RPC_IS_SWAPPER(task)) {
- list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
- if (pos->rq_cong || pos->rq_bytes_sent)
- continue;
- if (RPC_IS_SWAPPER(pos->rq_task))
- continue;
- /* Note: req is added _before_ pos */
- list_add_tail(&req->rq_xmit, &pos->rq_xmit);
- INIT_LIST_HEAD(&req->rq_xmit2);
- goto out;
- }
} else if (!req->rq_seqno) {
list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
if (pos->rq_task->tk_owner != task->tk_owner)
@@ -1635,12 +1624,15 @@
static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
{
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
+ gfp_t gfp_mask = GFP_KERNEL;
if (xprt->num_reqs >= xprt->max_reqs)
goto out;
++xprt->num_reqs;
spin_unlock(&xprt->reserve_lock);
- req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
+ if (current->flags & PF_WQ_WORKER)
+ gfp_mask |= __GFP_NORETRY | __GFP_NOWARN;
+ req = kzalloc(sizeof(*req), gfp_mask);
spin_lock(&xprt->reserve_lock);
if (req != NULL)
goto out;
@@ -2037,7 +2029,14 @@
*/
wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
+ /*
+ * xprt_schedule_autodisconnect() can run after XPRT_LOCKED
+ * is cleared. We use ->transport_lock to ensure the mod_timer()
+ * can only run *before* del_time_sync(), never after.
+ */
+ spin_lock(&xprt->transport_lock);
del_timer_sync(&xprt->timer);
+ spin_unlock(&xprt->transport_lock);
/*
* Destroy sockets etc from the system workqueue so they can
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ca267a8..b8174c7 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1137,6 +1137,7 @@
rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
{
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct xdr_stream *xdr = &rep->rr_stream;
__be32 *p;
@@ -1160,6 +1161,10 @@
if (*p != cpu_to_be32(RPC_CALL))
return false;
+ /* No bc service. */
+ if (xprt->bc_serv == NULL)
+ return false;
+
/* Now that we are sure this is a backchannel call,
* advance to the RPC header.
*/
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 8e2368a..9cf10cf 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -519,7 +519,7 @@
return;
out_sleep:
- task->tk_status = -EAGAIN;
+ task->tk_status = -ENOMEM;
xprt_add_backlog(xprt, task);
}
@@ -572,8 +572,10 @@
gfp_t flags;
flags = RPCRDMA_DEF_GFP;
+ if (RPC_IS_ASYNC(task))
+ flags = GFP_NOWAIT | __GFP_NOWARN;
if (RPC_IS_SWAPPER(task))
- flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+ flags |= __GFP_MEMALLOC;
if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
flags))
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 16c7758..ae5b538 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -754,12 +754,12 @@
/**
* xs_nospace - handle transmit was incomplete
* @req: pointer to RPC request
+ * @transport: pointer to struct sock_xprt
*
*/
-static int xs_nospace(struct rpc_rqst *req)
+static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport)
{
- struct rpc_xprt *xprt = req->rq_xprt;
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct rpc_xprt *xprt = &transport->xprt;
struct sock *sk = transport->inet;
int ret = -EAGAIN;
@@ -770,16 +770,6 @@
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
- /* wait for more buffer space */
- sk->sk_write_pending++;
- xprt_wait_for_buffer_space(xprt);
- } else
- ret = -ENOTCONN;
-
- spin_unlock(&xprt->transport_lock);
-
- /* Race breaker in case memory is freed before above code is called */
- if (ret == -EAGAIN) {
struct socket_wq *wq;
rcu_read_lock();
@@ -787,8 +777,42 @@
set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
rcu_read_unlock();
- sk->sk_write_space(sk);
- }
+ /* wait for more buffer space */
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ sk->sk_write_pending++;
+ xprt_wait_for_buffer_space(xprt);
+ } else
+ ret = -ENOTCONN;
+
+ spin_unlock(&xprt->transport_lock);
+ return ret;
+}
+
+static int xs_sock_nospace(struct rpc_rqst *req)
+{
+ struct sock_xprt *transport =
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
+ int ret = -EAGAIN;
+
+ lock_sock(sk);
+ if (!sock_writeable(sk))
+ ret = xs_nospace(req, transport);
+ release_sock(sk);
+ return ret;
+}
+
+static int xs_stream_nospace(struct rpc_rqst *req)
+{
+ struct sock_xprt *transport =
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
+ int ret = -EAGAIN;
+
+ lock_sock(sk);
+ if (!sk_stream_memory_free(sk))
+ ret = xs_nospace(req, transport);
+ release_sock(sk);
return ret;
}
@@ -847,7 +871,7 @@
/* Close the stream if the previous transmission was incomplete */
if (xs_send_request_was_aborted(transport, req)) {
- xs_close(xprt);
+ xprt_force_disconnect(xprt);
return -ENOTCONN;
}
@@ -878,14 +902,14 @@
case -ENOBUFS:
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_stream_nospace(req);
break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
fallthrough;
case -EPIPE:
- xs_close(xprt);
+ xprt_force_disconnect(xprt);
status = -ENOTCONN;
}
@@ -954,7 +978,7 @@
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_sock_nospace(req);
break;
case -ENETUNREACH:
case -ENOBUFS:
@@ -1069,7 +1093,7 @@
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_stream_nospace(req);
break;
case -ECONNRESET:
case -ECONNREFUSED:
@@ -1167,6 +1191,16 @@
if (sk == NULL)
return;
+ /*
+ * Make sure we're calling this in a context from which it is safe
+ * to call __fput_sync(). In practice that means rpciod and the
+ * system workqueue.
+ */
+ if (!(current->flags & PF_WQ_WORKER)) {
+ WARN_ON_ONCE(1);
+ set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ return;
+ }
if (atomic_read(&transport->xprt.swapper))
sk_clear_memalloc(sk);
@@ -1190,7 +1224,7 @@
mutex_unlock(&transport->recv_mutex);
trace_rpc_socket_close(xprt, sock);
- fput(filp);
+ __fput_sync(filp);
xprt_disconnect_done(xprt);
}
@@ -1883,6 +1917,7 @@
xprt->stat.connect_time += (long)jiffies -
xprt->stat.connect_start;
xprt_set_connected(xprt);
+ break;
case -ENOBUFS:
break;
case -ENOENT:
@@ -2236,10 +2271,14 @@
struct rpc_xprt *xprt = &transport->xprt;
int status = -EIO;
- if (!sock) {
- sock = xs_create_sock(xprt, transport,
- xs_addr(xprt)->sa_family, SOCK_STREAM,
- IPPROTO_TCP, true);
+ if (xprt_connected(xprt))
+ goto out;
+ if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT,
+ &transport->sock_state) ||
+ !sock) {
+ xs_reset_transport(transport);
+ sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family,
+ SOCK_STREAM, IPPROTO_TCP, true);
if (IS_ERR(sock)) {
status = PTR_ERR(sock);
goto out;
@@ -2270,6 +2309,8 @@
break;
case 0:
case -EINPROGRESS:
+ set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state);
+ fallthrough;
case -EALREADY:
xprt_unlock_connect(xprt, transport);
return;
@@ -2323,11 +2364,7 @@
if (transport->sock != NULL) {
dprintk("RPC: xs_connect delayed xprt %p for %lu "
- "seconds\n",
- xprt, xprt->reestablish_timeout / HZ);
-
- /* Start by resetting any existing state */
- xs_reset_transport(transport);
+ "seconds\n", xprt, xprt->reestablish_timeout / HZ);
delay = xprt_reconnect_delay(xprt);
xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO);
@@ -2802,9 +2839,6 @@
}
xprt_set_bound(xprt);
xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
- ret = ERR_PTR(xs_local_setup_socket(transport));
- if (ret)
- goto out_err;
break;
default:
ret = ERR_PTR(-EAFNOSUPPORT);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6911f1c..72c31ef 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -249,9 +249,8 @@
u32 i;
if (!bearer_name_validate(name, &b_names)) {
- errstr = "illegal name";
NL_SET_ERR_MSG(extack, "Illegal name");
- goto rejected;
+ return res;
}
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 40c0308..7724499 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -60,7 +60,7 @@
tn->trial_addr = 0;
tn->addr_trial_end = 0;
tn->capabilities = TIPC_NODE_CAPABILITIES;
- INIT_WORK(&tn->final_work.work, tipc_net_finalize_work);
+ INIT_WORK(&tn->work, tipc_net_finalize_work);
memset(tn->node_id, 0, sizeof(tn->node_id));
memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
@@ -111,10 +111,9 @@
struct tipc_net *tn = tipc_net(net);
tipc_detach_loopback(net);
- /* Make sure the tipc_net_finalize_work() finished */
- cancel_work_sync(&tn->final_work.work);
tipc_net_stop(net);
-
+ /* Make sure the tipc_net_finalize_work() finished */
+ cancel_work_sync(&tn->work);
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 992924a..73a26b0 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -90,12 +90,6 @@
extern int sysctl_tipc_rmem[3] __read_mostly;
extern int sysctl_tipc_named_timeout __read_mostly;
-struct tipc_net_work {
- struct work_struct work;
- struct net *net;
- u32 addr;
-};
-
struct tipc_net {
u8 node_id[NODE_ID_LEN];
u32 node_addr;
@@ -150,7 +144,7 @@
struct tipc_crypto *crypto_tx;
#endif
/* Work item for net finalize */
- struct tipc_net_work final_work;
+ struct work_struct work;
/* The numbers of work queues in schedule */
atomic_t wq_count;
};
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index d4ecacd..2730310 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -147,8 +147,8 @@
{
struct net *net = d->net;
struct tipc_net *tn = tipc_net(net);
- bool trial = time_before(jiffies, tn->addr_trial_end);
u32 self = tipc_own_addr(net);
+ bool trial = time_before(jiffies, tn->addr_trial_end) && !self;
if (mtyp == DSC_TRIAL_FAIL_MSG) {
if (!trial)
@@ -167,7 +167,7 @@
/* Apply trial address if we just left trial period */
if (!trial && !self) {
- tipc_sched_net_finalize(net, tn->trial_addr);
+ schedule_work(&tn->work);
msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
@@ -210,7 +210,10 @@
u32 self;
int err;
- skb_linearize(skb);
+ if (skb_linearize(skb)) {
+ kfree_skb(skb);
+ return;
+ }
hdr = buf_msg(skb);
if (caps & TIPC_NODE_ID128)
@@ -307,7 +310,7 @@
if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
mod_timer(&d->timer, jiffies + TIPC_DISC_INIT);
spin_unlock_bh(&d->lock);
- tipc_sched_net_finalize(net, tn->trial_addr);
+ schedule_work(&tn->work);
return;
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 7a353ff..064fdb8 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -344,6 +344,11 @@
return l->net_plane;
}
+struct net *tipc_link_net(struct tipc_link *l)
+{
+ return l->net;
+}
+
void tipc_link_update_caps(struct tipc_link *l, u16 capabilities)
{
l->peer_caps = capabilities;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index fc07232..a16f401 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -156,4 +156,5 @@
int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq);
bool tipc_link_too_silent(struct tipc_link *l);
+struct net *tipc_link_net(struct tipc_link *l);
#endif
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index a37190d..1d90f39 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -130,7 +130,7 @@
static int map_get(u64 up_map, int i)
{
- return (up_map & (1 << i)) >> i;
+ return (up_map & (1ULL << i)) >> i;
}
static struct tipc_peer *peer_prev(struct tipc_peer *peer)
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 0bb2323..671cb4f 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -41,6 +41,7 @@
#include "socket.h"
#include "node.h"
#include "bcast.h"
+#include "link.h"
#include "netlink.h"
#include "monitor.h"
@@ -138,19 +139,9 @@
void tipc_net_finalize_work(struct work_struct *work)
{
- struct tipc_net_work *fwork;
+ struct tipc_net *tn = container_of(work, struct tipc_net, work);
- fwork = container_of(work, struct tipc_net_work, work);
- tipc_net_finalize(fwork->net, fwork->addr);
-}
-
-void tipc_sched_net_finalize(struct net *net, u32 addr)
-{
- struct tipc_net *tn = tipc_net(net);
-
- tn->final_work.net = net;
- tn->final_work.addr = addr;
- schedule_work(&tn->final_work.work);
+ tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr);
}
void tipc_net_stop(struct net *net)
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 49e8933..2d62932 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -877,7 +877,7 @@
};
ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);
- if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query))
+ if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query))
return -EINVAL;
depth = ntohl(ntq->depth);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index e4452d5..6005982 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -456,8 +456,8 @@
bool preliminary)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_link *l, *snd_l = tipc_bc_sndlink(net);
struct tipc_node *n, *temp_node;
- struct tipc_link *l;
unsigned long intv;
int bearer_id;
int i;
@@ -472,6 +472,16 @@
goto exit;
/* A preliminary node becomes "real" now, refresh its data */
tipc_node_write_lock(n);
+ if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link refresh failed, no memory\n");
+ tipc_node_write_unlock_fast(n);
+ tipc_node_put(n);
+ n = NULL;
+ goto exit;
+ }
n->preliminary = false;
n->addr = addr;
hlist_del_rcu(&n->hash);
@@ -551,7 +561,16 @@
n->signature = INVALID_NODE_SIG;
n->active_links[0] = INVALID_BEARER_ID;
n->active_links[1] = INVALID_BEARER_ID;
- n->bc_entry.link = NULL;
+ if (!preliminary &&
+ !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link creation failed, no memory\n");
+ kfree(n);
+ n = NULL;
+ goto exit;
+ }
tipc_node_get(n);
timer_setup(&n->timer, tipc_node_timeout, 0);
/* Start a slow timer anyway, crypto needs it */
@@ -1128,7 +1147,7 @@
bool *respond, bool *dupl_addr)
{
struct tipc_node *n;
- struct tipc_link *l, *snd_l;
+ struct tipc_link *l;
struct tipc_link_entry *le;
bool addr_match = false;
bool sign_match = false;
@@ -1148,22 +1167,6 @@
return;
tipc_node_write_lock(n);
- if (unlikely(!n->bc_entry.link)) {
- snd_l = tipc_bc_sndlink(net);
- if (!tipc_link_bc_create(net, tipc_own_addr(net),
- addr, peer_id, U16_MAX,
- tipc_link_min_win(snd_l),
- tipc_link_max_win(snd_l),
- n->capabilities,
- &n->bc_entry.inputq1,
- &n->bc_entry.namedq, snd_l,
- &n->bc_entry.link)) {
- pr_warn("Broadcast rcv link creation failed, no mem\n");
- tipc_node_write_unlock_fast(n);
- tipc_node_put(n);
- return;
- }
- }
le = &n->links[b->identity];
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 8d2c985..8f3c9fb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -489,6 +489,7 @@
sock_init_data(sock, sk);
tipc_set_sk_state(sk, TIPC_OPEN);
if (tipc_sk_insert(tsk)) {
+ sk_free(sk);
pr_warn("Socket create failed; port number exhausted\n");
return -EINVAL;
}
@@ -503,7 +504,7 @@
timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
sk->sk_shutdown = 0;
sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
- sk->sk_rcvbuf = sysctl_tipc_rmem[1];
+ sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
sk->sk_destruct = tipc_sock_destruct;
@@ -2846,7 +2847,8 @@
/* Try again later if dest link is congested */
if (tsk->cong_link_cnt) {
- sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
+ sk_reset_timer(sk, &sk->sk_timer,
+ jiffies + msecs_to_jiffies(100));
return;
}
/* Prepare SYN for retransmit */
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 13f3143..89d8a2b 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -176,7 +176,7 @@
conn_put(con);
}
-static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
+static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock)
{
struct tipc_conn *con;
int ret;
@@ -202,10 +202,12 @@
}
con->conid = ret;
s->idr_in_use++;
- spin_unlock_bh(&s->idr_lock);
set_bit(CF_CONNECTED, &con->flags);
con->server = s;
+ con->sock = sock;
+ conn_get(con);
+ spin_unlock_bh(&s->idr_lock);
return con;
}
@@ -450,17 +452,24 @@
static void tipc_topsrv_accept(struct work_struct *work)
{
struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
- struct socket *lsock = srv->listener;
- struct socket *newsock;
+ struct socket *newsock, *lsock;
struct tipc_conn *con;
struct sock *newsk;
int ret;
+ spin_lock_bh(&srv->idr_lock);
+ if (!srv->listener) {
+ spin_unlock_bh(&srv->idr_lock);
+ return;
+ }
+ lsock = srv->listener;
+ spin_unlock_bh(&srv->idr_lock);
+
while (1) {
ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
if (ret < 0)
return;
- con = tipc_conn_alloc(srv);
+ con = tipc_conn_alloc(srv, newsock);
if (IS_ERR(con)) {
ret = PTR_ERR(con);
sock_release(newsock);
@@ -472,11 +481,11 @@
newsk->sk_data_ready = tipc_conn_data_ready;
newsk->sk_write_space = tipc_conn_write_space;
newsk->sk_user_data = con;
- con->sock = newsock;
write_unlock_bh(&newsk->sk_callback_lock);
/* Wake up receive process in case of 'SYN+' message */
newsk->sk_data_ready(newsk);
+ conn_put(con);
}
}
@@ -489,7 +498,7 @@
read_lock_bh(&sk->sk_callback_lock);
srv = sk->sk_user_data;
- if (srv->listener)
+ if (srv)
queue_work(srv->rcv_wq, &srv->awork);
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -568,19 +577,19 @@
sub.seq.upper = upper;
sub.timeout = TIPC_WAIT_FOREVER;
sub.filter = filter;
- *(u32 *)&sub.usr_handle = port;
+ *(u64 *)&sub.usr_handle = (u64)port;
- con = tipc_conn_alloc(tipc_topsrv(net));
+ con = tipc_conn_alloc(tipc_topsrv(net), NULL);
if (IS_ERR(con))
return false;
*conid = con->conid;
- con->sock = NULL;
rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
- if (rc >= 0)
- return true;
+ if (rc)
+ conn_put(con);
+
conn_put(con);
- return false;
+ return !rc;
}
void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
@@ -699,8 +708,9 @@
__module_get(lsock->sk->sk_prot_creator->owner);
srv->listener = NULL;
spin_unlock_bh(&srv->idr_lock);
- sock_release(lsock);
+
tipc_topsrv_work_stop(srv);
+ sock_release(lsock);
idr_destroy(&srv->conn_idr);
kfree(srv);
}
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index f718c73..5cb6846 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -97,13 +97,16 @@
unsigned long flags;
spin_lock_irqsave(&tls_device_lock, flags);
+ if (unlikely(!refcount_dec_and_test(&ctx->refcount)))
+ goto unlock;
+
list_move_tail(&ctx->list, &tls_device_gc_list);
/* schedule_work inside the spinlock
* to make sure tls_device_down waits for that work.
*/
schedule_work(&tls_device_gc_work);
-
+unlock:
spin_unlock_irqrestore(&tls_device_lock, flags);
}
@@ -194,8 +197,7 @@
clean_acked_data_disable(inet_csk(sk));
}
- if (refcount_dec_and_test(&tls_ctx->refcount))
- tls_device_queue_ctx_destruction(tls_ctx);
+ tls_device_queue_ctx_destruction(tls_ctx);
}
EXPORT_SYMBOL_GPL(tls_device_sk_destruct);
@@ -483,11 +485,13 @@
copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
copy = min_t(size_t, copy, (max_open_record_len - record->len));
- rc = tls_device_copy_data(page_address(pfrag->page) +
- pfrag->offset, copy, msg_iter);
- if (rc)
- goto handle_error;
- tls_append_frag(record, pfrag, copy);
+ if (copy) {
+ rc = tls_device_copy_data(page_address(pfrag->page) +
+ pfrag->offset, copy, msg_iter);
+ if (rc)
+ goto handle_error;
+ tls_append_frag(record, pfrag, copy);
+ }
size -= copy;
if (!size) {
@@ -1343,7 +1347,15 @@
/* Device contexts for RX and TX will be freed in on sk_destruct
* by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
+ * Now release the ref taken above.
*/
+ if (refcount_dec_and_test(&ctx->refcount)) {
+ /* sk_destruct ran after tls_device_down took a ref, and
+ * it returned early. Complete the destruction here.
+ */
+ list_del(&ctx->list);
+ tls_device_free_ctx(ctx);
+ }
}
up_write(&device_offload_lock);
@@ -1385,9 +1397,9 @@
.notifier_call = tls_dev_event,
};
-void __init tls_device_init(void)
+int __init tls_device_init(void)
{
- register_netdevice_notifier(&tls_dev_notifier);
+ return register_netdevice_notifier(&tls_dev_notifier);
}
void __exit tls_device_cleanup(void)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 58d22d6..e537085 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -905,7 +905,12 @@
if (err)
return err;
- tls_device_init();
+ err = tls_device_init();
+ if (err) {
+ unregister_pernet_subsys(&tls_proc_ops);
+ return err;
+ }
+
tcp_register_ulp(&tcp_tls_ulp_ops);
return 0;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 8cd011e..21f20c3 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1483,7 +1483,7 @@
}
if (prot->version == TLS_1_3_VERSION)
memcpy(iv + iv_offset, tls_ctx->rx.iv,
- crypto_aead_ivsize(ctx->aead_recv));
+ prot->iv_size + prot->salt_size);
else
memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b7edca8..28721e9 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -438,7 +438,7 @@
* -ECONNREFUSED. Otherwise, if we haven't queued any skbs
* to other and its full, we will hang waiting for POLLOUT.
*/
- if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
+ if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
return 1;
if (connected)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index d45d536..dc27635 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -204,6 +204,7 @@
/* The external entry point: unix_gc() */
void unix_gc(void)
{
+ struct sk_buff *next_skb, *skb;
struct unix_sock *u;
struct unix_sock *next;
struct sk_buff_head hitlist;
@@ -297,11 +298,30 @@
spin_unlock(&unix_gc_lock);
+ /* We need io_uring to clean its registered files, ignore all io_uring
+ * originated skbs. It's fine as io_uring doesn't keep references to
+ * other io_uring instances and so killing all other files in the cycle
+ * will put all io_uring references forcing it to go through normal
+ * release.path eventually putting registered files.
+ */
+ skb_queue_walk_safe(&hitlist, skb, next_skb) {
+ if (skb->scm_io_uring) {
+ __skb_unlink(skb, &hitlist);
+ skb_queue_tail(&skb->sk->sk_receive_queue, skb);
+ }
+ }
+
/* Here we are. Hitlist is filled. Die. */
__skb_queue_purge(&hitlist);
spin_lock(&unix_gc_lock);
+ /* There could be io_uring registered files, just push them back to
+ * the inflight list
+ */
+ list_for_each_entry_safe(u, next, &gc_candidates, link)
+ list_move_tail(&u->link, &gc_inflight_list);
+
/* All candidates should have been detached by now. */
BUG_ON(!list_empty(&gc_candidates));
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c598062..7829a50 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1242,6 +1242,7 @@
if (sk->sk_state == TCP_SYN_SENT &&
(sk->sk_shutdown != SHUTDOWN_MASK)) {
sk->sk_state = TCP_CLOSE;
+ sk->sk_socket->state = SS_UNCONNECTED;
sk->sk_err = ETIMEDOUT;
sk->sk_error_report(sk);
vsock_transport_cancel_pkt(vsk);
@@ -1347,7 +1348,14 @@
* timeout fires.
*/
sock_hold(sk);
- schedule_delayed_work(&vsk->connect_work, timeout);
+
+ /* If the timeout function is already scheduled,
+ * reschedule it, then ungrab the socket refcount to
+ * keep it balanced.
+ */
+ if (mod_delayed_work(system_wq, &vsk->connect_work,
+ timeout))
+ sock_put(sk);
/* Skip ahead to preserve error code set above. */
goto out_wait;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index d6d3a05..c9ee925 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1196,7 +1196,7 @@
void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
{
- kfree(pkt->buf);
+ kvfree(pkt->buf);
kfree(pkt);
}
EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3f45547..3b25b78 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -918,9 +918,6 @@
return res;
}
- /* set up regulatory info */
- wiphy_regulatory_register(wiphy);
-
list_add_rcu(&rdev->list, &cfg80211_rdev_list);
cfg80211_rdev_list_generation++;
@@ -931,6 +928,9 @@
cfg80211_debugfs_rdev_add(rdev);
nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
+ /* set up regulatory info */
+ wiphy_regulatory_register(wiphy);
+
if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) {
struct regulatory_request request;
diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index 76b845f..d80b06d 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -65,9 +65,10 @@
{
struct wiphy *wiphy = file->private_data;
char *buf;
- unsigned int offset = 0, buf_size = PAGE_SIZE, i, r;
+ unsigned int offset = 0, buf_size = PAGE_SIZE, i;
enum nl80211_band band;
struct ieee80211_supported_band *sband;
+ ssize_t r;
buf = kzalloc(buf_size, GFP_KERNEL);
if (!buf)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0df8b9a..8a7f0c8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -475,7 +475,8 @@
.len = IEEE80211_MAX_MESH_ID_LEN },
[NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT,
- [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 },
+ /* allow 3 for NUL-termination, we used to declare this NLA_STRING */
+ [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3),
[NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED },
[NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 },
@@ -2954,6 +2955,15 @@
} else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
chandef->width =
nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]);
+ if (chandef->chan->band == NL80211_BAND_S1GHZ) {
+ /* User input error for channel width doesn't match channel */
+ if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ attrs[NL80211_ATTR_CHANNEL_WIDTH],
+ "bad channel width");
+ return -EINVAL;
+ }
+ }
if (attrs[NL80211_ATTR_CENTER_FREQ1]) {
chandef->center_freq1 =
nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
@@ -3475,6 +3485,7 @@
wdev_lock(wdev);
switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
if (wdev->ssid_len &&
nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
goto nla_put_failure_locked;
@@ -11085,18 +11096,23 @@
struct cfg80211_bitrate_mask mask;
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
if (!rdev->ops->set_bitrate_mask)
return -EOPNOTSUPP;
+ wdev_lock(wdev);
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
NL80211_ATTR_TX_RATES, &mask,
dev);
if (err)
- return err;
+ goto out;
- return rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+ err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+out:
+ wdev_unlock(wdev);
+ return err;
}
static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index a04fdfb..a1e64d9 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -787,6 +787,8 @@
return 0;
}
+MODULE_FIRMWARE("regulatory.db.p7s");
+
static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
{
const struct firmware *sig;
@@ -1058,8 +1060,12 @@
release_firmware(fw);
}
+MODULE_FIRMWARE("regulatory.db");
+
static int query_regdb_file(const char *alpha2)
{
+ int err;
+
ASSERT_RTNL();
if (regdb)
@@ -1069,9 +1075,13 @@
if (!alpha2)
return -ENOMEM;
- return request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
- ®_pdev->dev, GFP_KERNEL,
- (void *)alpha2, regdb_fw_cb);
+ err = request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
+ ®_pdev->dev, GFP_KERNEL,
+ (void *)alpha2, regdb_fw_cb);
+ if (err)
+ kfree(alpha2);
+
+ return err;
}
int reg_reload_regdb(void)
@@ -4001,6 +4011,7 @@
wiphy_update_regulatory(wiphy, lr->initiator);
wiphy_all_share_dfs_chan_state(wiphy);
+ reg_process_self_managed_hints();
}
void wiphy_regulatory_deregister(struct wiphy *wiphy)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index fd614a5..15119c4 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -143,18 +143,12 @@
lockdep_assert_held(&rdev->bss_lock);
bss->refcount++;
- if (bss->pub.hidden_beacon_bss) {
- bss = container_of(bss->pub.hidden_beacon_bss,
- struct cfg80211_internal_bss,
- pub);
- bss->refcount++;
- }
- if (bss->pub.transmitted_bss) {
- bss = container_of(bss->pub.transmitted_bss,
- struct cfg80211_internal_bss,
- pub);
- bss->refcount++;
- }
+
+ if (bss->pub.hidden_beacon_bss)
+ bss_from_pub(bss->pub.hidden_beacon_bss)->refcount++;
+
+ if (bss->pub.transmitted_bss)
+ bss_from_pub(bss->pub.transmitted_bss)->refcount++;
}
static inline void bss_ref_put(struct cfg80211_registered_device *rdev,
@@ -304,7 +298,8 @@
tmp_old = cfg80211_find_ie(WLAN_EID_SSID, ie, ielen);
tmp_old = (tmp_old) ? tmp_old + tmp_old[1] + 2 : ie;
- while (tmp_old + tmp_old[1] + 2 - ie <= ielen) {
+ while (tmp_old + 2 - ie <= ielen &&
+ tmp_old + tmp_old[1] + 2 - ie <= ielen) {
if (tmp_old[0] == 0) {
tmp_old++;
continue;
@@ -364,7 +359,8 @@
* copied to new ie, skip ssid, capability, bssid-index ie
*/
tmp_new = sub_copy;
- while (tmp_new + tmp_new[1] + 2 - sub_copy <= subie_len) {
+ while (tmp_new + 2 - sub_copy <= subie_len &&
+ tmp_new + tmp_new[1] + 2 - sub_copy <= subie_len) {
if (!(tmp_new[0] == WLAN_EID_NON_TX_BSSID_CAP ||
tmp_new[0] == WLAN_EID_SSID)) {
memcpy(pos, tmp_new, tmp_new[1] + 2);
@@ -429,6 +425,15 @@
rcu_read_unlock();
+ /*
+ * This is a bit weird - it's not on the list, but already on another
+ * one! The only way that could happen is if there's some BSSID/SSID
+ * shared by multiple APs in their multi-BSSID profiles, potentially
+ * with hidden SSID mixed in ... ignore it.
+ */
+ if (!list_empty(&nontrans_bss->nontrans_list))
+ return -EINVAL;
+
/* add to the list */
list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list);
return 0;
@@ -702,8 +707,12 @@
for (i = 0; i < request->n_ssids; i++) {
/* wildcard ssid in the scan request */
- if (!request->ssids[i].ssid_len)
+ if (!request->ssids[i].ssid_len) {
+ if (ap->multi_bss && !ap->transmitted_bssid)
+ continue;
+
return true;
+ }
if (ap->ssid_len &&
ap->ssid_len == request->ssids[i].ssid_len) {
@@ -830,6 +839,9 @@
!cfg80211_find_ssid_match(ap, request))
continue;
+ if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid)
+ continue;
+
cfg80211_scan_req_add_chan(request, chan, true);
memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN);
scan_6ghz_params->short_ssid = ap->short_ssid;
@@ -1590,6 +1602,23 @@
u8 bssid_index;
};
+static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known,
+ const struct cfg80211_bss_ies *new_ies,
+ const struct cfg80211_bss_ies *old_ies)
+{
+ struct cfg80211_internal_bss *bss;
+
+ /* Assign beacon IEs to all sub entries */
+ list_for_each_entry(bss, &known->hidden_list, hidden_list) {
+ const struct cfg80211_bss_ies *ies;
+
+ ies = rcu_access_pointer(bss->pub.beacon_ies);
+ WARN_ON(ies != old_ies);
+
+ rcu_assign_pointer(bss->pub.beacon_ies, new_ies);
+ }
+}
+
static bool
cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *known,
@@ -1613,7 +1642,6 @@
kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
} else if (rcu_access_pointer(new->pub.beacon_ies)) {
const struct cfg80211_bss_ies *old;
- struct cfg80211_internal_bss *bss;
if (known->pub.hidden_beacon_bss &&
!list_empty(&known->hidden_list)) {
@@ -1641,16 +1669,9 @@
if (old == rcu_access_pointer(known->pub.ies))
rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies);
- /* Assign beacon IEs to all sub entries */
- list_for_each_entry(bss, &known->hidden_list, hidden_list) {
- const struct cfg80211_bss_ies *ies;
-
- ies = rcu_access_pointer(bss->pub.beacon_ies);
- WARN_ON(ies != old);
-
- rcu_assign_pointer(bss->pub.beacon_ies,
- new->pub.beacon_ies);
- }
+ cfg80211_update_hidden_bsses(known,
+ rcu_access_pointer(new->pub.beacon_ies),
+ old);
if (old)
kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
@@ -1727,6 +1748,8 @@
new->refcount = 1;
INIT_LIST_HEAD(&new->hidden_list);
INIT_LIST_HEAD(&new->pub.nontrans_list);
+ /* we'll set this later if it was non-NULL */
+ new->pub.transmitted_bss = NULL;
if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN);
@@ -1961,11 +1984,18 @@
/* this is a nontransmitting bss, we need to add it to
* transmitting bss' list if it is not there
*/
+ spin_lock_bh(&rdev->bss_lock);
if (cfg80211_add_nontrans_list(non_tx_data->tx_bss,
&res->pub)) {
- if (__cfg80211_unlink_bss(rdev, res))
+ if (__cfg80211_unlink_bss(rdev, res)) {
rdev->bss_generation++;
+ res = NULL;
+ }
}
+ spin_unlock_bh(&rdev->bss_lock);
+
+ if (!res)
+ return NULL;
}
trace_cfg80211_return_bss(&res->pub);
@@ -2084,6 +2114,8 @@
for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, ie, ielen) {
if (elem->datalen < 4)
continue;
+ if (elem->data[0] < 1 || (int)elem->data[0] > 8)
+ continue;
for_each_element(sub, elem->data + 1, elem->datalen - 1) {
u8 profile_len;
@@ -2219,7 +2251,7 @@
size_t new_ie_len;
struct cfg80211_bss_ies *new_ies;
const struct cfg80211_bss_ies *old;
- u8 cpy_len;
+ size_t cpy_len;
lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock);
@@ -2286,6 +2318,8 @@
} else {
old = rcu_access_pointer(nontrans_bss->beacon_ies);
rcu_assign_pointer(nontrans_bss->beacon_ies, new_ies);
+ cfg80211_update_hidden_bsses(bss_from_pub(nontrans_bss),
+ new_ies, old);
rcu_assign_pointer(nontrans_bss->ies, new_ies);
if (old)
kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 03ed170..d231d46 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1775,10 +1775,15 @@
write_lock_bh(&x25_list_lock);
- sk_for_each(s, &x25_list)
- if (x25_sk(s)->neighbour == nb)
+ sk_for_each(s, &x25_list) {
+ if (x25_sk(s)->neighbour == nb) {
+ write_unlock_bh(&x25_list_lock);
+ lock_sock(s);
x25_disconnect(s, ENETUNREACH, 0, 0);
-
+ release_sock(s);
+ write_lock_bh(&x25_list_lock);
+ }
+ }
write_unlock_bh(&x25_list_lock);
/* Remove any related forwards */
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 25bf72e..226397a 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -117,7 +117,7 @@
if (!pskb_may_pull(skb, 1)) {
x25_neigh_put(nb);
- return 0;
+ goto drop;
}
switch (skb->data[0]) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ca4716b..691841d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -742,8 +742,8 @@
goto out_unlock;
}
- err = xp_assign_dev_shared(xs->pool, umem_xs->umem,
- dev, qid);
+ err = xp_assign_dev_shared(xs->pool, umem_xs, dev,
+ qid);
if (err) {
xp_destroy(xs->pool);
xs->pool = NULL;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 2ef6f92..c347e52 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -198,17 +198,18 @@
return __xp_assign_dev(pool, dev, queue_id, flags);
}
-int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
+int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs,
struct net_device *dev, u16 queue_id)
{
u16 flags;
+ struct xdp_umem *umem = umem_xs->umem;
/* One fill and completion ring required for each queue id. */
if (!pool->fq || !pool->cq)
return -EINVAL;
flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY;
- if (pool->uses_need_wakeup)
+ if (umem_xs->pool->uses_need_wakeup)
flags |= XDP_USE_NEED_WAKEUP;
return __xp_assign_dev(pool, dev, queue_id, flags);
@@ -318,6 +319,7 @@
for (i = 0; i < dma_map->dma_pages_cnt; i++) {
dma = &dma_map->dma_pages[i];
if (*dma) {
+ *dma &= ~XSK_NEXT_PG_CONTIG_MASK;
dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
DMA_BIDIRECTIONAL, attrs);
*dma = 0;
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 1f08ebf..24ca49e 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -170,7 +170,7 @@
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
- if (skb_queue_len(&ctx->out_queue) >= netdev_max_backlog)
+ if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
return -ENOBUFS;
__skb_queue_tail(&ctx->out_queue, skb);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index c255aac..8b8e957 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -97,6 +97,18 @@
}
}
+static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ __u32 seq = xo->seq.low;
+
+ seq += skb_shinfo(skb)->gso_segs;
+ if (unlikely(seq < xo->seq.low))
+ return true;
+
+ return false;
+}
+
struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
{
int err;
@@ -134,7 +146,8 @@
return skb;
}
- if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) {
+ if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) ||
+ unlikely(xmit_xfrm_check_overflow(skb)))) {
struct sk_buff *segs;
/* Packet got rerouted, fixup features and segment it. */
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 61e6220..77e8203 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -782,7 +782,7 @@
trans = this_cpu_ptr(&xfrm_trans_tasklet);
- if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+ if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog))
return -ENOBUFS;
BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 4420c8f..da518b4 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -303,7 +303,10 @@
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ if (skb->len > 1280)
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ else
+ goto xmit;
} else {
if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
goto xmit;
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 0814320..24ac680 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -212,6 +212,7 @@
vfree(*per_cpu_ptr(scratches, i));
free_percpu(scratches);
+ ipcomp_scratches = NULL;
}
static void * __percpu *ipcomp_alloc_scratches(void)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 3d0ffd9..0d12bdf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2680,8 +2680,10 @@
*num_xfrms = 0;
return 0;
}
- if (IS_ERR(pols[0]))
+ if (IS_ERR(pols[0])) {
+ *num_pols = 0;
return PTR_ERR(pols[0]);
+ }
*num_xfrms = pols[0]->xfrm_nr;
@@ -2696,6 +2698,7 @@
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
+ *num_pols = 0;
return PTR_ERR(pols[1]);
}
(*num_pols)++;
@@ -3161,6 +3164,11 @@
return dst;
nopol:
+ if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) &&
+ net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
+ err = -EPERM;
+ goto error;
+ }
if (!(flags & XFRM_LOOKUP_ICMP)) {
dst = dst_orig;
goto ok;
@@ -3608,6 +3616,11 @@
}
if (!pol) {
+ if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
+ return 0;
+ }
+
if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
xfrm_secpath_reject(xerr_idx, skb, &fl);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3628,6 +3641,7 @@
if (pols[1]) {
if (IS_ERR(pols[1])) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
+ xfrm_pol_put(pols[0]);
return 0;
}
pols[1]->curlft.use_time = ktime_get_real_seconds();
@@ -3662,6 +3676,13 @@
tpp[ti++] = &pols[pi]->xfrm_vec[i];
}
xfrm_nr = ti;
+
+ if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK &&
+ !xfrm_nr) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
+ goto reject;
+ }
+
if (npols > 1) {
xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
tpp = stp;
@@ -4146,6 +4167,9 @@
spin_lock_init(&net->xfrm.xfrm_policy_lock);
seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
mutex_init(&net->xfrm.xfrm_cfg_mutex);
+ net->xfrm.policy_default[XFRM_POLICY_IN] = XFRM_USERPOLICY_ACCEPT;
+ net->xfrm.policy_default[XFRM_POLICY_FWD] = XFRM_USERPOLICY_ACCEPT;
+ net->xfrm.policy_default[XFRM_POLICY_OUT] = XFRM_USERPOLICY_ACCEPT;
rv = xfrm_statistics_init(net);
if (rv < 0)
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index c6a4338..65d009e 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -657,7 +657,7 @@
oseq += skb_shinfo(skb)->gso_segs;
}
- if (unlikely(oseq < replay_esn->oseq)) {
+ if (unlikely(xo->seq.low < replay_esn->oseq)) {
XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi;
xo->seq.hi = oseq_hi;
replay_esn->oseq_hi = oseq_hi;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 1befc6d..fdbd56e 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1020,7 +1020,8 @@
if ((x->sel.family &&
(x->sel.family != family ||
!xfrm_selector_match(&x->sel, fl, family))) ||
- !security_xfrm_state_pol_flow_match(x, pol, fl))
+ !security_xfrm_state_pol_flow_match(x, pol,
+ &fl->u.__fl_common))
return;
if (!*best ||
@@ -1035,7 +1036,8 @@
if ((!x->sel.family ||
(x->sel.family == family &&
xfrm_selector_match(&x->sel, fl, family))) &&
- security_xfrm_state_pol_flow_match(x, pol, fl))
+ security_xfrm_state_pol_flow_match(x, pol,
+ &fl->u.__fl_common))
*error = -ESRCH;
}
}
@@ -1555,6 +1557,7 @@
x->replay = orig->replay;
x->preplay = orig->preplay;
x->mapping_maxage = orig->mapping_maxage;
+ x->lastused = orig->lastused;
x->new_mapping = 0;
x->new_mapping_sport = 0;
@@ -2585,7 +2588,7 @@
int err;
if (family == AF_INET &&
- xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
+ READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
x->props.flags |= XFRM_STATE_NOPMTUDISC;
err = -EPROTONOSUPPORT;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 1ece01c..d9841f4 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1914,6 +1914,90 @@
return skb;
}
+static int xfrm_notify_userpolicy(struct net *net)
+{
+ struct xfrm_userpolicy_default *up;
+ int len = NLMSG_ALIGN(sizeof(*up));
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ skb = nlmsg_new(len, GFP_ATOMIC);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_GETDEFAULT, sizeof(*up), 0);
+ if (nlh == NULL) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ up = nlmsg_data(nlh);
+ up->in = net->xfrm.policy_default[XFRM_POLICY_IN];
+ up->fwd = net->xfrm.policy_default[XFRM_POLICY_FWD];
+ up->out = net->xfrm.policy_default[XFRM_POLICY_OUT];
+
+ nlmsg_end(skb, nlh);
+
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
+}
+
+static bool xfrm_userpolicy_is_valid(__u8 policy)
+{
+ return policy == XFRM_USERPOLICY_BLOCK ||
+ policy == XFRM_USERPOLICY_ACCEPT;
+}
+
+static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct nlattr **attrs)
+{
+ struct net *net = sock_net(skb->sk);
+ struct xfrm_userpolicy_default *up = nlmsg_data(nlh);
+
+ if (xfrm_userpolicy_is_valid(up->in))
+ net->xfrm.policy_default[XFRM_POLICY_IN] = up->in;
+
+ if (xfrm_userpolicy_is_valid(up->fwd))
+ net->xfrm.policy_default[XFRM_POLICY_FWD] = up->fwd;
+
+ if (xfrm_userpolicy_is_valid(up->out))
+ net->xfrm.policy_default[XFRM_POLICY_OUT] = up->out;
+
+ rt_genid_bump_all(net);
+
+ xfrm_notify_userpolicy(net);
+ return 0;
+}
+
+static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct nlattr **attrs)
+{
+ struct sk_buff *r_skb;
+ struct nlmsghdr *r_nlh;
+ struct net *net = sock_net(skb->sk);
+ struct xfrm_userpolicy_default *r_up;
+ int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default));
+ u32 portid = NETLINK_CB(skb).portid;
+ u32 seq = nlh->nlmsg_seq;
+
+ r_skb = nlmsg_new(len, GFP_ATOMIC);
+ if (!r_skb)
+ return -ENOMEM;
+
+ r_nlh = nlmsg_put(r_skb, portid, seq, XFRM_MSG_GETDEFAULT, sizeof(*r_up), 0);
+ if (!r_nlh) {
+ kfree_skb(r_skb);
+ return -EMSGSIZE;
+ }
+
+ r_up = nlmsg_data(r_nlh);
+ r_up->in = net->xfrm.policy_default[XFRM_POLICY_IN];
+ r_up->fwd = net->xfrm.policy_default[XFRM_POLICY_FWD];
+ r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT];
+ nlmsg_end(r_skb, r_nlh);
+
+ return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid);
+}
+
static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr **attrs)
{
@@ -2621,6 +2705,8 @@
[XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
+ [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
+ [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
};
EXPORT_SYMBOL_GPL(xfrm_msg_min);
@@ -2700,6 +2786,8 @@
.nla_pol = xfrma_spd_policy,
.nla_max = XFRMA_SPD_MAX },
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo },
+ [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_set_default },
+ [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_get_default },
};
static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,