Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index b83e16a..be1c400 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -6,7 +6,8 @@
menuconfig TIPC
tristate "The TIPC Protocol"
depends on INET
- ---help---
+ depends on IPV6 || IPV6=n
+ help
The Transparent Inter Process Communication (TIPC) protocol is
specially designed for intra cluster communication. This protocol
originates from Ericsson where it has been used in carrier grade
@@ -35,11 +36,26 @@
Saying Y here will enable support for running TIPC over IP/UDP
bool
default y
+config TIPC_CRYPTO
+ bool "TIPC encryption support"
+ depends on TIPC
+ select CRYPTO
+ select CRYPTO_AES
+ select CRYPTO_GCM
+ help
+ Saying Y here will enable support for TIPC encryption.
+ All TIPC messages will be encrypted/decrypted by using the currently most
+ advanced algorithm: AEAD AES-GCM (like IPSec or TLS) before leaving/
+ entering the TIPC stack.
+ Key setting from user-space is performed via netlink by a user program
+ (e.g. the iproute2 'tipc' tool).
+ bool
+ default y
config TIPC_DIAG
tristate "TIPC: socket monitoring interface"
depends on TIPC
default y
- ---help---
+ help
Support for TIPC socket monitoring interface used by ss tool.
If unsure, say Y.
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index c86aba0..ee49a9f 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,15 +9,14 @@
core.o link.o discover.o msg.o \
name_distr.o subscr.o monitor.o name_table.o net.o \
netlink.o netlink_compat.o node.o socket.o eth_media.o \
- topsrv.o socket.o group.o trace.o
+ topsrv.o group.o trace.o
CFLAGS_trace.o += -I$(src)
tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
tipc-$(CONFIG_SYSCTL) += sysctl.o
+tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o
obj-$(CONFIG_TIPC_DIAG) += diag.o
-
-tipc_diag-y := diag.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 885ecf6..593846d 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -46,6 +46,7 @@
#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
const char tipc_bclink_name[] = "broadcast-link";
+unsigned long sysctl_tipc_bc_retruni __read_mostly;
/**
* struct tipc_bc_base - base structure for keeping broadcast send state
@@ -84,12 +85,12 @@
*/
int tipc_bcast_get_mtu(struct net *net)
{
- return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE;
+ return tipc_link_mss(tipc_bc_sndlink(net));
}
-void tipc_bcast_disable_rcast(struct net *net)
+void tipc_bcast_toggle_rcast(struct net *net, bool supp)
{
- tipc_bc_base(net)->rcast_support = false;
+ tipc_bc_base(net)->rcast_support = supp;
}
static void tipc_bcbase_calc_bc_threshold(struct net *net)
@@ -107,6 +108,8 @@
{
struct tipc_bc_base *bb = tipc_bc_base(net);
int all_dests = tipc_link_bc_peers(bb->link);
+ int max_win = tipc_link_max_win(bb->link);
+ int min_win = tipc_link_min_win(bb->link);
int i, mtu, prim;
bb->primary_bearer = INVALID_BEARER_ID;
@@ -120,8 +123,12 @@
continue;
mtu = tipc_bearer_mtu(net, i);
- if (mtu < tipc_link_mtu(bb->link))
+ if (mtu < tipc_link_mtu(bb->link)) {
tipc_link_set_mtu(bb->link, mtu);
+ tipc_link_set_queue_limits(bb->link,
+ min_win,
+ max_win);
+ }
bb->bcast_support &= tipc_bearer_bcast_support(net, i);
if (bb->dests[i] < all_dests)
continue;
@@ -249,8 +256,8 @@
* Consumes the buffer chain.
* Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE
*/
-static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
- u16 *cong_link_cnt)
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ u16 *cong_link_cnt)
{
struct tipc_link *l = tipc_bc_sndlink(net);
struct sk_buff_head xmitq;
@@ -474,7 +481,7 @@
__skb_queue_head_init(&xmitq);
tipc_bcast_lock(net);
- tipc_link_bc_ack_rcv(l, acked, &xmitq);
+ tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq, NULL);
tipc_bcast_unlock(net);
tipc_bcbase_xmit(net, &xmitq);
@@ -489,9 +496,11 @@
* RCU is locked, no other locks set
*/
int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
- struct tipc_msg *hdr)
+ struct tipc_msg *hdr,
+ struct sk_buff_head *retrq)
{
struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ struct tipc_gap_ack_blks *ga;
struct sk_buff_head xmitq;
int rc = 0;
@@ -501,8 +510,13 @@
if (msg_type(hdr) != STATE_MSG) {
tipc_link_bc_init_rcv(l, hdr);
} else if (!msg_bc_ack_invalid(hdr)) {
- tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
- rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq);
+ tipc_get_gap_ack_blks(&ga, l, hdr, false);
+ if (!sysctl_tipc_bc_retruni)
+ retrq = &xmitq;
+ rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr),
+ msg_bc_gap(hdr), ga, &xmitq,
+ retrq);
+ rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq);
}
tipc_bcast_unlock(net);
@@ -555,10 +569,8 @@
tipc_sk_rcv(net, inputq);
}
-int tipc_bclink_reset_stats(struct net *net)
+int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l)
{
- struct tipc_link *l = tipc_bc_sndlink(net);
-
if (!l)
return -ENOPROTOOPT;
@@ -568,18 +580,18 @@
return 0;
}
-static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit)
+static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win)
{
struct tipc_link *l = tipc_bc_sndlink(net);
if (!l)
return -ENOPROTOOPT;
- if (limit < BCLINK_WIN_MIN)
- limit = BCLINK_WIN_MIN;
- if (limit > TIPC_MAX_LINK_WIN)
+ if (max_win < BCLINK_WIN_MIN)
+ max_win = BCLINK_WIN_MIN;
+ if (max_win > TIPC_MAX_LINK_WIN)
return -EINVAL;
tipc_bcast_lock(net);
- tipc_link_set_queue_limits(l, limit);
+ tipc_link_set_queue_limits(l, tipc_link_min_win(l), max_win);
tipc_bcast_unlock(net);
return 0;
}
@@ -686,8 +698,9 @@
tn->bcbase = bb;
spin_lock_init(&tipc_net(net)->bclock);
- if (!tipc_link_bc_create(net, 0, 0,
- FB_MTU,
+ if (!tipc_link_bc_create(net, 0, 0, NULL,
+ one_page_mtu,
+ BCLINK_WIN_DEFAULT,
BCLINK_WIN_DEFAULT,
0,
&bb->inputq,
@@ -745,7 +758,7 @@
nl->local = false;
}
-u32 tipc_bcast_get_broadcast_mode(struct net *net)
+u32 tipc_bcast_get_mode(struct net *net)
{
struct tipc_bc_base *bb = tipc_bc_base(net);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index dadad95..2d9352d 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -45,6 +45,7 @@
struct tipc_nlist;
struct tipc_nitem;
extern const char tipc_bclink_name[];
+extern unsigned long sysctl_tipc_bc_retruni;
#define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000)
@@ -85,20 +86,24 @@
void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id);
void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id);
int tipc_bcast_get_mtu(struct net *net);
-void tipc_bcast_disable_rcast(struct net *net);
+void tipc_bcast_toggle_rcast(struct net *net, bool supp);
int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct tipc_mc_method *method, struct tipc_nlist *dests,
u16 *cong_link_cnt);
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ u16 *cong_link_cnt);
int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
struct tipc_msg *hdr);
int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
- struct tipc_msg *hdr);
-int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
+ struct tipc_msg *hdr,
+ struct sk_buff_head *retrq);
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *bcl);
int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
-int tipc_bclink_reset_stats(struct net *net);
+int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l);
-u32 tipc_bcast_get_broadcast_mode(struct net *net);
+u32 tipc_bcast_get_mode(struct net *net);
u32 tipc_bcast_get_broadcast_ratio(struct net *net);
void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 8bd2454..6911f1c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -44,6 +44,7 @@
#include "netlink.h"
#include "udp_media.h"
#include "trace.h"
+#include "crypto.h"
#define MAX_ADDR_STR 60
@@ -326,11 +327,12 @@
b->identity = bearer_id;
b->tolerance = m->tolerance;
- b->window = m->window;
+ b->min_win = m->min_win;
+ b->max_win = m->max_win;
b->domain = disc_domain;
b->net_plane = bearer_id + 'A';
b->priority = prio;
- test_and_set_bit_lock(0, &b->up);
+ refcount_set(&b->refcnt, 1);
res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
if (res) {
@@ -340,15 +342,18 @@
goto rejected;
}
+ /* Create monitoring data before accepting activate messages */
+ if (tipc_mon_create(net, bearer_id)) {
+ bearer_disable(net, b);
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ test_and_set_bit_lock(0, &b->up);
rcu_assign_pointer(tn->bearer_list[bearer_id], b);
if (skb)
tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
- if (tipc_mon_create(net, bearer_id)) {
- bearer_disable(net, b);
- return -ENOMEM;
- }
-
pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
return res;
@@ -368,6 +373,17 @@
return 0;
}
+bool tipc_bearer_hold(struct tipc_bearer *b)
+{
+ return (b && refcount_inc_not_zero(&b->refcnt));
+}
+
+void tipc_bearer_put(struct tipc_bearer *b)
+{
+ if (b && refcount_dec_and_test(&b->refcnt))
+ kfree_rcu(b, rcu);
+}
+
/**
* bearer_disable
*
@@ -386,7 +402,7 @@
if (b->disc)
tipc_disc_delete(b->disc);
RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
- kfree_rcu(b, rcu);
+ tipc_bearer_put(b);
tipc_mon_delete(net, bearer_id);
}
@@ -521,10 +537,15 @@
rcu_read_lock();
b = bearer_get(net, bearer_id);
- if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr))))
- b->media->send_msg(net, skb, b, dest);
- else
+ if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) {
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_xmit(net, &skb, b, dest, NULL);
+ if (skb)
+#endif
+ b->media->send_msg(net, skb, b, dest);
+ } else {
kfree_skb(skb);
+ }
rcu_read_unlock();
}
@@ -532,7 +553,8 @@
*/
void tipc_bearer_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq,
- struct tipc_media_addr *dst)
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode)
{
struct tipc_bearer *b;
struct sk_buff *skb, *tmp;
@@ -546,10 +568,15 @@
__skb_queue_purge(xmitq);
skb_queue_walk_safe(xmitq, skb, tmp) {
__skb_dequeue(xmitq);
- if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb))))
- b->media->send_msg(net, skb, b, dst);
- else
+ if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) {
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_xmit(net, &skb, b, dst, __dnode);
+ if (skb)
+#endif
+ b->media->send_msg(net, skb, b, dst);
+ } else {
kfree_skb(skb);
+ }
}
rcu_read_unlock();
}
@@ -560,6 +587,7 @@
struct sk_buff_head *xmitq)
{
struct tipc_net *tn = tipc_net(net);
+ struct tipc_media_addr *dst;
int net_id = tn->net_id;
struct tipc_bearer *b;
struct sk_buff *skb, *tmp;
@@ -574,14 +602,19 @@
msg_set_non_seq(hdr, 1);
msg_set_mc_netid(hdr, net_id);
__skb_dequeue(xmitq);
- b->media->send_msg(net, skb, b, &b->bcast_addr);
+ dst = &b->bcast_addr;
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_xmit(net, &skb, b, dst, NULL);
+ if (skb)
+#endif
+ b->media->send_msg(net, skb, b, dst);
}
rcu_read_unlock();
}
/**
* tipc_l2_rcv_msg - handle incoming TIPC message from an interface
- * @buf: the received packet
+ * @skb: the received message
* @dev: the net device that the packet was received on
* @pt: the packet_type structure which was used to register this handler
* @orig_dev: the original receive net device in case the device is a bond
@@ -601,6 +634,7 @@
if (likely(b && test_bit(0, &b->up) &&
(skb->pkt_type <= PACKET_MULTICAST))) {
skb_mark_not_on_list(skb);
+ TIPC_SKB_CB(skb)->flags = 0;
tipc_rcv(dev_net(b->pt.dev), skb, b);
rcu_read_unlock();
return NET_RX_SUCCESS;
@@ -637,7 +671,7 @@
test_and_set_bit_lock(0, &b->up);
break;
}
- /* fall through */
+ fallthrough;
case NETDEV_GOING_DOWN:
clear_bit_unlock(0, &b->up);
tipc_reset_bearer(net, b);
@@ -782,7 +816,7 @@
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance))
goto prop_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->max_win))
goto prop_msg_full;
if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP)
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu))
@@ -1081,7 +1115,7 @@
if (props[TIPC_NLA_PROP_PRIO])
b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
- b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
if (props[TIPC_NLA_PROP_MTU]) {
if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
NL_SET_ERR_MSG(info->extack,
@@ -1141,7 +1175,7 @@
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance))
goto prop_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->max_win))
goto prop_msg_full;
if (media->type_id == TIPC_MEDIA_TYPE_UDP)
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu))
@@ -1276,7 +1310,7 @@
if (props[TIPC_NLA_PROP_PRIO])
m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
- m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
if (props[TIPC_NLA_PROP_MTU]) {
if (m->type_id != TIPC_MEDIA_TYPE_UDP) {
NL_SET_ERR_MSG(info->extack,
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index ea0f3c4..bc00231 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -119,7 +119,8 @@
char *raw);
u32 priority;
u32 tolerance;
- u32 window;
+ u32 min_win;
+ u32 max_win;
u32 mtu;
u32 type_id;
u32 hwaddr_len;
@@ -158,13 +159,15 @@
struct packet_type pt;
struct rcu_head rcu;
u32 priority;
- u32 window;
+ u32 min_win;
+ u32 max_win;
u32 tolerance;
u32 domain;
u32 identity;
struct tipc_discoverer *disc;
char net_plane;
unsigned long up;
+ refcount_t refcnt;
};
struct tipc_bearer_names {
@@ -210,6 +213,8 @@
int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
struct nlattr *attrs[]);
+bool tipc_bearer_hold(struct tipc_bearer *b);
+void tipc_bearer_put(struct tipc_bearer *b);
void tipc_disable_l2_media(struct tipc_bearer *b);
int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
struct tipc_bearer *b, struct tipc_media_addr *dest);
@@ -229,7 +234,8 @@
struct tipc_media_addr *dest);
void tipc_bearer_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq,
- struct tipc_media_addr *dst);
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode);
void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq);
void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 90cf7e0..40c0308 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -42,6 +42,7 @@
#include "socket.h"
#include "bcast.h"
#include "node.h"
+#include "crypto.h"
#include <linux/module.h>
@@ -67,6 +68,11 @@
INIT_LIST_HEAD(&tn->node_list);
spin_lock_init(&tn->node_list_lock);
+#ifdef CONFIG_TIPC_CRYPTO
+ err = tipc_crypto_start(&tn->crypto_tx, net, NULL);
+ if (err)
+ goto out_crypto;
+#endif
err = tipc_sk_rht_init(net);
if (err)
goto out_sk_rht;
@@ -92,6 +98,11 @@
out_nametbl:
tipc_sk_rht_destroy(net);
out_sk_rht:
+
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_stop(&tn->crypto_tx);
+out_crypto:
+#endif
return err;
}
@@ -107,11 +118,22 @@
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
-
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_stop(&tipc_net(net)->crypto_tx);
+#endif
while (atomic_read(&tn->wq_count))
cond_resched();
}
+static void __net_exit tipc_pernet_pre_exit(struct net *net)
+{
+ tipc_node_pre_cleanup_net(net);
+}
+
+static struct pernet_operations tipc_pernet_pre_exit_ops = {
+ .pre_exit = tipc_pernet_pre_exit,
+};
+
static struct pernet_operations tipc_net_ops = {
.init = tipc_init_net,
.exit = tipc_exit_net,
@@ -150,6 +172,10 @@
if (err)
goto out_pernet_topsrv;
+ err = register_pernet_subsys(&tipc_pernet_pre_exit_ops);
+ if (err)
+ goto out_register_pernet_subsys;
+
err = tipc_bearer_setup();
if (err)
goto out_bearer;
@@ -170,6 +196,8 @@
out_netlink:
tipc_bearer_cleanup();
out_bearer:
+ unregister_pernet_subsys(&tipc_pernet_pre_exit_ops);
+out_register_pernet_subsys:
unregister_pernet_device(&tipc_topsrv_net_ops);
out_pernet_topsrv:
tipc_socket_stop();
@@ -187,6 +215,7 @@
tipc_netlink_compat_stop();
tipc_netlink_stop();
tipc_bearer_cleanup();
+ unregister_pernet_subsys(&tipc_pernet_pre_exit_ops);
unregister_pernet_device(&tipc_topsrv_net_ops);
tipc_socket_stop();
unregister_pernet_device(&tipc_net_ops);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index c6bda91..992924a 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -59,6 +59,7 @@
#include <net/netns/generic.h>
#include <linux/rhashtable.h>
#include <net/genetlink.h>
+#include <net/netns/hash.h>
#ifdef pr_fmt
#undef pr_fmt
@@ -73,6 +74,9 @@
struct tipc_name_table;
struct tipc_topsrv;
struct tipc_monitor;
+#ifdef CONFIG_TIPC_CRYPTO
+struct tipc_crypto;
+#endif
#define TIPC_MOD_VER "2.0.0"
@@ -141,6 +145,10 @@
/* Tracing of node internal messages */
struct packet_type loopback_pt;
+#ifdef CONFIG_TIPC_CRYPTO
+ /* TX crypto handler */
+ struct tipc_crypto *crypto_tx;
+#endif
/* Work item for net finalize */
struct tipc_net_work final_work;
/* The numbers of work queues in schedule */
@@ -202,6 +210,11 @@
return !less(val, min) && !more(val, max);
}
+static inline u32 tipc_net_hash_mixes(struct net *net, int tn_rand)
+{
+ return net_hash_mix(&init_net) ^ net_hash_mix(net) ^ tn_rand;
+}
+
#ifdef CONFIG_SYSCTL
int tipc_register_sysctl(void);
void tipc_unregister_sysctl(void);
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
new file mode 100644
index 0000000..6f91b9a
--- /dev/null
+++ b/net/tipc/crypto.c
@@ -0,0 +1,2475 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * net/tipc/crypto.c: TIPC crypto for key handling & packet en/decryption
+ *
+ * Copyright (c) 2019, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <crypto/aead.h>
+#include <crypto/aes.h>
+#include <crypto/rng.h>
+#include "crypto.h"
+#include "msg.h"
+#include "bcast.h"
+
+#define TIPC_TX_GRACE_PERIOD msecs_to_jiffies(5000) /* 5s */
+#define TIPC_TX_LASTING_TIME msecs_to_jiffies(10000) /* 10s */
+#define TIPC_RX_ACTIVE_LIM msecs_to_jiffies(3000) /* 3s */
+#define TIPC_RX_PASSIVE_LIM msecs_to_jiffies(15000) /* 15s */
+
+#define TIPC_MAX_TFMS_DEF 10
+#define TIPC_MAX_TFMS_LIM 1000
+
+#define TIPC_REKEYING_INTV_DEF (60 * 24) /* default: 1 day */
+
+/**
+ * TIPC Key ids
+ */
+enum {
+ KEY_MASTER = 0,
+ KEY_MIN = KEY_MASTER,
+ KEY_1 = 1,
+ KEY_2,
+ KEY_3,
+ KEY_MAX = KEY_3,
+};
+
+/**
+ * TIPC Crypto statistics
+ */
+enum {
+ STAT_OK,
+ STAT_NOK,
+ STAT_ASYNC,
+ STAT_ASYNC_OK,
+ STAT_ASYNC_NOK,
+ STAT_BADKEYS, /* tx only */
+ STAT_BADMSGS = STAT_BADKEYS, /* rx only */
+ STAT_NOKEYS,
+ STAT_SWITCHES,
+
+ MAX_STATS,
+};
+
+/* TIPC crypto statistics' header */
+static const char *hstats[MAX_STATS] = {"ok", "nok", "async", "async_ok",
+ "async_nok", "badmsgs", "nokeys",
+ "switches"};
+
+/* Max TFMs number per key */
+int sysctl_tipc_max_tfms __read_mostly = TIPC_MAX_TFMS_DEF;
+/* Key exchange switch, default: on */
+int sysctl_tipc_key_exchange_enabled __read_mostly = 1;
+
+/**
+ * struct tipc_key - TIPC keys' status indicator
+ *
+ * 7 6 5 4 3 2 1 0
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * key: | (reserved)|passive idx| active idx|pending idx|
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ */
+struct tipc_key {
+#define KEY_BITS (2)
+#define KEY_MASK ((1 << KEY_BITS) - 1)
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 pending:2,
+ active:2,
+ passive:2, /* rx only */
+ reserved:2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved:2,
+ passive:2, /* rx only */
+ active:2,
+ pending:2;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ } __packed;
+ u8 keys;
+ };
+};
+
+/**
+ * struct tipc_tfm - TIPC TFM structure to form a list of TFMs
+ */
+struct tipc_tfm {
+ struct crypto_aead *tfm;
+ struct list_head list;
+};
+
+/**
+ * struct tipc_aead - TIPC AEAD key structure
+ * @tfm_entry: per-cpu pointer to one entry in TFM list
+ * @crypto: TIPC crypto owns this key
+ * @cloned: reference to the source key in case cloning
+ * @users: the number of the key users (TX/RX)
+ * @salt: the key's SALT value
+ * @authsize: authentication tag size (max = 16)
+ * @mode: crypto mode is applied to the key
+ * @hint[]: a hint for user key
+ * @rcu: struct rcu_head
+ * @key: the aead key
+ * @gen: the key's generation
+ * @seqno: the key seqno (cluster scope)
+ * @refcnt: the key reference counter
+ */
+struct tipc_aead {
+#define TIPC_AEAD_HINT_LEN (5)
+ struct tipc_tfm * __percpu *tfm_entry;
+ struct tipc_crypto *crypto;
+ struct tipc_aead *cloned;
+ atomic_t users;
+ u32 salt;
+ u8 authsize;
+ u8 mode;
+ char hint[2 * TIPC_AEAD_HINT_LEN + 1];
+ struct rcu_head rcu;
+ struct tipc_aead_key *key;
+ u16 gen;
+
+ atomic64_t seqno ____cacheline_aligned;
+ refcount_t refcnt ____cacheline_aligned;
+
+} ____cacheline_aligned;
+
+/**
+ * struct tipc_crypto_stats - TIPC Crypto statistics
+ */
+struct tipc_crypto_stats {
+ unsigned int stat[MAX_STATS];
+};
+
+/**
+ * struct tipc_crypto - TIPC TX/RX crypto structure
+ * @net: struct net
+ * @node: TIPC node (RX)
+ * @aead: array of pointers to AEAD keys for encryption/decryption
+ * @peer_rx_active: replicated peer RX active key index
+ * @key_gen: TX/RX key generation
+ * @key: the key states
+ * @skey_mode: session key's mode
+ * @skey: received session key
+ * @wq: common workqueue on TX crypto
+ * @work: delayed work sched for TX/RX
+ * @key_distr: key distributing state
+ * @rekeying_intv: rekeying interval (in minutes)
+ * @stats: the crypto statistics
+ * @name: the crypto name
+ * @sndnxt: the per-peer sndnxt (TX)
+ * @timer1: general timer 1 (jiffies)
+ * @timer2: general timer 2 (jiffies)
+ * @working: the crypto is working or not
+ * @key_master: flag indicates if master key exists
+ * @legacy_user: flag indicates if a peer joins w/o master key (for bwd comp.)
+ * @nokey: no key indication
+ * @lock: tipc_key lock
+ */
+struct tipc_crypto {
+ struct net *net;
+ struct tipc_node *node;
+ struct tipc_aead __rcu *aead[KEY_MAX + 1];
+ atomic_t peer_rx_active;
+ u16 key_gen;
+ struct tipc_key key;
+ u8 skey_mode;
+ struct tipc_aead_key *skey;
+ struct workqueue_struct *wq;
+ struct delayed_work work;
+#define KEY_DISTR_SCHED 1
+#define KEY_DISTR_COMPL 2
+ atomic_t key_distr;
+ u32 rekeying_intv;
+
+ struct tipc_crypto_stats __percpu *stats;
+ char name[48];
+
+ atomic64_t sndnxt ____cacheline_aligned;
+ unsigned long timer1;
+ unsigned long timer2;
+ union {
+ struct {
+ u8 working:1;
+ u8 key_master:1;
+ u8 legacy_user:1;
+ u8 nokey: 1;
+ };
+ u8 flags;
+ };
+ spinlock_t lock; /* crypto lock */
+
+} ____cacheline_aligned;
+
+/* struct tipc_crypto_tx_ctx - TX context for callbacks */
+struct tipc_crypto_tx_ctx {
+ struct tipc_aead *aead;
+ struct tipc_bearer *bearer;
+ struct tipc_media_addr dst;
+};
+
+/* struct tipc_crypto_rx_ctx - RX context for callbacks */
+struct tipc_crypto_rx_ctx {
+ struct tipc_aead *aead;
+ struct tipc_bearer *bearer;
+};
+
+static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead);
+static inline void tipc_aead_put(struct tipc_aead *aead);
+static void tipc_aead_free(struct rcu_head *rp);
+static int tipc_aead_users(struct tipc_aead __rcu *aead);
+static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim);
+static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim);
+static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val);
+static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead);
+static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey,
+ u8 mode);
+static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src);
+static void *tipc_aead_mem_alloc(struct crypto_aead *tfm,
+ unsigned int crypto_ctx_size,
+ u8 **iv, struct aead_request **req,
+ struct scatterlist **sg, int nsg);
+static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode);
+static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err);
+static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
+ struct sk_buff *skb, struct tipc_bearer *b);
+static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err);
+static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr);
+static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead,
+ u8 tx_key, struct sk_buff *skb,
+ struct tipc_crypto *__rx);
+static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
+ u8 new_passive,
+ u8 new_active,
+ u8 new_pending);
+static int tipc_crypto_key_attach(struct tipc_crypto *c,
+ struct tipc_aead *aead, u8 pos,
+ bool master_key);
+static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending);
+static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
+ struct tipc_crypto *rx,
+ struct sk_buff *skb,
+ u8 tx_key);
+static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb);
+static int tipc_crypto_key_revoke(struct net *net, u8 tx_key);
+static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode, u8 type);
+static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
+ struct tipc_bearer *b,
+ struct sk_buff **skb, int err);
+static void tipc_crypto_do_cmd(struct net *net, int cmd);
+static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf);
+static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
+ char *buf);
+static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey,
+ u16 gen, u8 mode, u32 dnode);
+static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr);
+static void tipc_crypto_work_tx(struct work_struct *work);
+static void tipc_crypto_work_rx(struct work_struct *work);
+static int tipc_aead_key_generate(struct tipc_aead_key *skey);
+
+#define is_tx(crypto) (!(crypto)->node)
+#define is_rx(crypto) (!is_tx(crypto))
+
+#define key_next(cur) ((cur) % KEY_MAX + 1)
+
+#define tipc_aead_rcu_ptr(rcu_ptr, lock) \
+ rcu_dereference_protected((rcu_ptr), lockdep_is_held(lock))
+
+#define tipc_aead_rcu_replace(rcu_ptr, ptr, lock) \
+do { \
+ typeof(rcu_ptr) __tmp = rcu_dereference_protected((rcu_ptr), \
+ lockdep_is_held(lock)); \
+ rcu_assign_pointer((rcu_ptr), (ptr)); \
+ tipc_aead_put(__tmp); \
+} while (0)
+
+#define tipc_crypto_key_detach(rcu_ptr, lock) \
+ tipc_aead_rcu_replace((rcu_ptr), NULL, lock)
+
+/**
+ * tipc_aead_key_validate - Validate a AEAD user key
+ */
+int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info)
+{
+ int keylen;
+
+ /* Check if algorithm exists */
+ if (unlikely(!crypto_has_alg(ukey->alg_name, 0, 0))) {
+ GENL_SET_ERR_MSG(info, "unable to load the algorithm (module existed?)");
+ return -ENODEV;
+ }
+
+ /* Currently, we only support the "gcm(aes)" cipher algorithm */
+ if (strcmp(ukey->alg_name, "gcm(aes)")) {
+ GENL_SET_ERR_MSG(info, "not supported yet the algorithm");
+ return -ENOTSUPP;
+ }
+
+ /* Check if key size is correct */
+ keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE;
+ if (unlikely(keylen != TIPC_AES_GCM_KEY_SIZE_128 &&
+ keylen != TIPC_AES_GCM_KEY_SIZE_192 &&
+ keylen != TIPC_AES_GCM_KEY_SIZE_256)) {
+ GENL_SET_ERR_MSG(info, "incorrect key length (20, 28 or 36 octets?)");
+ return -EKEYREJECTED;
+ }
+
+ return 0;
+}
+
+/**
+ * tipc_aead_key_generate - Generate new session key
+ * @skey: input/output key with new content
+ *
+ * Return: 0 in case of success, otherwise < 0
+ */
+static int tipc_aead_key_generate(struct tipc_aead_key *skey)
+{
+ int rc = 0;
+
+ /* Fill the key's content with a random value via RNG cipher */
+ rc = crypto_get_default_rng();
+ if (likely(!rc)) {
+ rc = crypto_rng_get_bytes(crypto_default_rng, skey->key,
+ skey->keylen);
+ crypto_put_default_rng();
+ }
+
+ return rc;
+}
+
+static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead)
+{
+ struct tipc_aead *tmp;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (unlikely(!tmp || !refcount_inc_not_zero(&tmp->refcnt)))
+ tmp = NULL;
+ rcu_read_unlock();
+
+ return tmp;
+}
+
+static inline void tipc_aead_put(struct tipc_aead *aead)
+{
+ if (aead && refcount_dec_and_test(&aead->refcnt))
+ call_rcu(&aead->rcu, tipc_aead_free);
+}
+
+/**
+ * tipc_aead_free - Release AEAD key incl. all the TFMs in the list
+ * @rp: rcu head pointer
+ */
+static void tipc_aead_free(struct rcu_head *rp)
+{
+ struct tipc_aead *aead = container_of(rp, struct tipc_aead, rcu);
+ struct tipc_tfm *tfm_entry, *head, *tmp;
+
+ if (aead->cloned) {
+ tipc_aead_put(aead->cloned);
+ } else {
+ head = *get_cpu_ptr(aead->tfm_entry);
+ put_cpu_ptr(aead->tfm_entry);
+ list_for_each_entry_safe(tfm_entry, tmp, &head->list, list) {
+ crypto_free_aead(tfm_entry->tfm);
+ list_del(&tfm_entry->list);
+ kfree(tfm_entry);
+ }
+ /* Free the head */
+ crypto_free_aead(head->tfm);
+ list_del(&head->list);
+ kfree(head);
+ }
+ free_percpu(aead->tfm_entry);
+ kfree_sensitive(aead->key);
+ kfree(aead);
+}
+
+static int tipc_aead_users(struct tipc_aead __rcu *aead)
+{
+ struct tipc_aead *tmp;
+ int users = 0;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp)
+ users = atomic_read(&tmp->users);
+ rcu_read_unlock();
+
+ return users;
+}
+
+static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim)
+{
+ struct tipc_aead *tmp;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp)
+ atomic_add_unless(&tmp->users, 1, lim);
+ rcu_read_unlock();
+}
+
+static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim)
+{
+ struct tipc_aead *tmp;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp)
+ atomic_add_unless(&rcu_dereference(aead)->users, -1, lim);
+ rcu_read_unlock();
+}
+
+static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val)
+{
+ struct tipc_aead *tmp;
+ int cur;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp) {
+ do {
+ cur = atomic_read(&tmp->users);
+ if (cur == val)
+ break;
+ } while (atomic_cmpxchg(&tmp->users, cur, val) != cur);
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * tipc_aead_tfm_next - Move TFM entry to the next one in list and return it
+ */
+static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead)
+{
+ struct tipc_tfm **tfm_entry;
+ struct crypto_aead *tfm;
+
+ tfm_entry = get_cpu_ptr(aead->tfm_entry);
+ *tfm_entry = list_next_entry(*tfm_entry, list);
+ tfm = (*tfm_entry)->tfm;
+ put_cpu_ptr(tfm_entry);
+
+ return tfm;
+}
+
+/**
+ * tipc_aead_init - Initiate TIPC AEAD
+ * @aead: returned new TIPC AEAD key handle pointer
+ * @ukey: pointer to user key data
+ * @mode: the key mode
+ *
+ * Allocate a (list of) new cipher transformation (TFM) with the specific user
+ * key data if valid. The number of the allocated TFMs can be set via the sysfs
+ * "net/tipc/max_tfms" first.
+ * Also, all the other AEAD data are also initialized.
+ *
+ * Return: 0 if the initiation is successful, otherwise: < 0
+ */
+static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey,
+ u8 mode)
+{
+ struct tipc_tfm *tfm_entry, *head;
+ struct crypto_aead *tfm;
+ struct tipc_aead *tmp;
+ int keylen, err, cpu;
+ int tfm_cnt = 0;
+
+ if (unlikely(*aead))
+ return -EEXIST;
+
+ /* Allocate a new AEAD */
+ tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
+ if (unlikely(!tmp))
+ return -ENOMEM;
+
+ /* The key consists of two parts: [AES-KEY][SALT] */
+ keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE;
+
+ /* Allocate per-cpu TFM entry pointer */
+ tmp->tfm_entry = alloc_percpu(struct tipc_tfm *);
+ if (!tmp->tfm_entry) {
+ kfree_sensitive(tmp);
+ return -ENOMEM;
+ }
+
+ /* Make a list of TFMs with the user key data */
+ do {
+ tfm = crypto_alloc_aead(ukey->alg_name, 0, 0);
+ if (IS_ERR(tfm)) {
+ err = PTR_ERR(tfm);
+ break;
+ }
+
+ if (unlikely(!tfm_cnt &&
+ crypto_aead_ivsize(tfm) != TIPC_AES_GCM_IV_SIZE)) {
+ crypto_free_aead(tfm);
+ err = -ENOTSUPP;
+ break;
+ }
+
+ err = crypto_aead_setauthsize(tfm, TIPC_AES_GCM_TAG_SIZE);
+ err |= crypto_aead_setkey(tfm, ukey->key, keylen);
+ if (unlikely(err)) {
+ crypto_free_aead(tfm);
+ break;
+ }
+
+ tfm_entry = kmalloc(sizeof(*tfm_entry), GFP_KERNEL);
+ if (unlikely(!tfm_entry)) {
+ crypto_free_aead(tfm);
+ err = -ENOMEM;
+ break;
+ }
+ INIT_LIST_HEAD(&tfm_entry->list);
+ tfm_entry->tfm = tfm;
+
+ /* First entry? */
+ if (!tfm_cnt) {
+ head = tfm_entry;
+ for_each_possible_cpu(cpu) {
+ *per_cpu_ptr(tmp->tfm_entry, cpu) = head;
+ }
+ } else {
+ list_add_tail(&tfm_entry->list, &head->list);
+ }
+
+ } while (++tfm_cnt < sysctl_tipc_max_tfms);
+
+ /* Not any TFM is allocated? */
+ if (!tfm_cnt) {
+ free_percpu(tmp->tfm_entry);
+ kfree_sensitive(tmp);
+ return err;
+ }
+
+ /* Form a hex string of some last bytes as the key's hint */
+ bin2hex(tmp->hint, ukey->key + keylen - TIPC_AEAD_HINT_LEN,
+ TIPC_AEAD_HINT_LEN);
+
+ /* Initialize the other data */
+ tmp->mode = mode;
+ tmp->cloned = NULL;
+ tmp->authsize = TIPC_AES_GCM_TAG_SIZE;
+ tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL);
+ if (!tmp->key) {
+ tipc_aead_free(&tmp->rcu);
+ return -ENOMEM;
+ }
+ memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE);
+ atomic_set(&tmp->users, 0);
+ atomic64_set(&tmp->seqno, 0);
+ refcount_set(&tmp->refcnt, 1);
+
+ *aead = tmp;
+ return 0;
+}
+
+/**
+ * tipc_aead_clone - Clone a TIPC AEAD key
+ * @dst: dest key for the cloning
+ * @src: source key to clone from
+ *
+ * Make a "copy" of the source AEAD key data to the dest, the TFMs list is
+ * common for the keys.
+ * A reference to the source is hold in the "cloned" pointer for the later
+ * freeing purposes.
+ *
+ * Note: this must be done in cluster-key mode only!
+ * Return: 0 in case of success, otherwise < 0
+ */
+static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src)
+{
+ struct tipc_aead *aead;
+ int cpu;
+
+ if (!src)
+ return -ENOKEY;
+
+ if (src->mode != CLUSTER_KEY)
+ return -EINVAL;
+
+ if (unlikely(*dst))
+ return -EEXIST;
+
+ aead = kzalloc(sizeof(*aead), GFP_ATOMIC);
+ if (unlikely(!aead))
+ return -ENOMEM;
+
+ aead->tfm_entry = alloc_percpu_gfp(struct tipc_tfm *, GFP_ATOMIC);
+ if (unlikely(!aead->tfm_entry)) {
+ kfree_sensitive(aead);
+ return -ENOMEM;
+ }
+
+ for_each_possible_cpu(cpu) {
+ *per_cpu_ptr(aead->tfm_entry, cpu) =
+ *per_cpu_ptr(src->tfm_entry, cpu);
+ }
+
+ memcpy(aead->hint, src->hint, sizeof(src->hint));
+ aead->mode = src->mode;
+ aead->salt = src->salt;
+ aead->authsize = src->authsize;
+ atomic_set(&aead->users, 0);
+ atomic64_set(&aead->seqno, 0);
+ refcount_set(&aead->refcnt, 1);
+
+ WARN_ON(!refcount_inc_not_zero(&src->refcnt));
+ aead->cloned = src;
+
+ *dst = aead;
+ return 0;
+}
+
+/**
+ * tipc_aead_mem_alloc - Allocate memory for AEAD request operations
+ * @tfm: cipher handle to be registered with the request
+ * @crypto_ctx_size: size of crypto context for callback
+ * @iv: returned pointer to IV data
+ * @req: returned pointer to AEAD request data
+ * @sg: returned pointer to SG lists
+ * @nsg: number of SG lists to be allocated
+ *
+ * Allocate memory to store the crypto context data, AEAD request, IV and SG
+ * lists, the memory layout is as follows:
+ * crypto_ctx || iv || aead_req || sg[]
+ *
+ * Return: the pointer to the memory areas in case of success, otherwise NULL
+ */
+static void *tipc_aead_mem_alloc(struct crypto_aead *tfm,
+ unsigned int crypto_ctx_size,
+ u8 **iv, struct aead_request **req,
+ struct scatterlist **sg, int nsg)
+{
+ unsigned int iv_size, req_size;
+ unsigned int len;
+ u8 *mem;
+
+ iv_size = crypto_aead_ivsize(tfm);
+ req_size = sizeof(**req) + crypto_aead_reqsize(tfm);
+
+ len = crypto_ctx_size;
+ len += iv_size;
+ len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1);
+ len = ALIGN(len, crypto_tfm_ctx_alignment());
+ len += req_size;
+ len = ALIGN(len, __alignof__(struct scatterlist));
+ len += nsg * sizeof(**sg);
+
+ mem = kmalloc(len, GFP_ATOMIC);
+ if (!mem)
+ return NULL;
+
+ *iv = (u8 *)PTR_ALIGN(mem + crypto_ctx_size,
+ crypto_aead_alignmask(tfm) + 1);
+ *req = (struct aead_request *)PTR_ALIGN(*iv + iv_size,
+ crypto_tfm_ctx_alignment());
+ *sg = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size,
+ __alignof__(struct scatterlist));
+
+ return (void *)mem;
+}
+
+/**
+ * tipc_aead_encrypt - Encrypt a message
+ * @aead: TIPC AEAD key for the message encryption
+ * @skb: the input/output skb
+ * @b: TIPC bearer where the message will be delivered after the encryption
+ * @dst: the destination media address
+ * @__dnode: TIPC dest node if "known"
+ *
+ * Return:
+ * 0 : if the encryption has completed
+ * -EINPROGRESS/-EBUSY : if a callback will be performed
+ * < 0 : the encryption has failed
+ */
+static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode)
+{
+ struct crypto_aead *tfm = tipc_aead_tfm_next(aead);
+ struct tipc_crypto_tx_ctx *tx_ctx;
+ struct aead_request *req;
+ struct sk_buff *trailer;
+ struct scatterlist *sg;
+ struct tipc_ehdr *ehdr;
+ int ehsz, len, tailen, nsg, rc;
+ void *ctx;
+ u32 salt;
+ u8 *iv;
+
+ /* Make sure message len at least 4-byte aligned */
+ len = ALIGN(skb->len, 4);
+ tailen = len - skb->len + aead->authsize;
+
+ /* Expand skb tail for authentication tag:
+ * As for simplicity, we'd have made sure skb having enough tailroom
+ * for authentication tag @skb allocation. Even when skb is nonlinear
+ * but there is no frag_list, it should be still fine!
+ * Otherwise, we must cow it to be a writable buffer with the tailroom.
+ */
+ SKB_LINEAR_ASSERT(skb);
+ if (tailen > skb_tailroom(skb)) {
+ pr_debug("TX(): skb tailroom is not enough: %d, requires: %d\n",
+ skb_tailroom(skb), tailen);
+ }
+
+ if (unlikely(!skb_cloned(skb) && tailen <= skb_tailroom(skb))) {
+ nsg = 1;
+ trailer = skb;
+ } else {
+ /* TODO: We could avoid skb_cow_data() if skb has no frag_list
+ * e.g. by skb_fill_page_desc() to add another page to the skb
+ * with the wanted tailen... However, page skbs look not often,
+ * so take it easy now!
+ * Cloned skbs e.g. from link_xmit() seems no choice though :(
+ */
+ nsg = skb_cow_data(skb, tailen, &trailer);
+ if (unlikely(nsg < 0)) {
+ pr_err("TX: skb_cow_data() returned %d\n", nsg);
+ return nsg;
+ }
+ }
+
+ pskb_put(skb, trailer, tailen);
+
+ /* Allocate memory for the AEAD operation */
+ ctx = tipc_aead_mem_alloc(tfm, sizeof(*tx_ctx), &iv, &req, &sg, nsg);
+ if (unlikely(!ctx))
+ return -ENOMEM;
+ TIPC_SKB_CB(skb)->crypto_ctx = ctx;
+
+ /* Map skb to the sg lists */
+ sg_init_table(sg, nsg);
+ rc = skb_to_sgvec(skb, sg, 0, skb->len);
+ if (unlikely(rc < 0)) {
+ pr_err("TX: skb_to_sgvec() returned %d, nsg %d!\n", rc, nsg);
+ goto exit;
+ }
+
+ /* Prepare IV: [SALT (4 octets)][SEQNO (8 octets)]
+ * In case we're in cluster-key mode, SALT is varied by xor-ing with
+ * the source address (or w0 of id), otherwise with the dest address
+ * if dest is known.
+ */
+ ehdr = (struct tipc_ehdr *)skb->data;
+ salt = aead->salt;
+ if (aead->mode == CLUSTER_KEY)
+ salt ^= ehdr->addr; /* __be32 */
+ else if (__dnode)
+ salt ^= tipc_node_get_addr(__dnode);
+ memcpy(iv, &salt, 4);
+ memcpy(iv + 4, (u8 *)&ehdr->seqno, 8);
+
+ /* Prepare request */
+ ehsz = tipc_ehdr_size(ehdr);
+ aead_request_set_tfm(req, tfm);
+ aead_request_set_ad(req, ehsz);
+ aead_request_set_crypt(req, sg, sg, len - ehsz, iv);
+
+ /* Set callback function & data */
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ tipc_aead_encrypt_done, skb);
+ tx_ctx = (struct tipc_crypto_tx_ctx *)ctx;
+ tx_ctx->aead = aead;
+ tx_ctx->bearer = b;
+ memcpy(&tx_ctx->dst, dst, sizeof(*dst));
+
+ /* Hold bearer */
+ if (unlikely(!tipc_bearer_hold(b))) {
+ rc = -ENODEV;
+ goto exit;
+ }
+
+ /* Now, do encrypt */
+ rc = crypto_aead_encrypt(req);
+ if (rc == -EINPROGRESS || rc == -EBUSY)
+ return rc;
+
+ tipc_bearer_put(b);
+
+exit:
+ kfree(ctx);
+ TIPC_SKB_CB(skb)->crypto_ctx = NULL;
+ return rc;
+}
+
+static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+ struct tipc_crypto_tx_ctx *tx_ctx = TIPC_SKB_CB(skb)->crypto_ctx;
+ struct tipc_bearer *b = tx_ctx->bearer;
+ struct tipc_aead *aead = tx_ctx->aead;
+ struct tipc_crypto *tx = aead->crypto;
+ struct net *net = tx->net;
+
+ switch (err) {
+ case 0:
+ this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]);
+ rcu_read_lock();
+ if (likely(test_bit(0, &b->up)))
+ b->media->send_msg(net, skb, b, &tx_ctx->dst);
+ else
+ kfree_skb(skb);
+ rcu_read_unlock();
+ break;
+ case -EINPROGRESS:
+ return;
+ default:
+ this_cpu_inc(tx->stats->stat[STAT_ASYNC_NOK]);
+ kfree_skb(skb);
+ break;
+ }
+
+ kfree(tx_ctx);
+ tipc_bearer_put(b);
+ tipc_aead_put(aead);
+}
+
+/**
+ * tipc_aead_decrypt - Decrypt an encrypted message
+ * @net: struct net
+ * @aead: TIPC AEAD for the message decryption
+ * @skb: the input/output skb
+ * @b: TIPC bearer where the message has been received
+ *
+ * Return:
+ * 0 : if the decryption has completed
+ * -EINPROGRESS/-EBUSY : if a callback will be performed
+ * < 0 : the decryption has failed
+ */
+static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
+ struct sk_buff *skb, struct tipc_bearer *b)
+{
+ struct tipc_crypto_rx_ctx *rx_ctx;
+ struct aead_request *req;
+ struct crypto_aead *tfm;
+ struct sk_buff *unused;
+ struct scatterlist *sg;
+ struct tipc_ehdr *ehdr;
+ int ehsz, nsg, rc;
+ void *ctx;
+ u32 salt;
+ u8 *iv;
+
+ if (unlikely(!aead))
+ return -ENOKEY;
+
+ nsg = skb_cow_data(skb, 0, &unused);
+ if (unlikely(nsg < 0)) {
+ pr_err("RX: skb_cow_data() returned %d\n", nsg);
+ return nsg;
+ }
+
+ /* Allocate memory for the AEAD operation */
+ tfm = tipc_aead_tfm_next(aead);
+ ctx = tipc_aead_mem_alloc(tfm, sizeof(*rx_ctx), &iv, &req, &sg, nsg);
+ if (unlikely(!ctx))
+ return -ENOMEM;
+ TIPC_SKB_CB(skb)->crypto_ctx = ctx;
+
+ /* Map skb to the sg lists */
+ sg_init_table(sg, nsg);
+ rc = skb_to_sgvec(skb, sg, 0, skb->len);
+ if (unlikely(rc < 0)) {
+ pr_err("RX: skb_to_sgvec() returned %d, nsg %d\n", rc, nsg);
+ goto exit;
+ }
+
+ /* Reconstruct IV: */
+ ehdr = (struct tipc_ehdr *)skb->data;
+ salt = aead->salt;
+ if (aead->mode == CLUSTER_KEY)
+ salt ^= ehdr->addr; /* __be32 */
+ else if (ehdr->destined)
+ salt ^= tipc_own_addr(net);
+ memcpy(iv, &salt, 4);
+ memcpy(iv + 4, (u8 *)&ehdr->seqno, 8);
+
+ /* Prepare request */
+ ehsz = tipc_ehdr_size(ehdr);
+ aead_request_set_tfm(req, tfm);
+ aead_request_set_ad(req, ehsz);
+ aead_request_set_crypt(req, sg, sg, skb->len - ehsz, iv);
+
+ /* Set callback function & data */
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ tipc_aead_decrypt_done, skb);
+ rx_ctx = (struct tipc_crypto_rx_ctx *)ctx;
+ rx_ctx->aead = aead;
+ rx_ctx->bearer = b;
+
+ /* Hold bearer */
+ if (unlikely(!tipc_bearer_hold(b))) {
+ rc = -ENODEV;
+ goto exit;
+ }
+
+ /* Now, do decrypt */
+ rc = crypto_aead_decrypt(req);
+ if (rc == -EINPROGRESS || rc == -EBUSY)
+ return rc;
+
+ tipc_bearer_put(b);
+
+exit:
+ kfree(ctx);
+ TIPC_SKB_CB(skb)->crypto_ctx = NULL;
+ return rc;
+}
+
+static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+ struct tipc_crypto_rx_ctx *rx_ctx = TIPC_SKB_CB(skb)->crypto_ctx;
+ struct tipc_bearer *b = rx_ctx->bearer;
+ struct tipc_aead *aead = rx_ctx->aead;
+ struct tipc_crypto_stats __percpu *stats = aead->crypto->stats;
+ struct net *net = aead->crypto->net;
+
+ switch (err) {
+ case 0:
+ this_cpu_inc(stats->stat[STAT_ASYNC_OK]);
+ break;
+ case -EINPROGRESS:
+ return;
+ default:
+ this_cpu_inc(stats->stat[STAT_ASYNC_NOK]);
+ break;
+ }
+
+ kfree(rx_ctx);
+ tipc_crypto_rcv_complete(net, aead, b, &skb, err);
+ if (likely(skb)) {
+ if (likely(test_bit(0, &b->up)))
+ tipc_rcv(net, skb, b);
+ else
+ kfree_skb(skb);
+ }
+
+ tipc_bearer_put(b);
+}
+
+static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr)
+{
+ return (ehdr->user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE;
+}
+
+/**
+ * tipc_ehdr_validate - Validate an encryption message
+ * @skb: the message buffer
+ *
+ * Returns "true" if this is a valid encryption message, otherwise "false"
+ */
+bool tipc_ehdr_validate(struct sk_buff *skb)
+{
+ struct tipc_ehdr *ehdr;
+ int ehsz;
+
+ if (unlikely(!pskb_may_pull(skb, EHDR_MIN_SIZE)))
+ return false;
+
+ ehdr = (struct tipc_ehdr *)skb->data;
+ if (unlikely(ehdr->version != TIPC_EVERSION))
+ return false;
+ ehsz = tipc_ehdr_size(ehdr);
+ if (unlikely(!pskb_may_pull(skb, ehsz)))
+ return false;
+ if (unlikely(skb->len <= ehsz + TIPC_AES_GCM_TAG_SIZE))
+ return false;
+
+ return true;
+}
+
+/**
+ * tipc_ehdr_build - Build TIPC encryption message header
+ * @net: struct net
+ * @aead: TX AEAD key to be used for the message encryption
+ * @tx_key: key id used for the message encryption
+ * @skb: input/output message skb
+ * @__rx: RX crypto handle if dest is "known"
+ *
+ * Return: the header size if the building is successful, otherwise < 0
+ */
+static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead,
+ u8 tx_key, struct sk_buff *skb,
+ struct tipc_crypto *__rx)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_ehdr *ehdr;
+ u32 user = msg_user(hdr);
+ u64 seqno;
+ int ehsz;
+
+ /* Make room for encryption header */
+ ehsz = (user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE;
+ WARN_ON(skb_headroom(skb) < ehsz);
+ ehdr = (struct tipc_ehdr *)skb_push(skb, ehsz);
+
+ /* Obtain a seqno first:
+ * Use the key seqno (= cluster wise) if dest is unknown or we're in
+ * cluster key mode, otherwise it's better for a per-peer seqno!
+ */
+ if (!__rx || aead->mode == CLUSTER_KEY)
+ seqno = atomic64_inc_return(&aead->seqno);
+ else
+ seqno = atomic64_inc_return(&__rx->sndnxt);
+
+ /* Revoke the key if seqno is wrapped around */
+ if (unlikely(!seqno))
+ return tipc_crypto_key_revoke(net, tx_key);
+
+ /* Word 1-2 */
+ ehdr->seqno = cpu_to_be64(seqno);
+
+ /* Words 0, 3- */
+ ehdr->version = TIPC_EVERSION;
+ ehdr->user = 0;
+ ehdr->keepalive = 0;
+ ehdr->tx_key = tx_key;
+ ehdr->destined = (__rx) ? 1 : 0;
+ ehdr->rx_key_active = (__rx) ? __rx->key.active : 0;
+ ehdr->rx_nokey = (__rx) ? __rx->nokey : 0;
+ ehdr->master_key = aead->crypto->key_master;
+ ehdr->reserved_1 = 0;
+ ehdr->reserved_2 = 0;
+
+ switch (user) {
+ case LINK_CONFIG:
+ ehdr->user = LINK_CONFIG;
+ memcpy(ehdr->id, tipc_own_id(net), NODE_ID_LEN);
+ break;
+ default:
+ if (user == LINK_PROTOCOL && msg_type(hdr) == STATE_MSG) {
+ ehdr->user = LINK_PROTOCOL;
+ ehdr->keepalive = msg_is_keepalive(hdr);
+ }
+ ehdr->addr = hdr->hdr[3];
+ break;
+ }
+
+ return ehsz;
+}
+
+static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
+ u8 new_passive,
+ u8 new_active,
+ u8 new_pending)
+{
+ struct tipc_key old = c->key;
+ char buf[32];
+
+ c->key.keys = ((new_passive & KEY_MASK) << (KEY_BITS * 2)) |
+ ((new_active & KEY_MASK) << (KEY_BITS)) |
+ ((new_pending & KEY_MASK));
+
+ pr_debug("%s: key changing %s ::%pS\n", c->name,
+ tipc_key_change_dump(old, c->key, buf),
+ __builtin_return_address(0));
+}
+
+/**
+ * tipc_crypto_key_init - Initiate a new user / AEAD key
+ * @c: TIPC crypto to which new key is attached
+ * @ukey: the user key
+ * @mode: the key mode (CLUSTER_KEY or PER_NODE_KEY)
+ * @master_key: specify this is a cluster master key
+ *
+ * A new TIPC AEAD key will be allocated and initiated with the specified user
+ * key, then attached to the TIPC crypto.
+ *
+ * Return: new key id in case of success, otherwise: < 0
+ */
+int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
+ u8 mode, bool master_key)
+{
+ struct tipc_aead *aead = NULL;
+ int rc = 0;
+
+ /* Initiate with the new user key */
+ rc = tipc_aead_init(&aead, ukey, mode);
+
+ /* Attach it to the crypto */
+ if (likely(!rc)) {
+ rc = tipc_crypto_key_attach(c, aead, 0, master_key);
+ if (rc < 0)
+ tipc_aead_free(&aead->rcu);
+ }
+
+ return rc;
+}
+
+/**
+ * tipc_crypto_key_attach - Attach a new AEAD key to TIPC crypto
+ * @c: TIPC crypto to which the new AEAD key is attached
+ * @aead: the new AEAD key pointer
+ * @pos: desired slot in the crypto key array, = 0 if any!
+ * @master_key: specify this is a cluster master key
+ *
+ * Return: new key id in case of success, otherwise: -EBUSY
+ */
+static int tipc_crypto_key_attach(struct tipc_crypto *c,
+ struct tipc_aead *aead, u8 pos,
+ bool master_key)
+{
+ struct tipc_key key;
+ int rc = -EBUSY;
+ u8 new_key;
+
+ spin_lock_bh(&c->lock);
+ key = c->key;
+ if (master_key) {
+ new_key = KEY_MASTER;
+ goto attach;
+ }
+ if (key.active && key.passive)
+ goto exit;
+ if (key.pending) {
+ if (tipc_aead_users(c->aead[key.pending]) > 0)
+ goto exit;
+ /* if (pos): ok with replacing, will be aligned when needed */
+ /* Replace it */
+ new_key = key.pending;
+ } else {
+ if (pos) {
+ if (key.active && pos != key_next(key.active)) {
+ key.passive = pos;
+ new_key = pos;
+ goto attach;
+ } else if (!key.active && !key.passive) {
+ key.pending = pos;
+ new_key = pos;
+ goto attach;
+ }
+ }
+ key.pending = key_next(key.active ?: key.passive);
+ new_key = key.pending;
+ }
+
+attach:
+ aead->crypto = c;
+ aead->gen = (is_tx(c)) ? ++c->key_gen : c->key_gen;
+ tipc_aead_rcu_replace(c->aead[new_key], aead, &c->lock);
+ if (likely(c->key.keys != key.keys))
+ tipc_crypto_key_set_state(c, key.passive, key.active,
+ key.pending);
+ c->working = 1;
+ c->nokey = 0;
+ c->key_master |= master_key;
+ rc = new_key;
+
+exit:
+ spin_unlock_bh(&c->lock);
+ return rc;
+}
+
+void tipc_crypto_key_flush(struct tipc_crypto *c)
+{
+ struct tipc_crypto *tx, *rx;
+ int k;
+
+ spin_lock_bh(&c->lock);
+ if (is_rx(c)) {
+ /* Try to cancel pending work */
+ rx = c;
+ tx = tipc_net(rx->net)->crypto_tx;
+ if (cancel_delayed_work(&rx->work)) {
+ kfree(rx->skey);
+ rx->skey = NULL;
+ atomic_xchg(&rx->key_distr, 0);
+ tipc_node_put(rx->node);
+ }
+ /* RX stopping => decrease TX key users if any */
+ k = atomic_xchg(&rx->peer_rx_active, 0);
+ if (k) {
+ tipc_aead_users_dec(tx->aead[k], 0);
+ /* Mark the point TX key users changed */
+ tx->timer1 = jiffies;
+ }
+ }
+
+ c->flags = 0;
+ tipc_crypto_key_set_state(c, 0, 0, 0);
+ for (k = KEY_MIN; k <= KEY_MAX; k++)
+ tipc_crypto_key_detach(c->aead[k], &c->lock);
+ atomic64_set(&c->sndnxt, 0);
+ spin_unlock_bh(&c->lock);
+}
+
+/**
+ * tipc_crypto_key_try_align - Align RX keys if possible
+ * @rx: RX crypto handle
+ * @new_pending: new pending slot if aligned (= TX key from peer)
+ *
+ * Peer has used an unknown key slot, this only happens when peer has left and
+ * rejoned, or we are newcomer.
+ * That means, there must be no active key but a pending key at unaligned slot.
+ * If so, we try to move the pending key to the new slot.
+ * Note: A potential passive key can exist, it will be shifted correspondingly!
+ *
+ * Return: "true" if key is successfully aligned, otherwise "false"
+ */
+static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending)
+{
+ struct tipc_aead *tmp1, *tmp2 = NULL;
+ struct tipc_key key;
+ bool aligned = false;
+ u8 new_passive = 0;
+ int x;
+
+ spin_lock(&rx->lock);
+ key = rx->key;
+ if (key.pending == new_pending) {
+ aligned = true;
+ goto exit;
+ }
+ if (key.active)
+ goto exit;
+ if (!key.pending)
+ goto exit;
+ if (tipc_aead_users(rx->aead[key.pending]) > 0)
+ goto exit;
+
+ /* Try to "isolate" this pending key first */
+ tmp1 = tipc_aead_rcu_ptr(rx->aead[key.pending], &rx->lock);
+ if (!refcount_dec_if_one(&tmp1->refcnt))
+ goto exit;
+ rcu_assign_pointer(rx->aead[key.pending], NULL);
+
+ /* Move passive key if any */
+ if (key.passive) {
+ tmp2 = rcu_replace_pointer(rx->aead[key.passive], tmp2, lockdep_is_held(&rx->lock));
+ x = (key.passive - key.pending + new_pending) % KEY_MAX;
+ new_passive = (x <= 0) ? x + KEY_MAX : x;
+ }
+
+ /* Re-allocate the key(s) */
+ tipc_crypto_key_set_state(rx, new_passive, 0, new_pending);
+ rcu_assign_pointer(rx->aead[new_pending], tmp1);
+ if (new_passive)
+ rcu_assign_pointer(rx->aead[new_passive], tmp2);
+ refcount_set(&tmp1->refcnt, 1);
+ aligned = true;
+ pr_info_ratelimited("%s: key[%d] -> key[%d]\n", rx->name, key.pending,
+ new_pending);
+
+exit:
+ spin_unlock(&rx->lock);
+ return aligned;
+}
+
+/**
+ * tipc_crypto_key_pick_tx - Pick one TX key for message decryption
+ * @tx: TX crypto handle
+ * @rx: RX crypto handle (can be NULL)
+ * @skb: the message skb which will be decrypted later
+ * @tx_key: peer TX key id
+ *
+ * This function looks up the existing TX keys and pick one which is suitable
+ * for the message decryption, that must be a cluster key and not used before
+ * on the same message (i.e. recursive).
+ *
+ * Return: the TX AEAD key handle in case of success, otherwise NULL
+ */
+static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
+ struct tipc_crypto *rx,
+ struct sk_buff *skb,
+ u8 tx_key)
+{
+ struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(skb);
+ struct tipc_aead *aead = NULL;
+ struct tipc_key key = tx->key;
+ u8 k, i = 0;
+
+ /* Initialize data if not yet */
+ if (!skb_cb->tx_clone_deferred) {
+ skb_cb->tx_clone_deferred = 1;
+ memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx));
+ }
+
+ skb_cb->tx_clone_ctx.rx = rx;
+ if (++skb_cb->tx_clone_ctx.recurs > 2)
+ return NULL;
+
+ /* Pick one TX key */
+ spin_lock(&tx->lock);
+ if (tx_key == KEY_MASTER) {
+ aead = tipc_aead_rcu_ptr(tx->aead[KEY_MASTER], &tx->lock);
+ goto done;
+ }
+ do {
+ k = (i == 0) ? key.pending :
+ ((i == 1) ? key.active : key.passive);
+ if (!k)
+ continue;
+ aead = tipc_aead_rcu_ptr(tx->aead[k], &tx->lock);
+ if (!aead)
+ continue;
+ if (aead->mode != CLUSTER_KEY ||
+ aead == skb_cb->tx_clone_ctx.last) {
+ aead = NULL;
+ continue;
+ }
+ /* Ok, found one cluster key */
+ skb_cb->tx_clone_ctx.last = aead;
+ WARN_ON(skb->next);
+ skb->next = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!skb->next))
+ pr_warn("Failed to clone skb for next round if any\n");
+ break;
+ } while (++i < 3);
+
+done:
+ if (likely(aead))
+ WARN_ON(!refcount_inc_not_zero(&aead->refcnt));
+ spin_unlock(&tx->lock);
+
+ return aead;
+}
+
+/**
+ * tipc_crypto_key_synch: Synch own key data according to peer key status
+ * @rx: RX crypto handle
+ * @skb: TIPCv2 message buffer (incl. the ehdr from peer)
+ *
+ * This function updates the peer node related data as the peer RX active key
+ * has changed, so the number of TX keys' users on this node are increased and
+ * decreased correspondingly.
+ *
+ * It also considers if peer has no key, then we need to make own master key
+ * (if any) taking over i.e. starting grace period and also trigger key
+ * distributing process.
+ *
+ * The "per-peer" sndnxt is also reset when the peer key has switched.
+ */
+static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb)
+{
+ struct tipc_ehdr *ehdr = (struct tipc_ehdr *)skb_network_header(skb);
+ struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
+ struct tipc_msg *hdr = buf_msg(skb);
+ u32 self = tipc_own_addr(rx->net);
+ u8 cur, new;
+ unsigned long delay;
+
+ /* Update RX 'key_master' flag according to peer, also mark "legacy" if
+ * a peer has no master key.
+ */
+ rx->key_master = ehdr->master_key;
+ if (!rx->key_master)
+ tx->legacy_user = 1;
+
+ /* For later cases, apply only if message is destined to this node */
+ if (!ehdr->destined || msg_short(hdr) || msg_destnode(hdr) != self)
+ return;
+
+ /* Case 1: Peer has no keys, let's make master key take over */
+ if (ehdr->rx_nokey) {
+ /* Set or extend grace period */
+ tx->timer2 = jiffies;
+ /* Schedule key distributing for the peer if not yet */
+ if (tx->key.keys &&
+ !atomic_cmpxchg(&rx->key_distr, 0, KEY_DISTR_SCHED)) {
+ get_random_bytes(&delay, 2);
+ delay %= 5;
+ delay = msecs_to_jiffies(500 * ++delay);
+ if (queue_delayed_work(tx->wq, &rx->work, delay))
+ tipc_node_get(rx->node);
+ }
+ } else {
+ /* Cancel a pending key distributing if any */
+ atomic_xchg(&rx->key_distr, 0);
+ }
+
+ /* Case 2: Peer RX active key has changed, let's update own TX users */
+ cur = atomic_read(&rx->peer_rx_active);
+ new = ehdr->rx_key_active;
+ if (tx->key.keys &&
+ cur != new &&
+ atomic_cmpxchg(&rx->peer_rx_active, cur, new) == cur) {
+ if (new)
+ tipc_aead_users_inc(tx->aead[new], INT_MAX);
+ if (cur)
+ tipc_aead_users_dec(tx->aead[cur], 0);
+
+ atomic64_set(&rx->sndnxt, 0);
+ /* Mark the point TX key users changed */
+ tx->timer1 = jiffies;
+
+ pr_debug("%s: key users changed %d-- %d++, peer %s\n",
+ tx->name, cur, new, rx->name);
+ }
+}
+
+static int tipc_crypto_key_revoke(struct net *net, u8 tx_key)
+{
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
+ struct tipc_key key;
+
+ spin_lock(&tx->lock);
+ key = tx->key;
+ WARN_ON(!key.active || tx_key != key.active);
+
+ /* Free the active key */
+ tipc_crypto_key_set_state(tx, key.passive, 0, key.pending);
+ tipc_crypto_key_detach(tx->aead[key.active], &tx->lock);
+ spin_unlock(&tx->lock);
+
+ pr_warn("%s: key is revoked\n", tx->name);
+ return -EKEYREVOKED;
+}
+
+int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net,
+ struct tipc_node *node)
+{
+ struct tipc_crypto *c;
+
+ if (*crypto)
+ return -EEXIST;
+
+ /* Allocate crypto */
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
+ if (!c)
+ return -ENOMEM;
+
+ /* Allocate workqueue on TX */
+ if (!node) {
+ c->wq = alloc_ordered_workqueue("tipc_crypto", 0);
+ if (!c->wq) {
+ kfree(c);
+ return -ENOMEM;
+ }
+ }
+
+ /* Allocate statistic structure */
+ c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC);
+ if (!c->stats) {
+ if (c->wq)
+ destroy_workqueue(c->wq);
+ kfree_sensitive(c);
+ return -ENOMEM;
+ }
+
+ c->flags = 0;
+ c->net = net;
+ c->node = node;
+ get_random_bytes(&c->key_gen, 2);
+ tipc_crypto_key_set_state(c, 0, 0, 0);
+ atomic_set(&c->key_distr, 0);
+ atomic_set(&c->peer_rx_active, 0);
+ atomic64_set(&c->sndnxt, 0);
+ c->timer1 = jiffies;
+ c->timer2 = jiffies;
+ c->rekeying_intv = TIPC_REKEYING_INTV_DEF;
+ spin_lock_init(&c->lock);
+ scnprintf(c->name, 48, "%s(%s)", (is_rx(c)) ? "RX" : "TX",
+ (is_rx(c)) ? tipc_node_get_id_str(c->node) :
+ tipc_own_id_string(c->net));
+
+ if (is_rx(c))
+ INIT_DELAYED_WORK(&c->work, tipc_crypto_work_rx);
+ else
+ INIT_DELAYED_WORK(&c->work, tipc_crypto_work_tx);
+
+ *crypto = c;
+ return 0;
+}
+
+void tipc_crypto_stop(struct tipc_crypto **crypto)
+{
+ struct tipc_crypto *c = *crypto;
+ u8 k;
+
+ if (!c)
+ return;
+
+ /* Flush any queued works & destroy wq */
+ if (is_tx(c)) {
+ c->rekeying_intv = 0;
+ cancel_delayed_work_sync(&c->work);
+ destroy_workqueue(c->wq);
+ }
+
+ /* Release AEAD keys */
+ rcu_read_lock();
+ for (k = KEY_MIN; k <= KEY_MAX; k++)
+ tipc_aead_put(rcu_dereference(c->aead[k]));
+ rcu_read_unlock();
+ pr_debug("%s: has been stopped\n", c->name);
+
+ /* Free this crypto statistics */
+ free_percpu(c->stats);
+
+ *crypto = NULL;
+ kfree_sensitive(c);
+}
+
+void tipc_crypto_timeout(struct tipc_crypto *rx)
+{
+ struct tipc_net *tn = tipc_net(rx->net);
+ struct tipc_crypto *tx = tn->crypto_tx;
+ struct tipc_key key;
+ int cmd;
+
+ /* TX pending: taking all users & stable -> active */
+ spin_lock(&tx->lock);
+ key = tx->key;
+ if (key.active && tipc_aead_users(tx->aead[key.active]) > 0)
+ goto s1;
+ if (!key.pending || tipc_aead_users(tx->aead[key.pending]) <= 0)
+ goto s1;
+ if (time_before(jiffies, tx->timer1 + TIPC_TX_LASTING_TIME))
+ goto s1;
+
+ tipc_crypto_key_set_state(tx, key.passive, key.pending, 0);
+ if (key.active)
+ tipc_crypto_key_detach(tx->aead[key.active], &tx->lock);
+ this_cpu_inc(tx->stats->stat[STAT_SWITCHES]);
+ pr_info("%s: key[%d] is activated\n", tx->name, key.pending);
+
+s1:
+ spin_unlock(&tx->lock);
+
+ /* RX pending: having user -> active */
+ spin_lock(&rx->lock);
+ key = rx->key;
+ if (!key.pending || tipc_aead_users(rx->aead[key.pending]) <= 0)
+ goto s2;
+
+ if (key.active)
+ key.passive = key.active;
+ key.active = key.pending;
+ rx->timer2 = jiffies;
+ tipc_crypto_key_set_state(rx, key.passive, key.active, 0);
+ this_cpu_inc(rx->stats->stat[STAT_SWITCHES]);
+ pr_info("%s: key[%d] is activated\n", rx->name, key.pending);
+ goto s5;
+
+s2:
+ /* RX pending: not working -> remove */
+ if (!key.pending || tipc_aead_users(rx->aead[key.pending]) > -10)
+ goto s3;
+
+ tipc_crypto_key_set_state(rx, key.passive, key.active, 0);
+ tipc_crypto_key_detach(rx->aead[key.pending], &rx->lock);
+ pr_debug("%s: key[%d] is removed\n", rx->name, key.pending);
+ goto s5;
+
+s3:
+ /* RX active: timed out or no user -> pending */
+ if (!key.active)
+ goto s4;
+ if (time_before(jiffies, rx->timer1 + TIPC_RX_ACTIVE_LIM) &&
+ tipc_aead_users(rx->aead[key.active]) > 0)
+ goto s4;
+
+ if (key.pending)
+ key.passive = key.active;
+ else
+ key.pending = key.active;
+ rx->timer2 = jiffies;
+ tipc_crypto_key_set_state(rx, key.passive, 0, key.pending);
+ tipc_aead_users_set(rx->aead[key.pending], 0);
+ pr_debug("%s: key[%d] is deactivated\n", rx->name, key.active);
+ goto s5;
+
+s4:
+ /* RX passive: outdated or not working -> free */
+ if (!key.passive)
+ goto s5;
+ if (time_before(jiffies, rx->timer2 + TIPC_RX_PASSIVE_LIM) &&
+ tipc_aead_users(rx->aead[key.passive]) > -10)
+ goto s5;
+
+ tipc_crypto_key_set_state(rx, 0, key.active, key.pending);
+ tipc_crypto_key_detach(rx->aead[key.passive], &rx->lock);
+ pr_debug("%s: key[%d] is freed\n", rx->name, key.passive);
+
+s5:
+ spin_unlock(&rx->lock);
+
+ /* Relax it here, the flag will be set again if it really is, but only
+ * when we are not in grace period for safety!
+ */
+ if (time_after(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD))
+ tx->legacy_user = 0;
+
+ /* Limit max_tfms & do debug commands if needed */
+ if (likely(sysctl_tipc_max_tfms <= TIPC_MAX_TFMS_LIM))
+ return;
+
+ cmd = sysctl_tipc_max_tfms;
+ sysctl_tipc_max_tfms = TIPC_MAX_TFMS_DEF;
+ tipc_crypto_do_cmd(rx->net, cmd);
+}
+
+static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode, u8 type)
+{
+ struct sk_buff *skb;
+
+ skb = skb_clone(_skb, GFP_ATOMIC);
+ if (skb) {
+ TIPC_SKB_CB(skb)->xmit_type = type;
+ tipc_crypto_xmit(net, &skb, b, dst, __dnode);
+ if (skb)
+ b->media->send_msg(net, skb, b, dst);
+ }
+}
+
+/**
+ * tipc_crypto_xmit - Build & encrypt TIPC message for xmit
+ * @net: struct net
+ * @skb: input/output message skb pointer
+ * @b: bearer used for xmit later
+ * @dst: destination media address
+ * @__dnode: destination node for reference if any
+ *
+ * First, build an encryption message header on the top of the message, then
+ * encrypt the original TIPC message by using the pending, master or active
+ * key with this preference order.
+ * If the encryption is successful, the encrypted skb is returned directly or
+ * via the callback.
+ * Otherwise, the skb is freed!
+ *
+ * Return:
+ * 0 : the encryption has succeeded (or no encryption)
+ * -EINPROGRESS/-EBUSY : the encryption is ongoing, a callback will be made
+ * -ENOKEK : the encryption has failed due to no key
+ * -EKEYREVOKED : the encryption has failed due to key revoked
+ * -ENOMEM : the encryption has failed due to no memory
+ * < 0 : the encryption has failed due to other reasons
+ */
+int tipc_crypto_xmit(struct net *net, struct sk_buff **skb,
+ struct tipc_bearer *b, struct tipc_media_addr *dst,
+ struct tipc_node *__dnode)
+{
+ struct tipc_crypto *__rx = tipc_node_crypto_rx(__dnode);
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
+ struct tipc_crypto_stats __percpu *stats = tx->stats;
+ struct tipc_msg *hdr = buf_msg(*skb);
+ struct tipc_key key = tx->key;
+ struct tipc_aead *aead = NULL;
+ u32 user = msg_user(hdr);
+ u32 type = msg_type(hdr);
+ int rc = -ENOKEY;
+ u8 tx_key = 0;
+
+ /* No encryption? */
+ if (!tx->working)
+ return 0;
+
+ /* Pending key if peer has active on it or probing time */
+ if (unlikely(key.pending)) {
+ tx_key = key.pending;
+ if (!tx->key_master && !key.active)
+ goto encrypt;
+ if (__rx && atomic_read(&__rx->peer_rx_active) == tx_key)
+ goto encrypt;
+ if (TIPC_SKB_CB(*skb)->xmit_type == SKB_PROBING) {
+ pr_debug("%s: probing for key[%d]\n", tx->name,
+ key.pending);
+ goto encrypt;
+ }
+ if (user == LINK_CONFIG || user == LINK_PROTOCOL)
+ tipc_crypto_clone_msg(net, *skb, b, dst, __dnode,
+ SKB_PROBING);
+ }
+
+ /* Master key if this is a *vital* message or in grace period */
+ if (tx->key_master) {
+ tx_key = KEY_MASTER;
+ if (!key.active)
+ goto encrypt;
+ if (TIPC_SKB_CB(*skb)->xmit_type == SKB_GRACING) {
+ pr_debug("%s: gracing for msg (%d %d)\n", tx->name,
+ user, type);
+ goto encrypt;
+ }
+ if (user == LINK_CONFIG ||
+ (user == LINK_PROTOCOL && type == RESET_MSG) ||
+ (user == MSG_CRYPTO && type == KEY_DISTR_MSG) ||
+ time_before(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD)) {
+ if (__rx && __rx->key_master &&
+ !atomic_read(&__rx->peer_rx_active))
+ goto encrypt;
+ if (!__rx) {
+ if (likely(!tx->legacy_user))
+ goto encrypt;
+ tipc_crypto_clone_msg(net, *skb, b, dst,
+ __dnode, SKB_GRACING);
+ }
+ }
+ }
+
+ /* Else, use the active key if any */
+ if (likely(key.active)) {
+ tx_key = key.active;
+ goto encrypt;
+ }
+
+ goto exit;
+
+encrypt:
+ aead = tipc_aead_get(tx->aead[tx_key]);
+ if (unlikely(!aead))
+ goto exit;
+ rc = tipc_ehdr_build(net, aead, tx_key, *skb, __rx);
+ if (likely(rc > 0))
+ rc = tipc_aead_encrypt(aead, *skb, b, dst, __dnode);
+
+exit:
+ switch (rc) {
+ case 0:
+ this_cpu_inc(stats->stat[STAT_OK]);
+ break;
+ case -EINPROGRESS:
+ case -EBUSY:
+ this_cpu_inc(stats->stat[STAT_ASYNC]);
+ *skb = NULL;
+ return rc;
+ default:
+ this_cpu_inc(stats->stat[STAT_NOK]);
+ if (rc == -ENOKEY)
+ this_cpu_inc(stats->stat[STAT_NOKEYS]);
+ else if (rc == -EKEYREVOKED)
+ this_cpu_inc(stats->stat[STAT_BADKEYS]);
+ kfree_skb(*skb);
+ *skb = NULL;
+ break;
+ }
+
+ tipc_aead_put(aead);
+ return rc;
+}
+
+/**
+ * tipc_crypto_rcv - Decrypt an encrypted TIPC message from peer
+ * @net: struct net
+ * @rx: RX crypto handle
+ * @skb: input/output message skb pointer
+ * @b: bearer where the message has been received
+ *
+ * If the decryption is successful, the decrypted skb is returned directly or
+ * as the callback, the encryption header and auth tag will be trimed out
+ * before forwarding to tipc_rcv() via the tipc_crypto_rcv_complete().
+ * Otherwise, the skb will be freed!
+ * Note: RX key(s) can be re-aligned, or in case of no key suitable, TX
+ * cluster key(s) can be taken for decryption (- recursive).
+ *
+ * Return:
+ * 0 : the decryption has successfully completed
+ * -EINPROGRESS/-EBUSY : the decryption is ongoing, a callback will be made
+ * -ENOKEY : the decryption has failed due to no key
+ * -EBADMSG : the decryption has failed due to bad message
+ * -ENOMEM : the decryption has failed due to no memory
+ * < 0 : the decryption has failed due to other reasons
+ */
+int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
+ struct sk_buff **skb, struct tipc_bearer *b)
+{
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
+ struct tipc_crypto_stats __percpu *stats;
+ struct tipc_aead *aead = NULL;
+ struct tipc_key key;
+ int rc = -ENOKEY;
+ u8 tx_key, n;
+
+ tx_key = ((struct tipc_ehdr *)(*skb)->data)->tx_key;
+
+ /* New peer?
+ * Let's try with TX key (i.e. cluster mode) & verify the skb first!
+ */
+ if (unlikely(!rx || tx_key == KEY_MASTER))
+ goto pick_tx;
+
+ /* Pick RX key according to TX key if any */
+ key = rx->key;
+ if (tx_key == key.active || tx_key == key.pending ||
+ tx_key == key.passive)
+ goto decrypt;
+
+ /* Unknown key, let's try to align RX key(s) */
+ if (tipc_crypto_key_try_align(rx, tx_key))
+ goto decrypt;
+
+pick_tx:
+ /* No key suitable? Try to pick one from TX... */
+ aead = tipc_crypto_key_pick_tx(tx, rx, *skb, tx_key);
+ if (aead)
+ goto decrypt;
+ goto exit;
+
+decrypt:
+ rcu_read_lock();
+ if (!aead)
+ aead = tipc_aead_get(rx->aead[tx_key]);
+ rc = tipc_aead_decrypt(net, aead, *skb, b);
+ rcu_read_unlock();
+
+exit:
+ stats = ((rx) ?: tx)->stats;
+ switch (rc) {
+ case 0:
+ this_cpu_inc(stats->stat[STAT_OK]);
+ break;
+ case -EINPROGRESS:
+ case -EBUSY:
+ this_cpu_inc(stats->stat[STAT_ASYNC]);
+ *skb = NULL;
+ return rc;
+ default:
+ this_cpu_inc(stats->stat[STAT_NOK]);
+ if (rc == -ENOKEY) {
+ kfree_skb(*skb);
+ *skb = NULL;
+ if (rx) {
+ /* Mark rx->nokey only if we dont have a
+ * pending received session key, nor a newer
+ * one i.e. in the next slot.
+ */
+ n = key_next(tx_key);
+ rx->nokey = !(rx->skey ||
+ rcu_access_pointer(rx->aead[n]));
+ pr_debug_ratelimited("%s: nokey %d, key %d/%x\n",
+ rx->name, rx->nokey,
+ tx_key, rx->key.keys);
+ tipc_node_put(rx->node);
+ }
+ this_cpu_inc(stats->stat[STAT_NOKEYS]);
+ return rc;
+ } else if (rc == -EBADMSG) {
+ this_cpu_inc(stats->stat[STAT_BADMSGS]);
+ }
+ break;
+ }
+
+ tipc_crypto_rcv_complete(net, aead, b, skb, rc);
+ return rc;
+}
+
+static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
+ struct tipc_bearer *b,
+ struct sk_buff **skb, int err)
+{
+ struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(*skb);
+ struct tipc_crypto *rx = aead->crypto;
+ struct tipc_aead *tmp = NULL;
+ struct tipc_ehdr *ehdr;
+ struct tipc_node *n;
+
+ /* Is this completed by TX? */
+ if (unlikely(is_tx(aead->crypto))) {
+ rx = skb_cb->tx_clone_ctx.rx;
+ pr_debug("TX->RX(%s): err %d, aead %p, skb->next %p, flags %x\n",
+ (rx) ? tipc_node_get_id_str(rx->node) : "-", err, aead,
+ (*skb)->next, skb_cb->flags);
+ pr_debug("skb_cb [recurs %d, last %p], tx->aead [%p %p %p]\n",
+ skb_cb->tx_clone_ctx.recurs, skb_cb->tx_clone_ctx.last,
+ aead->crypto->aead[1], aead->crypto->aead[2],
+ aead->crypto->aead[3]);
+ if (unlikely(err)) {
+ if (err == -EBADMSG && (*skb)->next)
+ tipc_rcv(net, (*skb)->next, b);
+ goto free_skb;
+ }
+
+ if (likely((*skb)->next)) {
+ kfree_skb((*skb)->next);
+ (*skb)->next = NULL;
+ }
+ ehdr = (struct tipc_ehdr *)(*skb)->data;
+ if (!rx) {
+ WARN_ON(ehdr->user != LINK_CONFIG);
+ n = tipc_node_create(net, 0, ehdr->id, 0xffffu, 0,
+ true);
+ rx = tipc_node_crypto_rx(n);
+ if (unlikely(!rx))
+ goto free_skb;
+ }
+
+ /* Ignore cloning if it was TX master key */
+ if (ehdr->tx_key == KEY_MASTER)
+ goto rcv;
+ if (tipc_aead_clone(&tmp, aead) < 0)
+ goto rcv;
+ WARN_ON(!refcount_inc_not_zero(&tmp->refcnt));
+ if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) {
+ tipc_aead_free(&tmp->rcu);
+ goto rcv;
+ }
+ tipc_aead_put(aead);
+ aead = tmp;
+ }
+
+ if (unlikely(err)) {
+ tipc_aead_users_dec(aead, INT_MIN);
+ goto free_skb;
+ }
+
+ /* Set the RX key's user */
+ tipc_aead_users_set(aead, 1);
+
+ /* Mark this point, RX works */
+ rx->timer1 = jiffies;
+
+rcv:
+ /* Remove ehdr & auth. tag prior to tipc_rcv() */
+ ehdr = (struct tipc_ehdr *)(*skb)->data;
+
+ /* Mark this point, RX passive still works */
+ if (rx->key.passive && ehdr->tx_key == rx->key.passive)
+ rx->timer2 = jiffies;
+
+ skb_reset_network_header(*skb);
+ skb_pull(*skb, tipc_ehdr_size(ehdr));
+ pskb_trim(*skb, (*skb)->len - aead->authsize);
+
+ /* Validate TIPCv2 message */
+ if (unlikely(!tipc_msg_validate(skb))) {
+ pr_err_ratelimited("Packet dropped after decryption!\n");
+ goto free_skb;
+ }
+
+ /* Ok, everything's fine, try to synch own keys according to peers' */
+ tipc_crypto_key_synch(rx, *skb);
+
+ /* Mark skb decrypted */
+ skb_cb->decrypted = 1;
+
+ /* Clear clone cxt if any */
+ if (likely(!skb_cb->tx_clone_deferred))
+ goto exit;
+ skb_cb->tx_clone_deferred = 0;
+ memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx));
+ goto exit;
+
+free_skb:
+ kfree_skb(*skb);
+ *skb = NULL;
+
+exit:
+ tipc_aead_put(aead);
+ if (rx)
+ tipc_node_put(rx->node);
+}
+
+static void tipc_crypto_do_cmd(struct net *net, int cmd)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_crypto *tx = tn->crypto_tx, *rx;
+ struct list_head *p;
+ unsigned int stat;
+ int i, j, cpu;
+ char buf[200];
+
+ /* Currently only one command is supported */
+ switch (cmd) {
+ case 0xfff1:
+ goto print_stats;
+ default:
+ return;
+ }
+
+print_stats:
+ /* Print a header */
+ pr_info("\n=============== TIPC Crypto Statistics ===============\n\n");
+
+ /* Print key status */
+ pr_info("Key status:\n");
+ pr_info("TX(%7.7s)\n%s", tipc_own_id_string(net),
+ tipc_crypto_key_dump(tx, buf));
+
+ rcu_read_lock();
+ for (p = tn->node_list.next; p != &tn->node_list; p = p->next) {
+ rx = tipc_node_crypto_rx_by_list(p);
+ pr_info("RX(%7.7s)\n%s", tipc_node_get_id_str(rx->node),
+ tipc_crypto_key_dump(rx, buf));
+ }
+ rcu_read_unlock();
+
+ /* Print crypto statistics */
+ for (i = 0, j = 0; i < MAX_STATS; i++)
+ j += scnprintf(buf + j, 200 - j, "|%11s ", hstats[i]);
+ pr_info("Counter %s", buf);
+
+ memset(buf, '-', 115);
+ buf[115] = '\0';
+ pr_info("%s\n", buf);
+
+ j = scnprintf(buf, 200, "TX(%7.7s) ", tipc_own_id_string(net));
+ for_each_possible_cpu(cpu) {
+ for (i = 0; i < MAX_STATS; i++) {
+ stat = per_cpu_ptr(tx->stats, cpu)->stat[i];
+ j += scnprintf(buf + j, 200 - j, "|%11d ", stat);
+ }
+ pr_info("%s", buf);
+ j = scnprintf(buf, 200, "%12s", " ");
+ }
+
+ rcu_read_lock();
+ for (p = tn->node_list.next; p != &tn->node_list; p = p->next) {
+ rx = tipc_node_crypto_rx_by_list(p);
+ j = scnprintf(buf, 200, "RX(%7.7s) ",
+ tipc_node_get_id_str(rx->node));
+ for_each_possible_cpu(cpu) {
+ for (i = 0; i < MAX_STATS; i++) {
+ stat = per_cpu_ptr(rx->stats, cpu)->stat[i];
+ j += scnprintf(buf + j, 200 - j, "|%11d ",
+ stat);
+ }
+ pr_info("%s", buf);
+ j = scnprintf(buf, 200, "%12s", " ");
+ }
+ }
+ rcu_read_unlock();
+
+ pr_info("\n======================== Done ========================\n");
+}
+
+static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf)
+{
+ struct tipc_key key = c->key;
+ struct tipc_aead *aead;
+ int k, i = 0;
+ char *s;
+
+ for (k = KEY_MIN; k <= KEY_MAX; k++) {
+ if (k == KEY_MASTER) {
+ if (is_rx(c))
+ continue;
+ if (time_before(jiffies,
+ c->timer2 + TIPC_TX_GRACE_PERIOD))
+ s = "ACT";
+ else
+ s = "PAS";
+ } else {
+ if (k == key.passive)
+ s = "PAS";
+ else if (k == key.active)
+ s = "ACT";
+ else if (k == key.pending)
+ s = "PEN";
+ else
+ s = "-";
+ }
+ i += scnprintf(buf + i, 200 - i, "\tKey%d: %s", k, s);
+
+ rcu_read_lock();
+ aead = rcu_dereference(c->aead[k]);
+ if (aead)
+ i += scnprintf(buf + i, 200 - i,
+ "{\"0x...%s\", \"%s\"}/%d:%d",
+ aead->hint,
+ (aead->mode == CLUSTER_KEY) ? "c" : "p",
+ atomic_read(&aead->users),
+ refcount_read(&aead->refcnt));
+ rcu_read_unlock();
+ i += scnprintf(buf + i, 200 - i, "\n");
+ }
+
+ if (is_rx(c))
+ i += scnprintf(buf + i, 200 - i, "\tPeer RX active: %d\n",
+ atomic_read(&c->peer_rx_active));
+
+ return buf;
+}
+
+static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
+ char *buf)
+{
+ struct tipc_key *key = &old;
+ int k, i = 0;
+ char *s;
+
+ /* Output format: "[%s %s %s] -> [%s %s %s]", max len = 32 */
+again:
+ i += scnprintf(buf + i, 32 - i, "[");
+ for (k = KEY_1; k <= KEY_3; k++) {
+ if (k == key->passive)
+ s = "pas";
+ else if (k == key->active)
+ s = "act";
+ else if (k == key->pending)
+ s = "pen";
+ else
+ s = "-";
+ i += scnprintf(buf + i, 32 - i,
+ (k != KEY_3) ? "%s " : "%s", s);
+ }
+ if (key != &new) {
+ i += scnprintf(buf + i, 32 - i, "] -> ");
+ key = &new;
+ goto again;
+ }
+ i += scnprintf(buf + i, 32 - i, "]");
+ return buf;
+}
+
+/**
+ * tipc_crypto_msg_rcv - Common 'MSG_CRYPTO' processing point
+ * @net: the struct net
+ * @skb: the receiving message buffer
+ */
+void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb)
+{
+ struct tipc_crypto *rx;
+ struct tipc_msg *hdr;
+
+ if (unlikely(skb_linearize(skb)))
+ goto exit;
+
+ hdr = buf_msg(skb);
+ rx = tipc_node_crypto_rx_by_addr(net, msg_prevnode(hdr));
+ if (unlikely(!rx))
+ goto exit;
+
+ switch (msg_type(hdr)) {
+ case KEY_DISTR_MSG:
+ if (tipc_crypto_key_rcv(rx, hdr))
+ goto exit;
+ break;
+ default:
+ break;
+ }
+
+ tipc_node_put(rx->node);
+
+exit:
+ kfree_skb(skb);
+}
+
+/**
+ * tipc_crypto_key_distr - Distribute a TX key
+ * @tx: the TX crypto
+ * @key: the key's index
+ * @dest: the destination tipc node, = NULL if distributing to all nodes
+ *
+ * Return: 0 in case of success, otherwise < 0
+ */
+int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key,
+ struct tipc_node *dest)
+{
+ struct tipc_aead *aead;
+ u32 dnode = tipc_node_get_addr(dest);
+ int rc = -ENOKEY;
+
+ if (!sysctl_tipc_key_exchange_enabled)
+ return 0;
+
+ if (key) {
+ rcu_read_lock();
+ aead = tipc_aead_get(tx->aead[key]);
+ if (likely(aead)) {
+ rc = tipc_crypto_key_xmit(tx->net, aead->key,
+ aead->gen, aead->mode,
+ dnode);
+ tipc_aead_put(aead);
+ }
+ rcu_read_unlock();
+ }
+
+ return rc;
+}
+
+/**
+ * tipc_crypto_key_xmit - Send a session key
+ * @net: the struct net
+ * @skey: the session key to be sent
+ * @gen: the key's generation
+ * @mode: the key's mode
+ * @dnode: the destination node address, = 0 if broadcasting to all nodes
+ *
+ * The session key 'skey' is packed in a TIPC v2 'MSG_CRYPTO/KEY_DISTR_MSG'
+ * as its data section, then xmit-ed through the uc/bc link.
+ *
+ * Return: 0 in case of success, otherwise < 0
+ */
+static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey,
+ u16 gen, u8 mode, u32 dnode)
+{
+ struct sk_buff_head pkts;
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ u16 size, cong_link_cnt;
+ u8 *data;
+ int rc;
+
+ size = tipc_aead_key_size(skey);
+ skb = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = buf_msg(skb);
+ tipc_msg_init(tipc_own_addr(net), hdr, MSG_CRYPTO, KEY_DISTR_MSG,
+ INT_H_SIZE, dnode);
+ msg_set_size(hdr, INT_H_SIZE + size);
+ msg_set_key_gen(hdr, gen);
+ msg_set_key_mode(hdr, mode);
+
+ data = msg_data(hdr);
+ *((__be32 *)(data + TIPC_AEAD_ALG_NAME)) = htonl(skey->keylen);
+ memcpy(data, skey->alg_name, TIPC_AEAD_ALG_NAME);
+ memcpy(data + TIPC_AEAD_ALG_NAME + sizeof(__be32), skey->key,
+ skey->keylen);
+
+ __skb_queue_head_init(&pkts);
+ __skb_queue_tail(&pkts, skb);
+ if (dnode)
+ rc = tipc_node_xmit(net, &pkts, dnode, 0);
+ else
+ rc = tipc_bcast_xmit(net, &pkts, &cong_link_cnt);
+
+ return rc;
+}
+
+/**
+ * tipc_crypto_key_rcv - Receive a session key
+ * @rx: the RX crypto
+ * @hdr: the TIPC v2 message incl. the receiving session key in its data
+ *
+ * This function retrieves the session key in the message from peer, then
+ * schedules a RX work to attach the key to the corresponding RX crypto.
+ *
+ * Return: "true" if the key has been scheduled for attaching, otherwise
+ * "false".
+ */
+static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
+{
+ struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
+ struct tipc_aead_key *skey = NULL;
+ u16 key_gen = msg_key_gen(hdr);
+ u32 size = msg_data_sz(hdr);
+ u8 *data = msg_data(hdr);
+ unsigned int keylen;
+
+ /* Verify whether the size can exist in the packet */
+ if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) {
+ pr_debug("%s: message data size is too small\n", rx->name);
+ goto exit;
+ }
+
+ keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME)));
+
+ /* Verify the supplied size values */
+ if (unlikely(size != keylen + sizeof(struct tipc_aead_key) ||
+ keylen > TIPC_AEAD_KEY_SIZE_MAX)) {
+ pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name);
+ goto exit;
+ }
+
+ spin_lock(&rx->lock);
+ if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) {
+ pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name,
+ rx->skey, key_gen, rx->key_gen);
+ goto exit_unlock;
+ }
+
+ /* Allocate memory for the key */
+ skey = kmalloc(size, GFP_ATOMIC);
+ if (unlikely(!skey)) {
+ pr_err("%s: unable to allocate memory for skey\n", rx->name);
+ goto exit_unlock;
+ }
+
+ /* Copy key from msg data */
+ skey->keylen = keylen;
+ memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME);
+ memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32),
+ skey->keylen);
+
+ rx->key_gen = key_gen;
+ rx->skey_mode = msg_key_mode(hdr);
+ rx->skey = skey;
+ rx->nokey = 0;
+ mb(); /* for nokey flag */
+
+exit_unlock:
+ spin_unlock(&rx->lock);
+
+exit:
+ /* Schedule the key attaching on this crypto */
+ if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0)))
+ return true;
+
+ return false;
+}
+
+/**
+ * tipc_crypto_work_rx - Scheduled RX works handler
+ * @work: the struct RX work
+ *
+ * The function processes the previous scheduled works i.e. distributing TX key
+ * or attaching a received session key on RX crypto.
+ */
+static void tipc_crypto_work_rx(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct tipc_crypto *rx = container_of(dwork, struct tipc_crypto, work);
+ struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
+ unsigned long delay = msecs_to_jiffies(5000);
+ bool resched = false;
+ u8 key;
+ int rc;
+
+ /* Case 1: Distribute TX key to peer if scheduled */
+ if (atomic_cmpxchg(&rx->key_distr,
+ KEY_DISTR_SCHED,
+ KEY_DISTR_COMPL) == KEY_DISTR_SCHED) {
+ /* Always pick the newest one for distributing */
+ key = tx->key.pending ?: tx->key.active;
+ rc = tipc_crypto_key_distr(tx, key, rx->node);
+ if (unlikely(rc))
+ pr_warn("%s: unable to distr key[%d] to %s, err %d\n",
+ tx->name, key, tipc_node_get_id_str(rx->node),
+ rc);
+
+ /* Sched for key_distr releasing */
+ resched = true;
+ } else {
+ atomic_cmpxchg(&rx->key_distr, KEY_DISTR_COMPL, 0);
+ }
+
+ /* Case 2: Attach a pending received session key from peer if any */
+ if (rx->skey) {
+ rc = tipc_crypto_key_init(rx, rx->skey, rx->skey_mode, false);
+ if (unlikely(rc < 0))
+ pr_warn("%s: unable to attach received skey, err %d\n",
+ rx->name, rc);
+ switch (rc) {
+ case -EBUSY:
+ case -ENOMEM:
+ /* Resched the key attaching */
+ resched = true;
+ break;
+ default:
+ synchronize_rcu();
+ kfree(rx->skey);
+ rx->skey = NULL;
+ break;
+ }
+ }
+
+ if (resched && queue_delayed_work(tx->wq, &rx->work, delay))
+ return;
+
+ tipc_node_put(rx->node);
+}
+
+/**
+ * tipc_crypto_rekeying_sched - (Re)schedule rekeying w/o new interval
+ * @tx: TX crypto
+ * @changed: if the rekeying needs to be rescheduled with new interval
+ * @new_intv: new rekeying interval (when "changed" = true)
+ */
+void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed,
+ u32 new_intv)
+{
+ unsigned long delay;
+ bool now = false;
+
+ if (changed) {
+ if (new_intv == TIPC_REKEYING_NOW)
+ now = true;
+ else
+ tx->rekeying_intv = new_intv;
+ cancel_delayed_work_sync(&tx->work);
+ }
+
+ if (tx->rekeying_intv || now) {
+ delay = (now) ? 0 : tx->rekeying_intv * 60 * 1000;
+ queue_delayed_work(tx->wq, &tx->work, msecs_to_jiffies(delay));
+ }
+}
+
+/**
+ * tipc_crypto_work_tx - Scheduled TX works handler
+ * @work: the struct TX work
+ *
+ * The function processes the previous scheduled work, i.e. key rekeying, by
+ * generating a new session key based on current one, then attaching it to the
+ * TX crypto and finally distributing it to peers. It also re-schedules the
+ * rekeying if needed.
+ */
+static void tipc_crypto_work_tx(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct tipc_crypto *tx = container_of(dwork, struct tipc_crypto, work);
+ struct tipc_aead_key *skey = NULL;
+ struct tipc_key key = tx->key;
+ struct tipc_aead *aead;
+ int rc = -ENOMEM;
+
+ if (unlikely(key.pending))
+ goto resched;
+
+ /* Take current key as a template */
+ rcu_read_lock();
+ aead = rcu_dereference(tx->aead[key.active ?: KEY_MASTER]);
+ if (unlikely(!aead)) {
+ rcu_read_unlock();
+ /* At least one key should exist for securing */
+ return;
+ }
+
+ /* Lets duplicate it first */
+ skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC);
+ rcu_read_unlock();
+
+ /* Now, generate new key, initiate & distribute it */
+ if (likely(skey)) {
+ rc = tipc_aead_key_generate(skey) ?:
+ tipc_crypto_key_init(tx, skey, PER_NODE_KEY, false);
+ if (likely(rc > 0))
+ rc = tipc_crypto_key_distr(tx, rc, NULL);
+ kfree_sensitive(skey);
+ }
+
+ if (unlikely(rc))
+ pr_warn_ratelimited("%s: rekeying returns %d\n", tx->name, rc);
+
+resched:
+ /* Re-schedule rekeying if any */
+ tipc_crypto_rekeying_sched(tx, false, 0);
+}
diff --git a/net/tipc/crypto.h b/net/tipc/crypto.h
new file mode 100644
index 0000000..e71193b
--- /dev/null
+++ b/net/tipc/crypto.h
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+ * net/tipc/crypto.h: Include file for TIPC crypto
+ *
+ * Copyright (c) 2019, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef CONFIG_TIPC_CRYPTO
+#ifndef _TIPC_CRYPTO_H
+#define _TIPC_CRYPTO_H
+
+#include "core.h"
+#include "node.h"
+#include "msg.h"
+#include "bearer.h"
+
+#define TIPC_EVERSION 7
+
+/* AEAD aes(gcm) */
+#define TIPC_AES_GCM_KEY_SIZE_128 16
+#define TIPC_AES_GCM_KEY_SIZE_192 24
+#define TIPC_AES_GCM_KEY_SIZE_256 32
+
+#define TIPC_AES_GCM_SALT_SIZE 4
+#define TIPC_AES_GCM_IV_SIZE 12
+#define TIPC_AES_GCM_TAG_SIZE 16
+
+/**
+ * TIPC crypto modes:
+ * - CLUSTER_KEY:
+ * One single key is used for both TX & RX in all nodes in the cluster.
+ * - PER_NODE_KEY:
+ * Each nodes in the cluster has one TX key, for RX a node needs to know
+ * its peers' TX key for the decryption of messages from those nodes.
+ */
+enum {
+ CLUSTER_KEY = 1,
+ PER_NODE_KEY = (1 << 1),
+};
+
+extern int sysctl_tipc_max_tfms __read_mostly;
+extern int sysctl_tipc_key_exchange_enabled __read_mostly;
+
+/**
+ * TIPC encryption message format:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
+ * 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w0:|Ver=7| User |D|TX |RX |K|M|N| Rsvd |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w1:| Seqno |
+ * w2:| (8 octets) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w3:\ Prevnode \
+ * / (4 or 16 octets) /
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \ \
+ * / Encrypted complete TIPC V2 header and user data /
+ * \ \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | AuthTag |
+ * | (16 octets) |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Word0:
+ * Ver : = 7 i.e. TIPC encryption message version
+ * User : = 7 (for LINK_PROTOCOL); = 13 (for LINK_CONFIG) or = 0
+ * D : The destined bit i.e. the message's destination node is
+ * "known" or not at the message encryption
+ * TX : TX key used for the message encryption
+ * RX : Currently RX active key corresponding to the destination
+ * node's TX key (when the "D" bit is set)
+ * K : Keep-alive bit (for RPS, LINK_PROTOCOL/STATE_MSG only)
+ * M : Bit indicates if sender has master key
+ * N : Bit indicates if sender has no RX keys corresponding to the
+ * receiver's TX (when the "D" bit is set)
+ * Rsvd : Reserved bit, field
+ * Word1-2:
+ * Seqno : The 64-bit sequence number of the encrypted message, also
+ * part of the nonce used for the message encryption/decryption
+ * Word3-:
+ * Prevnode: The source node address, or ID in case LINK_CONFIG only
+ * AuthTag : The authentication tag for the message integrity checking
+ * generated by the message encryption
+ */
+struct tipc_ehdr {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 destined:1,
+ user:4,
+ version:3;
+ __u8 reserved_1:1,
+ rx_nokey:1,
+ master_key:1,
+ keepalive:1,
+ rx_key_active:2,
+ tx_key:2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 version:3,
+ user:4,
+ destined:1;
+ __u8 tx_key:2,
+ rx_key_active:2,
+ keepalive:1,
+ master_key:1,
+ rx_nokey:1,
+ reserved_1:1;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __be16 reserved_2;
+ } __packed;
+ __be32 w0;
+ };
+ __be64 seqno;
+ union {
+ __be32 addr;
+ __u8 id[NODE_ID_LEN]; /* For a LINK_CONFIG message only! */
+ };
+#define EHDR_SIZE (offsetof(struct tipc_ehdr, addr) + sizeof(__be32))
+#define EHDR_CFG_SIZE (sizeof(struct tipc_ehdr))
+#define EHDR_MIN_SIZE (EHDR_SIZE)
+#define EHDR_MAX_SIZE (EHDR_CFG_SIZE)
+#define EMSG_OVERHEAD (EHDR_SIZE + TIPC_AES_GCM_TAG_SIZE)
+} __packed;
+
+int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net,
+ struct tipc_node *node);
+void tipc_crypto_stop(struct tipc_crypto **crypto);
+void tipc_crypto_timeout(struct tipc_crypto *rx);
+int tipc_crypto_xmit(struct net *net, struct sk_buff **skb,
+ struct tipc_bearer *b, struct tipc_media_addr *dst,
+ struct tipc_node *__dnode);
+int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
+ struct sk_buff **skb, struct tipc_bearer *b);
+int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
+ u8 mode, bool master_key);
+void tipc_crypto_key_flush(struct tipc_crypto *c);
+int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key,
+ struct tipc_node *dest);
+void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb);
+void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed,
+ u32 new_intv);
+int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info);
+bool tipc_ehdr_validate(struct sk_buff *skb);
+
+static inline u32 msg_key_gen(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 16, 0xffff);
+}
+
+static inline void msg_set_key_gen(struct tipc_msg *m, u32 gen)
+{
+ msg_set_bits(m, 4, 16, 0xffff, gen);
+}
+
+static inline u32 msg_key_mode(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 0, 0xf);
+}
+
+static inline void msg_set_key_mode(struct tipc_msg *m, u32 mode)
+{
+ msg_set_bits(m, 4, 0, 0xf, mode);
+}
+
+#endif /* _TIPC_CRYPTO_H */
+#endif
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index c138d68..d4ecacd 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -74,7 +74,7 @@
/**
* tipc_disc_init_msg - initialize a link setup message
* @net: the applicable net namespace
- * @type: message type (request or response)
+ * @mtyp: message type (request or response)
* @b: ptr to bearer issuing message
*/
static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
@@ -94,6 +94,7 @@
msg_set_dest_domain(hdr, dest_domain);
msg_set_bc_netid(hdr, tn->net_id);
b->media->addr2msg(msg_media_addr(hdr), &b->addr);
+ msg_set_peer_net_hash(hdr, tipc_net_hash_mixes(net, tn->random));
msg_set_node_id(hdr, tipc_own_id(net));
}
@@ -193,6 +194,7 @@
{
struct tipc_net *tn = tipc_net(net);
struct tipc_msg *hdr = buf_msg(skb);
+ u32 pnet_hash = msg_peer_net_hash(hdr);
u16 caps = msg_node_capabilities(hdr);
bool legacy = tn->legacy_addr_format;
u32 sugg = msg_sugg_node_addr(hdr);
@@ -241,7 +243,7 @@
return;
if (!tipc_in_scope(legacy, b->domain, src))
return;
- tipc_node_check_dest(net, src, peer_id, b, caps, signature,
+ tipc_node_check_dest(net, src, peer_id, b, caps, signature, pnet_hash,
&maddr, &respond, &dupl_addr);
if (dupl_addr)
disc_dupl_alert(b, src, &maddr);
@@ -337,7 +339,7 @@
* @net: the applicable net namespace
* @b: ptr to bearer issuing requests
* @dest: destination address for request messages
- * @dest_domain: network domain to which links can be established
+ * @skb: pointer to created frame
*
* Returns 0 if successful, otherwise -errno.
*/
@@ -391,7 +393,6 @@
* tipc_disc_reset - reset object to send periodic link setup requests
* @net: the applicable net namespace
* @b: ptr to bearer issuing requests
- * @dest_domain: network domain to which links can be established
*/
void tipc_disc_reset(struct net *net, struct tipc_bearer *b)
{
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index f69a2fd..c680196 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -62,12 +62,10 @@
struct tipc_media_addr *addr,
char *msg)
{
- char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-
memset(addr, 0, sizeof(*addr));
ether_addr_copy(addr->value, msg);
addr->media_id = TIPC_MEDIA_TYPE_ETH;
- addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN);
+ addr->broadcast = is_broadcast_ether_addr(addr->value);
return 0;
}
@@ -92,7 +90,8 @@
.raw2addr = tipc_eth_raw2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
- .window = TIPC_DEF_LINK_WIN,
+ .min_win = TIPC_DEF_LINK_WIN,
+ .max_win = TIPC_MAX_LINK_WIN,
.type_id = TIPC_MEDIA_TYPE_ETH,
.hwaddr_len = ETH_ALEN,
.name = "eth"
diff --git a/net/tipc/group.c b/net/tipc/group.c
index f53871b..b1fcd2a 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -542,7 +542,7 @@
update = true;
deliver = false;
}
- /* Fall thru */
+ fallthrough;
case TIPC_GRP_BCAST_MSG:
m->bc_rcv_nxt++;
ack = msg_grp_bc_ack_req(hdr);
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
index e8c1671..7aa9ff8 100644
--- a/net/tipc/ib_media.c
+++ b/net/tipc/ib_media.c
@@ -42,6 +42,8 @@
#include "core.h"
#include "bearer.h"
+#define TIPC_MAX_IB_LINK_WIN 500
+
/* convert InfiniBand address (media address format) media address to string */
static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf,
int str_size)
@@ -94,7 +96,8 @@
.raw2addr = tipc_ib_raw2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
- .window = TIPC_DEF_LINK_WIN,
+ .min_win = TIPC_DEF_LINK_WIN,
+ .max_win = TIPC_MAX_IB_LINK_WIN,
.type_id = TIPC_MEDIA_TYPE_IB,
.hwaddr_len = INFINIBAND_ALEN,
.name = "ib"
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f250102..7a353ff 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -44,6 +44,7 @@
#include "netlink.h"
#include "monitor.h"
#include "trace.h"
+#include "crypto.h"
#include <linux/pkt_sched.h>
@@ -163,7 +164,6 @@
struct sk_buff *target_bskb;
} backlog[5];
u16 snd_nxt;
- u16 window;
/* Reception */
u16 rcv_nxt;
@@ -174,6 +174,12 @@
/* Congestion handling */
struct sk_buff_head wakeupq;
+ u16 window;
+ u16 min_win;
+ u16 ssthresh;
+ u16 max_win;
+ u16 cong_acks;
+ u16 checkpoint;
/* Fragmentation/reassembly */
struct sk_buff *reasm_buf;
@@ -182,6 +188,8 @@
/* Broadcast */
u16 ackers;
u16 acked;
+ u16 last_gap;
+ struct tipc_gap_ack_blks *last_ga;
struct tipc_link *bc_rcvlink;
struct tipc_link *bc_sndlink;
u8 nack_state;
@@ -208,11 +216,6 @@
#define TIPC_BC_RETR_LIM (jiffies + msecs_to_jiffies(10))
#define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1))
-/*
- * Interval between NACKs when packets arrive out of order
- */
-#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
-
/* Link FSM states:
*/
enum {
@@ -243,12 +246,16 @@
struct sk_buff_head *xmitq);
static void tipc_link_build_bc_init_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
-static bool tipc_link_release_pkts(struct tipc_link *l, u16 to);
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data);
-static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga,
+ struct tipc_link *l, u8 start_index);
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr);
+static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
+ u16 acked, u16 gap,
struct tipc_gap_ack_blks *ga,
- struct sk_buff_head *xmitq);
-
+ struct sk_buff_head *xmitq,
+ bool *retransmitted, int *rc);
+static void tipc_link_update_cwin(struct tipc_link *l, int released,
+ bool retransmitted);
/*
* Simple non-static link routines (i.e. referenced outside this file)
*/
@@ -307,9 +314,14 @@
return l->peer_bearer_id << 16 | l->bearer_id;
}
-int tipc_link_window(struct tipc_link *l)
+int tipc_link_min_win(struct tipc_link *l)
{
- return l->window;
+ return l->min_win;
+}
+
+int tipc_link_max_win(struct tipc_link *l)
+{
+ return l->max_win;
}
int tipc_link_prio(struct tipc_link *l)
@@ -358,7 +370,7 @@
snd_l->ackers--;
rcv_l->bc_peer_is_up = true;
rcv_l->state = LINK_ESTABLISHED;
- tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
+ tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq, NULL);
trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
tipc_link_reset(rcv_l);
rcv_l->state = LINK_RESET;
@@ -397,6 +409,15 @@
return l->mtu;
}
+int tipc_link_mss(struct tipc_link *l)
+{
+#ifdef CONFIG_TIPC_CRYPTO
+ return l->mtu - INT_H_SIZE - EMSG_OVERHEAD;
+#else
+ return l->mtu - INT_H_SIZE;
+#endif
+}
+
u16 tipc_link_rcv_nxt(struct tipc_link *l)
{
return l->rcv_nxt;
@@ -419,14 +440,15 @@
/**
* tipc_link_create - create a new link
- * @n: pointer to associated node
+ * @net: pointer to associated network namespace
* @if_name: associated interface name
* @bearer_id: id (index) of associated bearer
* @tolerance: link tolerance to be used by link
* @net_plane: network plane (A,B,c..) this link belongs to
* @mtu: mtu to be advertised by link
* @priority: priority to be used by link
- * @window: send window to be used by link
+ * @min_win: minimal send window to be used by link
+ * @max_win: maximal send window to be used by link
* @session: session to be used by link
* @ownnode: identity of own node
* @peer: node id of peer node
@@ -441,7 +463,7 @@
*/
bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
int tolerance, char net_plane, u32 mtu, int priority,
- int window, u32 session, u32 self,
+ u32 min_win, u32 max_win, u32 session, u32 self,
u32 peer, u8 *peer_id, u16 peer_caps,
struct tipc_link *bc_sndlink,
struct tipc_link *bc_rcvlink,
@@ -485,7 +507,7 @@
l->advertised_mtu = mtu;
l->mtu = mtu;
l->priority = priority;
- tipc_link_set_queue_limits(l, window);
+ tipc_link_set_queue_limits(l, min_win, max_win);
l->ackers = 1;
l->bc_sndlink = bc_sndlink;
l->bc_rcvlink = bc_rcvlink;
@@ -503,17 +525,18 @@
/**
* tipc_link_bc_create - create new link to be used for broadcast
- * @n: pointer to associated node
+ * @net: pointer to associated network namespace
* @mtu: mtu to be used initially if no peers
- * @window: send window to be used
+ * @min_win: minimal send window to be used by link
+ * @max_win: maximal send window to be used by link
* @inputq: queue to put messages ready for delivery
* @namedq: queue to put binding table update messages ready for delivery
* @link: return value, pointer to put the created link
*
* Returns true if link was created, otherwise false
*/
-bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
- int mtu, int window, u16 peer_caps,
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id,
+ int mtu, u32 min_win, u32 max_win, u16 peer_caps,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
struct tipc_link *bc_sndlink,
@@ -521,13 +544,24 @@
{
struct tipc_link *l;
- if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window,
- 0, ownnode, peer, NULL, peer_caps, bc_sndlink,
- NULL, inputq, namedq, link))
+ if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, min_win,
+ max_win, 0, ownnode, peer, NULL, peer_caps,
+ bc_sndlink, NULL, inputq, namedq, link))
return false;
l = *link;
- strcpy(l->name, tipc_bclink_name);
+ if (peer_id) {
+ char peer_str[NODE_ID_STR_LEN] = {0,};
+
+ tipc_nodeid2string(peer_str, peer_id);
+ if (strlen(peer_str) > 16)
+ sprintf(peer_str, "%x", peer);
+ /* Broadcast receiver link name: "broadcast-link:<peer>" */
+ snprintf(l->name, sizeof(l->name), "%s:%s", tipc_bclink_name,
+ peer_str);
+ } else {
+ strcpy(l->name, tipc_bclink_name);
+ }
trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!");
tipc_link_reset(l);
l->state = LINK_RESET;
@@ -540,7 +574,7 @@
/* Disable replicast if even a single peer doesn't support it */
if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST))
- tipc_bcast_disable_rcast(net);
+ tipc_bcast_toggle_rcast(net, false);
return true;
}
@@ -789,11 +823,16 @@
state |= l->bc_rcvlink->rcv_unacked;
state |= l->rcv_unacked;
state |= !skb_queue_empty(&l->transmq);
- state |= !skb_queue_empty(&l->deferdq);
probe = mstate->probing;
probe |= l->silent_intv_cnt;
if (probe || mstate->monitoring)
l->silent_intv_cnt++;
+ probe |= !skb_queue_empty(&l->deferdq);
+ if (l->snd_nxt == l->checkpoint) {
+ tipc_link_update_cwin(l, 0, 0);
+ probe = true;
+ }
+ l->checkpoint = l->snd_nxt;
break;
case LINK_RESET:
setup = l->rst_cnt++ <= 4;
@@ -878,6 +917,21 @@
}
+/**
+ * tipc_link_set_skb_retransmit_time - set the time at which retransmission of
+ * the given skb should be next attempted
+ * @skb: skb to set a future retransmission time for
+ * @l: link the skb will be transmitted on
+ */
+static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb,
+ struct tipc_link *l)
+{
+ if (link_is_bc_sndlink(l))
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+ else
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
+}
+
void tipc_link_reset(struct tipc_link *l)
{
struct sk_buff_head list;
@@ -919,6 +973,9 @@
l->snd_nxt_state = 1;
l->rcv_nxt_state = 1;
l->acked = 0;
+ l->last_gap = 0;
+ kfree(l->last_ga);
+ l->last_ga = NULL;
l->silent_intv_cnt = 0;
l->rst_cnt = 0;
l->bc_peer_is_up = false;
@@ -928,7 +985,7 @@
/**
* tipc_link_xmit(): enqueue buffer list according to queue situation
- * @link: link to use
+ * @l: link to use
* @list: chain of buffers containing message
* @xmitq: returned list of packets to be sent by caller
*
@@ -939,16 +996,18 @@
int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
struct sk_buff_head *xmitq)
{
- unsigned int maxwin = l->window;
- unsigned int mtu = l->mtu;
+ struct sk_buff_head *backlogq = &l->backlogq;
+ struct sk_buff_head *transmq = &l->transmq;
+ struct sk_buff *skb, *_skb;
+ u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
u16 ack = l->rcv_nxt - 1;
u16 seqno = l->snd_nxt;
- u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
- struct sk_buff_head *transmq = &l->transmq;
- struct sk_buff_head *backlogq = &l->backlogq;
- struct sk_buff *skb, *_skb, **tskb;
int pkt_cnt = skb_queue_len(list);
+ unsigned int mss = tipc_link_mss(l);
+ unsigned int cwin = l->window;
+ unsigned int mtu = l->mtu;
struct tipc_msg *hdr;
+ bool new_bundle;
int rc = 0;
int imp;
@@ -980,24 +1039,20 @@
}
/* Prepare each packet for sending, and add to relevant queue: */
- while (skb_queue_len(list)) {
- skb = skb_peek(list);
- hdr = buf_msg(skb);
- msg_set_seqno(hdr, seqno);
- msg_set_ack(hdr, ack);
- msg_set_bcast_ack(hdr, bc_ack);
-
- if (likely(skb_queue_len(transmq) < maxwin)) {
+ while ((skb = __skb_dequeue(list))) {
+ if (likely(skb_queue_len(transmq) < cwin)) {
+ hdr = buf_msg(skb);
+ msg_set_seqno(hdr, seqno);
+ msg_set_ack(hdr, ack);
+ msg_set_bcast_ack(hdr, bc_ack);
_skb = skb_clone(skb, GFP_ATOMIC);
if (!_skb) {
+ kfree_skb(skb);
__skb_queue_purge(list);
return -ENOBUFS;
}
- __skb_dequeue(list);
__skb_queue_tail(transmq, skb);
- /* next retransmit attempt */
- if (link_is_bc_sndlink(l))
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+ tipc_link_set_skb_retransmit_time(skb, l);
__skb_queue_tail(xmitq, _skb);
TIPC_SKB_CB(skb)->ackers = l->ackers;
l->rcv_unacked = 0;
@@ -1005,39 +1060,86 @@
seqno++;
continue;
}
- tskb = &l->backlog[imp].target_bskb;
- if (tipc_msg_bundle(*tskb, hdr, mtu)) {
- kfree_skb(__skb_dequeue(list));
- l->stats.sent_bundled++;
- continue;
- }
- if (tipc_msg_make_bundle(tskb, hdr, mtu, l->addr)) {
- kfree_skb(__skb_dequeue(list));
- __skb_queue_tail(backlogq, *tskb);
- l->backlog[imp].len++;
- l->stats.sent_bundled++;
- l->stats.sent_bundles++;
+ if (tipc_msg_try_bundle(l->backlog[imp].target_bskb, &skb,
+ mss, l->addr, &new_bundle)) {
+ if (skb) {
+ /* Keep a ref. to the skb for next try */
+ l->backlog[imp].target_bskb = skb;
+ l->backlog[imp].len++;
+ __skb_queue_tail(backlogq, skb);
+ } else {
+ if (new_bundle) {
+ l->stats.sent_bundles++;
+ l->stats.sent_bundled++;
+ }
+ l->stats.sent_bundled++;
+ }
continue;
}
l->backlog[imp].target_bskb = NULL;
- l->backlog[imp].len += skb_queue_len(list);
+ l->backlog[imp].len += (1 + skb_queue_len(list));
+ __skb_queue_tail(backlogq, skb);
skb_queue_splice_tail_init(list, backlogq);
}
l->snd_nxt = seqno;
return rc;
}
+static void tipc_link_update_cwin(struct tipc_link *l, int released,
+ bool retransmitted)
+{
+ int bklog_len = skb_queue_len(&l->backlogq);
+ struct sk_buff_head *txq = &l->transmq;
+ int txq_len = skb_queue_len(txq);
+ u16 cwin = l->window;
+
+ /* Enter fast recovery */
+ if (unlikely(retransmitted)) {
+ l->ssthresh = max_t(u16, l->window / 2, 300);
+ l->window = min_t(u16, l->ssthresh, l->window);
+ return;
+ }
+ /* Enter slow start */
+ if (unlikely(!released)) {
+ l->ssthresh = max_t(u16, l->window / 2, 300);
+ l->window = l->min_win;
+ return;
+ }
+ /* Don't increase window if no pressure on the transmit queue */
+ if (txq_len + bklog_len < cwin)
+ return;
+
+ /* Don't increase window if there are holes the transmit queue */
+ if (txq_len && l->snd_nxt - buf_seqno(skb_peek(txq)) != txq_len)
+ return;
+
+ l->cong_acks += released;
+
+ /* Slow start */
+ if (cwin <= l->ssthresh) {
+ l->window = min_t(u16, cwin + released, l->max_win);
+ return;
+ }
+ /* Congestion avoidance */
+ if (l->cong_acks < cwin)
+ return;
+ l->window = min_t(u16, ++cwin, l->max_win);
+ l->cong_acks = 0;
+}
+
static void tipc_link_advance_backlog(struct tipc_link *l,
struct sk_buff_head *xmitq)
{
- struct sk_buff *skb, *_skb;
- struct tipc_msg *hdr;
- u16 seqno = l->snd_nxt;
- u16 ack = l->rcv_nxt - 1;
u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+ struct sk_buff_head *txq = &l->transmq;
+ struct sk_buff *skb, *_skb;
+ u16 ack = l->rcv_nxt - 1;
+ u16 seqno = l->snd_nxt;
+ struct tipc_msg *hdr;
+ u16 cwin = l->window;
u32 imp;
- while (skb_queue_len(&l->transmq) < l->window) {
+ while (skb_queue_len(txq) < cwin) {
skb = skb_peek(&l->backlogq);
if (!skb)
break;
@@ -1051,9 +1153,7 @@
if (unlikely(skb == l->backlog[imp].target_bskb))
l->backlog[imp].target_bskb = NULL;
__skb_queue_tail(&l->transmq, skb);
- /* next retransmit attempt */
- if (link_is_bc_sndlink(l))
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+ tipc_link_set_skb_retransmit_time(skb, l);
__skb_queue_tail(xmitq, _skb);
TIPC_SKB_CB(skb)->ackers = l->ackers;
@@ -1112,67 +1212,14 @@
if (link_is_bc_sndlink(l)) {
r->state = LINK_RESET;
- *rc = TIPC_LINK_DOWN_EVT;
+ *rc |= TIPC_LINK_DOWN_EVT;
} else {
- *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ *rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
return true;
}
-/* tipc_link_bc_retrans() - retransmit zero or more packets
- * @l: the link to transmit on
- * @r: the receiving link ordering the retransmit. Same as l if unicast
- * @from: retransmit from (inclusive) this sequence number
- * @to: retransmit to (inclusive) this sequence number
- * xmitq: queue for accumulating the retransmitted packets
- */
-static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
- u16 from, u16 to, struct sk_buff_head *xmitq)
-{
- struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
- u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
- u16 ack = l->rcv_nxt - 1;
- struct tipc_msg *hdr;
- int rc = 0;
-
- if (!skb)
- return 0;
- if (less(to, from))
- return 0;
-
- trace_tipc_link_retrans(r, from, to, &l->transmq);
-
- if (link_retransmit_failure(l, r, &rc))
- return rc;
-
- skb_queue_walk(&l->transmq, skb) {
- hdr = buf_msg(skb);
- if (less(msg_seqno(hdr), from))
- continue;
- if (more(msg_seqno(hdr), to))
- break;
-
- if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
- continue;
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
- _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC);
- if (!_skb)
- return 0;
- hdr = buf_msg(_skb);
- msg_set_ack(hdr, ack);
- msg_set_bcast_ack(hdr, bc_ack);
- _skb->priority = TC_PRIO_CONTROL;
- __skb_queue_tail(xmitq, _skb);
- l->stats.retransmitted++;
-
- /* Increase actual retrans counter & mark first time */
- if (!TIPC_SKB_CB(skb)->retr_cnt++)
- TIPC_SKB_CB(skb)->retr_stamp = jiffies;
- }
- return 0;
-}
-
/* tipc_data_input - deliver data and name distr msgs to upper layer
*
* Consumes buffer if message is of right type
@@ -1193,7 +1240,7 @@
skb_queue_tail(mc_inputq, skb);
return true;
}
- /* fall through */
+ fallthrough;
case CONN_MANAGER:
skb_queue_tail(inputq, skb);
return true;
@@ -1209,6 +1256,14 @@
case MSG_FRAGMENTER:
case BCAST_PROTOCOL:
return false;
+#ifdef CONFIG_TIPC_CRYPTO
+ case MSG_CRYPTO:
+ if (TIPC_SKB_CB(skb)->decrypted) {
+ tipc_crypto_msg_rcv(l->net, skb);
+ return true;
+ }
+ fallthrough;
+#endif
default:
pr_warn("Dropping received illegal msg type\n");
kfree_skb(skb);
@@ -1330,46 +1385,68 @@
return rc;
}
-static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
+/**
+ * tipc_get_gap_ack_blks - get Gap ACK blocks from PROTOCOL/STATE_MSG
+ * @ga: returned pointer to the Gap ACK blocks if any
+ * @l: the tipc link
+ * @hdr: the PROTOCOL/STATE_MSG header
+ * @uc: desired Gap ACK blocks type, i.e. unicast (= 1) or broadcast (= 0)
+ *
+ * Return: the total Gap ACK blocks size
+ */
+u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
+ struct tipc_msg *hdr, bool uc)
{
- bool released = false;
- struct sk_buff *skb, *tmp;
+ struct tipc_gap_ack_blks *p;
+ u16 sz = 0;
- skb_queue_walk_safe(&l->transmq, skb, tmp) {
- if (more(buf_seqno(skb), acked))
- break;
- __skb_unlink(skb, &l->transmq);
- kfree_skb(skb);
- released = true;
+ /* Does peer support the Gap ACK blocks feature? */
+ if (l->peer_caps & TIPC_GAP_ACK_BLOCK) {
+ p = (struct tipc_gap_ack_blks *)msg_data(hdr);
+ sz = ntohs(p->len);
+ /* Sanity check */
+ if (sz == struct_size(p, gacks, p->ugack_cnt + p->bgack_cnt)) {
+ /* Good, check if the desired type exists */
+ if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt))
+ goto ok;
+ /* Backward compatible: peer might not support bc, but uc? */
+ } else if (uc && sz == struct_size(p, gacks, p->ugack_cnt)) {
+ if (p->ugack_cnt) {
+ p->bgack_cnt = 0;
+ goto ok;
+ }
+ }
}
- return released;
+ /* Other cases: ignore! */
+ p = NULL;
+
+ok:
+ *ga = p;
+ return sz;
}
-/* tipc_build_gap_ack_blks - build Gap ACK blocks
- * @l: tipc link that data have come with gaps in sequence if any
- * @data: data buffer to store the Gap ACK blocks after built
- *
- * returns the actual allocated memory size
- */
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data)
+static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga,
+ struct tipc_link *l, u8 start_index)
{
+ struct tipc_gap_ack *gacks = &ga->gacks[start_index];
struct sk_buff *skb = skb_peek(&l->deferdq);
- struct tipc_gap_ack_blks *ga = data;
- u16 len, expect, seqno = 0;
+ u16 expect, seqno = 0;
u8 n = 0;
if (!skb)
- goto exit;
+ return 0;
expect = buf_seqno(skb);
skb_queue_walk(&l->deferdq, skb) {
seqno = buf_seqno(skb);
if (unlikely(more(seqno, expect))) {
- ga->gacks[n].ack = htons(expect - 1);
- ga->gacks[n].gap = htons(seqno - expect);
- if (++n >= MAX_GAP_ACK_BLKS) {
- pr_info_ratelimited("Too few Gap ACK blocks!\n");
- goto exit;
+ gacks[n].ack = htons(expect - 1);
+ gacks[n].gap = htons(seqno - expect);
+ if (++n >= MAX_GAP_ACK_BLKS / 2) {
+ pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n",
+ l->name, n,
+ skb_queue_len(&l->deferdq));
+ return n;
}
} else if (unlikely(less(seqno, expect))) {
pr_warn("Unexpected skb in deferdq!\n");
@@ -1379,14 +1456,44 @@
}
/* last block */
- ga->gacks[n].ack = htons(seqno);
- ga->gacks[n].gap = 0;
+ gacks[n].ack = htons(seqno);
+ gacks[n].gap = 0;
n++;
+ return n;
+}
-exit:
- len = tipc_gap_ack_blks_sz(n);
+/* tipc_build_gap_ack_blks - build Gap ACK blocks
+ * @l: tipc unicast link
+ * @hdr: the tipc message buffer to store the Gap ACK blocks after built
+ *
+ * The function builds Gap ACK blocks for both the unicast & broadcast receiver
+ * links of a certain peer, the buffer after built has the network data format
+ * as found at the struct tipc_gap_ack_blks definition.
+ *
+ * returns the actual allocated memory size
+ */
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr)
+{
+ struct tipc_link *bcl = l->bc_rcvlink;
+ struct tipc_gap_ack_blks *ga;
+ u16 len;
+
+ ga = (struct tipc_gap_ack_blks *)msg_data(hdr);
+
+ /* Start with broadcast link first */
+ tipc_bcast_lock(bcl->net);
+ msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
+ msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
+ ga->bgack_cnt = __tipc_build_gap_ack_blks(ga, bcl, 0);
+ tipc_bcast_unlock(bcl->net);
+
+ /* Now for unicast link, but an explicit NACK only (???) */
+ ga->ugack_cnt = (msg_seq_gap(hdr)) ?
+ __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0;
+
+ /* Total len */
+ len = struct_size(ga, gacks, ga->bgack_cnt + ga->ugack_cnt);
ga->len = htons(len);
- ga->gack_cnt = n;
return len;
}
@@ -1394,46 +1501,111 @@
* acked packets, also doing retransmissions if
* gaps found
* @l: tipc link with transmq queue to be advanced
+ * @r: tipc link "receiver" i.e. in case of broadcast (= "l" if unicast)
* @acked: seqno of last packet acked by peer without any gaps before
* @gap: # of gap packets
* @ga: buffer pointer to Gap ACK blocks from peer
* @xmitq: queue for accumulating the retransmitted packets if any
+ * @retransmitted: returned boolean value if a retransmission is really issued
+ * @rc: returned code e.g. TIPC_LINK_DOWN_EVT if a repeated retransmit failures
+ * happens (- unlikely case)
*
- * In case of a repeated retransmit failures, the call will return shortly
- * with a returned code (e.g. TIPC_LINK_DOWN_EVT)
+ * Return: the number of packets released from the link transmq
*/
-static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
+ u16 acked, u16 gap,
struct tipc_gap_ack_blks *ga,
- struct sk_buff_head *xmitq)
+ struct sk_buff_head *xmitq,
+ bool *retransmitted, int *rc)
{
+ struct tipc_gap_ack_blks *last_ga = r->last_ga, *this_ga = NULL;
+ struct tipc_gap_ack *gacks = NULL;
struct sk_buff *skb, *_skb, *tmp;
struct tipc_msg *hdr;
+ u32 qlen = skb_queue_len(&l->transmq);
+ u16 nacked = acked, ngap = gap, gack_cnt = 0;
u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
u16 ack = l->rcv_nxt - 1;
- bool passed = false;
u16 seqno, n = 0;
- int rc = 0;
+ u16 end = r->acked, start = end, offset = r->last_gap;
+ u16 si = (last_ga) ? last_ga->start_index : 0;
+ bool is_uc = !link_is_bc_sndlink(l);
+ bool bc_has_acked = false;
+ trace_tipc_link_retrans(r, acked + 1, acked + gap, &l->transmq);
+
+ /* Determine Gap ACK blocks if any for the particular link */
+ if (ga && is_uc) {
+ /* Get the Gap ACKs, uc part */
+ gack_cnt = ga->ugack_cnt;
+ gacks = &ga->gacks[ga->bgack_cnt];
+ } else if (ga) {
+ /* Copy the Gap ACKs, bc part, for later renewal if needed */
+ this_ga = kmemdup(ga, struct_size(ga, gacks, ga->bgack_cnt),
+ GFP_ATOMIC);
+ if (likely(this_ga)) {
+ this_ga->start_index = 0;
+ /* Start with the bc Gap ACKs */
+ gack_cnt = this_ga->bgack_cnt;
+ gacks = &this_ga->gacks[0];
+ } else {
+ /* Hmm, we can get in trouble..., simply ignore it */
+ pr_warn_ratelimited("Ignoring bc Gap ACKs, no memory\n");
+ }
+ }
+
+ /* Advance the link transmq */
skb_queue_walk_safe(&l->transmq, skb, tmp) {
seqno = buf_seqno(skb);
next_gap_ack:
- if (less_eq(seqno, acked)) {
+ if (less_eq(seqno, nacked)) {
+ if (is_uc)
+ goto release;
+ /* Skip packets peer has already acked */
+ if (!more(seqno, r->acked))
+ continue;
+ /* Get the next of last Gap ACK blocks */
+ while (more(seqno, end)) {
+ if (!last_ga || si >= last_ga->bgack_cnt)
+ break;
+ start = end + offset + 1;
+ end = ntohs(last_ga->gacks[si].ack);
+ offset = ntohs(last_ga->gacks[si].gap);
+ si++;
+ WARN_ONCE(more(start, end) ||
+ (!offset &&
+ si < last_ga->bgack_cnt) ||
+ si > MAX_GAP_ACK_BLKS,
+ "Corrupted Gap ACK: %d %d %d %d %d\n",
+ start, end, offset, si,
+ last_ga->bgack_cnt);
+ }
+ /* Check against the last Gap ACK block */
+ if (in_range(seqno, start, end))
+ continue;
+ /* Update/release the packet peer is acking */
+ bc_has_acked = true;
+ if (--TIPC_SKB_CB(skb)->ackers)
+ continue;
+release:
/* release skb */
__skb_unlink(skb, &l->transmq);
kfree_skb(skb);
- } else if (less_eq(seqno, acked + gap)) {
- /* First, check if repeated retrans failures occurs? */
- if (!passed && link_retransmit_failure(l, l, &rc))
- return rc;
- passed = true;
-
+ } else if (less_eq(seqno, nacked + ngap)) {
+ /* First gap: check if repeated retrans failures? */
+ if (unlikely(seqno == acked + 1 &&
+ link_retransmit_failure(l, r, rc))) {
+ /* Ignore this bc Gap ACKs if any */
+ kfree(this_ga);
+ this_ga = NULL;
+ break;
+ }
/* retransmit skb if unrestricted*/
if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
continue;
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
- _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE,
- GFP_ATOMIC);
+ tipc_link_set_skb_retransmit_time(skb, l);
+ _skb = pskb_copy(skb, GFP_ATOMIC);
if (!_skb)
continue;
hdr = buf_msg(_skb);
@@ -1442,22 +1614,53 @@
_skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, _skb);
l->stats.retransmitted++;
-
+ if (!is_uc)
+ r->stats.retransmitted++;
+ *retransmitted = true;
/* Increase actual retrans counter & mark first time */
if (!TIPC_SKB_CB(skb)->retr_cnt++)
TIPC_SKB_CB(skb)->retr_stamp = jiffies;
} else {
/* retry with Gap ACK blocks if any */
- if (!ga || n >= ga->gack_cnt)
+ if (n >= gack_cnt)
break;
- acked = ntohs(ga->gacks[n].ack);
- gap = ntohs(ga->gacks[n].gap);
+ nacked = ntohs(gacks[n].ack);
+ ngap = ntohs(gacks[n].gap);
n++;
goto next_gap_ack;
}
}
- return 0;
+ /* Renew last Gap ACK blocks for bc if needed */
+ if (bc_has_acked) {
+ if (this_ga) {
+ kfree(last_ga);
+ r->last_ga = this_ga;
+ r->last_gap = gap;
+ } else if (last_ga) {
+ if (less(acked, start)) {
+ si--;
+ offset = start - acked - 1;
+ } else if (less(acked, end)) {
+ acked = end;
+ }
+ if (si < last_ga->bgack_cnt) {
+ last_ga->start_index = si;
+ r->last_gap = offset;
+ } else {
+ kfree(last_ga);
+ r->last_ga = NULL;
+ r->last_gap = 0;
+ }
+ } else {
+ r->last_gap = 0;
+ }
+ r->acked = acked;
+ } else {
+ kfree(this_ga);
+ }
+
+ return qlen - skb_queue_len(&l->transmq);
}
/* tipc_link_build_state_msg: prepare link state message for transmission
@@ -1480,7 +1683,6 @@
l->snd_nxt = l->rcv_nxt;
return TIPC_LINK_SND_STATE;
}
-
/* Unicast ACK */
l->rcv_unacked = 0;
l->stats.sent_acks++;
@@ -1514,7 +1716,8 @@
struct sk_buff_head *xmitq)
{
u32 def_cnt = ++l->stats.deferred_recv;
- u32 defq_len = skb_queue_len(&l->deferdq);
+ struct sk_buff_head *dfq = &l->deferdq;
+ u32 defq_len = skb_queue_len(dfq);
int match1, match2;
if (link_is_bc_rcvlink(l)) {
@@ -1525,8 +1728,12 @@
return 0;
}
- if (defq_len >= 3 && !((defq_len - 3) % 16))
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
+ if (defq_len >= 3 && !((defq_len - 3) % 16)) {
+ u16 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
+
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0,
+ rcvgap, 0, 0, xmitq);
+ }
return 0;
}
@@ -1541,6 +1748,7 @@
struct sk_buff_head *defq = &l->deferdq;
struct tipc_msg *hdr = buf_msg(skb);
u16 seqno, rcv_nxt, win_lim;
+ int released = 0;
int rc = 0;
/* Verify and update link state */
@@ -1559,25 +1767,23 @@
if (unlikely(!link_is_up(l))) {
if (l->state == LINK_ESTABLISHING)
rc = TIPC_LINK_UP_EVT;
- goto drop;
+ kfree_skb(skb);
+ break;
}
/* Drop if outside receive window */
if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) {
l->stats.duplicates++;
- goto drop;
+ kfree_skb(skb);
+ break;
}
-
- /* Forward queues and wake up waiting users */
- if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
- tipc_link_advance_backlog(l, xmitq);
- if (unlikely(!skb_queue_empty(&l->wakeupq)))
- link_prepare_wakeup(l);
- }
+ released += tipc_link_advance_transmq(l, l, msg_ack(hdr), 0,
+ NULL, NULL, NULL, NULL);
/* Defer delivery if sequence gap */
if (unlikely(seqno != rcv_nxt)) {
- __tipc_skb_queue_sorted(defq, seqno, skb);
+ if (!__tipc_skb_queue_sorted(defq, seqno, skb))
+ l->stats.duplicates++;
rc |= tipc_link_build_nack_msg(l, xmitq);
break;
}
@@ -1596,9 +1802,13 @@
break;
} while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt)));
- return rc;
-drop:
- kfree_skb(skb);
+ /* Forward queues and wake up waiting users */
+ if (released) {
+ tipc_link_update_cwin(l, released, 0);
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
+ }
return rc;
}
@@ -1607,15 +1817,15 @@
int tolerance, int priority,
struct sk_buff_head *xmitq)
{
- struct tipc_link *bcl = l->bc_rcvlink;
- struct sk_buff *skb;
- struct tipc_msg *hdr;
- struct sk_buff_head *dfq = &l->deferdq;
- bool node_up = link_is_up(bcl);
struct tipc_mon_state *mstate = &l->mon_state;
+ struct sk_buff_head *dfq = &l->deferdq;
+ struct tipc_link *bcl = l->bc_rcvlink;
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ bool node_up = link_is_up(bcl);
+ u16 glen = 0, bc_rcvgap = 0;
int dlen = 0;
void *data;
- u16 glen = 0;
/* Don't send protocol message during reset or link failover */
if (tipc_link_is_blocked(l))
@@ -1624,7 +1834,7 @@
if (!tipc_link_is_up(l) && (mtyp == STATE_MSG))
return;
- if (!skb_queue_empty(dfq))
+ if ((probe || probe_reply) && !skb_queue_empty(dfq))
rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
@@ -1653,11 +1863,12 @@
if (l->peer_caps & TIPC_LINK_PROTO_SEQNO)
msg_set_seqno(hdr, l->snd_nxt_state++);
msg_set_seq_gap(hdr, rcvgap);
- msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
+ bc_rcvgap = link_bc_rcv_gap(bcl);
+ msg_set_bc_gap(hdr, bc_rcvgap);
msg_set_probe(hdr, probe);
msg_set_is_keepalive(hdr, probe || probe_reply);
if (l->peer_caps & TIPC_GAP_ACK_BLOCK)
- glen = tipc_build_gap_ack_blks(l, data);
+ glen = tipc_build_gap_ack_blks(l, hdr);
tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id);
msg_set_size(hdr, INT_H_SIZE + glen + dlen);
skb_trim(skb, INT_H_SIZE + glen + dlen);
@@ -1678,6 +1889,8 @@
l->stats.sent_probes++;
if (rcvgap)
l->stats.sent_nacks++;
+ if (bc_rcvgap)
+ bcl->stats.sent_nacks++;
skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, skb);
trace_tipc_proto_build(skb, false, l->name);
@@ -1733,21 +1946,6 @@
return;
__skb_queue_head_init(&tnlq);
- __skb_queue_head_init(&tmpxq);
- __skb_queue_head_init(&frags);
-
- /* At least one packet required for safe algorithm => add dummy */
- skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
- BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net),
- 0, 0, TIPC_ERR_NO_PORT);
- if (!skb) {
- pr_warn("%sunable to create tunnel packet\n", link_co_err);
- return;
- }
- __skb_queue_tail(&tnlq, skb);
- tipc_link_xmit(l, &tnlq, &tmpxq);
- __skb_queue_purge(&tmpxq);
-
/* Link Synching:
* From now on, send only one single ("dummy") SYNCH message
* to peer. The SYNCH message does not contain any data, just
@@ -1773,6 +1971,20 @@
return;
}
+ __skb_queue_head_init(&tmpxq);
+ __skb_queue_head_init(&frags);
+ /* At least one packet required for safe algorithm => add dummy */
+ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
+ BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net),
+ 0, 0, TIPC_ERR_NO_PORT);
+ if (!skb) {
+ pr_warn("%sunable to create tunnel packet\n", link_co_err);
+ return;
+ }
+ __skb_queue_tail(&tnlq, skb);
+ tipc_link_xmit(l, &tnlq, &tmpxq);
+ __skb_queue_purge(&tmpxq);
+
/* Initialize reusable tunnel packet header */
tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
mtyp, INT_H_SIZE, l->addr);
@@ -1878,7 +2090,7 @@
tipc_link_create_dummy_tnl_msg(tnl, xmitq);
- /* This failover link enpoint was never established before,
+ /* This failover link endpoint was never established before,
* so it has not received anything from peer.
* Otherwise, it must be a normal failover situation or the
* node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
@@ -1946,22 +2158,25 @@
{
struct tipc_msg *hdr = buf_msg(skb);
struct tipc_gap_ack_blks *ga = NULL;
- u16 rcvgap = 0;
- u16 ack = msg_ack(hdr);
- u16 gap = msg_seq_gap(hdr);
+ bool reply = msg_probe(hdr), retransmitted = false;
+ u32 dlen = msg_data_sz(hdr), glen = 0;
u16 peers_snd_nxt = msg_next_sent(hdr);
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
+ u16 gap = msg_seq_gap(hdr);
+ u16 ack = msg_ack(hdr);
u16 rcv_nxt = l->rcv_nxt;
- u16 dlen = msg_data_sz(hdr);
+ u16 rcvgap = 0;
int mtyp = msg_type(hdr);
- bool reply = msg_probe(hdr);
- u16 glen = 0;
- void *data;
+ int rc = 0, released;
char *if_name;
- int rc = 0;
+ void *data;
trace_tipc_proto_rcv(skb, false, l->name);
+
+ if (dlen > U16_MAX)
+ goto exit;
+
if (tipc_link_is_blocked(l) || !xmitq)
goto exit;
@@ -2030,6 +2245,11 @@
break;
case STATE_MSG:
+ /* Validate Gap ACK blocks, drop if invalid */
+ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
+ if (glen > dlen)
+ break;
+
l->rcv_nxt_state = msg_seqno(hdr) + 1;
/* Update own tolerance if peer indicates a non-zero value */
@@ -2055,32 +2275,27 @@
break;
}
- /* Receive Gap ACK blocks from peer if any */
- if (l->peer_caps & TIPC_GAP_ACK_BLOCK) {
- ga = (struct tipc_gap_ack_blks *)data;
- glen = ntohs(ga->len);
- /* sanity check: if failed, ignore Gap ACK blocks */
- if (glen != tipc_gap_ack_blks_sz(ga->gack_cnt))
- ga = NULL;
- }
-
tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
&l->mon_state, l->bearer_id);
/* Send NACK if peer has sent pkts we haven't received yet */
- if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
+ if ((reply || msg_is_keepalive(hdr)) &&
+ more(peers_snd_nxt, rcv_nxt) &&
+ !tipc_link_is_synching(l) &&
+ skb_queue_empty(&l->deferdq))
rcvgap = peers_snd_nxt - l->rcv_nxt;
if (rcvgap || reply)
tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
rcvgap, 0, 0, xmitq);
- rc |= tipc_link_advance_transmq(l, ack, gap, ga, xmitq);
-
- /* If NACK, retransmit will now start at right position */
+ released = tipc_link_advance_transmq(l, l, ack, gap, ga, xmitq,
+ &retransmitted, &rc);
if (gap)
l->stats.recv_nacks++;
-
- tipc_link_advance_backlog(l, xmitq);
+ if (released || retransmitted)
+ tipc_link_update_cwin(l, released, retransmitted);
+ if (released)
+ tipc_link_advance_backlog(l, xmitq);
if (unlikely(!skb_queue_empty(&l->wakeupq)))
link_prepare_wakeup(l);
}
@@ -2166,10 +2381,7 @@
int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
struct sk_buff_head *xmitq)
{
- struct tipc_link *snd_l = l->bc_sndlink;
u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
- u16 from = msg_bcast_ack(hdr) + 1;
- u16 to = from + msg_bc_gap(hdr) - 1;
int rc = 0;
if (!link_is_up(l))
@@ -2185,14 +2397,10 @@
if (!l->bc_peer_is_up)
return rc;
- l->stats.recv_nacks++;
-
/* Ignore if peers_snd_nxt goes beyond receive window */
if (more(peers_snd_nxt, l->rcv_nxt + l->window))
return rc;
- rc = tipc_link_bc_retrans(snd_l, l, from, to, xmitq);
-
l->snd_nxt = peers_snd_nxt;
if (link_bc_rcv_gap(l))
rc |= TIPC_LINK_SND_STATE;
@@ -2227,38 +2435,34 @@
return 0;
}
-void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
- struct sk_buff_head *xmitq)
+int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq,
+ struct sk_buff_head *retrq)
{
- struct sk_buff *skb, *tmp;
- struct tipc_link *snd_l = l->bc_sndlink;
+ struct tipc_link *l = r->bc_sndlink;
+ bool unused = false;
+ int rc = 0;
- if (!link_is_up(l) || !l->bc_peer_is_up)
- return;
+ if (!link_is_up(r) || !r->bc_peer_is_up)
+ return 0;
- if (!more(acked, l->acked))
- return;
-
- trace_tipc_link_bc_ack(l, l->acked, acked, &snd_l->transmq);
- /* Skip over packets peer has already acked */
- skb_queue_walk(&snd_l->transmq, skb) {
- if (more(buf_seqno(skb), l->acked))
- break;
+ if (gap) {
+ l->stats.recv_nacks++;
+ r->stats.recv_nacks++;
}
- /* Update/release the packets peer is acking now */
- skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) {
- if (more(buf_seqno(skb), acked))
- break;
- if (!--TIPC_SKB_CB(skb)->ackers) {
- __skb_unlink(skb, &snd_l->transmq);
- kfree_skb(skb);
- }
- }
- l->acked = acked;
- tipc_link_advance_backlog(snd_l, xmitq);
- if (unlikely(!skb_queue_empty(&snd_l->wakeupq)))
- link_prepare_wakeup(snd_l);
+ if (less(acked, r->acked) || (acked == r->acked && !gap && !ga))
+ return 0;
+
+ trace_tipc_link_bc_ack(r, acked, gap, &l->transmq);
+ tipc_link_advance_transmq(l, r, acked, gap, ga, retrq, &unused, &rc);
+
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
+
+ return rc;
}
/* tipc_link_bc_nack_rcv(): receive broadcast nack message
@@ -2286,8 +2490,8 @@
return 0;
if (dnode == tipc_own_addr(l->net)) {
- tipc_link_bc_ack_rcv(l, acked, xmitq);
- rc = tipc_link_bc_retrans(l->bc_sndlink, l, from, to, xmitq);
+ rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq,
+ xmitq);
l->stats.recv_nacks++;
return rc;
}
@@ -2299,15 +2503,18 @@
return 0;
}
-void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
+void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win)
{
int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE);
- l->window = win;
- l->backlog[TIPC_LOW_IMPORTANCE].limit = max_t(u16, 50, win);
- l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = max_t(u16, 100, win * 2);
- l->backlog[TIPC_HIGH_IMPORTANCE].limit = max_t(u16, 150, win * 3);
- l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4);
+ l->min_win = min_win;
+ l->ssthresh = max_win;
+ l->max_win = max_win;
+ l->window = min_win;
+ l->backlog[TIPC_LOW_IMPORTANCE].limit = min_win * 2;
+ l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = min_win * 4;
+ l->backlog[TIPC_HIGH_IMPORTANCE].limit = min_win * 6;
+ l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = min_win * 8;
l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk;
}
@@ -2360,10 +2567,10 @@
}
if (props[TIPC_NLA_PROP_WIN]) {
- u32 win;
+ u32 max_win;
- win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
- if ((win < TIPC_MIN_LINK_WIN) || (win > TIPC_MAX_LINK_WIN))
+ max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ if (max_win < TIPC_DEF_LINK_WIN || max_win > TIPC_MAX_LINK_WIN)
return -EINVAL;
}
@@ -2556,16 +2763,15 @@
return -EMSGSIZE;
}
-int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *bcl)
{
int err;
void *hdr;
struct nlattr *attrs;
struct nlattr *prop;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- u32 bc_mode = tipc_bcast_get_broadcast_mode(net);
+ u32 bc_mode = tipc_bcast_get_mode(net);
u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net);
- struct tipc_link *bcl = tn->bcl;
if (!bcl)
return 0;
@@ -2599,7 +2805,7 @@
prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP);
if (!prop)
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->max_win))
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode))
goto prop_msg_full;
@@ -2652,21 +2858,6 @@
l->abort_limit = limit;
}
-char *tipc_link_name_ext(struct tipc_link *l, char *buf)
-{
- if (!l)
- scnprintf(buf, TIPC_MAX_LINK_NAME, "null");
- else if (link_is_bc_sndlink(l))
- scnprintf(buf, TIPC_MAX_LINK_NAME, "broadcast-sender");
- else if (link_is_bc_rcvlink(l))
- scnprintf(buf, TIPC_MAX_LINK_NAME,
- "broadcast-receiver, peer %x", l->addr);
- else
- memcpy(buf, l->name, TIPC_MAX_LINK_NAME);
-
- return buf;
-}
-
/**
* tipc_link_dump - dump TIPC link data
* @l: tipc link to be dumped
diff --git a/net/tipc/link.h b/net/tipc/link.h
index adcad65..fc07232 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -73,15 +73,15 @@
bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
int tolerance, char net_plane, u32 mtu, int priority,
- int window, u32 session, u32 ownnode,
+ u32 min_win, u32 max_win, u32 session, u32 ownnode,
u32 peer, u8 *peer_id, u16 peer_caps,
struct tipc_link *bc_sndlink,
struct tipc_link *bc_rcvlink,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
struct tipc_link **link);
-bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
- int mtu, int window, u16 peer_caps,
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id,
+ int mtu, u32 min_win, u32 max_win, u16 peer_caps,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
struct tipc_link *bc_sndlink,
@@ -111,11 +111,11 @@
u16 tipc_link_acked(struct tipc_link *l);
u32 tipc_link_id(struct tipc_link *l);
char *tipc_link_name(struct tipc_link *l);
-char *tipc_link_name_ext(struct tipc_link *l, char *buf);
u32 tipc_link_state(struct tipc_link *l);
char tipc_link_plane(struct tipc_link *l);
int tipc_link_prio(struct tipc_link *l);
-int tipc_link_window(struct tipc_link *l);
+int tipc_link_min_win(struct tipc_link *l);
+int tipc_link_max_win(struct tipc_link *l);
void tipc_link_update_caps(struct tipc_link *l, u16 capabilities);
bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr);
unsigned long tipc_link_tolerance(struct tipc_link *l);
@@ -124,7 +124,7 @@
void tipc_link_set_prio(struct tipc_link *l, u32 prio,
struct sk_buff_head *xmitq);
void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit);
-void tipc_link_set_queue_limits(struct tipc_link *l, u32 window);
+void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win);
int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
struct tipc_link *link, int nlflags);
int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
@@ -141,8 +141,13 @@
int tipc_link_bc_peers(struct tipc_link *l);
void tipc_link_set_mtu(struct tipc_link *l, int mtu);
int tipc_link_mtu(struct tipc_link *l);
-void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
- struct sk_buff_head *xmitq);
+int tipc_link_mss(struct tipc_link *l);
+u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
+ struct tipc_msg *hdr, bool uc);
+int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq,
+ struct sk_buff_head *retrq);
void tipc_link_build_bc_sync_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 58708b4..a37190d 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -322,9 +322,13 @@
void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
- struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *self;
struct tipc_peer *peer, *prev, *head;
+ if (!mon)
+ return;
+
+ self = get_self(net, bearer_id);
write_lock_bh(&mon->lock);
peer = get_peer(mon, addr);
if (!peer)
@@ -407,11 +411,15 @@
void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
- struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *self;
struct tipc_peer *peer, *head;
struct tipc_mon_domain *dom;
int applied;
+ if (!mon)
+ return;
+
+ self = get_self(net, bearer_id);
write_lock_bh(&mon->lock);
peer = get_peer(mon, addr);
if (!peer) {
@@ -457,6 +465,8 @@
state->probing = false;
/* Sanity check received domain record */
+ if (new_member_cnt > MAX_MON_DOMAIN)
+ return;
if (dlen < dom_rec_len(arrv_dom, 0))
return;
if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index e4ea942..91dcf64 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -39,10 +39,19 @@
#include "msg.h"
#include "addr.h"
#include "name_table.h"
+#include "crypto.h"
#define MAX_FORWARD_SIZE 1024
+#ifdef CONFIG_TIPC_CRYPTO
+#define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16)
+#define BUF_OVERHEAD (BUF_HEADROOM + TIPC_AES_GCM_TAG_SIZE)
+#else
#define BUF_HEADROOM (LL_MAX_HEADER + 48)
-#define BUF_TAILROOM 16
+#define BUF_OVERHEAD BUF_HEADROOM
+#endif
+
+const int one_page_mtu = PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
static unsigned int align(unsigned int i)
{
@@ -61,9 +70,8 @@
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
{
struct sk_buff *skb;
- unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
- skb = alloc_skb_fclone(buf_size, gfp);
+ skb = alloc_skb_fclone(BUF_OVERHEAD + size, gfp);
if (skb) {
skb_reserve(skb, BUF_HEADROOM);
skb_put(skb, size);
@@ -169,7 +177,7 @@
}
if (fragid == LAST_FRAGMENT) {
- TIPC_SKB_CB(head)->validated = false;
+ TIPC_SKB_CB(head)->validated = 0;
if (unlikely(!tipc_msg_validate(&head)))
goto err;
*buf = head;
@@ -186,6 +194,55 @@
return 0;
}
+/**
+ * tipc_msg_append(): Append data to tail of an existing buffer queue
+ * @_hdr: header to be used
+ * @m: the data to be appended
+ * @mss: max allowable size of buffer
+ * @dlen: size of data to be appended
+ * @txq: queue to appand to
+ * Returns the number og 1k blocks appended or errno value
+ */
+int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
+ int mss, struct sk_buff_head *txq)
+{
+ struct sk_buff *skb;
+ int accounted, total, curr;
+ int mlen, cpy, rem = dlen;
+ struct tipc_msg *hdr;
+
+ skb = skb_peek_tail(txq);
+ accounted = skb ? msg_blocks(buf_msg(skb)) : 0;
+ total = accounted;
+
+ do {
+ if (!skb || skb->len >= mss) {
+ skb = tipc_buf_acquire(mss, GFP_KERNEL);
+ if (unlikely(!skb))
+ return -ENOMEM;
+ skb_orphan(skb);
+ skb_trim(skb, MIN_H_SIZE);
+ hdr = buf_msg(skb);
+ skb_copy_to_linear_data(skb, _hdr, MIN_H_SIZE);
+ msg_set_hdr_sz(hdr, MIN_H_SIZE);
+ msg_set_size(hdr, MIN_H_SIZE);
+ __skb_queue_tail(txq, skb);
+ total += 1;
+ }
+ hdr = buf_msg(skb);
+ curr = msg_blocks(hdr);
+ mlen = msg_size(hdr);
+ cpy = min_t(size_t, rem, mss - mlen);
+ if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter))
+ return -EFAULT;
+ msg_set_size(hdr, mlen + cpy);
+ skb_put(skb, cpy);
+ rem -= cpy;
+ total += msg_blocks(hdr) - curr;
+ } while (rem > 0);
+ return total - accounted;
+}
+
/* tipc_msg_validate - validate basic format of received message
*
* This routine ensures a TIPC message has an acceptable header, and at least
@@ -214,6 +271,7 @@
if (unlikely(TIPC_SKB_CB(skb)->validated))
return true;
+
if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE)))
return false;
@@ -235,7 +293,7 @@
if (unlikely(skb->len < msz))
return false;
- TIPC_SKB_CB(skb)->validated = true;
+ TIPC_SKB_CB(skb)->validated = 1;
return true;
}
@@ -335,7 +393,8 @@
if (unlikely(!skb)) {
if (pktmax != MAX_MSG_SIZE)
return -ENOMEM;
- rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list);
+ rc = tipc_msg_build(mhdr, m, offset, dsz,
+ one_page_mtu, list);
if (rc != dsz)
return rc;
if (tipc_msg_assemble(list))
@@ -415,59 +474,109 @@
}
/**
- * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one
- * @skb: the buffer to append to ("bundle")
- * @msg: message to be appended
- * @mtu: max allowable size for the bundle buffer
- * Consumes buffer if successful
- * Returns true if bundling could be performed, otherwise false
+ * tipc_msg_bundle - Append contents of a buffer to tail of an existing one
+ * @bskb: the bundle buffer to append to
+ * @msg: message to be appended
+ * @max: max allowable size for the bundle buffer
+ *
+ * Returns "true" if bundling has been performed, otherwise "false"
*/
-bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu)
+static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg,
+ u32 max)
{
- struct tipc_msg *bmsg;
- unsigned int bsz;
- unsigned int msz = msg_size(msg);
- u32 start, pad;
- u32 max = mtu - INT_H_SIZE;
+ struct tipc_msg *bmsg = buf_msg(bskb);
+ u32 msz, bsz, offset, pad;
- if (likely(msg_user(msg) == MSG_FRAGMENTER))
- return false;
- if (!skb)
- return false;
- bmsg = buf_msg(skb);
+ msz = msg_size(msg);
bsz = msg_size(bmsg);
- start = align(bsz);
- pad = start - bsz;
+ offset = align(bsz);
+ pad = offset - bsz;
- if (unlikely(msg_user(msg) == TUNNEL_PROTOCOL))
+ if (unlikely(skb_tailroom(bskb) < (pad + msz)))
return false;
- if (unlikely(msg_user(msg) == BCAST_PROTOCOL))
- return false;
- if (unlikely(msg_user(bmsg) != MSG_BUNDLER))
- return false;
- if (unlikely(skb_tailroom(skb) < (pad + msz)))
- return false;
- if (unlikely(max < (start + msz)))
- return false;
- if ((msg_importance(msg) < TIPC_SYSTEM_IMPORTANCE) &&
- (msg_importance(bmsg) == TIPC_SYSTEM_IMPORTANCE))
+ if (unlikely(max < (offset + msz)))
return false;
- skb_put(skb, pad + msz);
- skb_copy_to_linear_data_offset(skb, start, msg, msz);
- msg_set_size(bmsg, start + msz);
+ skb_put(bskb, pad + msz);
+ skb_copy_to_linear_data_offset(bskb, offset, msg, msz);
+ msg_set_size(bmsg, offset + msz);
msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
return true;
}
/**
+ * tipc_msg_try_bundle - Try to bundle a new message to the last one
+ * @tskb: the last/target message to which the new one will be appended
+ * @skb: the new message skb pointer
+ * @mss: max message size (header inclusive)
+ * @dnode: destination node for the message
+ * @new_bundle: if this call made a new bundle or not
+ *
+ * Return: "true" if the new message skb is potential for bundling this time or
+ * later, in the case a bundling has been done this time, the skb is consumed
+ * (the skb pointer = NULL).
+ * Otherwise, "false" if the skb cannot be bundled at all.
+ */
+bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
+ u32 dnode, bool *new_bundle)
+{
+ struct tipc_msg *msg, *inner, *outer;
+ u32 tsz;
+
+ /* First, check if the new buffer is suitable for bundling */
+ msg = buf_msg(*skb);
+ if (msg_user(msg) == MSG_FRAGMENTER)
+ return false;
+ if (msg_user(msg) == TUNNEL_PROTOCOL)
+ return false;
+ if (msg_user(msg) == BCAST_PROTOCOL)
+ return false;
+ if (mss <= INT_H_SIZE + msg_size(msg))
+ return false;
+
+ /* Ok, but the last/target buffer can be empty? */
+ if (unlikely(!tskb))
+ return true;
+
+ /* Is it a bundle already? Try to bundle the new message to it */
+ if (msg_user(buf_msg(tskb)) == MSG_BUNDLER) {
+ *new_bundle = false;
+ goto bundle;
+ }
+
+ /* Make a new bundle of the two messages if possible */
+ tsz = msg_size(buf_msg(tskb));
+ if (unlikely(mss < align(INT_H_SIZE + tsz) + msg_size(msg)))
+ return true;
+ if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE,
+ GFP_ATOMIC)))
+ return true;
+ inner = buf_msg(tskb);
+ skb_push(tskb, INT_H_SIZE);
+ outer = buf_msg(tskb);
+ tipc_msg_init(msg_prevnode(inner), outer, MSG_BUNDLER, 0, INT_H_SIZE,
+ dnode);
+ msg_set_importance(outer, msg_importance(inner));
+ msg_set_size(outer, INT_H_SIZE + tsz);
+ msg_set_msgcnt(outer, 1);
+ *new_bundle = true;
+
+bundle:
+ if (likely(tipc_msg_bundle(tskb, msg, mss))) {
+ consume_skb(*skb);
+ *skb = NULL;
+ }
+ return true;
+}
+
+/**
* tipc_msg_extract(): extract bundled inner packet from buffer
* @skb: buffer to be extracted from.
* @iskb: extracted inner buffer, to be returned
* @pos: position in outer message of msg to be extracted.
* Returns position of next msg
* Consumes outer buffer when last packet extracted
- * Returns true when when there is an extracted buffer, otherwise false
+ * Returns true when there is an extracted buffer, otherwise false
*/
bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
{
@@ -506,49 +615,6 @@
}
/**
- * tipc_msg_make_bundle(): Create bundle buf and append message to its tail
- * @list: the buffer chain, where head is the buffer to replace/append
- * @skb: buffer to be created, appended to and returned in case of success
- * @msg: message to be appended
- * @mtu: max allowable size for the bundle buffer, inclusive header
- * @dnode: destination node for message. (Not always present in header)
- * Returns true if success, otherwise false
- */
-bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
- u32 mtu, u32 dnode)
-{
- struct sk_buff *_skb;
- struct tipc_msg *bmsg;
- u32 msz = msg_size(msg);
- u32 max = mtu - INT_H_SIZE;
-
- if (msg_user(msg) == MSG_FRAGMENTER)
- return false;
- if (msg_user(msg) == TUNNEL_PROTOCOL)
- return false;
- if (msg_user(msg) == BCAST_PROTOCOL)
- return false;
- if (msz > (max / 2))
- return false;
-
- _skb = tipc_buf_acquire(max, GFP_ATOMIC);
- if (!_skb)
- return false;
-
- skb_trim(_skb, INT_H_SIZE);
- bmsg = buf_msg(_skb);
- tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
- INT_H_SIZE, dnode);
- msg_set_importance(bmsg, msg_importance(msg));
- msg_set_seqno(bmsg, msg_seqno(msg));
- msg_set_ack(bmsg, msg_ack(msg));
- msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
- tipc_msg_bundle(_skb, msg, mtu);
- *skb = _skb;
- return true;
-}
-
-/**
* tipc_msg_reverse(): swap source and destination addresses and add error code
* @own_node: originating node id for reversed message
* @skb: buffer containing message to be reversed; will be consumed
@@ -661,9 +727,6 @@
msg_set_destport(msg, dport);
*err = TIPC_OK;
- if (!skb_cloned(skb))
- return true;
-
return true;
}
@@ -753,19 +816,19 @@
* @seqno: sequence number of buffer to add
* @skb: buffer to add
*/
-void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
struct sk_buff *skb)
{
struct sk_buff *_skb, *tmp;
if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
__skb_queue_head(list, skb);
- return;
+ return true;
}
if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
__skb_queue_tail(list, skb);
- return;
+ return true;
}
skb_queue_walk_safe(list, _skb, tmp) {
@@ -774,9 +837,10 @@
if (seqno == buf_seqno(_skb))
break;
__skb_queue_before(list, _skb, skb);
- return;
+ return true;
}
kfree_skb(skb);
+ return false;
}
void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 0daa6f0..64ae4c4 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -82,6 +82,7 @@
#define NAME_DISTRIBUTOR 11
#define MSG_FRAGMENTER 12
#define LINK_CONFIG 13
+#define MSG_CRYPTO 14
#define SOCK_WAKEUP 14 /* pseudo user */
#define TOP_SRV 15 /* pseudo user */
@@ -98,20 +99,49 @@
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
-#define FB_MTU 3744
#define TIPC_MEDIA_INFO_OFFSET 5
+extern const int one_page_mtu;
+
struct tipc_skb_cb {
- struct sk_buff *tail;
- unsigned long nxt_retr;
- unsigned long retr_stamp;
- u32 bytes_read;
- u32 orig_member;
- u16 chain_imp;
- u16 ackers;
- u16 retr_cnt;
- bool validated;
-};
+ union {
+ struct {
+ struct sk_buff *tail;
+ unsigned long nxt_retr;
+ unsigned long retr_stamp;
+ u32 bytes_read;
+ u32 orig_member;
+ u16 chain_imp;
+ u16 ackers;
+ u16 retr_cnt;
+ } __packed;
+#ifdef CONFIG_TIPC_CRYPTO
+ struct {
+ struct tipc_crypto *rx;
+ struct tipc_aead *last;
+ u8 recurs;
+ } tx_clone_ctx __packed;
+#endif
+ } __packed;
+ union {
+ struct {
+ u8 validated:1;
+#ifdef CONFIG_TIPC_CRYPTO
+ u8 encrypted:1;
+ u8 decrypted:1;
+#define SKB_PROBING 1
+#define SKB_GRACING 2
+ u8 xmit_type:2;
+ u8 tx_clone_deferred:1;
+#endif
+ };
+ u8 flags;
+ };
+ u8 reserved;
+#ifdef CONFIG_TIPC_CRYPTO
+ void *crypto_ctx;
+#endif
+} __packed;
#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
@@ -134,21 +164,38 @@
/* struct tipc_gap_ack_blks
* @len: actual length of the record
- * @gack_cnt: number of Gap ACK blocks in the record
+ * @ugack_cnt: number of Gap ACK blocks for unicast (following the broadcast
+ * ones)
+ * @start_index: starting index for "valid" broadcast Gap ACK blocks
+ * @bgack_cnt: number of Gap ACK blocks for broadcast in the record
* @gacks: array of Gap ACK blocks
+ *
+ * 31 16 15 0
+ * +-------------+-------------+-------------+-------------+
+ * | bgack_cnt | ugack_cnt | len |
+ * +-------------+-------------+-------------+-------------+ -
+ * | gap | ack | |
+ * +-------------+-------------+-------------+-------------+ > bc gacks
+ * : : : |
+ * +-------------+-------------+-------------+-------------+ -
+ * | gap | ack | |
+ * +-------------+-------------+-------------+-------------+ > uc gacks
+ * : : : |
+ * +-------------+-------------+-------------+-------------+ -
*/
struct tipc_gap_ack_blks {
__be16 len;
- u8 gack_cnt;
- u8 reserved;
+ union {
+ u8 ugack_cnt;
+ u8 start_index;
+ };
+ u8 bgack_cnt;
struct tipc_gap_ack gacks[];
};
-#define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \
- sizeof(struct tipc_gap_ack) * (n))
-
-#define MAX_GAP_ACK_BLKS 32
-#define MAX_GAP_ACK_BLKS_SZ tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS)
+#define MAX_GAP_ACK_BLKS 128
+#define MAX_GAP_ACK_BLKS_SZ (sizeof(struct tipc_gap_ack_blks) + \
+ sizeof(struct tipc_gap_ack) * MAX_GAP_ACK_BLKS)
static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
{
@@ -290,6 +337,26 @@
msg_set_bits(m, 0, 18, 1, d);
}
+static inline int msg_ack_required(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 18, 1);
+}
+
+static inline void msg_set_ack_required(struct tipc_msg *m)
+{
+ msg_set_bits(m, 0, 18, 1, 1);
+}
+
+static inline int msg_nagle_ack(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 18, 1);
+}
+
+static inline void msg_set_nagle_ack(struct tipc_msg *m)
+{
+ msg_set_bits(m, 0, 18, 1, 1);
+}
+
static inline bool msg_is_rcast(struct tipc_msg *m)
{
return msg_bits(m, 0, 18, 0x1);
@@ -358,6 +425,11 @@
return msg_type(m) == TIPC_CONN_MSG;
}
+static inline u32 msg_direct(struct tipc_msg *m)
+{
+ return msg_type(m) == TIPC_DIRECT_MSG;
+}
+
static inline u32 msg_errcode(struct tipc_msg *m)
{
return msg_bits(m, 1, 25, 0xf);
@@ -368,6 +440,36 @@
msg_set_bits(m, 1, 25, 0xf, err);
}
+static inline void msg_set_bulk(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 28, 0x1, 1);
+}
+
+static inline u32 msg_is_bulk(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 28, 0x1);
+}
+
+static inline void msg_set_last_bulk(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 27, 0x1, 1);
+}
+
+static inline u32 msg_is_last_bulk(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 27, 0x1);
+}
+
+static inline void msg_set_non_legacy(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 26, 0x1, 1);
+}
+
+static inline u32 msg_is_legacy(struct tipc_msg *m)
+{
+ return !msg_bits(m, 1, 26, 0x1);
+}
+
static inline u32 msg_reroute_cnt(struct tipc_msg *m)
{
return msg_bits(m, 1, 21, 0xf);
@@ -497,6 +599,16 @@
msg_set_word(m, 4, p);
}
+static inline u16 msg_named_seqno(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 0, 0xffff);
+}
+
+static inline void msg_set_named_seqno(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 4, 0, 0xffff, n);
+}
+
static inline u32 msg_destport(struct tipc_msg *m)
{
return msg_word(m, 5);
@@ -639,6 +751,9 @@
#define GRP_RECLAIM_MSG 4
#define GRP_REMIT_MSG 5
+/* Crypto message types */
+#define KEY_DISTR_MSG 0
+
/*
* Word 1
*/
@@ -1026,6 +1141,20 @@
return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
}
+/* Word 13
+ */
+static inline void msg_set_peer_net_hash(struct tipc_msg *m, u32 n)
+{
+ msg_set_word(m, 13, n);
+}
+
+static inline u32 msg_peer_net_hash(struct tipc_msg *m)
+{
+ return msg_word(m, 13);
+}
+
+/* Word 14
+ */
static inline u32 msg_sugg_node_addr(struct tipc_msg *m)
{
return msg_word(m, 14);
@@ -1057,20 +1186,21 @@
uint data_sz, u32 dnode, u32 onode,
u32 dport, u32 oport, int errcode);
int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
-bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu);
-bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
- u32 mtu, u32 dnode);
+bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
+ u32 dnode, bool *new_bundle);
bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
int pktmax, struct sk_buff_head *frags);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
+int tipc_msg_append(struct tipc_msg *hdr, struct msghdr *m, int dlen,
+ int mss, struct sk_buff_head *txq);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
bool tipc_msg_assemble(struct sk_buff_head *list);
bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
struct sk_buff_head *cpy);
-void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
struct sk_buff *skb);
bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 836e629..a757fe2 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -102,7 +102,8 @@
pr_warn("Publication distribution failure\n");
return NULL;
}
-
+ msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
+ msg_set_non_legacy(buf_msg(skb));
item = (struct distr_item *)msg_data(buf_msg(skb));
publ_to_item(item, publ);
return skb;
@@ -114,8 +115,8 @@
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
{
struct name_table *nt = tipc_name_table(net);
- struct sk_buff *buf;
struct distr_item *item;
+ struct sk_buff *skb;
write_lock_bh(&nt->cluster_scope_lock);
list_del(&publ->binding_node);
@@ -123,15 +124,16 @@
if (publ->scope == TIPC_NODE_SCOPE)
return NULL;
- buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
- if (!buf) {
+ skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
+ if (!skb) {
pr_warn("Withdrawal distribution failure\n");
return NULL;
}
-
- item = (struct distr_item *)msg_data(buf_msg(buf));
+ msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
+ msg_set_non_legacy(buf_msg(skb));
+ item = (struct distr_item *)msg_data(buf_msg(skb));
publ_to_item(item, publ);
- return buf;
+ return skb;
}
/**
@@ -141,14 +143,15 @@
* @pls: linked list of publication items to be packed into buffer chain
*/
static void named_distribute(struct net *net, struct sk_buff_head *list,
- u32 dnode, struct list_head *pls)
+ u32 dnode, struct list_head *pls, u16 seqno)
{
struct publication *publ;
struct sk_buff *skb = NULL;
struct distr_item *item = NULL;
- u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) /
+ u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) /
ITEM_SIZE) * ITEM_SIZE;
u32 msg_rem = msg_dsz;
+ struct tipc_msg *hdr;
list_for_each_entry(publ, pls, binding_node) {
/* Prepare next buffer: */
@@ -159,8 +162,11 @@
pr_warn("Bulk publication failure\n");
return;
}
- msg_set_bc_ack_invalid(buf_msg(skb), true);
- item = (struct distr_item *)msg_data(buf_msg(skb));
+ hdr = buf_msg(skb);
+ msg_set_bc_ack_invalid(hdr, true);
+ msg_set_bulk(hdr);
+ msg_set_non_legacy(hdr);
+ item = (struct distr_item *)msg_data(hdr);
}
/* Pack publication into message: */
@@ -176,24 +182,35 @@
}
}
if (skb) {
- msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem));
+ hdr = buf_msg(skb);
+ msg_set_size(hdr, INT_H_SIZE + (msg_dsz - msg_rem));
skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem));
__skb_queue_tail(list, skb);
}
+ hdr = buf_msg(skb_peek_tail(list));
+ msg_set_last_bulk(hdr);
+ msg_set_named_seqno(hdr, seqno);
}
/**
* tipc_named_node_up - tell specified node about all publications by this node
*/
-void tipc_named_node_up(struct net *net, u32 dnode)
+void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities)
{
struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
struct sk_buff_head head;
+ u16 seqno;
__skb_queue_head_init(&head);
+ spin_lock_bh(&tn->nametbl_lock);
+ if (!(capabilities & TIPC_NAMED_BCAST))
+ nt->rc_dests++;
+ seqno = nt->snd_nxt;
+ spin_unlock_bh(&tn->nametbl_lock);
read_lock_bh(&nt->cluster_scope_lock);
- named_distribute(net, &head, dnode, &nt->cluster_scope);
+ named_distribute(net, &head, dnode, &nt->cluster_scope, seqno);
tipc_node_xmit(net, &head, dnode, 0);
read_unlock_bh(&nt->cluster_scope_lock);
}
@@ -245,13 +262,21 @@
spin_unlock_bh(&tn->nametbl_lock);
}
-void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
+ u32 addr, u16 capabilities)
{
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
+
struct publication *publ, *tmp;
list_for_each_entry_safe(publ, tmp, nsub_list, binding_node)
tipc_publ_purge(net, publ, addr);
tipc_dist_queue_purge(net, addr);
+ spin_lock_bh(&tn->nametbl_lock);
+ if (!(capabilities & TIPC_NAMED_BCAST))
+ nt->rc_dests--;
+ spin_unlock_bh(&tn->nametbl_lock);
}
/**
@@ -290,34 +315,75 @@
pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n",
type, lower, node);
} else {
- pr_warn("Unrecognized name table message received\n");
+ pr_warn_ratelimited("Unknown name table message received\n");
}
return false;
}
+static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open)
+{
+ struct sk_buff *skb, *tmp;
+ struct tipc_msg *hdr;
+ u16 seqno;
+
+ spin_lock_bh(&namedq->lock);
+ skb_queue_walk_safe(namedq, skb, tmp) {
+ if (unlikely(skb_linearize(skb))) {
+ __skb_unlink(skb, namedq);
+ kfree_skb(skb);
+ continue;
+ }
+ hdr = buf_msg(skb);
+ seqno = msg_named_seqno(hdr);
+ if (msg_is_last_bulk(hdr)) {
+ *rcv_nxt = seqno;
+ *open = true;
+ }
+
+ if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) {
+ __skb_unlink(skb, namedq);
+ spin_unlock_bh(&namedq->lock);
+ return skb;
+ }
+
+ if (*open && (*rcv_nxt == seqno)) {
+ (*rcv_nxt)++;
+ __skb_unlink(skb, namedq);
+ spin_unlock_bh(&namedq->lock);
+ return skb;
+ }
+
+ if (less(seqno, *rcv_nxt)) {
+ __skb_unlink(skb, namedq);
+ kfree_skb(skb);
+ continue;
+ }
+ }
+ spin_unlock_bh(&namedq->lock);
+ return NULL;
+}
+
/**
* tipc_named_rcv - process name table update messages sent by another node
*/
-void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
+void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_msg *msg;
+ struct tipc_net *tn = tipc_net(net);
struct distr_item *item;
- uint count;
- u32 node;
+ struct tipc_msg *hdr;
struct sk_buff *skb;
- int mtype;
+ u32 count, node;
spin_lock_bh(&tn->nametbl_lock);
- for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) {
- skb_linearize(skb);
- msg = buf_msg(skb);
- mtype = msg_type(msg);
- item = (struct distr_item *)msg_data(msg);
- count = msg_data_sz(msg) / ITEM_SIZE;
- node = msg_orignode(msg);
+ while ((skb = tipc_named_dequeue(namedq, rcv_nxt, open))) {
+ hdr = buf_msg(skb);
+ node = msg_orignode(hdr);
+ item = (struct distr_item *)msg_data(hdr);
+ count = msg_data_sz(hdr) / ITEM_SIZE;
while (count--) {
- tipc_update_nametbl(net, item, node, mtype);
+ tipc_update_nametbl(net, item, node, msg_type(hdr));
item++;
}
kfree_skb(skb);
@@ -345,6 +411,6 @@
publ->node = self;
list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
publ->node = self;
-
+ nt->rc_dests = 0;
spin_unlock_bh(&tn->nametbl_lock);
}
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 63fc73e..0923231 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -67,11 +67,14 @@
__be32 key;
};
+void tipc_named_bcast(struct net *net, struct sk_buff *skb);
struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
-void tipc_named_node_up(struct net *net, u32 dnode);
-void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue);
+void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities);
+void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open);
void tipc_named_reinit(struct net *net);
-void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr);
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
+ u32 addr, u16 capabilities);
#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 66a65c2..54c5328 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -35,6 +35,8 @@
*/
#include <net/sock.h>
+#include <linux/list_sort.h>
+#include <linux/rbtree_augmented.h>
#include "core.h"
#include "netlink.h"
#include "name_table.h"
@@ -50,6 +52,7 @@
* @lower: service range lower bound
* @upper: service range upper bound
* @tree_node: member of service range RB tree
+ * @max: largest 'upper' in this node subtree
* @local_publ: list of identical publications made from this node
* Used by closest_first lookup and multicast lookup algorithm
* @all_publ: all publications identical to this one, whatever node and scope
@@ -59,6 +62,7 @@
u32 lower;
u32 upper;
struct rb_node tree_node;
+ u32 max;
struct list_head local_publ;
struct list_head all_publ;
};
@@ -66,6 +70,7 @@
/**
* struct tipc_service - container for all published instances of a service type
* @type: 32 bit 'type' value for service
+ * @publ_cnt: increasing counter for publications in this service
* @ranges: rb tree containing all service ranges for this service
* @service_list: links to adjacent name ranges in hash chain
* @subscriptions: list of subscriptions for this service type
@@ -74,6 +79,7 @@
*/
struct tipc_service {
u32 type;
+ u32 publ_cnt;
struct rb_root ranges;
struct hlist_node service_list;
struct list_head subscriptions;
@@ -81,6 +87,130 @@
struct rcu_head rcu;
};
+#define service_range_upper(sr) ((sr)->upper)
+RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks,
+ struct service_range, tree_node, u32, max,
+ service_range_upper)
+
+#define service_range_entry(rbtree_node) \
+ (container_of(rbtree_node, struct service_range, tree_node))
+
+#define service_range_overlap(sr, start, end) \
+ ((sr)->lower <= (end) && (sr)->upper >= (start))
+
+/**
+ * service_range_foreach_match - iterate over tipc service rbtree for each
+ * range match
+ * @sr: the service range pointer as a loop cursor
+ * @sc: the pointer to tipc service which holds the service range rbtree
+ * @start, end: the range (end >= start) for matching
+ */
+#define service_range_foreach_match(sr, sc, start, end) \
+ for (sr = service_range_match_first((sc)->ranges.rb_node, \
+ start, \
+ end); \
+ sr; \
+ sr = service_range_match_next(&(sr)->tree_node, \
+ start, \
+ end))
+
+/**
+ * service_range_match_first - find first service range matching a range
+ * @n: the root node of service range rbtree for searching
+ * @start, end: the range (end >= start) for matching
+ *
+ * Return: the leftmost service range node in the rbtree that overlaps the
+ * specific range if any. Otherwise, returns NULL.
+ */
+static struct service_range *service_range_match_first(struct rb_node *n,
+ u32 start, u32 end)
+{
+ struct service_range *sr;
+ struct rb_node *l, *r;
+
+ /* Non overlaps in tree at all? */
+ if (!n || service_range_entry(n)->max < start)
+ return NULL;
+
+ while (n) {
+ l = n->rb_left;
+ if (l && service_range_entry(l)->max >= start) {
+ /* A leftmost overlap range node must be one in the left
+ * subtree. If not, it has lower > end, then nodes on
+ * the right side cannot satisfy the condition either.
+ */
+ n = l;
+ continue;
+ }
+
+ /* No one in the left subtree can match, return if this node is
+ * an overlap i.e. leftmost.
+ */
+ sr = service_range_entry(n);
+ if (service_range_overlap(sr, start, end))
+ return sr;
+
+ /* Ok, try to lookup on the right side */
+ r = n->rb_right;
+ if (sr->lower <= end &&
+ r && service_range_entry(r)->max >= start) {
+ n = r;
+ continue;
+ }
+ break;
+ }
+
+ return NULL;
+}
+
+/**
+ * service_range_match_next - find next service range matching a range
+ * @n: a node in service range rbtree from which the searching starts
+ * @start, end: the range (end >= start) for matching
+ *
+ * Return: the next service range node to the given node in the rbtree that
+ * overlaps the specific range if any. Otherwise, returns NULL.
+ */
+static struct service_range *service_range_match_next(struct rb_node *n,
+ u32 start, u32 end)
+{
+ struct service_range *sr;
+ struct rb_node *p, *r;
+
+ while (n) {
+ r = n->rb_right;
+ if (r && service_range_entry(r)->max >= start)
+ /* A next overlap range node must be one in the right
+ * subtree. If not, it has lower > end, then any next
+ * successor (- an ancestor) of this node cannot
+ * satisfy the condition either.
+ */
+ return service_range_match_first(r, start, end);
+
+ /* No one in the right subtree can match, go up to find an
+ * ancestor of this node which is parent of a left-hand child.
+ */
+ while ((p = rb_parent(n)) && n == p->rb_right)
+ n = p;
+ if (!p)
+ break;
+
+ /* Return if this ancestor is an overlap */
+ sr = service_range_entry(p);
+ if (service_range_overlap(sr, start, end))
+ return sr;
+
+ /* Ok, try to lookup more from this ancestor */
+ if (sr->lower <= end) {
+ n = p;
+ continue;
+ }
+ break;
+ }
+
+ return NULL;
+}
+
static int hash(int x)
{
return x & (TIPC_NAMETBL_SIZE - 1);
@@ -109,6 +239,7 @@
INIT_LIST_HEAD(&publ->binding_node);
INIT_LIST_HEAD(&publ->local_publ);
INIT_LIST_HEAD(&publ->all_publ);
+ INIT_LIST_HEAD(&publ->list);
return publ;
}
@@ -135,84 +266,51 @@
return service;
}
-/**
- * tipc_service_first_range - find first service range in tree matching instance
- *
- * Very time-critical, so binary search through range rb tree
- */
-static struct service_range *tipc_service_first_range(struct tipc_service *sc,
- u32 instance)
-{
- struct rb_node *n = sc->ranges.rb_node;
- struct service_range *sr;
-
- while (n) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->lower > instance)
- n = n->rb_left;
- else if (sr->upper < instance)
- n = n->rb_right;
- else
- return sr;
- }
- return NULL;
-}
-
/* tipc_service_find_range - find service range matching publication parameters
*/
static struct service_range *tipc_service_find_range(struct tipc_service *sc,
u32 lower, u32 upper)
{
- struct rb_node *n = sc->ranges.rb_node;
struct service_range *sr;
- sr = tipc_service_first_range(sc, lower);
- if (!sr)
- return NULL;
-
- /* Look for exact match */
- for (n = &sr->tree_node; n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->upper == upper)
- break;
+ service_range_foreach_match(sr, sc, lower, upper) {
+ /* Look for exact match */
+ if (sr->lower == lower && sr->upper == upper)
+ return sr;
}
- if (!n || sr->lower != lower || sr->upper != upper)
- return NULL;
- return sr;
+ return NULL;
}
static struct service_range *tipc_service_create_range(struct tipc_service *sc,
u32 lower, u32 upper)
{
struct rb_node **n, *parent = NULL;
- struct service_range *sr, *tmp;
+ struct service_range *sr;
n = &sc->ranges.rb_node;
while (*n) {
- tmp = container_of(*n, struct service_range, tree_node);
parent = *n;
- tmp = container_of(parent, struct service_range, tree_node);
- if (lower < tmp->lower)
- n = &(*n)->rb_left;
- else if (lower > tmp->lower)
- n = &(*n)->rb_right;
- else if (upper < tmp->upper)
- n = &(*n)->rb_left;
- else if (upper > tmp->upper)
- n = &(*n)->rb_right;
+ sr = service_range_entry(parent);
+ if (lower == sr->lower && upper == sr->upper)
+ return sr;
+ if (sr->max < upper)
+ sr->max = upper;
+ if (lower <= sr->lower)
+ n = &parent->rb_left;
else
- return tmp;
+ n = &parent->rb_right;
}
sr = kzalloc(sizeof(*sr), GFP_ATOMIC);
if (!sr)
return NULL;
sr->lower = lower;
sr->upper = upper;
+ sr->max = upper;
INIT_LIST_HEAD(&sr->local_publ);
INIT_LIST_HEAD(&sr->all_publ);
rb_link_node(&sr->tree_node, parent, n);
- rb_insert_color(&sr->tree_node, &sc->ranges);
+ rb_insert_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
return sr;
}
@@ -244,6 +342,8 @@
p = tipc_publ_create(type, lower, upper, scope, node, port, key);
if (!p)
goto err;
+ /* Suppose there shouldn't be a huge gap btw publs i.e. >INT_MAX */
+ p->id = sc->publ_cnt++;
if (in_own_node(net, node))
list_add(&p->local_publ, &sr->local_publ);
list_add(&p->all_publ, &sr->all_publ);
@@ -278,6 +378,20 @@
}
/**
+ * Code reused: time_after32() for the same purpose
+ */
+#define publication_after(pa, pb) time_after32((pa)->id, (pb)->id)
+static int tipc_publ_sort(void *priv, const struct list_head *a,
+ const struct list_head *b)
+{
+ struct publication *pa, *pb;
+
+ pa = container_of(a, struct publication, list);
+ pb = container_of(b, struct publication, list);
+ return publication_after(pa, pb);
+}
+
+/**
* tipc_service_subscribe - attach a subscription, and optionally
* issue the prescribed number of events if there is any service
* range overlapping with the requested range
@@ -286,36 +400,44 @@
struct tipc_subscription *sub)
{
struct tipc_subscr *sb = &sub->evt.s;
+ struct publication *p, *first, *tmp;
+ struct list_head publ_list;
struct service_range *sr;
struct tipc_name_seq ns;
- struct publication *p;
- struct rb_node *n;
- bool first;
+ u32 filter;
ns.type = tipc_sub_read(sb, seq.type);
ns.lower = tipc_sub_read(sb, seq.lower);
ns.upper = tipc_sub_read(sb, seq.upper);
+ filter = tipc_sub_read(sb, filter);
tipc_sub_get(sub);
list_add(&sub->service_list, &service->subscriptions);
- if (tipc_sub_read(sb, filter) & TIPC_SUB_NO_STATUS)
+ if (filter & TIPC_SUB_NO_STATUS)
return;
- for (n = rb_first(&service->ranges); n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->lower > ns.upper)
- break;
- if (!tipc_sub_check_overlap(&ns, sr->lower, sr->upper))
- continue;
- first = true;
-
+ INIT_LIST_HEAD(&publ_list);
+ service_range_foreach_match(sr, service, ns.lower, ns.upper) {
+ first = NULL;
list_for_each_entry(p, &sr->all_publ, all_publ) {
- tipc_sub_report_overlap(sub, sr->lower, sr->upper,
- TIPC_PUBLISHED, p->port,
- p->node, p->scope, first);
- first = false;
+ if (filter & TIPC_SUB_PORTS)
+ list_add_tail(&p->list, &publ_list);
+ else if (!first || publication_after(first, p))
+ /* Pick this range's *first* publication */
+ first = p;
}
+ if (first)
+ list_add_tail(&first->list, &publ_list);
+ }
+
+ /* Sort the publications before reporting */
+ list_sort(NULL, &publ_list, tipc_publ_sort);
+ list_for_each_entry_safe(p, tmp, &publ_list, list) {
+ tipc_sub_report_overlap(sub, p->lower, p->upper,
+ TIPC_PUBLISHED, p->port, p->node,
+ p->scope, true);
+ list_del_init(&p->list);
}
}
@@ -390,7 +512,7 @@
/* Remove service range item if this was its last publication */
if (list_empty(&sr->all_publ)) {
- rb_erase(&sr->tree_node, &sc->ranges);
+ rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
kfree(sr);
}
@@ -438,34 +560,39 @@
rcu_read_lock();
sc = tipc_service_find(net, type);
if (unlikely(!sc))
- goto not_found;
+ goto exit;
spin_lock_bh(&sc->lock);
- sr = tipc_service_first_range(sc, instance);
- if (unlikely(!sr))
- goto no_match;
-
- /* Select lookup algorithm: local, closest-first or round-robin */
- if (*dnode == self) {
- list = &sr->local_publ;
- if (list_empty(list))
- goto no_match;
- p = list_first_entry(list, struct publication, local_publ);
- list_move_tail(&p->local_publ, &sr->local_publ);
- } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) {
- list = &sr->local_publ;
- p = list_first_entry(list, struct publication, local_publ);
- list_move_tail(&p->local_publ, &sr->local_publ);
- } else {
- list = &sr->all_publ;
- p = list_first_entry(list, struct publication, all_publ);
- list_move_tail(&p->all_publ, &sr->all_publ);
+ service_range_foreach_match(sr, sc, instance, instance) {
+ /* Select lookup algo: local, closest-first or round-robin */
+ if (*dnode == self) {
+ list = &sr->local_publ;
+ if (list_empty(list))
+ continue;
+ p = list_first_entry(list, struct publication,
+ local_publ);
+ list_move_tail(&p->local_publ, &sr->local_publ);
+ } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) {
+ list = &sr->local_publ;
+ p = list_first_entry(list, struct publication,
+ local_publ);
+ list_move_tail(&p->local_publ, &sr->local_publ);
+ } else {
+ list = &sr->all_publ;
+ p = list_first_entry(list, struct publication,
+ all_publ);
+ list_move_tail(&p->all_publ, &sr->all_publ);
+ }
+ port = p->port;
+ node = p->node;
+ /* Todo: as for legacy, pick the first matching range only, a
+ * "true" round-robin will be performed as needed.
+ */
+ break;
}
- port = p->port;
- node = p->node;
-no_match:
spin_unlock_bh(&sc->lock);
-not_found:
+
+exit:
rcu_read_unlock();
*dnode = node;
return port;
@@ -488,7 +615,8 @@
spin_lock_bh(&sc->lock);
- sr = tipc_service_first_range(sc, instance);
+ /* Todo: a full search i.e. service_range_foreach_match() instead? */
+ sr = service_range_match_first(sc->ranges.rb_node, instance, instance);
if (!sr)
goto no_match;
@@ -517,7 +645,6 @@
struct service_range *sr;
struct tipc_service *sc;
struct publication *p;
- struct rb_node *n;
rcu_read_lock();
sc = tipc_service_find(net, type);
@@ -525,13 +652,7 @@
goto exit;
spin_lock_bh(&sc->lock);
-
- for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->upper < lower)
- continue;
- if (sr->lower > upper)
- break;
+ service_range_foreach_match(sr, sc, lower, upper) {
list_for_each_entry(p, &sr->local_publ, local_publ) {
if (p->scope == scope || (!exact && p->scope < scope))
tipc_dest_push(dports, 0, p->port);
@@ -552,7 +673,6 @@
struct service_range *sr;
struct tipc_service *sc;
struct publication *p;
- struct rb_node *n;
rcu_read_lock();
sc = tipc_service_find(net, type);
@@ -560,13 +680,7 @@
goto exit;
spin_lock_bh(&sc->lock);
-
- for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->upper < lower)
- continue;
- if (sr->lower > upper)
- break;
+ service_range_foreach_match(sr, sc, lower, upper) {
list_for_each_entry(p, &sr->all_publ, all_publ) {
tipc_nlist_add(nodes, p->node);
}
@@ -615,6 +729,7 @@
struct tipc_net *tn = tipc_net(net);
struct publication *p = NULL;
struct sk_buff *skb = NULL;
+ u32 rc_dests;
spin_lock_bh(&tn->nametbl_lock);
@@ -629,12 +744,14 @@
nt->local_publ_count++;
skb = tipc_named_publish(net, p);
}
+ rc_dests = nt->rc_dests;
exit:
spin_unlock_bh(&tn->nametbl_lock);
if (skb)
- tipc_node_broadcast(net, skb);
+ tipc_node_broadcast(net, skb, rc_dests);
return p;
+
}
/**
@@ -648,6 +765,7 @@
u32 self = tipc_own_addr(net);
struct sk_buff *skb = NULL;
struct publication *p;
+ u32 rc_dests;
spin_lock_bh(&tn->nametbl_lock);
@@ -661,10 +779,11 @@
pr_err("Failed to remove local publication {%u,%u,%u}/%u\n",
type, lower, upper, key);
}
+ rc_dests = nt->rc_dests;
spin_unlock_bh(&tn->nametbl_lock);
if (skb) {
- tipc_node_broadcast(net, skb);
+ tipc_node_broadcast(net, skb, rc_dests);
return 1;
}
return 0;
@@ -764,7 +883,7 @@
tipc_service_remove_publ(sr, p->node, p->key);
kfree_rcu(p, rcu);
}
- rb_erase(&sr->tree_node, &sc->ranges);
+ rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
kfree(sr);
}
hlist_del_init_rcu(&sc->service_list);
@@ -812,7 +931,7 @@
list_for_each_entry(p, &sr->all_publ, all_publ)
if (p->key == *last_key)
break;
- if (p->key != *last_key)
+ if (list_entry_is_head(p, &sr->all_publ, all_publ))
return -EPIPE;
} else {
p = list_first_entry(&sr->all_publ,
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index f790663..8064e19 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -58,6 +58,7 @@
* @node: network address of publishing socket's node
* @port: publishing port
* @key: publication key, unique across the cluster
+ * @id: publication id
* @binding_node: all publications from the same node which bound this one
* - Remote publications: in node->publ_list
* Used by node/name distr to withdraw publications when node is lost
@@ -69,6 +70,7 @@
* Used by closest_first and multicast receive lookup algorithms
* @all_publ: all publications identical to this one, whatever node and scope
* Used by round-robin lookup algorithm
+ * @list: to form a list of publications in temporal order
* @rcu: RCU callback head used for deferred freeing
*/
struct publication {
@@ -79,10 +81,12 @@
u32 node;
u32 port;
u32 key;
+ u32 id;
struct list_head binding_node;
struct list_head binding_sock;
struct list_head local_publ;
struct list_head all_publ;
+ struct list_head list;
struct rcu_head rcu;
};
@@ -102,6 +106,8 @@
struct list_head cluster_scope;
rwlock_t cluster_scope_lock;
u32 local_publ_count;
+ u32 rc_dests;
+ u32 snd_nxt;
};
int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 2498ce8..0bb2323 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -292,3 +292,59 @@
return err;
}
+
+static int __tipc_nl_addr_legacy_get(struct net *net, struct tipc_nl_msg *msg)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct nlattr *attrs;
+ void *hdr;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ 0, TIPC_NL_ADDR_LEGACY_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ attrs = nla_nest_start(msg->skb, TIPC_NLA_NET);
+ if (!attrs)
+ goto msg_full;
+
+ if (tn->legacy_addr_format)
+ if (nla_put_flag(msg->skb, TIPC_NLA_NET_ADDR_LEGACY))
+ goto attr_msg_full;
+
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
+
+ return 0;
+
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
+}
+
+int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tipc_nl_msg msg;
+ struct sk_buff *rep;
+ int err;
+
+ rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ msg.skb = rep;
+ msg.portid = info->snd_portid;
+ msg.seq = info->snd_seq;
+
+ err = __tipc_nl_addr_legacy_get(net, &msg);
+ if (err) {
+ nlmsg_free(msg.skb);
+ return err;
+ }
+
+ return genlmsg_reply(msg.skb, info);
+}
diff --git a/net/tipc/net.h b/net/tipc/net.h
index a6a4dba..d0c91d2 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -48,5 +48,6 @@
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info);
#endif
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index e9bbf4a..c447cb5 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -83,6 +83,7 @@
[TIPC_NLA_NET_ADDR] = { .type = NLA_U32 },
[TIPC_NLA_NET_NODEID] = { .type = NLA_U64 },
[TIPC_NLA_NET_NODEID_W1] = { .type = NLA_U64 },
+ [TIPC_NLA_NET_ADDR_LEGACY] = { .type = NLA_FLAG }
};
const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
@@ -102,7 +103,13 @@
const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
[TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
[TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
- [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }
+ [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG },
+ [TIPC_NLA_NODE_ID] = { .type = NLA_BINARY,
+ .len = TIPC_NODEID_LEN},
+ [TIPC_NLA_NODE_KEY] = { .type = NLA_BINARY,
+ .len = TIPC_AEAD_KEY_SIZE_MAX},
+ [TIPC_NLA_NODE_KEY_MASTER] = { .type = NLA_FLAG },
+ [TIPC_NLA_NODE_REKEYING] = { .type = NLA_U32 },
};
/* Properties valid for media, bearer and link */
@@ -177,12 +184,13 @@
},
{
.cmd = TIPC_NL_PUBL_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = tipc_nl_publ_dump,
},
{
.cmd = TIPC_NL_LINK_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT,
.doit = tipc_nl_node_get_link,
.dumpit = tipc_nl_node_dump_link,
},
@@ -240,7 +248,8 @@
},
{
.cmd = TIPC_NL_MON_PEER_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = tipc_nl_node_dump_monitor_peer,
},
{
@@ -251,10 +260,28 @@
#ifdef CONFIG_TIPC_MEDIA_UDP
{
.cmd = TIPC_NL_UDP_GET_REMOTEIP,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = tipc_udp_nl_dump_remoteip,
},
#endif
+#ifdef CONFIG_TIPC_CRYPTO
+ {
+ .cmd = TIPC_NL_KEY_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_node_set_key,
+ },
+ {
+ .cmd = TIPC_NL_KEY_FLUSH,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_node_flush_key,
+ },
+#endif
+ {
+ .cmd = TIPC_NL_ADDR_LEGACY_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_net_addr_legacy_get,
+ },
};
struct genl_family tipc_genl_family __ro_after_init = {
@@ -269,18 +296,6 @@
.n_ops = ARRAY_SIZE(tipc_genl_v2_ops),
};
-int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
-{
- u32 maxattr = tipc_genl_family.maxattr;
-
- *attr = genl_family_attrbuf(&tipc_genl_family);
- if (!*attr)
- return -EOPNOTSUPP;
-
- return nlmsg_parse_deprecated(nlh, GENL_HDRLEN, *attr, maxattr,
- tipc_nl_policy, NULL);
-}
-
int __init tipc_netlink_start(void)
{
int res;
diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h
index 4ba0ad4..7cf7777 100644
--- a/net/tipc/netlink.h
+++ b/net/tipc/netlink.h
@@ -38,7 +38,6 @@
#include <net/netlink.h>
extern struct genl_family tipc_genl_family;
-int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf);
struct tipc_nl_msg {
struct sk_buff *skb;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 561ea83..49e8933 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -181,15 +181,18 @@
struct tipc_nl_compat_msg *msg,
struct sk_buff *arg)
{
+ struct genl_dumpit_info info;
int len = 0;
int err;
struct sk_buff *buf;
struct nlmsghdr *nlmsg;
struct netlink_callback cb;
+ struct nlattr **attrbuf;
memset(&cb, 0, sizeof(cb));
cb.nlh = (struct nlmsghdr *)arg->data;
cb.skb = arg;
+ cb.data = &info;
buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (!buf)
@@ -201,19 +204,35 @@
return -ENOMEM;
}
+ attrbuf = kcalloc(tipc_genl_family.maxattr + 1,
+ sizeof(struct nlattr *), GFP_KERNEL);
+ if (!attrbuf) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ info.attrs = attrbuf;
+ err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf,
+ tipc_genl_family.maxattr,
+ tipc_genl_family.policy, NULL);
+ if (err)
+ goto err_out;
+
do {
int rem;
len = (*cmd->dumpit)(buf, &cb);
nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) {
- struct nlattr **attrs;
-
- err = tipc_nlmsg_parse(nlmsg, &attrs);
+ err = nlmsg_parse_deprecated(nlmsg, GENL_HDRLEN,
+ attrbuf,
+ tipc_genl_family.maxattr,
+ tipc_genl_family.policy,
+ NULL);
if (err)
goto err_out;
- err = (*cmd->format)(msg, attrs);
+ err = (*cmd->format)(msg, attrbuf);
if (err)
goto err_out;
@@ -231,6 +250,7 @@
err = 0;
err_out:
+ kfree(attrbuf);
tipc_dump_done(&cb);
kfree_skb(buf);
@@ -1317,7 +1337,7 @@
return err;
}
-static const struct genl_ops tipc_genl_compat_ops[] = {
+static const struct genl_small_ops tipc_genl_compat_ops[] = {
{
.cmd = TIPC_GENL_CMD,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -1332,8 +1352,8 @@
.maxattr = 0,
.netnsok = true,
.module = THIS_MODULE,
- .ops = tipc_genl_compat_ops,
- .n_ops = ARRAY_SIZE(tipc_genl_compat_ops),
+ .small_ops = tipc_genl_compat_ops,
+ .n_small_ops = ARRAY_SIZE(tipc_genl_compat_ops),
};
int __init tipc_netlink_compat_start(void)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c8f6177..e4452d5 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -44,6 +44,7 @@
#include "discover.h"
#include "netlink.h"
#include "trace.h"
+#include "crypto.h"
#define INVALID_NODE_SIG 0x10000
#define NODE_CLEANUP_AFTER 300000
@@ -74,6 +75,8 @@
struct sk_buff_head arrvq;
struct sk_buff_head inputq2;
struct sk_buff_head namedq;
+ u16 named_rcv_nxt;
+ bool named_open;
};
/**
@@ -89,6 +92,7 @@
* @links: array containing references to all links to node
* @action_flags: bit mask of different types of node actions
* @state: connectivity state vs peer node
+ * @preliminary: a preliminary node or not
* @sync_point: sequence number where synch/failover is finished
* @list: links to adjacent nodes in sorted list of cluster's nodes
* @working_links: number of working links to node (both active and standby)
@@ -99,6 +103,7 @@
* @publ_list: list of publications
* @rcu: rcu struct for tipc_node
* @delete_at: indicates the time for deleting a down node
+ * @crypto_rx: RX crypto handler
*/
struct tipc_node {
u32 addr;
@@ -112,6 +117,7 @@
int action_flags;
struct list_head list;
int state;
+ bool preliminary;
bool failover_sent;
u16 sync_point;
int link_cnt;
@@ -120,12 +126,18 @@
u32 signature;
u32 link_id;
u8 peer_id[16];
+ char peer_id_string[NODE_ID_STR_LEN];
struct list_head publ_list;
struct list_head conn_sks;
unsigned long keepalive_intv;
struct timer_list timer;
struct rcu_head rcu;
unsigned long delete_at;
+ struct net *peer_net;
+ u32 peer_hash_mix;
+#ifdef CONFIG_TIPC_CRYPTO
+ struct tipc_crypto *crypto_rx;
+#endif
};
/* Node FSM states and events:
@@ -163,7 +175,6 @@
static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id);
-static void tipc_node_put(struct tipc_node *node);
static bool node_is_up(struct tipc_node *n);
static void tipc_node_delete_from_list(struct tipc_node *node);
@@ -184,7 +195,7 @@
return n->links[bearer_id].link;
}
-int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected)
{
struct tipc_node *n;
int bearer_id;
@@ -194,6 +205,14 @@
if (unlikely(!n))
return mtu;
+ /* Allow MAX_MSG_SIZE when building connection oriented message
+ * if they are in the same core network
+ */
+ if (n->peer_net && connected) {
+ tipc_node_put(n);
+ return mtu;
+ }
+
bearer_id = n->active_links[sel & 1];
if (likely(bearer_id != INVALID_BEARER_ID))
mtu = n->links[bearer_id].mtu;
@@ -235,20 +254,64 @@
return caps;
}
+u32 tipc_node_get_addr(struct tipc_node *node)
+{
+ return (node) ? node->addr : 0;
+}
+
+char *tipc_node_get_id_str(struct tipc_node *node)
+{
+ return node->peer_id_string;
+}
+
+#ifdef CONFIG_TIPC_CRYPTO
+/**
+ * tipc_node_crypto_rx - Retrieve crypto RX handle from node
+ * Note: node ref counter must be held first!
+ */
+struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n)
+{
+ return (__n) ? __n->crypto_rx : NULL;
+}
+
+struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos)
+{
+ return container_of(pos, struct tipc_node, list)->crypto_rx;
+}
+
+struct tipc_crypto *tipc_node_crypto_rx_by_addr(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+
+ n = tipc_node_find(net, addr);
+ return (n) ? n->crypto_rx : NULL;
+}
+#endif
+
+static void tipc_node_free(struct rcu_head *rp)
+{
+ struct tipc_node *n = container_of(rp, struct tipc_node, rcu);
+
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_stop(&n->crypto_rx);
+#endif
+ kfree(n);
+}
+
static void tipc_node_kref_release(struct kref *kref)
{
struct tipc_node *n = container_of(kref, struct tipc_node, kref);
kfree(n->bc_entry.link);
- kfree_rcu(n, rcu);
+ call_rcu(&n->rcu, tipc_node_free);
}
-static void tipc_node_put(struct tipc_node *node)
+void tipc_node_put(struct tipc_node *node)
{
kref_put(&node->kref, tipc_node_kref_release);
}
-static void tipc_node_get(struct tipc_node *node)
+void tipc_node_get(struct tipc_node *node)
{
kref_get(&node->kref);
}
@@ -264,7 +327,7 @@
rcu_read_lock();
hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) {
- if (node->addr != addr)
+ if (node->addr != addr || node->preliminary)
continue;
if (!kref_get_unless_zero(&node->kref))
node = NULL;
@@ -343,10 +406,10 @@
write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN)
- tipc_publ_notify(net, publ_list, addr);
+ tipc_publ_notify(net, publ_list, addr, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP)
- tipc_named_node_up(net, addr);
+ tipc_named_node_up(net, addr, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) {
tipc_mon_peer_up(net, addr, bearer_id);
@@ -360,18 +423,71 @@
}
}
-static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
- u8 *peer_id, u16 capabilities)
+static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes)
+{
+ int net_id = tipc_netid(n->net);
+ struct tipc_net *tn_peer;
+ struct net *tmp;
+ u32 hash_chk;
+
+ if (n->peer_net)
+ return;
+
+ for_each_net_rcu(tmp) {
+ tn_peer = tipc_net(tmp);
+ if (!tn_peer)
+ continue;
+ /* Integrity checking whether node exists in namespace or not */
+ if (tn_peer->net_id != net_id)
+ continue;
+ if (memcmp(n->peer_id, tn_peer->node_id, NODE_ID_LEN))
+ continue;
+ hash_chk = tipc_net_hash_mixes(tmp, tn_peer->random);
+ if (hash_mixes ^ hash_chk)
+ continue;
+ n->peer_net = tmp;
+ n->peer_hash_mix = hash_mixes;
+ break;
+ }
+}
+
+struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
+ u16 capabilities, u32 hash_mixes,
+ bool preliminary)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *n, *temp_node;
struct tipc_link *l;
+ unsigned long intv;
int bearer_id;
int i;
spin_lock_bh(&tn->node_list_lock);
- n = tipc_node_find(net, addr);
+ n = tipc_node_find(net, addr) ?:
+ tipc_node_find_by_id(net, peer_id);
if (n) {
+ if (!n->preliminary)
+ goto update;
+ if (preliminary)
+ goto exit;
+ /* A preliminary node becomes "real" now, refresh its data */
+ tipc_node_write_lock(n);
+ n->preliminary = false;
+ n->addr = addr;
+ hlist_del_rcu(&n->hash);
+ hlist_add_head_rcu(&n->hash,
+ &tn->node_htable[tipc_hashfn(addr)]);
+ list_del_rcu(&n->list);
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ if (n->addr < temp_node->addr)
+ break;
+ }
+ list_add_tail_rcu(&n->list, &temp_node->list);
+ tipc_node_write_unlock_fast(n);
+
+update:
+ if (n->peer_hash_mix ^ hash_mixes)
+ tipc_node_assign_peer_net(n, hash_mixes);
if (n->capabilities == capabilities)
goto exit;
/* Same node may come back with new capabilities */
@@ -389,6 +505,10 @@
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
+
+ tipc_bcast_toggle_rcast(net,
+ (tn->capabilities & TIPC_BCAST_RCAST));
+
goto exit;
}
n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -396,9 +516,23 @@
pr_warn("Node creation failed, no memory\n");
goto exit;
}
+ tipc_nodeid2string(n->peer_id_string, peer_id);
+#ifdef CONFIG_TIPC_CRYPTO
+ if (unlikely(tipc_crypto_start(&n->crypto_rx, net, n))) {
+ pr_warn("Failed to start crypto RX(%s)!\n", n->peer_id_string);
+ kfree(n);
+ n = NULL;
+ goto exit;
+ }
+#endif
n->addr = addr;
+ n->preliminary = preliminary;
memcpy(&n->peer_id, peer_id, 16);
n->net = net;
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
+ /* Assign kernel local namespace if exists */
+ tipc_node_assign_peer_net(n, hash_mixes);
n->capabilities = capabilities;
kref_init(&n->kref);
rwlock_init(&n->lock);
@@ -417,22 +551,14 @@
n->signature = INVALID_NODE_SIG;
n->active_links[0] = INVALID_BEARER_ID;
n->active_links[1] = INVALID_BEARER_ID;
- if (!tipc_link_bc_create(net, tipc_own_addr(net),
- addr, U16_MAX,
- tipc_link_window(tipc_bc_sndlink(net)),
- n->capabilities,
- &n->bc_entry.inputq1,
- &n->bc_entry.namedq,
- tipc_bc_sndlink(net),
- &n->bc_entry.link)) {
- pr_warn("Broadcast rcv link creation failed, no memory\n");
- kfree(n);
- n = NULL;
- goto exit;
- }
+ n->bc_entry.link = NULL;
tipc_node_get(n);
timer_setup(&n->timer, tipc_node_timeout, 0);
- n->keepalive_intv = U32_MAX;
+ /* Start a slow timer anyway, crypto needs it */
+ n->keepalive_intv = 10000;
+ intv = jiffies + msecs_to_jiffies(n->keepalive_intv);
+ if (!mod_timer(&n->timer, intv))
+ tipc_node_get(n);
hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
if (n->addr < temp_node->addr)
@@ -444,6 +570,7 @@
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
+ tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST));
trace_tipc_node_create(n, true, " ");
exit:
spin_unlock_bh(&tn->node_list_lock);
@@ -465,6 +592,9 @@
static void tipc_node_delete_from_list(struct tipc_node *node)
{
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_key_flush(node->crypto_rx);
+#endif
list_del_rcu(&node->list);
hlist_del_rcu(&node->hash);
tipc_node_put(node);
@@ -617,12 +747,18 @@
}
tipc_node_write_unlock(peer);
+ if (!deleted) {
+ spin_unlock_bh(&tn->node_list_lock);
+ return deleted;
+ }
+
/* Calculate cluster capabilities */
tn->capabilities = TIPC_NODE_CAPABILITIES;
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
-
+ tipc_bcast_toggle_rcast(peer->net,
+ (tn->capabilities & TIPC_BCAST_RCAST));
spin_unlock_bh(&tn->node_list_lock);
return deleted;
}
@@ -645,6 +781,10 @@
return;
}
+#ifdef CONFIG_TIPC_CRYPTO
+ /* Take any crypto key related actions first */
+ tipc_crypto_timeout(n->crypto_rx);
+#endif
__skb_queue_head_init(&xmitq);
/* Initial node interval to value larger (10 seconds), then it will be
@@ -665,7 +805,7 @@
remains--;
}
tipc_node_read_unlock(n);
- tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr);
+ tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr, n);
if (rc & TIPC_LINK_DOWN_EVT)
tipc_node_link_down(n, bearer_id, false);
}
@@ -697,7 +837,7 @@
n->link_id = tipc_link_id(nl);
/* Leave room for tunnel header when returning 'mtu' to users: */
- n->links[bearer_id].mtu = tipc_link_mtu(nl) - INT_H_SIZE;
+ n->links[bearer_id].mtu = tipc_link_mss(nl);
tipc_bearer_add_dest(n->net, bearer_id, n->addr);
tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id);
@@ -751,7 +891,7 @@
tipc_node_write_lock(n);
__tipc_node_link_up(n, bearer_id, xmitq);
maddr = &n->links[bearer_id].maddr;
- tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr);
+ tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr, n);
tipc_node_write_unlock(n);
}
@@ -906,7 +1046,7 @@
if (delete)
tipc_mon_remove_peer(n->net, n->addr, old_bearer_id);
if (!skb_queue_empty(&xmitq))
- tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
+ tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr, n);
tipc_sk_rcv(n->net, &le->inputq);
}
@@ -950,6 +1090,8 @@
{
struct tipc_net *tn = tipc_net(net);
struct tipc_node *n;
+ bool preliminary;
+ u32 sugg_addr;
/* Suggest new address if some other peer is using this one */
n = tipc_node_find(net, addr);
@@ -965,9 +1107,11 @@
/* Suggest previously used address if peer is known */
n = tipc_node_find_by_id(net, id);
if (n) {
- addr = n->addr;
+ sugg_addr = n->addr;
+ preliminary = n->preliminary;
tipc_node_put(n);
- return addr;
+ if (!preliminary)
+ return sugg_addr;
}
/* Even this node may be in conflict */
@@ -979,12 +1123,12 @@
void tipc_node_check_dest(struct net *net, u32 addr,
u8 *peer_id, struct tipc_bearer *b,
- u16 capabilities, u32 signature,
+ u16 capabilities, u32 signature, u32 hash_mixes,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr)
{
struct tipc_node *n;
- struct tipc_link *l;
+ struct tipc_link *l, *snd_l;
struct tipc_link_entry *le;
bool addr_match = false;
bool sign_match = false;
@@ -998,11 +1142,28 @@
*dupl_addr = false;
*respond = false;
- n = tipc_node_create(net, addr, peer_id, capabilities);
+ n = tipc_node_create(net, addr, peer_id, capabilities, hash_mixes,
+ false);
if (!n)
return;
tipc_node_write_lock(n);
+ if (unlikely(!n->bc_entry.link)) {
+ snd_l = tipc_bc_sndlink(net);
+ if (!tipc_link_bc_create(net, tipc_own_addr(net),
+ addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l),
+ tipc_link_max_win(snd_l),
+ n->capabilities,
+ &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l,
+ &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link creation failed, no mem\n");
+ tipc_node_write_unlock_fast(n);
+ tipc_node_put(n);
+ return;
+ }
+ }
le = &n->links[b->identity];
@@ -1017,6 +1178,9 @@
if (sign_match && addr_match && link_up) {
/* All is fine. Do nothing. */
reset = false;
+ /* Peer node is not a container/local namespace */
+ if (!n->peer_hash_mix)
+ n->peer_hash_mix = hash_mixes;
} else if (sign_match && addr_match && !link_up) {
/* Respond. The link will come up in due time */
*respond = true;
@@ -1083,7 +1247,7 @@
get_random_bytes(&session, sizeof(u16));
if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
b->net_plane, b->mtu, b->priority,
- b->window, session,
+ b->min_win, b->max_win, session,
tipc_own_addr(net), addr, peer_id,
n->capabilities,
tipc_bc_sndlink(n->net), n->bc_entry.link,
@@ -1332,6 +1496,7 @@
/* Clean up broadcast state */
tipc_bcast_remove_peer(n->net, n->bc_entry.link);
+ skb_queue_purge(&n->bc_entry.namedq);
/* Abort any ongoing link failover */
for (i = 0; i < MAX_BEARERS; i++) {
@@ -1342,7 +1507,8 @@
/* Notify publications from this node */
n->action_flags |= TIPC_NOTIFY_NODE_DOWN;
-
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
/* Notify sockets connected to node */
list_for_each_entry_safe(conn, safe, conns, list) {
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
@@ -1360,7 +1526,7 @@
* tipc_node_get_linkname - get the name of a link
*
* @bearer_id: id of the bearer
- * @node: peer node address
+ * @addr: peer node address
* @linkname: link name output buffer
*
* Returns 0 on success
@@ -1424,6 +1590,57 @@
return -EMSGSIZE;
}
+static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list)
+{
+ struct tipc_msg *hdr = buf_msg(skb_peek(list));
+ struct sk_buff_head inputq;
+
+ switch (msg_user(hdr)) {
+ case TIPC_LOW_IMPORTANCE:
+ case TIPC_MEDIUM_IMPORTANCE:
+ case TIPC_HIGH_IMPORTANCE:
+ case TIPC_CRITICAL_IMPORTANCE:
+ if (msg_connected(hdr) || msg_named(hdr) ||
+ msg_direct(hdr)) {
+ tipc_loopback_trace(peer_net, list);
+ spin_lock_init(&list->lock);
+ tipc_sk_rcv(peer_net, list);
+ return;
+ }
+ if (msg_mcast(hdr)) {
+ tipc_loopback_trace(peer_net, list);
+ skb_queue_head_init(&inputq);
+ tipc_sk_mcast_rcv(peer_net, list, &inputq);
+ __skb_queue_purge(list);
+ skb_queue_purge(&inputq);
+ return;
+ }
+ return;
+ case MSG_FRAGMENTER:
+ if (tipc_msg_assemble(list)) {
+ tipc_loopback_trace(peer_net, list);
+ skb_queue_head_init(&inputq);
+ tipc_sk_mcast_rcv(peer_net, list, &inputq);
+ __skb_queue_purge(list);
+ skb_queue_purge(&inputq);
+ }
+ return;
+ case GROUP_PROTOCOL:
+ case CONN_MANAGER:
+ tipc_loopback_trace(peer_net, list);
+ spin_lock_init(&list->lock);
+ tipc_sk_rcv(peer_net, list);
+ return;
+ case LINK_PROTOCOL:
+ case NAME_DISTRIBUTOR:
+ case TUNNEL_PROTOCOL:
+ case BCAST_PROTOCOL:
+ return;
+ default:
+ return;
+ };
+}
+
/**
* tipc_node_xmit() is the general link level function for message sending
* @net: the applicable net namespace
@@ -1439,6 +1656,7 @@
struct tipc_link_entry *le = NULL;
struct tipc_node *n;
struct sk_buff_head xmitq;
+ bool node_up = false;
int bearer_id;
int rc;
@@ -1456,6 +1674,17 @@
}
tipc_node_read_lock(n);
+ node_up = node_is_up(n);
+ if (node_up && n->peer_net && check_net(n->peer_net)) {
+ /* xmit inner linux container */
+ tipc_lxc_xmit(n->peer_net, list);
+ if (likely(skb_queue_empty(list))) {
+ tipc_node_read_unlock(n);
+ tipc_node_put(n);
+ return 0;
+ }
+ }
+
bearer_id = n->active_links[selector & 1];
if (unlikely(bearer_id == INVALID_BEARER_ID)) {
tipc_node_read_unlock(n);
@@ -1474,7 +1703,7 @@
if (unlikely(rc == -ENOBUFS))
tipc_node_link_down(n, bearer_id, false);
else
- tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
tipc_node_put(n);
@@ -1514,12 +1743,23 @@
return 0;
}
-void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests)
{
+ struct sk_buff_head xmitq;
struct sk_buff *txskb;
struct tipc_node *n;
+ u16 dummy;
u32 dst;
+ /* Use broadcast if all nodes support it */
+ if (!rc_dests && tipc_bcast_get_mode(net) != BCLINK_MODE_RCAST) {
+ __skb_queue_head_init(&xmitq);
+ __skb_queue_tail(&xmitq, skb);
+ tipc_bcast_xmit(net, &xmitq, &dummy);
+ return;
+ }
+
+ /* Otherwise use legacy replicast method */
rcu_read_lock();
list_for_each_entry_rcu(n, tipc_nodes(net), list) {
dst = n->addr;
@@ -1534,7 +1774,6 @@
tipc_node_xmit_skb(net, txskb, dst, 0);
}
rcu_read_unlock();
-
kfree_skb(skb);
}
@@ -1557,7 +1796,7 @@
struct tipc_link *ucl;
int rc;
- rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
+ rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr, xmitq);
if (rc & TIPC_LINK_DOWN_EVT) {
tipc_node_reset_links(n);
@@ -1622,14 +1861,16 @@
}
if (!skb_queue_empty(&xmitq))
- tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
if (!skb_queue_empty(&be->inputq1))
tipc_node_mcast_rcv(n);
/* Handle NAME_DISTRIBUTOR messages sent from 1.7 nodes */
if (!skb_queue_empty(&n->bc_entry.namedq))
- tipc_named_rcv(net, &n->bc_entry.namedq);
+ tipc_named_rcv(net, &n->bc_entry.namedq,
+ &n->bc_entry.named_rcv_nxt,
+ &n->bc_entry.named_open);
/* If reassembly or retransmission failure => reset all links to peer */
if (rc & TIPC_LINK_DOWN_EVT)
@@ -1792,7 +2033,7 @@
* tipc_rcv - process TIPC packets/messages arriving from off-node
* @net: the applicable net namespace
* @skb: TIPC packet
- * @bearer: pointer to bearer message arrived on
+ * @b: pointer to bearer message arrived on
*
* Invoked with no locks held. Bearer pointer must point to a valid bearer
* structure (i.e. cannot be NULL), but bearer can be inactive.
@@ -1800,20 +2041,38 @@
void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
{
struct sk_buff_head xmitq;
- struct tipc_node *n;
- struct tipc_msg *hdr;
- int bearer_id = b->identity;
struct tipc_link_entry *le;
+ struct tipc_msg *hdr;
+ struct tipc_node *n;
+ int bearer_id = b->identity;
u32 self = tipc_own_addr(net);
int usr, rc = 0;
u16 bc_ack;
+#ifdef CONFIG_TIPC_CRYPTO
+ struct tipc_ehdr *ehdr;
- __skb_queue_head_init(&xmitq);
+ /* Check if message must be decrypted first */
+ if (TIPC_SKB_CB(skb)->decrypted || !tipc_ehdr_validate(skb))
+ goto rcv;
+ ehdr = (struct tipc_ehdr *)skb->data;
+ if (likely(ehdr->user != LINK_CONFIG)) {
+ n = tipc_node_find(net, ntohl(ehdr->addr));
+ if (unlikely(!n))
+ goto discard;
+ } else {
+ n = tipc_node_find_by_id(net, ehdr->id);
+ }
+ tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b);
+ if (!skb)
+ return;
+
+rcv:
+#endif
/* Ensure message is well-formed before touching the header */
- TIPC_SKB_CB(skb)->validated = false;
if (unlikely(!tipc_msg_validate(&skb)))
goto discard;
+ __skb_queue_head_init(&xmitq);
hdr = buf_msg(skb);
usr = msg_user(hdr);
bc_ack = msg_bcast_ack(hdr);
@@ -1837,10 +2096,16 @@
le = &n->links[bearer_id];
/* Ensure broadcast reception is in synch with peer's send state */
- if (unlikely(usr == LINK_PROTOCOL))
+ if (unlikely(usr == LINK_PROTOCOL)) {
+ if (unlikely(skb_linearize(skb))) {
+ tipc_node_put(n);
+ goto discard;
+ }
+ hdr = buf_msg(skb);
tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq);
- else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack))
+ } else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) {
tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr);
+ }
/* Receive packet directly if conditions permit */
tipc_node_read_lock(n);
@@ -1857,7 +2122,7 @@
/* Check/update node state before receiving */
if (unlikely(skb)) {
if (unlikely(skb_linearize(skb)))
- goto discard;
+ goto out_node_put;
tipc_node_write_lock(n);
if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) {
if (le->link) {
@@ -1875,7 +2140,9 @@
tipc_node_link_down(n, bearer_id, false);
if (unlikely(!skb_queue_empty(&n->bc_entry.namedq)))
- tipc_named_rcv(net, &n->bc_entry.namedq);
+ tipc_named_rcv(net, &n->bc_entry.namedq,
+ &n->bc_entry.named_rcv_nxt,
+ &n->bc_entry.named_open);
if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1)))
tipc_node_mcast_rcv(n);
@@ -1884,8 +2151,9 @@
tipc_sk_rcv(net, &le->inputq);
if (!skb_queue_empty(&xmitq))
- tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
+out_node_put:
tipc_node_put(n);
discard:
kfree_skb(skb);
@@ -1913,9 +2181,13 @@
&xmitq);
else if (prop == TIPC_NLA_PROP_MTU)
tipc_link_set_mtu(e->link, b->mtu);
+
+ /* Update MTU for node link entry */
+ e->mtu = tipc_link_mss(e->link);
}
+
tipc_node_write_unlock(n);
- tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL);
}
rcu_read_unlock();
@@ -1926,7 +2198,7 @@
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
- struct tipc_node *peer;
+ struct tipc_node *peer, *temp_node;
u32 addr;
int err;
@@ -1967,6 +2239,12 @@
tipc_node_write_unlock(peer);
tipc_node_delete(peer);
+ /* Calculate cluster capabilities */
+ tn->capabilities = TIPC_NODE_CAPABILITIES;
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ tn->capabilities &= temp_node->capabilities;
+ }
+ tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST));
err = 0;
err_out:
tipc_node_put(peer);
@@ -2011,6 +2289,8 @@
}
list_for_each_entry_rcu(node, &tn->node_list, list) {
+ if (node->preliminary)
+ continue;
if (last_addr) {
if (node->addr == last_addr)
last_addr = 0;
@@ -2121,8 +2401,7 @@
if (attrs[TIPC_NLA_LINK_PROP]) {
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
- err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP],
- props);
+ err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props);
if (err) {
res = err;
goto out;
@@ -2141,16 +2420,19 @@
tipc_link_set_prio(link, prio, &xmitq);
}
if (props[TIPC_NLA_PROP_WIN]) {
- u32 win;
+ u32 max_win;
- win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
- tipc_link_set_queue_limits(link, win);
+ max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ tipc_link_set_queue_limits(link,
+ tipc_link_min_win(link),
+ max_win);
}
}
out:
tipc_node_read_unlock(node);
- tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr,
+ NULL);
return res;
}
@@ -2184,7 +2466,7 @@
return -ENOMEM;
if (strcmp(name, tipc_bclink_name) == 0) {
- err = tipc_nl_add_bc_link(net, &msg);
+ err = tipc_nl_add_bc_link(net, &msg, tipc_net(net)->bcl);
if (err)
goto err_free;
} else {
@@ -2228,6 +2510,7 @@
struct tipc_node *node;
struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = tipc_net(net);
struct tipc_link_entry *le;
if (!info->attrs[TIPC_NLA_LINK])
@@ -2244,11 +2527,26 @@
link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
- if (strcmp(link_name, tipc_bclink_name) == 0) {
- err = tipc_bclink_reset_stats(net);
+ err = -EINVAL;
+ if (!strcmp(link_name, tipc_bclink_name)) {
+ err = tipc_bclink_reset_stats(net, tipc_bc_sndlink(net));
if (err)
return err;
return 0;
+ } else if (strstr(link_name, tipc_bclink_name)) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(node, &tn->node_list, list) {
+ tipc_node_read_lock(node);
+ link = node->bc_entry.link;
+ if (link && !strcmp(link_name, tipc_link_name(link))) {
+ err = tipc_bclink_reset_stats(net, link);
+ tipc_node_read_unlock(node);
+ break;
+ }
+ tipc_node_read_unlock(node);
+ }
+ rcu_read_unlock();
+ return err;
}
node = tipc_node_find_by_name(net, link_name, &bearer_id);
@@ -2272,7 +2570,8 @@
/* Caller should hold node lock */
static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
- struct tipc_node *node, u32 *prev_link)
+ struct tipc_node *node, u32 *prev_link,
+ bool bc_link)
{
u32 i;
int err;
@@ -2288,6 +2587,14 @@
if (err)
return err;
}
+
+ if (bc_link) {
+ *prev_link = i;
+ err = tipc_nl_add_bc_link(net, msg, node->bc_entry.link);
+ if (err)
+ return err;
+ }
+
*prev_link = 0;
return 0;
@@ -2296,17 +2603,36 @@
int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *node;
struct tipc_nl_msg msg;
u32 prev_node = cb->args[0];
u32 prev_link = cb->args[1];
int done = cb->args[2];
+ bool bc_link = cb->args[3];
int err;
if (done)
return 0;
+ if (!prev_node) {
+ /* Check if broadcast-receiver links dumping is needed */
+ if (attrs && attrs[TIPC_NLA_LINK]) {
+ err = nla_parse_nested_deprecated(link,
+ TIPC_NLA_LINK_MAX,
+ attrs[TIPC_NLA_LINK],
+ tipc_nl_link_policy,
+ NULL);
+ if (unlikely(err))
+ return err;
+ if (unlikely(!link[TIPC_NLA_LINK_BROADCAST]))
+ return -EINVAL;
+ bc_link = true;
+ }
+ }
+
msg.skb = skb;
msg.portid = NETLINK_CB(cb->skb).portid;
msg.seq = cb->nlh->nlmsg_seq;
@@ -2330,7 +2656,7 @@
list) {
tipc_node_read_lock(node);
err = __tipc_nl_add_node_links(net, &msg, node,
- &prev_link);
+ &prev_link, bc_link);
tipc_node_read_unlock(node);
if (err)
goto out;
@@ -2338,14 +2664,14 @@
prev_node = node->addr;
}
} else {
- err = tipc_nl_add_bc_link(net, &msg);
+ err = tipc_nl_add_bc_link(net, &msg, tn->bcl);
if (err)
goto out;
list_for_each_entry_rcu(node, &tn->node_list, list) {
tipc_node_read_lock(node);
err = __tipc_nl_add_node_links(net, &msg, node,
- &prev_link);
+ &prev_link, bc_link);
tipc_node_read_unlock(node);
if (err)
goto out;
@@ -2360,6 +2686,7 @@
cb->args[0] = prev_node;
cb->args[1] = prev_link;
cb->args[2] = done;
+ cb->args[3] = bc_link;
return skb->len;
}
@@ -2484,13 +2811,9 @@
int err;
if (!prev_node) {
- struct nlattr **attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
struct nlattr *mon[TIPC_NLA_MON_MAX + 1];
- err = tipc_nlmsg_parse(cb->nlh, &attrs);
- if (err)
- return err;
-
if (!attrs[TIPC_NLA_MON])
return -EINVAL;
@@ -2530,11 +2853,172 @@
return skb->len;
}
-u32 tipc_node_get_addr(struct tipc_node *node)
+#ifdef CONFIG_TIPC_CRYPTO
+static int tipc_nl_retrieve_key(struct nlattr **attrs,
+ struct tipc_aead_key **pkey)
{
- return (node) ? node->addr : 0;
+ struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY];
+ struct tipc_aead_key *key;
+
+ if (!attr)
+ return -ENODATA;
+
+ if (nla_len(attr) < sizeof(*key))
+ return -EINVAL;
+ key = (struct tipc_aead_key *)nla_data(attr);
+ if (key->keylen > TIPC_AEAD_KEYLEN_MAX ||
+ nla_len(attr) < tipc_aead_key_size(key))
+ return -EINVAL;
+
+ *pkey = key;
+ return 0;
}
+static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id)
+{
+ struct nlattr *attr = attrs[TIPC_NLA_NODE_ID];
+
+ if (!attr)
+ return -ENODATA;
+
+ if (nla_len(attr) < TIPC_NODEID_LEN)
+ return -EINVAL;
+
+ *node_id = (u8 *)nla_data(attr);
+ return 0;
+}
+
+static int tipc_nl_retrieve_rekeying(struct nlattr **attrs, u32 *intv)
+{
+ struct nlattr *attr = attrs[TIPC_NLA_NODE_REKEYING];
+
+ if (!attr)
+ return -ENODATA;
+
+ *intv = nla_get_u32(attr);
+ return 0;
+}
+
+static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx, *c = tx;
+ struct tipc_node *n = NULL;
+ struct tipc_aead_key *ukey;
+ bool rekeying = true, master_key = false;
+ u8 *id, *own_id, mode;
+ u32 intv = 0;
+ int rc = 0;
+
+ if (!info->attrs[TIPC_NLA_NODE])
+ return -EINVAL;
+
+ rc = nla_parse_nested(attrs, TIPC_NLA_NODE_MAX,
+ info->attrs[TIPC_NLA_NODE],
+ tipc_nl_node_policy, info->extack);
+ if (rc)
+ return rc;
+
+ own_id = tipc_own_id(net);
+ if (!own_id) {
+ GENL_SET_ERR_MSG(info, "not found own node identity (set id?)");
+ return -EPERM;
+ }
+
+ rc = tipc_nl_retrieve_rekeying(attrs, &intv);
+ if (rc == -ENODATA)
+ rekeying = false;
+
+ rc = tipc_nl_retrieve_key(attrs, &ukey);
+ if (rc == -ENODATA && rekeying)
+ goto rekeying;
+ else if (rc)
+ return rc;
+
+ rc = tipc_aead_key_validate(ukey, info);
+ if (rc)
+ return rc;
+
+ rc = tipc_nl_retrieve_nodeid(attrs, &id);
+ switch (rc) {
+ case -ENODATA:
+ mode = CLUSTER_KEY;
+ master_key = !!(attrs[TIPC_NLA_NODE_KEY_MASTER]);
+ break;
+ case 0:
+ mode = PER_NODE_KEY;
+ if (memcmp(id, own_id, NODE_ID_LEN)) {
+ n = tipc_node_find_by_id(net, id) ?:
+ tipc_node_create(net, 0, id, 0xffffu, 0, true);
+ if (unlikely(!n))
+ return -ENOMEM;
+ c = n->crypto_rx;
+ }
+ break;
+ default:
+ return rc;
+ }
+
+ /* Initiate the TX/RX key */
+ rc = tipc_crypto_key_init(c, ukey, mode, master_key);
+ if (n)
+ tipc_node_put(n);
+
+ if (unlikely(rc < 0)) {
+ GENL_SET_ERR_MSG(info, "unable to initiate or attach new key");
+ return rc;
+ } else if (c == tx) {
+ /* Distribute TX key but not master one */
+ if (!master_key && tipc_crypto_key_distr(tx, rc, NULL))
+ GENL_SET_ERR_MSG(info, "failed to replicate new key");
+rekeying:
+ /* Schedule TX rekeying if needed */
+ tipc_crypto_rekeying_sched(tx, rekeying, intv);
+ }
+
+ return 0;
+}
+
+int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_node_set_key(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
+
+static int __tipc_nl_node_flush_key(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_node *n;
+
+ tipc_crypto_key_flush(tn->crypto_tx);
+ rcu_read_lock();
+ list_for_each_entry_rcu(n, &tn->node_list, list)
+ tipc_crypto_key_flush(n->crypto_rx);
+ rcu_read_unlock();
+
+ return 0;
+}
+
+int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_node_flush_key(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
+#endif
+
/**
* tipc_node_dump - dump TIPC node data
* @n: tipc node to be dumped
@@ -2591,3 +3075,33 @@
return i;
}
+
+void tipc_node_pre_cleanup_net(struct net *exit_net)
+{
+ struct tipc_node *n;
+ struct tipc_net *tn;
+ struct net *tmp;
+
+ rcu_read_lock();
+ for_each_net_rcu(tmp) {
+ if (tmp == exit_net)
+ continue;
+ tn = tipc_net(tmp);
+ if (!tn)
+ continue;
+ spin_lock_bh(&tn->node_list_lock);
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ if (!n->peer_net)
+ continue;
+ if (n->peer_net != exit_net)
+ continue;
+ tipc_node_write_lock(n);
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
+ tipc_node_write_unlock_fast(n);
+ break;
+ }
+ spin_unlock_bh(&tn->node_list_lock);
+ }
+ rcu_read_unlock();
+}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 291d0ec..154a5bb 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -54,7 +54,9 @@
TIPC_LINK_PROTO_SEQNO = (1 << 6),
TIPC_MCAST_RBCTL = (1 << 7),
TIPC_GAP_ACK_BLOCK = (1 << 8),
- TIPC_TUNNEL_ENHANCED = (1 << 9)
+ TIPC_TUNNEL_ENHANCED = (1 << 9),
+ TIPC_NAGLE = (1 << 10),
+ TIPC_NAMED_BCAST = (1 << 11)
};
#define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \
@@ -66,16 +68,30 @@
TIPC_LINK_PROTO_SEQNO | \
TIPC_MCAST_RBCTL | \
TIPC_GAP_ACK_BLOCK | \
- TIPC_TUNNEL_ENHANCED)
+ TIPC_TUNNEL_ENHANCED | \
+ TIPC_NAGLE | \
+ TIPC_NAMED_BCAST)
+
#define INVALID_BEARER_ID -1
void tipc_node_stop(struct net *net);
bool tipc_node_get_id(struct net *net, u32 addr, u8 *id);
u32 tipc_node_get_addr(struct tipc_node *node);
+char *tipc_node_get_id_str(struct tipc_node *node);
+void tipc_node_put(struct tipc_node *node);
+void tipc_node_get(struct tipc_node *node);
+struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
+ u16 capabilities, u32 hash_mixes,
+ bool preliminary);
+#ifdef CONFIG_TIPC_CRYPTO
+struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n);
+struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos);
+struct tipc_crypto *tipc_node_crypto_rx_by_addr(struct net *net, u32 addr);
+#endif
u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
struct tipc_bearer *bearer,
- u16 capabilities, u32 signature,
+ u16 capabilities, u32 signature, u32 hash_mixes,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr);
void tipc_node_delete_links(struct net *net, int bearer_id);
@@ -89,10 +105,10 @@
u32 selector);
void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr);
-void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
-int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected);
bool tipc_node_is_up(struct net *net, u32 addr);
u16 tipc_node_get_capabilities(struct net *net, u32 addr);
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
@@ -107,4 +123,9 @@
int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
struct netlink_callback *cb);
+#ifdef CONFIG_TIPC_CRYPTO
+int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info);
+#endif
+void tipc_node_pre_cleanup_net(struct net *exit_net);
#endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index fbbac9b..8d2c985 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -48,12 +48,13 @@
#include "group.h"
#include "trace.h"
+#define NAGLE_START_INIT 4
+#define NAGLE_START_MAX 1024
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
-#define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff
#define TIPC_MIN_PORT 1
-#define TIPC_ACK_RATE 4 /* ACK at 1/4 of of rcv window size */
+#define TIPC_ACK_RATE 4 /* ACK at 1/4 of rcv window size */
enum {
TIPC_LISTEN = TCP_LISTEN,
@@ -75,6 +76,7 @@
* @conn_instance: TIPC instance used when connection was established
* @published: non-zero if port has one or more associated names
* @max_pkt: maximum packet size "hint" used when building messages sent by port
+ * @maxnagle: maximum size of msg which can be subject to nagle
* @portid: unique port identity in TIPC socket hash table
* @phdr: preformatted message header used when sending messages
* #cong_links: list of congested links
@@ -97,6 +99,7 @@
u32 conn_instance;
int published;
u32 max_pkt;
+ u32 maxnagle;
u32 portid;
struct tipc_msg phdr;
struct list_head cong_links;
@@ -116,6 +119,13 @@
struct tipc_mc_method mc_method;
struct rcu_head rcu;
struct tipc_group *group;
+ u32 oneway;
+ u32 nagle_start;
+ u16 snd_backlog;
+ u16 msg_acc;
+ u16 pkt_cnt;
+ bool expect_ack;
+ bool nodelay;
bool group_is_open;
};
@@ -137,6 +147,8 @@
static void tipc_sk_remove(struct tipc_sock *tsk);
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
+static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
+static int tipc_wait_for_connect(struct socket *sock, long *timeo_p);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -184,19 +196,19 @@
return msg_importance(&tsk->phdr);
}
-static int tsk_set_importance(struct tipc_sock *tsk, int imp)
-{
- if (imp > TIPC_CRITICAL_IMPORTANCE)
- return -EINVAL;
- msg_set_importance(&tsk->phdr, (u32)imp);
- return 0;
-}
-
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
return container_of(sk, struct tipc_sock, sk);
}
+int tsk_set_importance(struct sock *sk, int imp)
+{
+ if (imp > TIPC_CRITICAL_IMPORTANCE)
+ return -EINVAL;
+ msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp);
+ return 0;
+}
+
static bool tsk_conn_cong(struct tipc_sock *tsk)
{
return tsk->snt_unacked > tsk->snd_win;
@@ -227,6 +239,26 @@
return 1;
}
+/* tsk_set_nagle - enable/disable nagle property by manipulating maxnagle
+ */
+static void tsk_set_nagle(struct tipc_sock *tsk)
+{
+ struct sock *sk = &tsk->sk;
+
+ tsk->maxnagle = 0;
+ if (sk->sk_type != SOCK_STREAM)
+ return;
+ if (tsk->nodelay)
+ return;
+ if (!(tsk->peer_caps & TIPC_NAGLE))
+ return;
+ /* Limit node local buffer size to avoid receive queue overflow */
+ if (tsk->max_pkt == MAX_MSG_SIZE)
+ tsk->maxnagle = 1500;
+ else
+ tsk->maxnagle = tsk->max_pkt;
+}
+
/**
* tsk_advance_rx_queue - discard first buffer in socket receive queue
*
@@ -446,6 +478,8 @@
tsk = tipc_sk(sk);
tsk->max_pkt = MAX_PKT_DEFAULT;
+ tsk->maxnagle = 0;
+ tsk->nagle_start = NAGLE_START_INIT;
INIT_LIST_HEAD(&tsk->publications);
INIT_LIST_HEAD(&tsk->cong_links);
msg = &tsk->phdr;
@@ -512,7 +546,9 @@
tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
!tsk_conn_cong(tsk)));
- /* Remove any pending SYN message */
+ /* Push out delayed messages if in Nagle mode */
+ tipc_sk_push_backlog(tsk, false);
+ /* Remove pending SYN */
__skb_queue_purge(&sk->sk_write_queue);
/* Remove partially received buffer if any */
@@ -675,7 +711,6 @@
* tipc_getname - get port ID of socket or peer socket
* @sock: socket structure
* @uaddr: area for returned socket address
- * @uaddr_len: area for returned length of socket address
* @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
*
* Returns 0 on success, errno otherwise
@@ -748,7 +783,7 @@
case TIPC_ESTABLISHED:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
revents |= EPOLLOUT;
- /* fall through */
+ fallthrough;
case TIPC_LISTEN:
case TIPC_CONNECTING:
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
@@ -865,7 +900,7 @@
/* Build message as chain of buffers */
__skb_queue_head_init(&pkts);
- mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
@@ -1017,7 +1052,7 @@
/**
* tipc_send_group_bcast - send message to all members in communication group
- * @sk: socket structure
+ * @sock: socket structure
* @m: message to send
* @dlen: total length of message data
* @timeout: timeout to wait for wakeup
@@ -1222,6 +1257,56 @@
tipc_sk_rcv(net, inputq);
}
+/* tipc_sk_push_backlog(): send accumulated buffers in socket write queue
+ * when socket is in Nagle mode
+ */
+static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack)
+{
+ struct sk_buff_head *txq = &tsk->sk.sk_write_queue;
+ struct sk_buff *skb = skb_peek_tail(txq);
+ struct net *net = sock_net(&tsk->sk);
+ u32 dnode = tsk_peer_node(tsk);
+ int rc;
+
+ if (nagle_ack) {
+ tsk->pkt_cnt += skb_queue_len(txq);
+ if (!tsk->pkt_cnt || tsk->msg_acc / tsk->pkt_cnt < 2) {
+ tsk->oneway = 0;
+ if (tsk->nagle_start < NAGLE_START_MAX)
+ tsk->nagle_start *= 2;
+ tsk->expect_ack = false;
+ pr_debug("tsk %10u: bad nagle %u -> %u, next start %u!\n",
+ tsk->portid, tsk->msg_acc, tsk->pkt_cnt,
+ tsk->nagle_start);
+ } else {
+ tsk->nagle_start = NAGLE_START_INIT;
+ if (skb) {
+ msg_set_ack_required(buf_msg(skb));
+ tsk->expect_ack = true;
+ } else {
+ tsk->expect_ack = false;
+ }
+ }
+ tsk->msg_acc = 0;
+ tsk->pkt_cnt = 0;
+ }
+
+ if (!skb || tsk->cong_link_cnt)
+ return;
+
+ /* Do not send SYN again after congestion */
+ if (msg_is_syn(buf_msg(skb)))
+ return;
+
+ if (tsk->msg_acc)
+ tsk->pkt_cnt += skb_queue_len(txq);
+ tsk->snt_unacked += tsk->snd_backlog;
+ tsk->snd_backlog = 0;
+ rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
+ if (rc == -ELINKCONG)
+ tsk->cong_link_cnt = 1;
+}
+
/**
* tipc_sk_conn_proto_rcv - receive a connection mng protocol message
* @tsk: receiving socket
@@ -1235,7 +1320,7 @@
u32 onode = tsk_own_node(tsk);
struct sock *sk = &tsk->sk;
int mtyp = msg_type(hdr);
- bool conn_cong;
+ bool was_cong;
/* Ignore if connection cannot be validated: */
if (!tsk_peer_msg(tsk, hdr)) {
@@ -1268,11 +1353,12 @@
__skb_queue_tail(xmitq, skb);
return;
} else if (mtyp == CONN_ACK) {
- conn_cong = tsk_conn_cong(tsk);
+ was_cong = tsk_conn_cong(tsk);
+ tipc_sk_push_backlog(tsk, msg_nagle_ack(hdr));
tsk->snt_unacked -= msg_conn_ack(hdr);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
tsk->snd_win = msg_adv_win(hdr);
- if (conn_cong)
+ if (was_cong && !tsk_conn_cong(tsk))
sk->sk_write_space(sk);
} else if (mtyp != CONN_PROBE_REPLY) {
pr_warn("Received unknown CONN_PROTO msg\n");
@@ -1406,7 +1492,7 @@
}
__skb_queue_head_init(&pkts);
- mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid, true);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
@@ -1423,8 +1509,13 @@
rc = 0;
}
- if (unlikely(syn && !rc))
+ if (unlikely(syn && !rc)) {
tipc_set_sk_state(sk, TIPC_CONNECTING);
+ if (dlen && timeout) {
+ timeout = msecs_to_jiffies(timeout);
+ tipc_wait_for_connect(sock, &timeout);
+ }
+ }
return rc ? rc : dlen;
}
@@ -1457,21 +1548,22 @@
struct sock *sk = sock->sk;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+ struct sk_buff_head *txq = &sk->sk_write_queue;
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
- struct sk_buff_head pkts;
+ struct sk_buff *skb;
u32 dnode = tsk_peer_node(tsk);
+ int maxnagle = tsk->maxnagle;
+ int maxpkt = tsk->max_pkt;
int send, sent = 0;
- int rc = 0;
-
- __skb_queue_head_init(&pkts);
+ int blocks, rc = 0;
if (unlikely(dlen > INT_MAX))
return -EMSGSIZE;
/* Handle implicit connection setup */
- if (unlikely(dest)) {
+ if (unlikely(dest && sk->sk_state == TIPC_OPEN)) {
rc = __tipc_sendmsg(sock, m, dlen);
if (dlen && dlen == rc) {
tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
@@ -1487,21 +1579,48 @@
tipc_sk_connected(sk)));
if (unlikely(rc))
break;
-
send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
- rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
- if (unlikely(rc != send))
- break;
-
- trace_tipc_sk_sendstream(sk, skb_peek(&pkts),
+ blocks = tsk->snd_backlog;
+ if (tsk->oneway++ >= tsk->nagle_start && maxnagle &&
+ send <= maxnagle) {
+ rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
+ if (unlikely(rc < 0))
+ break;
+ blocks += rc;
+ tsk->msg_acc++;
+ if (blocks <= 64 && tsk->expect_ack) {
+ tsk->snd_backlog = blocks;
+ sent += send;
+ break;
+ } else if (blocks > 64) {
+ tsk->pkt_cnt += skb_queue_len(txq);
+ } else {
+ skb = skb_peek_tail(txq);
+ if (skb) {
+ msg_set_ack_required(buf_msg(skb));
+ tsk->expect_ack = true;
+ } else {
+ tsk->expect_ack = false;
+ }
+ tsk->msg_acc = 0;
+ tsk->pkt_cnt = 0;
+ }
+ } else {
+ rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq);
+ if (unlikely(rc != send))
+ break;
+ blocks += tsk_inc(tsk, send + MIN_H_SIZE);
+ }
+ trace_tipc_sk_sendstream(sk, skb_peek(txq),
TIPC_DUMP_SK_SNDQ, " ");
- rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+ rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
if (unlikely(rc == -ELINKCONG)) {
tsk->cong_link_cnt = 1;
rc = 0;
}
if (likely(!rc)) {
- tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
+ tsk->snt_unacked += blocks;
+ tsk->snd_backlog = 0;
sent += send;
}
} while (sent < dlen && !rc);
@@ -1546,8 +1665,9 @@
sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
tipc_set_sk_state(sk, TIPC_ESTABLISHED);
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
- tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
+ tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid, true);
tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
+ tsk_set_nagle(tsk);
__skb_queue_purge(&sk->sk_write_queue);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
return;
@@ -1560,7 +1680,7 @@
/**
* tipc_sk_set_orig_addr - capture sender's address for received message
* @m: descriptor for message info
- * @hdr: received message header
+ * @skb: received message
*
* Note: Address is not captured if not requested by receiver.
*/
@@ -1669,22 +1789,21 @@
return 0;
}
-static void tipc_sk_send_ack(struct tipc_sock *tsk)
+static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk)
{
struct sock *sk = &tsk->sk;
- struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
struct tipc_msg *msg;
u32 peer_port = tsk_peer_port(tsk);
u32 dnode = tsk_peer_node(tsk);
if (!tipc_sk_connected(sk))
- return;
+ return NULL;
skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
dnode, tsk_own_node(tsk), peer_port,
tsk->portid, TIPC_OK);
if (!skb)
- return;
+ return NULL;
msg = buf_msg(skb);
msg_set_conn_ack(msg, tsk->rcv_unacked);
tsk->rcv_unacked = 0;
@@ -1694,7 +1813,19 @@
tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
msg_set_adv_win(msg, tsk->rcv_win);
}
- tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
+ return skb;
+}
+
+static void tipc_sk_send_ack(struct tipc_sock *tsk)
+{
+ struct sk_buff *skb;
+
+ skb = tipc_sk_build_ack(tsk);
+ if (!skb)
+ return;
+
+ tipc_node_xmit_skb(sock_net(&tsk->sk), skb, tsk_peer_node(tsk),
+ msg_link_selector(buf_msg(skb)));
}
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
@@ -1959,7 +2090,7 @@
/* Send connection flow control advertisement when applicable */
tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
- if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE))
+ if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
tipc_sk_send_ack(tsk);
/* Exit if all requested data or FIN/error received */
@@ -1991,7 +2122,6 @@
/**
* tipc_data_ready - wake up threads to indicate messages have been received
* @sk: socket
- * @len: the length of messages
*/
static void tipc_data_ready(struct sock *sk)
{
@@ -2030,6 +2160,7 @@
smp_wmb();
tsk->cong_link_cnt--;
wakeup = true;
+ tipc_sk_push_backlog(tsk, false);
break;
case GROUP_PROTOCOL:
tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
@@ -2052,9 +2183,11 @@
* tipc_sk_filter_connect - check incoming message for a connection-based socket
* @tsk: TIPC socket
* @skb: pointer to message buffer.
+ * @xmitq: for Nagle ACK if any
* Returns true if message should be added to receive queue, false otherwise
*/
-static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
+static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
@@ -2069,6 +2202,7 @@
if (unlikely(msg_mcast(hdr)))
return false;
+ tsk->oneway = 0;
switch (sk->sk_state) {
case TIPC_CONNECTING:
@@ -2114,9 +2248,22 @@
return true;
return false;
case TIPC_ESTABLISHED:
+ if (!skb_queue_empty(&sk->sk_write_queue))
+ tipc_sk_push_backlog(tsk, false);
/* Accept only connection-based messages sent by peer */
- if (likely(con_msg && !err && pport == oport && pnode == onode))
+ if (likely(con_msg && !err && pport == oport &&
+ pnode == onode)) {
+ if (msg_ack_required(hdr)) {
+ struct sk_buff *skb;
+
+ skb = tipc_sk_build_ack(tsk);
+ if (skb) {
+ msg_set_nagle_ack(buf_msg(skb));
+ __skb_queue_tail(xmitq, skb);
+ }
+ }
return true;
+ }
if (!tsk_peer_msg(tsk, hdr))
return false;
if (!err)
@@ -2211,7 +2358,7 @@
while ((skb = __skb_dequeue(&inputq))) {
hdr = buf_msg(skb);
limit = rcvbuf_limit(sk, skb);
- if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
+ if ((sk_conn && !tipc_sk_filter_connect(tsk, skb, xmitq)) ||
(!sk_conn && msg_connected(hdr)) ||
(!grp && msg_in_group(hdr)))
err = TIPC_ERR_NO_PORT;
@@ -2385,10 +2532,12 @@
return -ETIMEDOUT;
if (signal_pending(current))
return sock_intr_errno(*timeo_p);
+ if (sk->sk_state == TIPC_DISCONNECTING)
+ break;
add_wait_queue(sk_sleep(sk), &wait);
- done = sk_wait_event(sk, timeo_p,
- sk->sk_state != TIPC_CONNECTING, &wait);
+ done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk),
+ &wait);
remove_wait_queue(sk_sleep(sk), &wait);
} while (!done);
return 0;
@@ -2476,7 +2625,7 @@
* case is EINPROGRESS, rather than EALREADY.
*/
res = -EINPROGRESS;
- /* fall through */
+ fallthrough;
case TIPC_CONNECTING:
if (!timeout) {
if (previous == TIPC_CONNECTING)
@@ -2553,7 +2702,7 @@
/**
* tipc_accept - wait for connection request
* @sock: listening socket
- * @newsock: new socket that is to be connected
+ * @new_sock: new socket that is to be connected
* @flags: file-related flags associated with socket
*
* Returns 0 on success, errno otherwise
@@ -2562,9 +2711,10 @@
bool kern)
{
struct sock *new_sk, *sk = sock->sk;
- struct sk_buff *buf;
struct tipc_sock *new_tsock;
+ struct msghdr m = {NULL,};
struct tipc_msg *msg;
+ struct sk_buff *buf;
long timeo;
int res;
@@ -2602,26 +2752,24 @@
/* Connect new socket to it's peer */
tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
- tsk_set_importance(new_tsock, msg_importance(msg));
+ tsk_set_importance(new_sk, msg_importance(msg));
if (msg_named(msg)) {
new_tsock->conn_type = msg_nametype(msg);
new_tsock->conn_instance = msg_nameinst(msg);
}
/*
- * Respond to 'SYN-' by discarding it & returning 'ACK'-.
- * Respond to 'SYN+' by queuing it on new socket.
+ * Respond to 'SYN-' by discarding it & returning 'ACK'.
+ * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'.
*/
if (!msg_data_sz(msg)) {
- struct msghdr m = {NULL,};
-
tsk_advance_rx_queue(sk);
- __tipc_sendstream(new_sock, &m, 0);
} else {
__skb_dequeue(&sk->sk_receive_queue);
__skb_queue_head(&new_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
+ __tipc_sendstream(new_sock, &m, 0);
release_sock(new_sk);
exit:
release_sock(sk);
@@ -2844,7 +2992,7 @@
struct tipc_sock *tsk;
rcu_read_lock();
- tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
+ tsk = rhashtable_lookup(&tn->sk_rht, &portid, tsk_rht_params);
if (tsk)
sock_hold(&tsk->sk);
rcu_read_unlock();
@@ -2981,7 +3129,7 @@
* Returns 0 on success, errno otherwise
*/
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
- char __user *ov, unsigned int ol)
+ sockptr_t ov, unsigned int ol)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
@@ -2999,19 +3147,20 @@
case TIPC_SRC_DROPPABLE:
case TIPC_DEST_DROPPABLE:
case TIPC_CONN_TIMEOUT:
+ case TIPC_NODELAY:
if (ol < sizeof(value))
return -EINVAL;
- if (get_user(value, (u32 __user *)ov))
+ if (copy_from_sockptr(&value, ov, sizeof(u32)))
return -EFAULT;
break;
case TIPC_GROUP_JOIN:
if (ol < sizeof(mreq))
return -EINVAL;
- if (copy_from_user(&mreq, ov, sizeof(mreq)))
+ if (copy_from_sockptr(&mreq, ov, sizeof(mreq)))
return -EFAULT;
break;
default:
- if (ov || ol)
+ if (!sockptr_is_null(ov) || ol)
return -EINVAL;
}
@@ -3019,7 +3168,7 @@
switch (opt) {
case TIPC_IMPORTANCE:
- res = tsk_set_importance(tsk, value);
+ res = tsk_set_importance(sk, value);
break;
case TIPC_SRC_DROPPABLE:
if (sock->type != SOCK_STREAM)
@@ -3047,6 +3196,10 @@
case TIPC_GROUP_LEAVE:
res = tipc_sk_leave(tsk);
break;
+ case TIPC_NODELAY:
+ tsk->nodelay = !!value;
+ tsk_set_nagle(tsk);
+ break;
default:
res = -EINVAL;
}
@@ -3590,7 +3743,7 @@
if (p->key == *last_publ)
break;
}
- if (p->key != *last_publ) {
+ if (list_entry_is_head(p, &tsk->publications, binding_sock)) {
/* We never set seq or call nl_dump_check_consistent()
* this means that setting prev_seq here will cause the
* consistence check to fail in the netlink callback
@@ -3628,13 +3781,9 @@
struct tipc_sock *tsk;
if (!tsk_portid) {
- struct nlattr **attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];
- err = tipc_nlmsg_parse(cb->nlh, &attrs);
- if (err)
- return err;
-
if (!attrs[TIPC_NLA_SOCK])
return -EINVAL;
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 235b967..b11575a 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -75,4 +75,6 @@
bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb);
bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb);
+int tsk_set_importance(struct sock *sk, int imp);
+
#endif
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index aa015c2..6ebbec1 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -96,6 +96,16 @@
(swap_ ? swab32(val__) : val__); \
})
+/* tipc_sub_write - write val_ to field_ of struct sub_ in user endian format
+ */
+#define tipc_sub_write(sub_, field_, val_) \
+ ({ \
+ struct tipc_subscr *sub__ = sub_; \
+ u32 val__ = val_; \
+ int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \
+ (sub__)->field_ = swap_ ? swab32(val__) : val__; \
+ })
+
/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
*/
#define tipc_evt_write(evt_, field_, val_) \
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 6159d32..9fb65c9 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -35,7 +35,8 @@
#include "core.h"
#include "trace.h"
-
+#include "crypto.h"
+#include "bcast.h"
#include <linux/sysctl.h>
static struct ctl_table_header *tipc_ctl_hdr;
@@ -64,6 +65,32 @@
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
+#ifdef CONFIG_TIPC_CRYPTO
+ {
+ .procname = "max_tfms",
+ .data = &sysctl_tipc_max_tfms,
+ .maxlen = sizeof(sysctl_tipc_max_tfms),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+ {
+ .procname = "key_exchange_enabled",
+ .data = &sysctl_tipc_key_exchange_enabled,
+ .maxlen = sizeof(sysctl_tipc_key_exchange_enabled),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif
+ {
+ .procname = "bc_retruni",
+ .data = &sysctl_tipc_bc_retruni,
+ .maxlen = sizeof(sysctl_tipc_bc_retruni),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
{}
};
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 444e179..13f3143 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -48,7 +48,6 @@
#define MAX_SEND_MSG_COUNT 25
#define MAX_RECV_MSG_COUNT 25
#define CF_CONNECTED 1
-#define CF_SERVER 2
#define TIPC_SERVER_NAME_LEN 32
@@ -237,8 +236,8 @@
if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
tipc_sub_unsubscribe(sub);
atomic_dec(&tn->subscription_count);
- } else if (s) {
- break;
+ if (s)
+ break;
}
}
spin_unlock_bh(&con->sub_lock);
@@ -362,9 +361,10 @@
{
struct tipc_net *tn = tipc_net(srv->net);
struct tipc_subscription *sub;
+ u32 s_filter = tipc_sub_read(s, filter);
- if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
- s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL);
+ if (s_filter & TIPC_SUB_CANCEL) {
+ tipc_sub_write(s, filter, s_filter & ~TIPC_SUB_CANCEL);
tipc_conn_delete_sub(con, s);
return 0;
}
@@ -496,7 +496,6 @@
static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
{
- int imp = TIPC_CRITICAL_IMPORTANCE;
struct socket *lsock = NULL;
struct sockaddr_tipc saddr;
struct sock *sk;
@@ -513,8 +512,9 @@
sk->sk_user_data = srv;
write_unlock_bh(&sk->sk_callback_lock);
- rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
- (char *)&imp, sizeof(imp));
+ lock_sock(sk);
+ rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE);
+ release_sock(sk);
if (rc < 0)
goto err;
diff --git a/net/tipc/trace.h b/net/tipc/trace.h
index 4d8e004..04af83f 100644
--- a/net/tipc/trace.h
+++ b/net/tipc/trace.h
@@ -255,7 +255,7 @@
TP_fast_assign(
__assign_str(header, header);
- tipc_link_name_ext(l, __entry->name);
+ memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME);
tipc_link_dump(l, dqueues, __get_str(buf));
),
@@ -295,12 +295,14 @@
),
TP_fast_assign(
- tipc_link_name_ext(r, __entry->name);
+ memcpy(__entry->name, tipc_link_name(r), TIPC_MAX_LINK_NAME);
__entry->from = f;
__entry->to = t;
__entry->len = skb_queue_len(tq);
- __entry->fseqno = msg_seqno(buf_msg(skb_peek(tq)));
- __entry->lseqno = msg_seqno(buf_msg(skb_peek_tail(tq)));
+ __entry->fseqno = __entry->len ?
+ msg_seqno(buf_msg(skb_peek(tq))) : 0;
+ __entry->lseqno = __entry->len ?
+ msg_seqno(buf_msg(skb_peek_tail(tq))) : 0;
),
TP_printk("<%s> retrans req: [%u-%u] transmq: %u [%u-%u]\n",
@@ -308,15 +310,16 @@
__entry->len, __entry->fseqno, __entry->lseqno)
);
-DEFINE_EVENT(tipc_link_transmq_class, tipc_link_retrans,
+DEFINE_EVENT_CONDITION(tipc_link_transmq_class, tipc_link_retrans,
TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
- TP_ARGS(r, f, t, tq)
+ TP_ARGS(r, f, t, tq),
+ TP_CONDITION(less_eq(f, t))
);
DEFINE_EVENT_PRINT(tipc_link_transmq_class, tipc_link_bc_ack,
TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
TP_ARGS(r, f, t, tq),
- TP_printk("<%s> acked: [%u-%u] transmq: %u [%u-%u]\n",
+ TP_printk("<%s> acked: %u gap: %u transmq: %u [%u-%u]\n",
__entry->name, __entry->from, __entry->to,
__entry->len, __entry->fseqno, __entry->lseqno)
);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 1fb0535..a236281 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -52,6 +52,7 @@
#include "bearer.h"
#include "netlink.h"
#include "msg.h"
+#include "udp_media.h"
/* IANA assigned UDP port */
#define UDP_PORT_DEFAULT 6118
@@ -379,6 +380,7 @@
goto out;
if (b && test_bit(0, &b->up)) {
+ TIPC_SKB_CB(skb)->flags = 0;
tipc_rcv(sock_net(sk), skb, b);
return 0;
}
@@ -455,15 +457,11 @@
int i;
if (!bid && !skip_cnt) {
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
struct net *net = sock_net(skb->sk);
struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
- struct nlattr **attrs;
char *bname;
- err = tipc_nlmsg_parse(cb->nlh, &attrs);
- if (err)
- return err;
-
if (!attrs[TIPC_NLA_BEARER])
return -EINVAL;
@@ -568,7 +566,7 @@
/**
* tipc_parse_udp_addr - build udp media address from netlink data
- * @nlattr: netlink attribute containing sockaddr storage aligned address
+ * @nla: netlink attribute containing sockaddr storage aligned address
* @addr: tipc media address to fill with address, port and protocol type
* @scope_id: IPv6 scope id pointer, not NULL indicates it's required
*/
@@ -663,6 +661,7 @@
struct udp_tunnel_sock_cfg tuncfg = {NULL};
struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
u8 node_id[NODE_ID_LEN] = {0,};
+ struct net_device *dev;
int rmcast = 0;
ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
@@ -717,8 +716,6 @@
rcu_assign_pointer(ub->bearer, b);
tipc_udp_media_addr_set(&b->addr, &local);
if (local.proto == htons(ETH_P_IP)) {
- struct net_device *dev;
-
dev = __ip_dev_find(net, local.ipv4.s_addr, false);
if (!dev) {
err = -ENODEV;
@@ -741,6 +738,12 @@
b->mtu = b->media->mtu;
#if IS_ENABLED(CONFIG_IPV6)
} else if (local.proto == htons(ETH_P_IPV6)) {
+ dev = ub->ifindex ? __dev_get_by_index(net, ub->ifindex) : NULL;
+ dev = ipv6_dev_find(net, &local.ipv6, dev);
+ if (!dev) {
+ err = -ENODEV;
+ goto err;
+ }
udp_conf.family = AF_INET6;
udp_conf.use_udp6_tx_checksums = true;
udp_conf.use_udp6_rx_checksums = true;
@@ -748,6 +751,7 @@
udp_conf.local_ip6 = in6addr_any;
else
udp_conf.local_ip6 = local.ipv6;
+ ub->ifindex = dev->ifindex;
b->mtu = 1280;
#endif
} else {
@@ -837,7 +841,8 @@
.msg2addr = tipc_udp_msg2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
- .window = TIPC_DEF_LINK_WIN,
+ .min_win = TIPC_DEF_LINK_WIN,
+ .max_win = TIPC_DEF_LINK_WIN,
.mtu = TIPC_DEF_LINK_UDP_MTU,
.type_id = TIPC_MEDIA_TYPE_UDP,
.hwaddr_len = 0,