Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 366ce0b..4b92b19 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -46,8 +46,9 @@
#include "bcast.h"
#include "netlink.h"
#include "group.h"
+#include "trace.h"
-#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
+#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
#define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff
@@ -80,7 +81,6 @@
* @publications: list of publications for port
* @blocking_link: address of the congested link we are currently sleeping on
* @pub_count: total # of publications port has made during its lifetime
- * @probing_state:
* @conn_timeout: the time we can wait for an unresponded setup request
* @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
* @cong_link_cnt: number of congested links
@@ -102,8 +102,8 @@
struct list_head cong_links;
struct list_head publications;
u32 pub_count;
- uint conn_timeout;
atomic_t dupl_rcvcnt;
+ u16 conn_timeout;
bool probe_unacked;
u16 cong_link_cnt;
u16 snt_unacked;
@@ -234,6 +234,7 @@
*/
static void tsk_advance_rx_queue(struct sock *sk)
{
+ trace_tipc_sk_advance_rx(sk, NULL, TIPC_DUMP_SK_RCVQ, " ");
kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
}
@@ -248,6 +249,7 @@
if (!tipc_msg_reverse(onode, &skb, err))
return;
+ trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE, "@sk_respond!");
dnode = msg_destnode(buf_msg(skb));
selector = msg_origport(buf_msg(skb));
tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
@@ -377,16 +379,18 @@
#define tipc_wait_for_cond(sock_, timeo_, condition_) \
({ \
+ DEFINE_WAIT_FUNC(wait_, woken_wake_function); \
struct sock *sk_; \
int rc_; \
\
while ((rc_ = !(condition_))) { \
- DEFINE_WAIT_FUNC(wait_, woken_wake_function); \
+ /* coupled with smp_wmb() in tipc_sk_proto_rcv() */ \
+ smp_rmb(); \
sk_ = (sock_)->sk; \
rc_ = tipc_sk_sock_err((sock_), timeo_); \
if (rc_) \
break; \
- prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \
+ add_wait_queue(sk_sleep(sk_), &wait_); \
release_sock(sk_); \
*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
sched_annotate_sleep(); \
@@ -482,7 +486,8 @@
if (sock->type == SOCK_DGRAM)
tsk_set_unreliable(tsk, true);
}
-
+ __skb_queue_head_init(&tsk->mc_method.deferredq);
+ trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " ");
return 0;
}
@@ -507,6 +512,9 @@
tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
!tsk_conn_cong(tsk)));
+ /* Remove any pending SYN message */
+ __skb_queue_purge(&sk->sk_write_queue);
+
/* Reject all unreceived messages, except on an active connection
* (which disconnects locally & sends a 'FIN+' to peer).
*/
@@ -569,10 +577,12 @@
tsk = tipc_sk(sk);
lock_sock(sk);
+ trace_tipc_sk_release(sk, NULL, TIPC_DUMP_ALL, " ");
__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
sk->sk_shutdown = SHUTDOWN_MASK;
tipc_sk_leave(tsk);
tipc_sk_withdraw(tsk, 0, NULL);
+ __skb_queue_purge(&tsk->mc_method.deferredq);
sk_stop_timer(sk, &sk->sk_timer);
tipc_sk_remove(tsk);
@@ -716,6 +726,7 @@
__poll_t revents = 0;
sock_poll_wait(file, sock, wait);
+ trace_tipc_sk_poll(sk, NULL, TIPC_DUMP_ALL, " ");
if (sk->sk_shutdown & RCV_SHUTDOWN)
revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
@@ -724,12 +735,12 @@
switch (sk->sk_state) {
case TIPC_ESTABLISHED:
- case TIPC_CONNECTING:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
revents |= EPOLLOUT;
- /* fall thru' */
+ /* fall through */
case TIPC_LISTEN:
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ case TIPC_CONNECTING:
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
revents |= EPOLLIN | EPOLLRDNORM;
break;
case TIPC_OPEN:
@@ -737,7 +748,7 @@
revents |= EPOLLOUT;
if (!tipc_sk_type_connectionless(sk))
break;
- if (skb_queue_empty(&sk->sk_receive_queue))
+ if (skb_queue_empty_lockless(&sk->sk_receive_queue))
break;
revents |= EPOLLIN | EPOLLRDNORM;
break;
@@ -798,13 +809,16 @@
msg_set_nameupper(hdr, seq->upper);
/* Build message as chain of buffers */
- skb_queue_head_init(&pkts);
+ __skb_queue_head_init(&pkts);
rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
/* Send message if build was successful */
- if (unlikely(rc == dlen))
+ if (unlikely(rc == dlen)) {
+ trace_tipc_sk_sendmcast(sk, skb_peek(&pkts),
+ TIPC_DUMP_SK_SNDQ, " ");
rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
&tsk->cong_link_cnt);
+ }
tipc_nlist_purge(&dsts);
@@ -839,7 +853,7 @@
msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
/* Build message as chain of buffers */
- skb_queue_head_init(&pkts);
+ __skb_queue_head_init(&pkts);
mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
@@ -878,7 +892,6 @@
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
int blks = tsk_blocks(GROUP_H_SIZE + dlen);
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_group *grp = tsk->group;
struct net *net = sock_net(sk);
struct tipc_member *mb = NULL;
u32 node, port;
@@ -892,7 +905,9 @@
/* Block or return if destination link or member is congested */
rc = tipc_wait_for_cond(sock, &timeout,
!tipc_dest_find(&tsk->cong_links, node, 0) &&
- !tipc_group_cong(grp, node, port, blks, &mb));
+ tsk->group &&
+ !tipc_group_cong(tsk->group, node, port, blks,
+ &mb));
if (unlikely(rc))
return rc;
@@ -922,7 +937,6 @@
struct tipc_sock *tsk = tipc_sk(sk);
struct list_head *cong_links = &tsk->cong_links;
int blks = tsk_blocks(GROUP_H_SIZE + dlen);
- struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_member *first = NULL;
struct tipc_member *mbr = NULL;
@@ -939,9 +953,10 @@
type = msg_nametype(hdr);
inst = dest->addr.name.name.instance;
scope = msg_lookup_scope(hdr);
- exclude = tipc_group_exclude(grp);
while (++lookups < 4) {
+ exclude = tipc_group_exclude(tsk->group);
+
first = NULL;
/* Look for a non-congested destination member, if any */
@@ -950,7 +965,8 @@
&dstcnt, exclude, false))
return -EHOSTUNREACH;
tipc_dest_pop(&dsts, &node, &port);
- cong = tipc_group_cong(grp, node, port, blks, &mbr);
+ cong = tipc_group_cong(tsk->group, node, port, blks,
+ &mbr);
if (!cong)
break;
if (mbr == first)
@@ -969,7 +985,8 @@
/* Block or return if destination link or member is congested */
rc = tipc_wait_for_cond(sock, &timeout,
!tipc_dest_find(cong_links, node, 0) &&
- !tipc_group_cong(grp, node, port,
+ tsk->group &&
+ !tipc_group_cong(tsk->group, node, port,
blks, &mbr));
if (unlikely(rc))
return rc;
@@ -1004,8 +1021,7 @@
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_group *grp = tsk->group;
- struct tipc_nlist *dsts = tipc_group_dests(grp);
+ struct tipc_nlist *dsts;
struct tipc_mc_method *method = &tsk->mc_method;
bool ack = method->mandatory && method->rcast;
int blks = tsk_blocks(MCAST_H_SIZE + dlen);
@@ -1014,15 +1030,17 @@
struct sk_buff_head pkts;
int rc = -EHOSTUNREACH;
- if (!dsts->local && !dsts->remote)
- return -EHOSTUNREACH;
-
/* Block or return if any destination link or member is congested */
- rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt &&
- !tipc_group_bc_cong(grp, blks));
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tsk->cong_link_cnt && tsk->group &&
+ !tipc_group_bc_cong(tsk->group, blks));
if (unlikely(rc))
return rc;
+ dsts = tipc_group_dests(tsk->group);
+ if (!dsts->local && !dsts->remote)
+ return -EHOSTUNREACH;
+
/* Complete message header */
if (dest) {
msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
@@ -1034,13 +1052,13 @@
msg_set_hdr_sz(hdr, GROUP_H_SIZE);
msg_set_destport(hdr, 0);
msg_set_destnode(hdr, 0);
- msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));
+ msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(tsk->group));
/* Avoid getting stuck with repeated forced replicasts */
msg_set_grp_bc_ack_req(hdr, ack);
/* Build message as chain of buffers */
- skb_queue_head_init(&pkts);
+ __skb_queue_head_init(&pkts);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
@@ -1206,8 +1224,10 @@
bool conn_cong;
/* Ignore if connection cannot be validated: */
- if (!tsk_peer_msg(tsk, hdr))
+ if (!tsk_peer_msg(tsk, hdr)) {
+ trace_tipc_sk_drop_msg(sk, skb, TIPC_DUMP_NONE, "@proto_rcv!");
goto exit;
+ }
if (unlikely(msg_errcode(hdr))) {
tipc_set_sk_state(sk, TIPC_DISCONNECTING);
@@ -1314,7 +1334,7 @@
if (unlikely(!dest)) {
dest = &tsk->peer;
- if (!syn || dest->family != AF_TIPC)
+ if (!syn && dest->family != AF_TIPC)
return -EDESTADDRREQ;
}
@@ -1329,6 +1349,7 @@
tsk->conn_type = dest->addr.name.name.type;
tsk->conn_instance = dest->addr.name.name.instance;
}
+ msg_set_syn(hdr, 1);
}
seq = &dest->addr.nameseq;
@@ -1366,12 +1387,15 @@
if (unlikely(rc))
return rc;
- skb_queue_head_init(&pkts);
+ __skb_queue_head_init(&pkts);
mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
+ if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue)))
+ return -ENOMEM;
+ trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " ");
rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
if (unlikely(rc == -ELINKCONG)) {
tipc_dest_push(clinks, dnode, 0);
@@ -1421,7 +1445,7 @@
int send, sent = 0;
int rc = 0;
- skb_queue_head_init(&pkts);
+ __skb_queue_head_init(&pkts);
if (unlikely(dlen > INT_MAX))
return -EMSGSIZE;
@@ -1449,6 +1473,8 @@
if (unlikely(rc != send))
break;
+ trace_tipc_sk_sendstream(sk, skb_peek(&pkts),
+ TIPC_DUMP_SK_SNDQ, " ");
rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
if (unlikely(rc == -ELINKCONG)) {
tsk->cong_link_cnt = 1;
@@ -1490,6 +1516,7 @@
struct net *net = sock_net(sk);
struct tipc_msg *msg = &tsk->phdr;
+ msg_set_syn(msg, 0);
msg_set_destnode(msg, peer_node);
msg_set_destport(msg, peer_port);
msg_set_type(msg, TIPC_CONN_MSG);
@@ -1501,6 +1528,7 @@
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
+ __skb_queue_purge(&sk->sk_write_queue);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
return;
@@ -1652,7 +1680,7 @@
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
{
struct sock *sk = sock->sk;
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
long timeo = *timeop;
int err = sock_error(sk);
@@ -1660,15 +1688,17 @@
return err;
for (;;) {
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
if (sk->sk_shutdown & RCV_SHUTDOWN) {
err = -ENOTCONN;
break;
}
+ add_wait_queue(sk_sleep(sk), &wait);
release_sock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+ sched_annotate_sleep();
lock_sock(sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
}
err = 0;
if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -1684,7 +1714,6 @@
if (err)
break;
}
- finish_wait(sk_sleep(sk), &wait);
*timeop = timeo;
return err;
}
@@ -1776,7 +1805,7 @@
/* Send group flow control advertisement when applicable */
if (tsk->group && msg_in_group(hdr) && !grp_evt) {
- skb_queue_head_init(&xmitq);
+ __skb_queue_head_init(&xmitq);
tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
msg_orignode(hdr), msg_origport(hdr),
&xmitq);
@@ -1957,6 +1986,8 @@
return;
case SOCK_WAKEUP:
tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
+ /* coupled with smp_rmb() in tipc_wait_for_cond() */
+ smp_wmb();
tsk->cong_link_cnt--;
wakeup = true;
break;
@@ -1978,91 +2009,90 @@
}
/**
- * tipc_filter_connect - Handle incoming message for a connection-based socket
+ * tipc_sk_filter_connect - check incoming message for a connection-based socket
* @tsk: TIPC socket
- * @skb: pointer to message buffer. Set to NULL if buffer is consumed
- *
- * Returns true if everything ok, false otherwise
+ * @skb: pointer to message buffer.
+ * Returns true if message should be added to receive queue, false otherwise
*/
static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
struct tipc_msg *hdr = buf_msg(skb);
- u32 pport = msg_origport(hdr);
- u32 pnode = msg_orignode(hdr);
+ bool con_msg = msg_connected(hdr);
+ u32 pport = tsk_peer_port(tsk);
+ u32 pnode = tsk_peer_node(tsk);
+ u32 oport = msg_origport(hdr);
+ u32 onode = msg_orignode(hdr);
+ int err = msg_errcode(hdr);
+ unsigned long delay;
if (unlikely(msg_mcast(hdr)))
return false;
switch (sk->sk_state) {
case TIPC_CONNECTING:
- /* Accept only ACK or NACK message */
- if (unlikely(!msg_connected(hdr))) {
- if (pport != tsk_peer_port(tsk) ||
- pnode != tsk_peer_node(tsk))
- return false;
-
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- sk->sk_err = ECONNREFUSED;
+ /* Setup ACK */
+ if (likely(con_msg)) {
+ if (err)
+ break;
+ tipc_sk_finish_conn(tsk, oport, onode);
+ msg_set_importance(&tsk->phdr, msg_importance(hdr));
+ /* ACK+ message with data is added to receive queue */
+ if (msg_data_sz(hdr))
+ return true;
+ /* Empty ACK-, - wake up sleeping connect() and drop */
sk->sk_state_change(sk);
- return true;
+ msg_set_dest_droppable(hdr, 1);
+ return false;
}
-
- if (unlikely(msg_errcode(hdr))) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- sk->sk_err = ECONNREFUSED;
- sk->sk_state_change(sk);
- return true;
- }
-
- if (unlikely(!msg_isdata(hdr))) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- sk->sk_err = EINVAL;
- sk->sk_state_change(sk);
- return true;
- }
-
- tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
- msg_set_importance(&tsk->phdr, msg_importance(hdr));
-
- /* If 'ACK+' message, add to socket receive queue */
- if (msg_data_sz(hdr))
- return true;
-
- /* If empty 'ACK-' message, wake up sleeping connect() */
- sk->sk_data_ready(sk);
-
- /* 'ACK-' message is neither accepted nor rejected: */
- msg_set_dest_droppable(hdr, 1);
- return false;
-
- case TIPC_OPEN:
- case TIPC_DISCONNECTING:
- break;
- case TIPC_LISTEN:
- /* Accept only SYN message */
- if (!msg_connected(hdr) && !(msg_errcode(hdr)))
- return true;
- break;
- case TIPC_ESTABLISHED:
- /* Accept only connection-based messages sent by peer */
- if (unlikely(!tsk_peer_msg(tsk, hdr)))
+ /* Ignore connectionless message if not from listening socket */
+ if (oport != pport || onode != pnode)
return false;
- if (unlikely(msg_errcode(hdr))) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- /* Let timer expire on it's own */
- tipc_node_remove_conn(net, tsk_peer_node(tsk),
- tsk->portid);
- sk->sk_state_change(sk);
- }
+ /* Rejected SYN */
+ if (err != TIPC_ERR_OVERLOAD)
+ break;
+
+ /* Prepare for new setup attempt if we have a SYN clone */
+ if (skb_queue_empty(&sk->sk_write_queue))
+ break;
+ get_random_bytes(&delay, 2);
+ delay %= (tsk->conn_timeout / 4);
+ delay = msecs_to_jiffies(delay + 100);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + delay);
+ return false;
+ case TIPC_OPEN:
+ case TIPC_DISCONNECTING:
+ return false;
+ case TIPC_LISTEN:
+ /* Accept only SYN message */
+ if (!msg_is_syn(hdr) &&
+ tipc_node_get_capabilities(net, onode) & TIPC_SYN_BIT)
+ return false;
+ if (!con_msg && !err)
+ return true;
+ return false;
+ case TIPC_ESTABLISHED:
+ /* Accept only connection-based messages sent by peer */
+ if (likely(con_msg && !err && pport == oport && pnode == onode))
+ return true;
+ if (!tsk_peer_msg(tsk, hdr))
+ return false;
+ if (!err)
+ return true;
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ tipc_node_remove_conn(net, pnode, tsk->portid);
+ sk->sk_state_change(sk);
return true;
default:
pr_err("Unknown sk_state %u\n", sk->sk_state);
}
-
- return false;
+ /* Abort connection setup attempt */
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ sk->sk_err = ECONNREFUSED;
+ sk->sk_state_change(sk);
+ return true;
}
/**
@@ -2089,13 +2119,13 @@
struct tipc_msg *hdr = buf_msg(skb);
if (unlikely(msg_in_group(hdr)))
- return sk->sk_rcvbuf;
+ return READ_ONCE(sk->sk_rcvbuf);
if (unlikely(!msg_connected(hdr)))
- return sk->sk_rcvbuf << msg_importance(hdr);
+ return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr);
if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
- return sk->sk_rcvbuf;
+ return READ_ONCE(sk->sk_rcvbuf);
return FLOWCTL_MSG_LIM;
}
@@ -2120,8 +2150,10 @@
struct tipc_msg *hdr = buf_msg(skb);
struct net *net = sock_net(sk);
struct sk_buff_head inputq;
+ int mtyp = msg_type(hdr);
int limit, err = TIPC_OK;
+ trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " ");
TIPC_SKB_CB(skb)->bytes_read = 0;
__skb_queue_head_init(&inputq);
__skb_queue_tail(&inputq, skb);
@@ -2132,6 +2164,9 @@
if (unlikely(grp))
tipc_group_filter_msg(grp, &inputq, xmitq);
+ if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG)
+ tipc_mcast_filter_msg(net, &tsk->mc_method.deferredq, &inputq);
+
/* Validate and add to receive buffer if there is space */
while ((skb = __skb_dequeue(&inputq))) {
hdr = buf_msg(skb);
@@ -2141,17 +2176,25 @@
(!grp && msg_in_group(hdr)))
err = TIPC_ERR_NO_PORT;
else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
+ trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL,
+ "err_overload2!");
atomic_inc(&sk->sk_drops);
err = TIPC_ERR_OVERLOAD;
}
if (unlikely(err)) {
- tipc_skb_reject(net, err, skb, xmitq);
+ if (tipc_msg_reverse(tipc_own_addr(net), &skb, err)) {
+ trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE,
+ "@filter_rcv!");
+ __skb_queue_tail(xmitq, skb);
+ }
err = TIPC_OK;
continue;
}
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
+ trace_tipc_sk_overlimit2(sk, skb, TIPC_DUMP_ALL,
+ "rcvq >90% allocated!");
sk->sk_data_ready(sk);
}
}
@@ -2217,14 +2260,21 @@
if (!sk->sk_backlog.len)
atomic_set(dcnt, 0);
lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
- if (likely(!sk_add_backlog(sk, skb, lim)))
+ if (likely(!sk_add_backlog(sk, skb, lim))) {
+ trace_tipc_sk_overlimit1(sk, skb, TIPC_DUMP_ALL,
+ "bklg & rcvq >90% allocated!");
continue;
+ }
+ trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload!");
/* Overload => reject message back to sender */
onode = tipc_own_addr(sock_net(sk));
atomic_inc(&sk->sk_drops);
- if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
+ if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) {
+ trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_ALL,
+ "@sk_enqueue!");
__skb_queue_tail(xmitq, skb);
+ }
break;
}
}
@@ -2273,6 +2323,8 @@
/* Prepare for message rejection */
if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
continue;
+
+ trace_tipc_sk_rej_msg(NULL, skb, TIPC_DUMP_NONE, "@sk_rcv!");
xmit:
dnode = msg_destnode(buf_msg(skb));
tipc_node_xmit_skb(net, skb, dnode, dport);
@@ -2302,6 +2354,16 @@
return 0;
}
+static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr)
+{
+ if (addr->family != AF_TIPC)
+ return false;
+ if (addr->addrtype == TIPC_SERVICE_RANGE)
+ return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper);
+ return (addr->addrtype == TIPC_SERVICE_ADDR ||
+ addr->addrtype == TIPC_SOCKET_ADDR);
+}
+
/**
* tipc_connect - establish a connection to another TIPC port
* @sock: socket structure
@@ -2337,18 +2399,18 @@
if (!tipc_sk_type_connectionless(sk))
res = -EINVAL;
goto exit;
- } else if (dst->family != AF_TIPC) {
- res = -EINVAL;
}
- if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
+ if (!tipc_sockaddr_is_sane(dst)) {
res = -EINVAL;
- if (res)
goto exit;
-
+ }
/* DGRAM/RDM connect(), just save the destaddr */
if (tipc_sk_type_connectionless(sk)) {
memcpy(&tsk->peer, dest, destlen);
goto exit;
+ } else if (dst->addrtype == TIPC_SERVICE_RANGE) {
+ res = -EINVAL;
+ goto exit;
}
previous = sk->sk_state;
@@ -2374,7 +2436,7 @@
* case is EINPROGRESS, rather than EALREADY.
*/
res = -EINPROGRESS;
- /* fall thru' */
+ /* fall through */
case TIPC_CONNECTING:
if (!timeout) {
if (previous == TIPC_CONNECTING)
@@ -2546,6 +2608,7 @@
lock_sock(sk);
+ trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " ");
__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
sk->sk_shutdown = SEND_SHUTDOWN;
@@ -2564,43 +2627,78 @@
return res;
}
+static void tipc_sk_check_probing_state(struct sock *sk,
+ struct sk_buff_head *list)
+{
+ struct tipc_sock *tsk = tipc_sk(sk);
+ u32 pnode = tsk_peer_node(tsk);
+ u32 pport = tsk_peer_port(tsk);
+ u32 self = tsk_own_node(tsk);
+ u32 oport = tsk->portid;
+ struct sk_buff *skb;
+
+ if (tsk->probe_unacked) {
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ sk->sk_err = ECONNABORTED;
+ tipc_node_remove_conn(sock_net(sk), pnode, pport);
+ sk->sk_state_change(sk);
+ return;
+ }
+ /* Prepare new probe */
+ skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
+ pnode, self, pport, oport, TIPC_OK);
+ if (skb)
+ __skb_queue_tail(list, skb);
+ tsk->probe_unacked = true;
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
+}
+
+static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
+{
+ struct tipc_sock *tsk = tipc_sk(sk);
+
+ /* Try again later if dest link is congested */
+ if (tsk->cong_link_cnt) {
+ sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
+ return;
+ }
+ /* Prepare SYN for retransmit */
+ tipc_msg_skb_clone(&sk->sk_write_queue, list);
+}
+
static void tipc_sk_timeout(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
struct tipc_sock *tsk = tipc_sk(sk);
- u32 peer_port = tsk_peer_port(tsk);
- u32 peer_node = tsk_peer_node(tsk);
- u32 own_node = tsk_own_node(tsk);
- u32 own_port = tsk->portid;
- struct net *net = sock_net(sk);
- struct sk_buff *skb = NULL;
+ u32 pnode = tsk_peer_node(tsk);
+ struct sk_buff_head list;
+ int rc = 0;
+ __skb_queue_head_init(&list);
bh_lock_sock(sk);
- if (!tipc_sk_connected(sk))
- goto exit;
/* Try again later if socket is busy */
if (sock_owned_by_user(sk)) {
sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
- goto exit;
+ bh_unlock_sock(sk);
+ return;
}
- if (tsk->probe_unacked) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- tipc_node_remove_conn(net, peer_node, peer_port);
- sk->sk_state_change(sk);
- goto exit;
- }
- /* Send new probe */
- skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
- peer_node, own_node, peer_port, own_port,
- TIPC_OK);
- tsk->probe_unacked = true;
- sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
-exit:
+ if (sk->sk_state == TIPC_ESTABLISHED)
+ tipc_sk_check_probing_state(sk, &list);
+ else if (sk->sk_state == TIPC_CONNECTING)
+ tipc_sk_retry_connect(sk, &list);
+
bh_unlock_sock(sk);
- if (skb)
- tipc_node_xmit_skb(net, skb, peer_node, own_port);
+
+ if (!skb_queue_empty(&list))
+ rc = tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid);
+
+ /* SYN messages may cause link congestion */
+ if (rc == -ELINKCONG) {
+ tipc_dest_push(&tsk->cong_links, pnode, 0);
+ tsk->cong_link_cnt = 1;
+ }
sock_put(sk);
}
@@ -2683,11 +2781,15 @@
rhashtable_walk_start(&iter);
while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
- spin_lock_bh(&tsk->sk.sk_lock.slock);
+ sock_hold(&tsk->sk);
+ rhashtable_walk_stop(&iter);
+ lock_sock(&tsk->sk);
msg = &tsk->phdr;
msg_set_prevnode(msg, tipc_own_addr(net));
msg_set_orignode(msg, tipc_own_addr(net));
- spin_unlock_bh(&tsk->sk.sk_lock.slock);
+ release_sock(&tsk->sk);
+ rhashtable_walk_start(&iter);
+ sock_put(&tsk->sk);
}
rhashtable_walk_stop(&iter);
@@ -2967,6 +3069,9 @@
case TIPC_SOCK_RECVQ_DEPTH:
value = skb_queue_len(&sk->sk_receive_queue);
break;
+ case TIPC_SOCK_RECVQ_USED:
+ value = sk_rmem_alloc_get(sk);
+ break;
case TIPC_GROUP_JOIN:
seq.type = 0;
if (tsk->group)
@@ -3167,7 +3272,9 @@
peer_node = tsk_peer_node(tsk);
peer_port = tsk_peer_port(tsk);
- nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);
+ nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON);
+ if (!nest)
+ return -EMSGSIZE;
if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
goto msg_full;
@@ -3224,7 +3331,7 @@
if (!hdr)
goto msg_cancel;
- attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
+ attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK);
if (!attrs)
goto genlmsg_cancel;
@@ -3329,7 +3436,7 @@
if (!(sk_filter_state & (1 << sk->sk_state)))
return 0;
- attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
+ attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK);
if (!attrs)
goto msg_cancel;
@@ -3347,7 +3454,7 @@
TIPC_NLA_SOCK_PAD))
goto attr_msg_cancel;
- stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT);
+ stat = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_STAT);
if (!stat)
goto attr_msg_cancel;
@@ -3404,7 +3511,7 @@
if (!hdr)
goto msg_cancel;
- attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
+ attrs = nla_nest_start_noflag(skb, TIPC_NLA_PUBL);
if (!attrs)
goto genlmsg_cancel;
@@ -3491,9 +3598,9 @@
if (!attrs[TIPC_NLA_SOCK])
return -EINVAL;
- err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
- attrs[TIPC_NLA_SOCK],
- tipc_nl_sock_policy, NULL);
+ err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX,
+ attrs[TIPC_NLA_SOCK],
+ tipc_nl_sock_policy, NULL);
if (err)
return err;
@@ -3523,3 +3630,187 @@
return skb->len;
}
+
+/**
+ * tipc_sk_filtering - check if a socket should be traced
+ * @sk: the socket to be examined
+ * @sysctl_tipc_sk_filter[]: the socket tuple for filtering,
+ * (portid, sock type, name type, name lower, name upper)
+ *
+ * Returns true if the socket meets the socket tuple data
+ * (value 0 = 'any') or when there is no tuple set (all = 0),
+ * otherwise false
+ */
+bool tipc_sk_filtering(struct sock *sk)
+{
+ struct tipc_sock *tsk;
+ struct publication *p;
+ u32 _port, _sktype, _type, _lower, _upper;
+ u32 type = 0, lower = 0, upper = 0;
+
+ if (!sk)
+ return true;
+
+ tsk = tipc_sk(sk);
+
+ _port = sysctl_tipc_sk_filter[0];
+ _sktype = sysctl_tipc_sk_filter[1];
+ _type = sysctl_tipc_sk_filter[2];
+ _lower = sysctl_tipc_sk_filter[3];
+ _upper = sysctl_tipc_sk_filter[4];
+
+ if (!_port && !_sktype && !_type && !_lower && !_upper)
+ return true;
+
+ if (_port)
+ return (_port == tsk->portid);
+
+ if (_sktype && _sktype != sk->sk_type)
+ return false;
+
+ if (tsk->published) {
+ p = list_first_entry_or_null(&tsk->publications,
+ struct publication, binding_sock);
+ if (p) {
+ type = p->type;
+ lower = p->lower;
+ upper = p->upper;
+ }
+ }
+
+ if (!tipc_sk_type_connectionless(sk)) {
+ type = tsk->conn_type;
+ lower = tsk->conn_instance;
+ upper = tsk->conn_instance;
+ }
+
+ if ((_type && _type != type) || (_lower && _lower != lower) ||
+ (_upper && _upper != upper))
+ return false;
+
+ return true;
+}
+
+u32 tipc_sock_get_portid(struct sock *sk)
+{
+ return (sk) ? (tipc_sk(sk))->portid : 0;
+}
+
+/**
+ * tipc_sk_overlimit1 - check if socket rx queue is about to be overloaded,
+ * both the rcv and backlog queues are considered
+ * @sk: tipc sk to be checked
+ * @skb: tipc msg to be checked
+ *
+ * Returns true if the socket rx queue allocation is > 90%, otherwise false
+ */
+
+bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb)
+{
+ atomic_t *dcnt = &tipc_sk(sk)->dupl_rcvcnt;
+ unsigned int lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
+ unsigned int qsize = sk->sk_backlog.len + sk_rmem_alloc_get(sk);
+
+ return (qsize > lim * 90 / 100);
+}
+
+/**
+ * tipc_sk_overlimit2 - check if socket rx queue is about to be overloaded,
+ * only the rcv queue is considered
+ * @sk: tipc sk to be checked
+ * @skb: tipc msg to be checked
+ *
+ * Returns true if the socket rx queue allocation is > 90%, otherwise false
+ */
+
+bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb)
+{
+ unsigned int lim = rcvbuf_limit(sk, skb);
+ unsigned int qsize = sk_rmem_alloc_get(sk);
+
+ return (qsize > lim * 90 / 100);
+}
+
+/**
+ * tipc_sk_dump - dump TIPC socket
+ * @sk: tipc sk to be dumped
+ * @dqueues: bitmask to decide if any socket queue to be dumped?
+ * - TIPC_DUMP_NONE: don't dump socket queues
+ * - TIPC_DUMP_SK_SNDQ: dump socket send queue
+ * - TIPC_DUMP_SK_RCVQ: dump socket rcv queue
+ * - TIPC_DUMP_SK_BKLGQ: dump socket backlog queue
+ * - TIPC_DUMP_ALL: dump all the socket queues above
+ * @buf: returned buffer of dump data in format
+ */
+int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
+{
+ int i = 0;
+ size_t sz = (dqueues) ? SK_LMAX : SK_LMIN;
+ struct tipc_sock *tsk;
+ struct publication *p;
+ bool tsk_connected;
+
+ if (!sk) {
+ i += scnprintf(buf, sz, "sk data: (null)\n");
+ return i;
+ }
+
+ tsk = tipc_sk(sk);
+ tsk_connected = !tipc_sk_type_connectionless(sk);
+
+ i += scnprintf(buf, sz, "sk data: %u", sk->sk_type);
+ i += scnprintf(buf + i, sz - i, " %d", sk->sk_state);
+ i += scnprintf(buf + i, sz - i, " %x", tsk_own_node(tsk));
+ i += scnprintf(buf + i, sz - i, " %u", tsk->portid);
+ i += scnprintf(buf + i, sz - i, " | %u", tsk_connected);
+ if (tsk_connected) {
+ i += scnprintf(buf + i, sz - i, " %x", tsk_peer_node(tsk));
+ i += scnprintf(buf + i, sz - i, " %u", tsk_peer_port(tsk));
+ i += scnprintf(buf + i, sz - i, " %u", tsk->conn_type);
+ i += scnprintf(buf + i, sz - i, " %u", tsk->conn_instance);
+ }
+ i += scnprintf(buf + i, sz - i, " | %u", tsk->published);
+ if (tsk->published) {
+ p = list_first_entry_or_null(&tsk->publications,
+ struct publication, binding_sock);
+ i += scnprintf(buf + i, sz - i, " %u", (p) ? p->type : 0);
+ i += scnprintf(buf + i, sz - i, " %u", (p) ? p->lower : 0);
+ i += scnprintf(buf + i, sz - i, " %u", (p) ? p->upper : 0);
+ }
+ i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win);
+ i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win);
+ i += scnprintf(buf + i, sz - i, " %u", tsk->max_pkt);
+ i += scnprintf(buf + i, sz - i, " %x", tsk->peer_caps);
+ i += scnprintf(buf + i, sz - i, " %u", tsk->cong_link_cnt);
+ i += scnprintf(buf + i, sz - i, " %u", tsk->snt_unacked);
+ i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_unacked);
+ i += scnprintf(buf + i, sz - i, " %u", atomic_read(&tsk->dupl_rcvcnt));
+ i += scnprintf(buf + i, sz - i, " %u", sk->sk_shutdown);
+ i += scnprintf(buf + i, sz - i, " | %d", sk_wmem_alloc_get(sk));
+ i += scnprintf(buf + i, sz - i, " %d", sk->sk_sndbuf);
+ i += scnprintf(buf + i, sz - i, " | %d", sk_rmem_alloc_get(sk));
+ i += scnprintf(buf + i, sz - i, " %d", sk->sk_rcvbuf);
+ i += scnprintf(buf + i, sz - i, " | %d\n", READ_ONCE(sk->sk_backlog.len));
+
+ if (dqueues & TIPC_DUMP_SK_SNDQ) {
+ i += scnprintf(buf + i, sz - i, "sk_write_queue: ");
+ i += tipc_list_dump(&sk->sk_write_queue, false, buf + i);
+ }
+
+ if (dqueues & TIPC_DUMP_SK_RCVQ) {
+ i += scnprintf(buf + i, sz - i, "sk_receive_queue: ");
+ i += tipc_list_dump(&sk->sk_receive_queue, false, buf + i);
+ }
+
+ if (dqueues & TIPC_DUMP_SK_BKLGQ) {
+ i += scnprintf(buf + i, sz - i, "sk_backlog:\n head ");
+ i += tipc_skb_dump(sk->sk_backlog.head, false, buf + i);
+ if (sk->sk_backlog.tail != sk->sk_backlog.head) {
+ i += scnprintf(buf + i, sz - i, " tail ");
+ i += tipc_skb_dump(sk->sk_backlog.tail, false,
+ buf + i);
+ }
+ }
+
+ return i;
+}