Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a2e52ad..c0fcfa2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -260,7 +260,8 @@
* cwnd may be very low (even just 1 packet), so we should ACK
* immediately.
*/
- inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
}
}
@@ -445,7 +446,6 @@
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
tcp_sndbuf_expand(sk);
- tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss);
tcp_mstamp_refresh(tp);
tp->rcvq_space.time = tp->tcp_mstamp;
tp->rcvq_space.seq = tp->copied_seq;
@@ -469,6 +469,8 @@
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
tp->snd_cwnd_stamp = tcp_jiffies32;
+ tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd,
+ (u32)TCP_INIT_CWND * tp->advmss);
}
/* 4. Recalculate window clamp after socket hit its memory bounds. */
@@ -915,9 +917,10 @@
/* This must be called before lost_out is incremented */
static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
{
- if (!tp->retransmit_skb_hint ||
- before(TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+ if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
+ (tp->retransmit_skb_hint &&
+ before(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
tp->retransmit_skb_hint = skb;
}
@@ -1206,7 +1209,7 @@
if (dup_sack && (sacked & TCPCB_RETRANS)) {
if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
- tp->undo_retrans--;
+ tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount);
if ((sacked & TCPCB_SACKED_ACKED) &&
before(start_seq, state->reord))
state->reord = start_seq;
@@ -1727,8 +1730,11 @@
}
/* Ignore very old stuff early */
- if (!after(sp[used_sacks].end_seq, prior_snd_una))
+ if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
+ if (i == 0)
+ first_sack_index = -1;
continue;
+ }
used_sacks++;
}
@@ -2758,7 +2764,8 @@
} else if (tcp_is_rack(sk)) {
u32 prior_retrans = tp->retrans_out;
- tcp_rack_mark_lost(sk);
+ if (tcp_rack_mark_lost(sk))
+ *ack_flag &= ~FLAG_SET_XMIT_TIMER;
if (prior_retrans > tp->retrans_out)
*ack_flag |= FLAG_LOST_RETRANS;
}
@@ -2939,6 +2946,8 @@
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ if (!delta)
+ delta = 1;
seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
ca_rtt_us = seq_rtt_us;
}
@@ -3160,6 +3169,7 @@
tp->retransmit_skb_hint = NULL;
if (unlikely(skb == tp->lost_skb_hint))
tp->lost_skb_hint = NULL;
+ tcp_highest_sack_replace(sk, skb, next);
tcp_rtx_queue_unlink_and_free(skb, sk);
}
@@ -3277,6 +3287,7 @@
return;
if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
icsk->icsk_backoff = 0;
+ icsk->icsk_probes_tstamp = 0;
inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
/* Socket must be waked up by subsequent tcp_data_snd_check().
* This function is not for random using!
@@ -3284,6 +3295,7 @@
} else {
unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
+ when = tcp_clamp_probe0_to_user_timeout(sk, when);
tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
when, TCP_RTO_MAX, NULL);
}
@@ -3499,10 +3511,8 @@
}
}
-/* This routine deals with acks during a TLP episode.
- * We mark the end of a TLP episode on receiving TLP dupack or when
- * ack is after tlp_high_seq.
- * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
+/* This routine deals with acks during a TLP episode and ends an episode by
+ * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
*/
static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
{
@@ -3511,7 +3521,10 @@
if (before(ack, tp->tlp_high_seq))
return;
- if (flag & FLAG_DSACKING_ACK) {
+ if (!tp->tlp_retrans) {
+ /* TLP of new data has been acknowledged */
+ tp->tlp_high_seq = 0;
+ } else if (flag & FLAG_DSACKING_ACK) {
/* This DSACK means original and TLP probe arrived; no loss */
tp->tlp_high_seq = 0;
} else if (after(ack, tp->tlp_high_seq)) {
@@ -3677,6 +3690,15 @@
tcp_in_ack_event(sk, ack_ev_flags);
}
+ /* This is a deviation from RFC3168 since it states that:
+ * "When the TCP data sender is ready to set the CWR bit after reducing
+ * the congestion window, it SHOULD set the CWR bit only on the first
+ * new data packet that it transmits."
+ * We accept CWR on pure ACKs to be more robust
+ * with widely-deployed TCP implementations that do this.
+ */
+ tcp_ecn_accept_cwr(sk, skb);
+
/* We passed data and got it acked, remove any soft error
* log. Something worked...
*/
@@ -3693,9 +3715,6 @@
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
- /* If needed, reset TLP/RTO timer; RACK may later override this. */
- if (flag & FLAG_SET_XMIT_TIMER)
- tcp_set_xmit_timer(sk);
if (tcp_ack_is_dubious(sk, flag)) {
if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
@@ -3708,6 +3727,10 @@
&rexmit);
}
+ /* If needed, reset TLP/RTO timer when RACK doesn't set. */
+ if (flag & FLAG_SET_XMIT_TIMER)
+ tcp_set_xmit_timer(sk);
+
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
sk_dst_confirm(sk);
@@ -4549,6 +4572,7 @@
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
+ sk->sk_data_ready(sk);
tcp_drop(sk, skb);
return;
}
@@ -4582,7 +4606,11 @@
if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
skb, &fragstolen)) {
coalesce_done:
- tcp_grow_window(sk, skb);
+ /* For non sack flows, do not grow window to force DUPACK
+ * and trigger fast retransmit.
+ */
+ if (tcp_is_sack(tp))
+ tcp_grow_window(sk, skb);
kfree_skb_partial(skb, fragstolen);
skb = NULL;
goto add_sack;
@@ -4666,7 +4694,11 @@
tcp_sack_new_ofo_skb(sk, seq, end_seq);
end:
if (skb) {
- tcp_grow_window(sk, skb);
+ /* For non sack flows, do not grow window to force DUPACK
+ * and trigger fast retransmit.
+ */
+ if (tcp_is_sack(tp))
+ tcp_grow_window(sk, skb);
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
@@ -4746,7 +4778,9 @@
const struct tcp_sock *tp = tcp_sk(sk);
int avail = tp->rcv_nxt - tp->copied_seq;
- if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE))
+ if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
+ !sock_flag(sk, SOCK_DONE) &&
+ tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
return;
sk->sk_data_ready(sk);
@@ -4765,8 +4799,6 @@
skb_dst_drop(skb);
__skb_pull(skb, tcp_hdr(skb)->doff * 4);
- tcp_ecn_accept_cwr(sk, skb);
-
tp->rx_opt.dsack = 0;
/* Queue data for delivery to the user.
@@ -4785,6 +4817,7 @@
sk_forced_mem_schedule(sk, skb->truesize);
else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
+ sk->sk_data_ready(sk);
goto drop;
}
@@ -5669,6 +5702,8 @@
tcp_data_snd_check(sk);
if (!inet_csk_ack_scheduled(sk))
goto no_ack;
+ } else {
+ tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
}
__tcp_ack_snd_check(sk, 0);
@@ -6091,7 +6126,11 @@
{
struct request_sock *req;
- tcp_try_undo_loss(sk, false);
+ /* If we are still handling the SYNACK RTO, see if timestamp ECR allows
+ * undo. If peer SACKs triggered fast recovery, we can't undo here.
+ */
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
+ tcp_try_undo_loss(sk, false);
/* Reset rtx states to prevent spurious retransmits_timed_out() */
tcp_sk(sk)->retrans_stamp = 0;