Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b914959..334b8d1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -23,9 +23,9 @@
#include <linux/cache.h>
#include <linux/percpu.h>
#include <linux/skbuff.h>
-#include <linux/cryptohash.h>
#include <linux/kref.h>
#include <linux/ktime.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
@@ -39,6 +39,7 @@
#include <net/tcp_states.h>
#include <net/inet_ecn.h>
#include <net/dst.h>
+#include <net/mptcp.h>
#include <linux/seq_file.h>
#include <linux/memcontrol.h>
@@ -47,7 +48,9 @@
extern struct inet_hashinfo tcp_hashinfo;
-extern struct percpu_counter tcp_orphan_count;
+DECLARE_PER_CPU(unsigned int, tcp_orphan_count);
+int tcp_orphan_count_sum(void);
+
void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
@@ -125,6 +128,7 @@
* to combine FIN-WAIT-2 timeout with
* TIME-WAIT timer.
*/
+#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
#if HZ >= 100
@@ -182,6 +186,7 @@
#define TCPOPT_SACK 5 /* SACK Block */
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
+#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */
#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
#define TCPOPT_EXP 254 /* Experimental */
/* Magic number to be after the option value for sharing TCP
@@ -287,19 +292,6 @@
void sk_forced_mem_schedule(struct sock *sk, int size);
-static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
-{
- struct percpu_counter *ocp = sk->sk_prot->orphan_count;
- int orphans = percpu_counter_read_positive(ocp);
-
- if (orphans << shift > sysctl_tcp_max_orphans) {
- orphans = percpu_counter_sum_positive(ocp);
- if (orphans << shift > sysctl_tcp_max_orphans)
- return true;
- }
- return false;
-}
-
bool tcp_check_oom(struct sock *sk, int shift);
@@ -328,6 +320,9 @@
size_t size, int flags);
ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
+int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
+void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
+ int size_goal);
void tcp_release_cb(struct sock *sk);
void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
@@ -388,17 +383,13 @@
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
void tcp_close(struct sock *sk, long timeout);
void tcp_init_sock(struct sock *sk);
-void tcp_init_transfer(struct sock *sk, int bpf_op);
+void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
__poll_t tcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int tcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int tcp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
@@ -431,6 +422,7 @@
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
void tcp_v4_mtu_reduced(struct sock *sk);
void tcp_req_err(struct sock *sk, u32 seq, bool abort);
+void tcp_ld_RTO_revert(struct sock *sk, u32 seq);
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_create_openreq_child(const struct sock *sk,
struct request_sock *req,
@@ -452,7 +444,8 @@
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type);
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb);
int tcp_disconnect(struct sock *sk, int flags);
void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
@@ -466,6 +459,8 @@
int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
u32 cookie);
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
+struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+ struct sock *sk, struct sk_buff *skb);
#ifdef CONFIG_SYN_COOKIES
/* Syncookies use a monotonic timer which increments every 60 seconds.
@@ -548,7 +543,7 @@
u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
u16 *mssp);
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
-u64 cookie_init_timestamp(struct request_sock *req);
+u64 cookie_init_timestamp(struct request_sock *req, u64 now);
bool cookie_timestamp_decode(const struct net *net,
struct tcp_options_received *opt);
bool cookie_ecn_ok(const struct tcp_options_received *opt,
@@ -656,7 +651,6 @@
int tcp_mtu_to_mss(struct sock *sk, int pmtu);
int tcp_mss_to_mtu(struct sock *sk, int mss);
void tcp_mtup_init(struct sock *sk);
-void tcp_init_buffer_space(struct sock *sk);
static inline void tcp_bound_rto(const struct sock *sk)
{
@@ -671,6 +665,10 @@
static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
{
+ /* mptcp hooks are only on the slow path */
+ if (sk_is_mptcp((struct sock *)tp))
+ return;
+
tp->pred_flags = htonl((tp->tcp_header_len << 26) |
ntohl(TCP_FLAG_ACK) |
snd_wnd);
@@ -696,7 +694,7 @@
static inline u32 tcp_rto_min(struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
- u32 rto_min = TCP_RTO_MIN;
+ u32 rto_min = inet_csk(sk)->icsk_rto_min;
if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
@@ -769,10 +767,16 @@
return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
}
+/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */
+static inline u32 tcp_ns_to_ts(u64 ns)
+{
+ return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ);
+}
+
/* Could use tcp_clock_us() / 1000, but this version uses a single divide */
static inline u32 tcp_time_stamp_raw(void)
{
- return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ);
+ return tcp_ns_to_ts(tcp_clock_ns());
}
void tcp_mstamp_refresh(struct tcp_sock *tp);
@@ -784,7 +788,7 @@
static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
{
- return div_u64(skb->skb_mstamp_ns, NSEC_PER_SEC / TCP_TS_HZ);
+ return tcp_ns_to_ts(skb->skb_mstamp_ns);
}
/* provide the departure time in us unit */
@@ -896,6 +900,8 @@
TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
}
+extern const struct inet_connection_sock_af_ops ipv4_specific;
+
#if IS_ENABLED(CONFIG_IPV6)
/* This is the variant of inet6_iif() that must be used by TCP,
* as TCP moves IP6CB into a different location in skb->cb[]
@@ -921,17 +927,14 @@
#endif
return 0;
}
-#endif
-static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
-{
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
- skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
- return true;
+extern const struct inet_connection_sock_af_ops ipv6_specific;
+
+INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
+INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
+INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
+
#endif
- return false;
-}
/* TCP_SKB_CB reference means this can not be used from early demux */
static inline int tcp_v4_sdif(struct sk_buff *skb)
@@ -972,6 +975,13 @@
return likely(!TCP_SKB_CB(skb)->eor);
}
+static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
+ const struct sk_buff *from)
+{
+ return likely(tcp_skb_can_collapse_to(to) &&
+ mptcp_skb_can_collapse(to, from));
+}
+
/* Events passed to congestion control interface */
enum tcp_ca_event {
CA_EVENT_TX_START, /* first transmit when no packets in flight */
@@ -1002,6 +1012,7 @@
#define TCP_CONG_NON_RESTRICTED 0x1
/* Requires ECN/ECT set on all packets */
#define TCP_CONG_NEEDS_ECN 0x2
+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
union tcp_cc_info;
@@ -1087,7 +1098,7 @@
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
- bool reinit, bool cap_net_admin);
+ bool cap_net_admin);
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
@@ -1096,6 +1107,7 @@
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
extern struct tcp_congestion_ops tcp_reno;
+struct tcp_congestion_ops *tcp_ca_find(const char *name);
struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca);
#ifdef CONFIG_INET
@@ -1269,26 +1281,22 @@
return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
}
-/* Return in jiffies the delay before one skb is sent.
- * If @skb is NULL, we look at EDT for next packet being sent on the socket.
+/* Estimates in how many jiffies next packet for this flow can be sent.
+ * Scheduling a retransmit timer too early would be silly.
*/
-static inline unsigned long tcp_pacing_delay(const struct sock *sk,
- const struct sk_buff *skb)
+static inline unsigned long tcp_pacing_delay(const struct sock *sk)
{
- s64 pacing_delay = skb ? skb->tstamp : tcp_sk(sk)->tcp_wstamp_ns;
+ s64 delay = tcp_sk(sk)->tcp_wstamp_ns - tcp_sk(sk)->tcp_clock_cache;
- pacing_delay -= tcp_sk(sk)->tcp_clock_cache;
-
- return pacing_delay > 0 ? nsecs_to_jiffies(pacing_delay) : 0;
+ return delay > 0 ? nsecs_to_jiffies(delay) : 0;
}
static inline void tcp_reset_xmit_timer(struct sock *sk,
const int what,
unsigned long when,
- const unsigned long max_when,
- const struct sk_buff *skb)
+ const unsigned long max_when)
{
- inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk, skb),
+ inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk),
max_when);
}
@@ -1316,8 +1324,7 @@
{
if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- tcp_probe0_base(sk), TCP_RTO_MAX,
- NULL);
+ tcp_probe0_base(sk), TCP_RTO_MAX);
}
static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
@@ -1357,7 +1364,6 @@
rx_opt->num_sacks = 0;
}
-u32 tcp_default_init_rwnd(u32 mss);
void tcp_cwnd_restart(struct sock *sk, s32 delta);
static inline void tcp_slow_start_after_idle_check(struct sock *sk)
@@ -1402,6 +1408,8 @@
return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
+void tcp_cleanup_rbuf(struct sock *sk, int copied);
+
/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
* If 87.5 % (7/8) of the space has been consumed, we want to override
* SO_RCVLOWAT constraint, since we are receiving skbs with too small
@@ -1545,8 +1553,9 @@
struct hlist_node node;
u8 keylen;
u8 family; /* AF_INET or AF_INET6 */
- union tcp_md5_addr addr;
u8 prefixlen;
+ union tcp_md5_addr addr;
+ int l3index; /* set if key added with L3 scope */
u8 key[TCP_MD5SIG_MAXKEYLEN];
struct rcu_head rcu;
};
@@ -1590,34 +1599,33 @@
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk, const struct sk_buff *skb);
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
- gfp_t gfp);
+ int family, u8 prefixlen, int l3index,
+ const u8 *newkey, u8 newkeylen, gfp_t gfp);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen);
+ int family, u8 prefixlen, int l3index);
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
#ifdef CONFIG_TCP_MD5SIG
#include <linux/jump_label.h>
extern struct static_key_false tcp_md5_needed;
-struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
+struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
int family);
static inline struct tcp_md5sig_key *
-tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family)
+tcp_md5_do_lookup(const struct sock *sk, int l3index,
+ const union tcp_md5_addr *addr, int family)
{
if (!static_branch_unlikely(&tcp_md5_needed))
return NULL;
- return __tcp_md5_do_lookup(sk, addr, family);
+ return __tcp_md5_do_lookup(sk, l3index, addr, family);
}
#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
#else
-static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family)
+static inline struct tcp_md5sig_key *
+tcp_md5_do_lookup(const struct sock *sk, int l3index,
+ const union tcp_md5_addr *addr, int family)
{
return NULL;
}
@@ -1781,9 +1789,18 @@
return skb_queue_is_last(&sk->sk_write_queue, skb);
}
+/**
+ * tcp_write_queue_empty - test if any payload (or FIN) is available in write queue
+ * @sk: socket
+ *
+ * Since the write queue can have a temporary empty skb in it,
+ * we must not use "return skb_queue_empty(&sk->sk_write_queue)"
+ */
static inline bool tcp_write_queue_empty(const struct sock *sk)
{
- return skb_queue_empty(&sk->sk_write_queue);
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ return tp->write_seq == tp->snd_nxt;
}
static inline bool tcp_rtx_queue_empty(const struct sock *sk)
@@ -1920,6 +1937,7 @@
struct seq_net_private p;
enum tcp_seq_states state;
struct sock *syn_wait_sk;
+ struct tcp_seq_afinfo *bpf_seq_afinfo;
int bucket, offset, sbucket, num;
loff_t last_pos;
};
@@ -1932,6 +1950,10 @@
struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features);
struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
+INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
+INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb));
int tcp_gro_complete(struct sk_buff *skb);
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
@@ -1976,7 +1998,7 @@
const struct sk_buff *skb);
int (*md5_parse)(struct sock *sk,
int optname,
- char __user *optval,
+ sockptr_t optval,
int optlen);
#endif
};
@@ -2005,9 +2027,15 @@
int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl, struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type);
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb);
};
+extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
+#if IS_ENABLED(CONFIG_IPV6)
+extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
+#endif
+
#ifdef CONFIG_SYN_COOKIES
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
const struct sock *sk, struct sk_buff *skb,
@@ -2160,6 +2188,9 @@
/* diagnostic */
int (*get_info)(const struct sock *sk, struct sk_buff *skb);
size_t (*get_info_size)(const struct sock *sk);
+ /* clone ulp */
+ void (*clone)(const struct request_sock *req, struct sock *newsk,
+ const gfp_t priority);
char name[TCP_ULP_NAME_MAX];
struct module *owner;
@@ -2179,14 +2210,37 @@
struct sk_msg;
struct sk_psock;
-int tcp_bpf_init(struct sock *sk);
-void tcp_bpf_reinit(struct sock *sk);
+#ifdef CONFIG_BPF_STREAM_PARSER
+struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
+void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
+#else
+static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
+{
+}
+#endif /* CONFIG_BPF_STREAM_PARSER */
+
+#ifdef CONFIG_NET_SOCK_MSG
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int flags);
-int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len, int flags);
+#endif /* CONFIG_NET_SOCK_MSG */
+
+#ifdef CONFIG_CGROUP_BPF
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+ struct sk_buff *skb,
+ unsigned int end_offset)
+{
+ skops->skb = skb;
+ skops->skb_data_end = skb->data + end_offset;
+}
+#else
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+ struct sk_buff *skb,
+ unsigned int end_offset)
+{
+}
+#endif
/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF