Blame - net/dccp/ccids/ccid2.c - hafnium/third_party/linux

blob: 842a9c7c73a3f6ef7568e105d38c276547ea29ef [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
				3	*
				4	* Changes to meet Linux coding standards, and DCCP infrastructure fixes.
				5	*
				6	* Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
				7	*
				8	* This program is free software; you can redistribute it and/or modify
				9	* it under the terms of the GNU General Public License as published by
				10	* the Free Software Foundation; either version 2 of the License, or
				11	* (at your option) any later version.
				12	*
				13	* This program is distributed in the hope that it will be useful,
				14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				16	* GNU General Public License for more details.
				17	*
				18	* You should have received a copy of the GNU General Public License
				19	* along with this program; if not, write to the Free Software
				20	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
				21	*/
				22
				23	/*
				24	* This implementation should follow RFC 4341
				25	*/
				26	#include <linux/slab.h>
				27	#include "../feat.h"
				28	#include "ccid2.h"
				29
				30
				31	#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
				32	static bool ccid2_debug;
				33	#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
				34	#else
				35	#define ccid2_pr_debug(format, a...)
				36	#endif
				37
				38	static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
				39	{
				40	struct ccid2_seq *seqp;
				41	int i;
				42
				43	/* check if we have space to preserve the pointer to the buffer */
				44	if (hc->tx_seqbufc >= (sizeof(hc->tx_seqbuf) /
				45	sizeof(struct ccid2_seq *)))
				46	return -ENOMEM;
				47
				48	/* allocate buffer and initialize linked list */
				49	seqp = kmalloc_array(CCID2_SEQBUF_LEN, sizeof(struct ccid2_seq),
				50	gfp_any());
				51	if (seqp == NULL)
				52	return -ENOMEM;
				53
				54	for (i = 0; i < (CCID2_SEQBUF_LEN - 1); i++) {
				55	seqp[i].ccid2s_next = &seqp[i + 1];
				56	seqp[i + 1].ccid2s_prev = &seqp[i];
				57	}
				58	seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = seqp;
				59	seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
				60
				61	/* This is the first allocation. Initiate the head and tail. */
				62	if (hc->tx_seqbufc == 0)
				63	hc->tx_seqh = hc->tx_seqt = seqp;
				64	else {
				65	/* link the existing list with the one we just created */
				66	hc->tx_seqh->ccid2s_next = seqp;
				67	seqp->ccid2s_prev = hc->tx_seqh;
				68
				69	hc->tx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
				70	seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hc->tx_seqt;
				71	}
				72
				73	/* store the original pointer to the buffer so we can free it */
				74	hc->tx_seqbuf[hc->tx_seqbufc] = seqp;
				75	hc->tx_seqbufc++;
				76
				77	return 0;
				78	}
				79
				80	static int ccid2_hc_tx_send_packet(struct sock sk, struct sk_buff skb)
				81	{
				82	if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
				83	return CCID_PACKET_WILL_DEQUEUE_LATER;
				84	return CCID_PACKET_SEND_AT_ONCE;
				85	}
				86
				87	static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
				88	{
				89	u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2);
				90
				91	/*
				92	* Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
				93	* RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always
				94	* acceptable since this causes starvation/deadlock whenever cwnd < 2.
				95	* The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled).
				96	*/
				97	if (val == 0 \|\| val > max_ratio) {
				98	DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
				99	val = max_ratio;
				100	}
				101	dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO,
				102	min_t(u32, val, DCCPF_ACK_RATIO_MAX));
				103	}
				104
				105	static void ccid2_check_l_ack_ratio(struct sock *sk)
				106	{
				107	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
				108
				109	/*
				110	* After a loss, idle period, application limited period, or RTO we
				111	* need to check that the ack ratio is still less than the congestion
				112	* window. Otherwise, we will send an entire congestion window of
				113	* packets and got no response because we haven't sent ack ratio
				114	* packets yet.
				115	* If the ack ratio does need to be reduced, we reduce it to half of
				116	* the congestion window (or 1 if that's zero) instead of to the
				117	* congestion window. This prevents problems if one ack is lost.
				118	*/
				119	if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd)
				120	ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U);
				121	}
				122
				123	static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
				124	{
				125	dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW,
				126	clamp_val(val, DCCPF_SEQ_WMIN,
				127	DCCPF_SEQ_WMAX));
				128	}
				129
				130	static void dccp_tasklet_schedule(struct sock *sk)
				131	{
				132	struct tasklet_struct *t = &dccp_sk(sk)->dccps_xmitlet;
				133
				134	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
				135	sock_hold(sk);
				136	__tasklet_schedule(t);
				137	}
				138	}
				139
				140	static void ccid2_hc_tx_rto_expire(struct timer_list *t)
				141	{
				142	struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer);
				143	struct sock *sk = hc->sk;
				144	const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
				145
				146	bh_lock_sock(sk);
				147	if (sock_owned_by_user(sk)) {
				148	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + HZ / 5);
				149	goto out;
				150	}
				151
				152	ccid2_pr_debug("RTO_EXPIRE\n");
				153
				154	if (sk->sk_state == DCCP_CLOSED)
				155	goto out;
				156
				157	/* back-off timer */
				158	hc->tx_rto <<= 1;
				159	if (hc->tx_rto > DCCP_RTO_MAX)
				160	hc->tx_rto = DCCP_RTO_MAX;
				161
				162	/* adjust pipe, cwnd etc */
				163	hc->tx_ssthresh = hc->tx_cwnd / 2;
				164	if (hc->tx_ssthresh < 2)
				165	hc->tx_ssthresh = 2;
				166	hc->tx_cwnd = 1;
				167	hc->tx_pipe = 0;
				168
				169	/* clear state about stuff we sent */
				170	hc->tx_seqt = hc->tx_seqh;
				171	hc->tx_packets_acked = 0;
				172
				173	/* clear ack ratio state. */
				174	hc->tx_rpseq = 0;
				175	hc->tx_rpdupack = -1;
				176	ccid2_change_l_ack_ratio(sk, 1);
				177
				178	/* if we were blocked before, we may now send cwnd=1 packet */
				179	if (sender_was_blocked)
				180	dccp_tasklet_schedule(sk);
				181	/* restart backed-off timer */
				182	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
				183	out:
				184	bh_unlock_sock(sk);
				185	sock_put(sk);
				186	}
				187
				188	/*
				189	* Congestion window validation (RFC 2861).
				190	*/
				191	static bool ccid2_do_cwv = true;
				192	module_param(ccid2_do_cwv, bool, 0644);
				193	MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation");
				194
				195	/**
				196	* ccid2_update_used_window - Track how much of cwnd is actually used
				197	* This is done in addition to CWV. The sender needs to have an idea of how many
				198	* packets may be in flight, to set the local Sequence Window value accordingly
				199	* (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the
				200	* maximum-used window. We use an EWMA low-pass filter to filter out noise.
				201	*/
				202	static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd)
				203	{
				204	hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4;
				205	}
				206
				207	/* This borrows the code of tcp_cwnd_application_limited() */
				208	static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now)
				209	{
				210	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
				211	/* don't reduce cwnd below the initial window (IW) */
				212	u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache),
				213	win_used = max(hc->tx_cwnd_used, init_win);
				214
				215	if (win_used < hc->tx_cwnd) {
				216	hc->tx_ssthresh = max(hc->tx_ssthresh,
				217	(hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2));
				218	hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1;
				219	}
				220	hc->tx_cwnd_used = 0;
				221	hc->tx_cwnd_stamp = now;
				222
				223	ccid2_check_l_ack_ratio(sk);
				224	}
				225
				226	/* This borrows the code of tcp_cwnd_restart() */
				227	static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
				228	{
				229	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
				230	u32 cwnd = hc->tx_cwnd, restart_cwnd,
				231	iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
				232	s32 delta = now - hc->tx_lsndtime;
				233
				234	hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
				235
				236	/* don't reduce cwnd below the initial window (IW) */
				237	restart_cwnd = min(cwnd, iwnd);
				238
				239	while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd)
				240	cwnd >>= 1;
				241	hc->tx_cwnd = max(cwnd, restart_cwnd);
				242	hc->tx_cwnd_stamp = now;
				243	hc->tx_cwnd_used = 0;
				244
				245	ccid2_check_l_ack_ratio(sk);
				246	}
				247
				248	static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
				249	{
				250	struct dccp_sock *dp = dccp_sk(sk);
				251	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
				252	const u32 now = ccid2_jiffies32;
				253	struct ccid2_seq *next;
				254
				255	/* slow-start after idle periods (RFC 2581, RFC 2861) */
				256	if (ccid2_do_cwv && !hc->tx_pipe &&
				257	(s32)(now - hc->tx_lsndtime) >= hc->tx_rto)
				258	ccid2_cwnd_restart(sk, now);
				259
				260	hc->tx_lsndtime = now;
				261	hc->tx_pipe += 1;
				262
				263	/* see whether cwnd was fully used (RFC 2861), update expected window */
				264	if (ccid2_cwnd_network_limited(hc)) {
				265	ccid2_update_used_window(hc, hc->tx_cwnd);
				266	hc->tx_cwnd_used = 0;
				267	hc->tx_cwnd_stamp = now;
				268	} else {
				269	if (hc->tx_pipe > hc->tx_cwnd_used)
				270	hc->tx_cwnd_used = hc->tx_pipe;
				271
				272	ccid2_update_used_window(hc, hc->tx_cwnd_used);
				273
				274	if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto)
				275	ccid2_cwnd_application_limited(sk, now);
				276	}
				277
				278	hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
				279	hc->tx_seqh->ccid2s_acked = 0;
				280	hc->tx_seqh->ccid2s_sent = now;
				281
				282	next = hc->tx_seqh->ccid2s_next;
				283	/* check if we need to alloc more space */
				284	if (next == hc->tx_seqt) {
				285	if (ccid2_hc_tx_alloc_seq(hc)) {
				286	DCCP_CRIT("packet history - out of memory!");
				287	/* FIXME: find a more graceful way to bail out */
				288	return;
				289	}
				290	next = hc->tx_seqh->ccid2s_next;
				291	BUG_ON(next == hc->tx_seqt);
				292	}
				293	hc->tx_seqh = next;
				294
				295	ccid2_pr_debug("cwnd=%d pipe=%d\n", hc->tx_cwnd, hc->tx_pipe);
				296
				297	/*
				298	* FIXME: The code below is broken and the variables have been removed
				299	* from the socket struct. The `ackloss' variable was always set to 0,
				300	* and with arsent there are several problems:
				301	* (i) it doesn't just count the number of Acks, but all sent packets;
				302	* (ii) it is expressed in # of packets, not # of windows, so the
				303	* comparison below uses the wrong formula: Appendix A of RFC 4341
				304	* comes up with the number K = cwnd / (R^2 - R) of consecutive windows
				305	* of data with no lost or marked Ack packets. If arsent were the # of
				306	* consecutive Acks received without loss, then Ack Ratio needs to be
				307	* decreased by 1 when
				308	* arsent >= K * cwnd / R = cwnd^2 / (R^3 - R^2)
				309	* where cwnd / R is the number of Acks received per window of data
				310	* (cf. RFC 4341, App. A). The problems are that
				311	* - arsent counts other packets as well;
				312	* - the comparison uses a formula different from RFC 4341;
				313	* - computing a cubic/quadratic equation each time is too complicated.
				314	* Hence a different algorithm is needed.
				315	*/
				316	#if 0
				317	/* Ack Ratio. Need to maintain a concept of how many windows we sent */
				318	hc->tx_arsent++;
				319	/* We had an ack loss in this window... */
				320	if (hc->tx_ackloss) {
				321	if (hc->tx_arsent >= hc->tx_cwnd) {
				322	hc->tx_arsent = 0;
				323	hc->tx_ackloss = 0;
				324	}
				325	} else {
				326	/* No acks lost up to now... */
				327	/* decrease ack ratio if enough packets were sent */
				328	if (dp->dccps_l_ack_ratio > 1) {
				329	/* XXX don't calculate denominator each time */
				330	int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
				331	dp->dccps_l_ack_ratio;
				332
				333	denom = hc->tx_cwnd * hc->tx_cwnd / denom;
				334
				335	if (hc->tx_arsent >= denom) {
				336	ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
				337	hc->tx_arsent = 0;
				338	}
				339	} else {
				340	/* we can't increase ack ratio further [1] */
				341	hc->tx_arsent = 0; /* or maybe set it to cwnd*/
				342	}
				343	}
				344	#endif
				345
				346	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
				347
				348	#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
				349	do {
				350	struct ccid2_seq *seqp = hc->tx_seqt;
				351
				352	while (seqp != hc->tx_seqh) {
				353	ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
				354	(unsigned long long)seqp->ccid2s_seq,
				355	seqp->ccid2s_acked, seqp->ccid2s_sent);
				356	seqp = seqp->ccid2s_next;
				357	}
				358	} while (0);
				359	ccid2_pr_debug("=========\n");
				360	#endif
				361	}
				362
				363	/**
				364	* ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
				365	* This code is almost identical with TCP's tcp_rtt_estimator(), since
				366	* - it has a higher sampling frequency (recommended by RFC 1323),
				367	* - the RTO does not collapse into RTT due to RTTVAR going towards zero,
				368	* - it is simple (cf. more complex proposals such as Eifel timer or research
				369	* which suggests that the gain should be set according to window size),
				370	* - in tests it was found to work well with CCID2 [gerrit].
				371	*/
				372	static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
				373	{
				374	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
				375	long m = mrtt ? : 1;
				376
				377	if (hc->tx_srtt == 0) {
				378	/* First measurement m */
				379	hc->tx_srtt = m << 3;
				380	hc->tx_mdev = m << 1;
				381
				382	hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
				383	hc->tx_rttvar = hc->tx_mdev_max;
				384
				385	hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
				386	} else {
				387	/* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
				388	m -= (hc->tx_srtt >> 3);
				389	hc->tx_srtt += m;
				390
				391	/* Similarly, update scaled mdev with regard to \|m\| */
				392	if (m < 0) {
				393	m = -m;
				394	m -= (hc->tx_mdev >> 2);
				395	/*
				396	* This neutralises RTO increase when RTT < SRTT - mdev
				397	* (see P. Sarolahti, A. Kuznetsov,"Congestion Control
				398	* in Linux TCP", USENIX 2002, pp. 49-62).
				399	*/
				400	if (m > 0)
				401	m >>= 3;
				402	} else {
				403	m -= (hc->tx_mdev >> 2);
				404	}
				405	hc->tx_mdev += m;
				406
				407	if (hc->tx_mdev > hc->tx_mdev_max) {
				408	hc->tx_mdev_max = hc->tx_mdev;
				409	if (hc->tx_mdev_max > hc->tx_rttvar)
				410	hc->tx_rttvar = hc->tx_mdev_max;
				411	}
				412
				413	/*
				414	* Decay RTTVAR at most once per flight, exploiting that
				415	* 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
				416	* 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
				417	* GAR is a useful bound for FlightSize = pipe.
				418	* AWL is probably too low here, as it over-estimates pipe.
				419	*/
				420	if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
				421	if (hc->tx_mdev_max < hc->tx_rttvar)
				422	hc->tx_rttvar -= (hc->tx_rttvar -
				423	hc->tx_mdev_max) >> 2;
				424	hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
				425	hc->tx_mdev_max = tcp_rto_min(sk);
				426	}
				427	}
				428
				429	/*
				430	* Set RTO from SRTT and RTTVAR
				431	* As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
				432	* This agrees with RFC 4341, 5:
				433	* "Because DCCP does not retransmit data, DCCP does not require
				434	* TCP's recommended minimum timeout of one second".
				435	*/
				436	hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
				437
				438	if (hc->tx_rto > DCCP_RTO_MAX)
				439	hc->tx_rto = DCCP_RTO_MAX;
				440	}
				441
				442	static void ccid2_new_ack(struct sock sk, struct ccid2_seq seqp,
				443	unsigned int *maxincr)
				444	{
				445	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
				446	struct dccp_sock *dp = dccp_sk(sk);
				447	int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio;
				448
				449	if (hc->tx_cwnd < dp->dccps_l_seq_win &&
				450	r_seq_used < dp->dccps_r_seq_win) {
				451	if (hc->tx_cwnd < hc->tx_ssthresh) {
				452	if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) {
				453	hc->tx_cwnd += 1;
				454	*maxincr -= 1;
				455	hc->tx_packets_acked = 0;
				456	}
				457	} else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
				458	hc->tx_cwnd += 1;
				459	hc->tx_packets_acked = 0;
				460	}
				461	}
				462
				463	/*
				464	* Adjust the local sequence window and the ack ratio to allow about
				465	* 5 times the number of packets in the network (RFC 4340 7.5.2)
				466	*/
				467	if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win)
				468	ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2);
				469	else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2)
				470	ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U);
				471
				472	if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win)
				473	ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2);
				474	else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2)
				475	ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2);
				476
				477	/*
				478	* FIXME: RTT is sampled several times per acknowledgment (for each
				479	* entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
				480	* This causes the RTT to be over-estimated, since the older entries
				481	* in the Ack Vector have earlier sending times.
				482	* The cleanest solution is to not use the ccid2s_sent field at all
				483	* and instead use DCCP timestamps: requires changes in other places.
				484	*/
				485	ccid2_rtt_estimator(sk, ccid2_jiffies32 - seqp->ccid2s_sent);
				486	}
				487
				488	static void ccid2_congestion_event(struct sock sk, struct ccid2_seq seqp)
				489	{
				490	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
				491
				492	if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
				493	ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
				494	return;
				495	}
				496
				497	hc->tx_last_cong = ccid2_jiffies32;
				498
				499	hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
				500	hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
				501
				502	ccid2_check_l_ack_ratio(sk);
				503	}
				504
				505	static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
				506	u8 option, u8 *optval, u8 optlen)
				507	{
				508	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
				509
				510	switch (option) {
				511	case DCCPO_ACK_VECTOR_0:
				512	case DCCPO_ACK_VECTOR_1:
				513	return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen,
				514	option - DCCPO_ACK_VECTOR_0);
				515	}
				516	return 0;
				517	}
				518
				519	static void ccid2_hc_tx_packet_recv(struct sock sk, struct sk_buff skb)
				520	{
				521	struct dccp_sock *dp = dccp_sk(sk);
				522	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
				523	const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
				524	struct dccp_ackvec_parsed *avp;
				525	u64 ackno, seqno;
				526	struct ccid2_seq *seqp;
				527	int done = 0;
				528	unsigned int maxincr = 0;
				529
				530	/* check reverse path congestion */
				531	seqno = DCCP_SKB_CB(skb)->dccpd_seq;
				532
				533	/* XXX this whole "algorithm" is broken. Need to fix it to keep track
				534	* of the seqnos of the dupacks so that rpseq and rpdupack are correct
				535	* -sorbo.
				536	*/
				537	/* need to bootstrap */
				538	if (hc->tx_rpdupack == -1) {
				539	hc->tx_rpdupack = 0;
				540	hc->tx_rpseq = seqno;
				541	} else {
				542	/* check if packet is consecutive */
				543	if (dccp_delta_seqno(hc->tx_rpseq, seqno) == 1)
				544	hc->tx_rpseq = seqno;
				545	/* it's a later packet */
				546	else if (after48(seqno, hc->tx_rpseq)) {
				547	hc->tx_rpdupack++;
				548
				549	/* check if we got enough dupacks */
				550	if (hc->tx_rpdupack >= NUMDUPACK) {
				551	hc->tx_rpdupack = -1; /* XXX lame */
				552	hc->tx_rpseq = 0;
				553	#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__
				554	/*
				555	* FIXME: Ack Congestion Control is broken; in
				556	* the current state instabilities occurred with
				557	* Ack Ratios greater than 1; causing hang-ups
				558	* and long RTO timeouts. This needs to be fixed
				559	* before opening up dynamic changes. -- gerrit
				560	*/
				561	ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
				562	#endif
				563	}
				564	}
				565	}
				566
				567	/* check forward path congestion */
				568	if (dccp_packet_without_ack(skb))
				569	return;
				570
				571	/* still didn't send out new data packets */
				572	if (hc->tx_seqh == hc->tx_seqt)
				573	goto done;
				574
				575	ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
				576	if (after48(ackno, hc->tx_high_ack))
				577	hc->tx_high_ack = ackno;
				578
				579	seqp = hc->tx_seqt;
				580	while (before48(seqp->ccid2s_seq, ackno)) {
				581	seqp = seqp->ccid2s_next;
				582	if (seqp == hc->tx_seqh) {
				583	seqp = hc->tx_seqh->ccid2s_prev;
				584	break;
				585	}
				586	}
				587
				588	/*
				589	* In slow-start, cwnd can increase up to a maximum of Ack Ratio/2
				590	* packets per acknowledgement. Rounding up avoids that cwnd is not
				591	* advanced when Ack Ratio is 1 and gives a slight edge otherwise.
				592	*/
				593	if (hc->tx_cwnd < hc->tx_ssthresh)
				594	maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
				595
				596	/* go through all ack vectors */
				597	list_for_each_entry(avp, &hc->tx_av_chunks, node) {
				598	/* go through this ack vector */
				599	for (; avp->len--; avp->vec++) {
				600	u64 ackno_end_rl = SUB48(ackno,
				601	dccp_ackvec_runlen(avp->vec));
				602
				603	ccid2_pr_debug("ackvec %llu \|%u,%u\|\n",
				604	(unsigned long long)ackno,
				605	dccp_ackvec_state(avp->vec) >> 6,
				606	dccp_ackvec_runlen(avp->vec));
				607	/* if the seqno we are analyzing is larger than the
				608	* current ackno, then move towards the tail of our
				609	* seqnos.
				610	*/
				611	while (after48(seqp->ccid2s_seq, ackno)) {
				612	if (seqp == hc->tx_seqt) {
				613	done = 1;
				614	break;
				615	}
				616	seqp = seqp->ccid2s_prev;
				617	}
				618	if (done)
				619	break;
				620
				621	/* check all seqnos in the range of the vector
				622	* run length
				623	*/
				624	while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
				625	const u8 state = dccp_ackvec_state(avp->vec);
				626
				627	/* new packet received or marked */
				628	if (state != DCCPAV_NOT_RECEIVED &&
				629	!seqp->ccid2s_acked) {
				630	if (state == DCCPAV_ECN_MARKED)
				631	ccid2_congestion_event(sk,
				632	seqp);
				633	else
				634	ccid2_new_ack(sk, seqp,
				635	&maxincr);
				636
				637	seqp->ccid2s_acked = 1;
				638	ccid2_pr_debug("Got ack for %llu\n",
				639	(unsigned long long)seqp->ccid2s_seq);
				640	hc->tx_pipe--;
				641	}
				642	if (seqp == hc->tx_seqt) {
				643	done = 1;
				644	break;
				645	}
				646	seqp = seqp->ccid2s_prev;
				647	}
				648	if (done)
				649	break;
				650
				651	ackno = SUB48(ackno_end_rl, 1);
				652	}
				653	if (done)
				654	break;
				655	}
				656
				657	/* The state about what is acked should be correct now
				658	* Check for NUMDUPACK
				659	*/
				660	seqp = hc->tx_seqt;
				661	while (before48(seqp->ccid2s_seq, hc->tx_high_ack)) {
				662	seqp = seqp->ccid2s_next;
				663	if (seqp == hc->tx_seqh) {
				664	seqp = hc->tx_seqh->ccid2s_prev;
				665	break;
				666	}
				667	}
				668	done = 0;
				669	while (1) {
				670	if (seqp->ccid2s_acked) {
				671	done++;
				672	if (done == NUMDUPACK)
				673	break;
				674	}
				675	if (seqp == hc->tx_seqt)
				676	break;
				677	seqp = seqp->ccid2s_prev;
				678	}
				679
				680	/* If there are at least 3 acknowledgements, anything unacknowledged
				681	* below the last sequence number is considered lost
				682	*/
				683	if (done == NUMDUPACK) {
				684	struct ccid2_seq *last_acked = seqp;
				685
				686	/* check for lost packets */
				687	while (1) {
				688	if (!seqp->ccid2s_acked) {
				689	ccid2_pr_debug("Packet lost: %llu\n",
				690	(unsigned long long)seqp->ccid2s_seq);
				691	/* XXX need to traverse from tail -> head in
				692	* order to detect multiple congestion events in
				693	* one ack vector.
				694	*/
				695	ccid2_congestion_event(sk, seqp);
				696	hc->tx_pipe--;
				697	}
				698	if (seqp == hc->tx_seqt)
				699	break;
				700	seqp = seqp->ccid2s_prev;
				701	}
				702
				703	hc->tx_seqt = last_acked;
				704	}
				705
				706	/* trim acked packets in tail */
				707	while (hc->tx_seqt != hc->tx_seqh) {
				708	if (!hc->tx_seqt->ccid2s_acked)
				709	break;
				710
				711	hc->tx_seqt = hc->tx_seqt->ccid2s_next;
				712	}
				713
				714	/* restart RTO timer if not all outstanding data has been acked */
				715	if (hc->tx_pipe == 0)
				716	sk_stop_timer(sk, &hc->tx_rtotimer);
				717	else
				718	sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
				719	done:
				720	/* check if incoming Acks allow pending packets to be sent */
				721	if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
				722	dccp_tasklet_schedule(sk);
				723	dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
				724	}
				725
				726	static int ccid2_hc_tx_init(struct ccid ccid, struct sock sk)
				727	{
				728	struct ccid2_hc_tx_sock *hc = ccid_priv(ccid);
				729	struct dccp_sock *dp = dccp_sk(sk);
				730	u32 max_ratio;
				731
				732	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
				733	hc->tx_ssthresh = ~0U;
				734
				735	/* Use larger initial windows (RFC 4341, section 5). */
				736	hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
				737	hc->tx_expected_wnd = hc->tx_cwnd;
				738
				739	/* Make sure that Ack Ratio is enabled and within bounds. */
				740	max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
				741	if (dp->dccps_l_ack_ratio == 0 \|\| dp->dccps_l_ack_ratio > max_ratio)
				742	dp->dccps_l_ack_ratio = max_ratio;
				743
				744	/* XXX init ~ to window size... */
				745	if (ccid2_hc_tx_alloc_seq(hc))
				746	return -ENOMEM;
				747
				748	hc->tx_rto = DCCP_TIMEOUT_INIT;
				749	hc->tx_rpdupack = -1;
				750	hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32;
				751	hc->tx_cwnd_used = 0;
				752	hc->sk = sk;
				753	timer_setup(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 0);
				754	INIT_LIST_HEAD(&hc->tx_av_chunks);
				755	return 0;
				756	}
				757
				758	static void ccid2_hc_tx_exit(struct sock *sk)
				759	{
				760	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
				761	int i;
				762
				763	sk_stop_timer(sk, &hc->tx_rtotimer);
				764
				765	for (i = 0; i < hc->tx_seqbufc; i++)
				766	kfree(hc->tx_seqbuf[i]);
				767	hc->tx_seqbufc = 0;
				768	dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
				769	}
				770
				771	static void ccid2_hc_rx_packet_recv(struct sock sk, struct sk_buff skb)
				772	{
				773	struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk);
				774
				775	if (!dccp_data_packet(skb))
				776	return;
				777
				778	if (++hc->rx_num_data_pkts >= dccp_sk(sk)->dccps_r_ack_ratio) {
				779	dccp_send_ack(sk);
				780	hc->rx_num_data_pkts = 0;
				781	}
				782	}
				783
				784	struct ccid_operations ccid2_ops = {
				785	.ccid_id = DCCPC_CCID2,
				786	.ccid_name = "TCP-like",
				787	.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
				788	.ccid_hc_tx_init = ccid2_hc_tx_init,
				789	.ccid_hc_tx_exit = ccid2_hc_tx_exit,
				790	.ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
				791	.ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
				792	.ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
				793	.ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
				794	.ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
				795	.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
				796	};
				797
				798	#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
				799	module_param(ccid2_debug, bool, 0644);
				800	MODULE_PARM_DESC(ccid2_debug, "Enable CCID-2 debug messages");
				801	#endif