Update Linux to v5.4.2

Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 18d30bb..df98a88 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2013 Cisco Systems, Inc, 2013.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Author: Vijay Subramanian <vijaynsu@cisco.com>
  * Author: Mythili Prabhu <mysuryan@cisco.com>
  *
@@ -17,9 +8,7 @@
  * University of Oslo, Norway.
  *
  * References:
- * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00
- * IEEE  Conference on High Performance Switching and Routing 2013 :
- * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem"
+ * RFC 8033: https://tools.ietf.org/html/rfc8033
  */
 
 #include <linux/module.h>
@@ -31,9 +20,9 @@
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
 
-#define QUEUE_THRESHOLD 10000
+#define QUEUE_THRESHOLD 16384
 #define DQCOUNT_INVALID -1
-#define MAX_PROB  0xffffffff
+#define MAX_PROB 0xffffffffffffffff
 #define PIE_SCALE 8
 
 /* parameters used */
@@ -49,14 +38,16 @@
 
 /* variables used */
 struct pie_vars {
-	u32 prob;		/* probability but scaled by u32 limit. */
+	u64 prob;		/* probability but scaled by u64 limit. */
 	psched_time_t burst_time;
 	psched_time_t qdelay;
 	psched_time_t qdelay_old;
 	u64 dq_count;		/* measured in bytes */
 	psched_time_t dq_tstamp;	/* drain rate */
+	u64 accu_prob;		/* accumulated drop probability */
 	u32 avg_dq_rate;	/* bytes per pschedtime tick,scaled */
 	u32 qlen_old;		/* in bytes */
+	u8 accu_prob_overflows;	/* overflows of accu_prob */
 };
 
 /* statistics gathering */
@@ -81,9 +72,9 @@
 {
 	params->alpha = 2;
 	params->beta = 20;
-	params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC);	/* 30 ms */
+	params->tupdate = usecs_to_jiffies(15 * USEC_PER_MSEC);	/* 15 ms */
 	params->limit = 1000;	/* default of 1000 packets */
-	params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC);	/* 20 ms */
+	params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC);	/* 15 ms */
 	params->ecn = false;
 	params->bytemode = false;
 }
@@ -91,16 +82,18 @@
 static void pie_vars_init(struct pie_vars *vars)
 {
 	vars->dq_count = DQCOUNT_INVALID;
+	vars->accu_prob = 0;
 	vars->avg_dq_rate = 0;
-	/* default of 100 ms in pschedtime */
-	vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC);
+	/* default of 150 ms in pschedtime */
+	vars->burst_time = PSCHED_NS2TICKS(150 * NSEC_PER_MSEC);
+	vars->accu_prob_overflows = 0;
 }
 
 static bool drop_early(struct Qdisc *sch, u32 packet_size)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
-	u32 rnd;
-	u32 local_prob = q->vars.prob;
+	u64 rnd;
+	u64 local_prob = q->vars.prob;
 	u32 mtu = psched_mtu(qdisc_dev(sch));
 
 	/* If there is still burst allowance left skip random early drop */
@@ -110,8 +103,8 @@
 	/* If current delay is less than half of target, and
 	 * if drop prob is low already, disable early_drop
 	 */
-	if ((q->vars.qdelay < q->params.target / 2)
-	    && (q->vars.prob < MAX_PROB / 5))
+	if ((q->vars.qdelay < q->params.target / 2) &&
+	    (q->vars.prob < MAX_PROB / 5))
 		return false;
 
 	/* If we have fewer than 2 mtu-sized packets, disable drop_early,
@@ -124,14 +117,34 @@
 	 * probablity. Smaller packets will have lower drop prob in this case
 	 */
 	if (q->params.bytemode && packet_size <= mtu)
-		local_prob = (local_prob / mtu) * packet_size;
+		local_prob = (u64)packet_size * div_u64(local_prob, mtu);
 	else
 		local_prob = q->vars.prob;
 
-	rnd = prandom_u32();
-	if (rnd < local_prob)
+	if (local_prob == 0) {
+		q->vars.accu_prob = 0;
+		q->vars.accu_prob_overflows = 0;
+	}
+
+	if (local_prob > MAX_PROB - q->vars.accu_prob)
+		q->vars.accu_prob_overflows++;
+
+	q->vars.accu_prob += local_prob;
+
+	if (q->vars.accu_prob_overflows == 0 &&
+	    q->vars.accu_prob < (MAX_PROB / 100) * 85)
+		return false;
+	if (q->vars.accu_prob_overflows == 8 &&
+	    q->vars.accu_prob >= MAX_PROB / 2)
 		return true;
 
+	prandom_bytes(&rnd, 8);
+	if (rnd < local_prob) {
+		q->vars.accu_prob = 0;
+		q->vars.accu_prob_overflows = 0;
+		return true;
+	}
+
 	return false;
 }
 
@@ -168,6 +181,8 @@
 
 out:
 	q->stats.dropped++;
+	q->vars.accu_prob = 0;
+	q->vars.accu_prob_overflows = 0;
 	return qdisc_drop(skb, sch, to_free);
 }
 
@@ -192,7 +207,8 @@
 	if (!opt)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy,
+					  NULL);
 	if (err < 0)
 		return err;
 
@@ -209,7 +225,8 @@
 
 	/* tupdate is in jiffies */
 	if (tb[TCA_PIE_TUPDATE])
-		q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
+		q->params.tupdate =
+			usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
 
 	if (tb[TCA_PIE_LIMIT]) {
 		u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
@@ -247,7 +264,6 @@
 
 static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
 {
-
 	struct pie_sched_data *q = qdisc_priv(sch);
 	int qlen = sch->qstats.backlog;	/* current queue size in bytes */
 
@@ -294,9 +310,9 @@
 			 * dq_count to 0 to re-enter the if block when the next
 			 * packet is dequeued
 			 */
-			if (qlen < QUEUE_THRESHOLD)
+			if (qlen < QUEUE_THRESHOLD) {
 				q->vars.dq_count = DQCOUNT_INVALID;
-			else {
+			} else {
 				q->vars.dq_count = 0;
 				q->vars.dq_tstamp = psched_get_time();
 			}
@@ -317,9 +333,10 @@
 	u32 qlen = sch->qstats.backlog;	/* queue size in bytes */
 	psched_time_t qdelay = 0;	/* in pschedtime */
 	psched_time_t qdelay_old = q->vars.qdelay;	/* in pschedtime */
-	s32 delta = 0;		/* determines the change in probability */
-	u32 oldprob;
-	u32 alpha, beta;
+	s64 delta = 0;		/* determines the change in probability */
+	u64 oldprob;
+	u64 alpha, beta;
+	u32 power;
 	bool update_prob = true;
 
 	q->vars.qdelay_old = q->vars.qdelay;
@@ -339,38 +356,36 @@
 	 * value for alpha as 0.125. In this implementation, we use values 0-32
 	 * passed from user space to represent this. Also, alpha and beta have
 	 * unit of HZ and need to be scaled before they can used to update
-	 * probability. alpha/beta are updated locally below by 1) scaling them
-	 * appropriately 2) scaling down by 16 to come to 0-2 range.
-	 * Please see paper for details.
-	 *
-	 * We scale alpha and beta differently depending on whether we are in
-	 * light, medium or high dropping mode.
+	 * probability. alpha/beta are updated locally below by scaling down
+	 * by 16 to come to 0-2 range.
 	 */
-	if (q->vars.prob < MAX_PROB / 100) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-	} else if (q->vars.prob < MAX_PROB / 10) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-	} else {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+
+	/* We scale alpha and beta differently depending on how heavy the
+	 * congestion is. Please see RFC 8033 for details.
+	 */
+	if (q->vars.prob < MAX_PROB / 10) {
+		alpha >>= 1;
+		beta >>= 1;
+
+		power = 100;
+		while (q->vars.prob < div_u64(MAX_PROB, power) &&
+		       power <= 1000000) {
+			alpha >>= 2;
+			beta >>= 2;
+			power *= 10;
+		}
 	}
 
 	/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
-	delta += alpha * ((qdelay - q->params.target));
-	delta += beta * ((qdelay - qdelay_old));
+	delta += alpha * (u64)(qdelay - q->params.target);
+	delta += beta * (u64)(qdelay - qdelay_old);
 
 	oldprob = q->vars.prob;
 
 	/* to ensure we increase probability in steps of no more than 2% */
-	if (delta > (s32) (MAX_PROB / (100 / 2)) &&
+	if (delta > (s64)(MAX_PROB / (100 / 2)) &&
 	    q->vars.prob >= MAX_PROB / 10)
 		delta = (MAX_PROB / 100) * 2;
 
@@ -405,8 +420,9 @@
 	 * delay is 0 for 2 consecutive Tupdate periods.
 	 */
 
-	if ((qdelay == 0) && (qdelay_old == 0) && update_prob)
-		q->vars.prob = (q->vars.prob * 98) / 100;
+	if (qdelay == 0 && qdelay_old == 0 && update_prob)
+		/* Reduce drop probability to 98.4% */
+		q->vars.prob -= q->vars.prob / 64u;
 
 	q->vars.qdelay = qdelay;
 	q->vars.qlen_old = qlen;
@@ -419,8 +435,8 @@
 	 */
 	if ((q->vars.qdelay < q->params.target / 2) &&
 	    (q->vars.qdelay_old < q->params.target / 2) &&
-	    (q->vars.prob == 0) &&
-	    (q->vars.avg_dq_rate > 0))
+	    q->vars.prob == 0 &&
+	    q->vars.avg_dq_rate > 0)
 		pie_vars_init(&q->vars);
 }
 
@@ -437,7 +453,6 @@
 	if (q->params.tupdate)
 		mod_timer(&q->adapt_timer, jiffies + q->params.tupdate);
 	spin_unlock(root_lock);
-
 }
 
 static int pie_init(struct Qdisc *sch, struct nlattr *opt,
@@ -468,16 +483,17 @@
 	struct pie_sched_data *q = qdisc_priv(sch);
 	struct nlattr *opts;
 
-	opts = nla_nest_start(skb, TCA_OPTIONS);
-	if (opts == NULL)
+	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
+	if (!opts)
 		goto nla_put_failure;
 
 	/* convert target from pschedtime to us */
 	if (nla_put_u32(skb, TCA_PIE_TARGET,
-			((u32) PSCHED_TICKS2NS(q->params.target)) /
+			((u32)PSCHED_TICKS2NS(q->params.target)) /
 			NSEC_PER_USEC) ||
 	    nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
-	    nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) ||
+	    nla_put_u32(skb, TCA_PIE_TUPDATE,
+			jiffies_to_usecs(q->params.tupdate)) ||
 	    nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
 	    nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
 	    nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
@@ -489,7 +505,6 @@
 nla_put_failure:
 	nla_nest_cancel(skb, opts);
 	return -1;
-
 }
 
 static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
@@ -497,7 +512,7 @@
 	struct pie_sched_data *q = qdisc_priv(sch);
 	struct tc_pie_xstats st = {
 		.prob		= q->vars.prob,
-		.delay		= ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) /
+		.delay		= ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
 				   NSEC_PER_USEC,
 		/* unscale and return dq_rate in bytes per sec */
 		.avg_dq_rate	= q->vars.avg_dq_rate *
@@ -514,8 +529,7 @@
 
 static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
 {
-	struct sk_buff *skb;
-	skb = qdisc_dequeue_head(sch);
+	struct sk_buff *skb = qdisc_dequeue_head(sch);
 
 	if (!skb)
 		return NULL;
@@ -527,6 +541,7 @@
 static void pie_reset(struct Qdisc *sch)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
+
 	qdisc_reset_queue(sch);
 	pie_vars_init(&q->vars);
 }
@@ -534,6 +549,7 @@
 static void pie_destroy(struct Qdisc *sch)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
+
 	q->params.tupdate = 0;
 	del_timer_sync(&q->adapt_timer);
 }