Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 18d30bb..df98a88 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2013 Cisco Systems, Inc, 2013.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
* Author: Vijay Subramanian <vijaynsu@cisco.com>
* Author: Mythili Prabhu <mysuryan@cisco.com>
*
@@ -17,9 +8,7 @@
* University of Oslo, Norway.
*
* References:
- * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00
- * IEEE Conference on High Performance Switching and Routing 2013 :
- * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem"
+ * RFC 8033: https://tools.ietf.org/html/rfc8033
*/
#include <linux/module.h>
@@ -31,9 +20,9 @@
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
-#define QUEUE_THRESHOLD 10000
+#define QUEUE_THRESHOLD 16384
#define DQCOUNT_INVALID -1
-#define MAX_PROB 0xffffffff
+#define MAX_PROB 0xffffffffffffffff
#define PIE_SCALE 8
/* parameters used */
@@ -49,14 +38,16 @@
/* variables used */
struct pie_vars {
- u32 prob; /* probability but scaled by u32 limit. */
+ u64 prob; /* probability but scaled by u64 limit. */
psched_time_t burst_time;
psched_time_t qdelay;
psched_time_t qdelay_old;
u64 dq_count; /* measured in bytes */
psched_time_t dq_tstamp; /* drain rate */
+ u64 accu_prob; /* accumulated drop probability */
u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */
u32 qlen_old; /* in bytes */
+ u8 accu_prob_overflows; /* overflows of accu_prob */
};
/* statistics gathering */
@@ -81,9 +72,9 @@
{
params->alpha = 2;
params->beta = 20;
- params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC); /* 30 ms */
+ params->tupdate = usecs_to_jiffies(15 * USEC_PER_MSEC); /* 15 ms */
params->limit = 1000; /* default of 1000 packets */
- params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC); /* 20 ms */
+ params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC); /* 15 ms */
params->ecn = false;
params->bytemode = false;
}
@@ -91,16 +82,18 @@
static void pie_vars_init(struct pie_vars *vars)
{
vars->dq_count = DQCOUNT_INVALID;
+ vars->accu_prob = 0;
vars->avg_dq_rate = 0;
- /* default of 100 ms in pschedtime */
- vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC);
+ /* default of 150 ms in pschedtime */
+ vars->burst_time = PSCHED_NS2TICKS(150 * NSEC_PER_MSEC);
+ vars->accu_prob_overflows = 0;
}
static bool drop_early(struct Qdisc *sch, u32 packet_size)
{
struct pie_sched_data *q = qdisc_priv(sch);
- u32 rnd;
- u32 local_prob = q->vars.prob;
+ u64 rnd;
+ u64 local_prob = q->vars.prob;
u32 mtu = psched_mtu(qdisc_dev(sch));
/* If there is still burst allowance left skip random early drop */
@@ -110,8 +103,8 @@
/* If current delay is less than half of target, and
* if drop prob is low already, disable early_drop
*/
- if ((q->vars.qdelay < q->params.target / 2)
- && (q->vars.prob < MAX_PROB / 5))
+ if ((q->vars.qdelay < q->params.target / 2) &&
+ (q->vars.prob < MAX_PROB / 5))
return false;
/* If we have fewer than 2 mtu-sized packets, disable drop_early,
@@ -124,14 +117,34 @@
* probablity. Smaller packets will have lower drop prob in this case
*/
if (q->params.bytemode && packet_size <= mtu)
- local_prob = (local_prob / mtu) * packet_size;
+ local_prob = (u64)packet_size * div_u64(local_prob, mtu);
else
local_prob = q->vars.prob;
- rnd = prandom_u32();
- if (rnd < local_prob)
+ if (local_prob == 0) {
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
+ }
+
+ if (local_prob > MAX_PROB - q->vars.accu_prob)
+ q->vars.accu_prob_overflows++;
+
+ q->vars.accu_prob += local_prob;
+
+ if (q->vars.accu_prob_overflows == 0 &&
+ q->vars.accu_prob < (MAX_PROB / 100) * 85)
+ return false;
+ if (q->vars.accu_prob_overflows == 8 &&
+ q->vars.accu_prob >= MAX_PROB / 2)
return true;
+ prandom_bytes(&rnd, 8);
+ if (rnd < local_prob) {
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
+ return true;
+ }
+
return false;
}
@@ -168,6 +181,8 @@
out:
q->stats.dropped++;
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
return qdisc_drop(skb, sch, to_free);
}
@@ -192,7 +207,8 @@
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy, NULL);
+ err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy,
+ NULL);
if (err < 0)
return err;
@@ -209,7 +225,8 @@
/* tupdate is in jiffies */
if (tb[TCA_PIE_TUPDATE])
- q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
+ q->params.tupdate =
+ usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
if (tb[TCA_PIE_LIMIT]) {
u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
@@ -247,7 +264,6 @@
static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
{
-
struct pie_sched_data *q = qdisc_priv(sch);
int qlen = sch->qstats.backlog; /* current queue size in bytes */
@@ -294,9 +310,9 @@
* dq_count to 0 to re-enter the if block when the next
* packet is dequeued
*/
- if (qlen < QUEUE_THRESHOLD)
+ if (qlen < QUEUE_THRESHOLD) {
q->vars.dq_count = DQCOUNT_INVALID;
- else {
+ } else {
q->vars.dq_count = 0;
q->vars.dq_tstamp = psched_get_time();
}
@@ -317,9 +333,10 @@
u32 qlen = sch->qstats.backlog; /* queue size in bytes */
psched_time_t qdelay = 0; /* in pschedtime */
psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */
- s32 delta = 0; /* determines the change in probability */
- u32 oldprob;
- u32 alpha, beta;
+ s64 delta = 0; /* determines the change in probability */
+ u64 oldprob;
+ u64 alpha, beta;
+ u32 power;
bool update_prob = true;
q->vars.qdelay_old = q->vars.qdelay;
@@ -339,38 +356,36 @@
* value for alpha as 0.125. In this implementation, we use values 0-32
* passed from user space to represent this. Also, alpha and beta have
* unit of HZ and need to be scaled before they can used to update
- * probability. alpha/beta are updated locally below by 1) scaling them
- * appropriately 2) scaling down by 16 to come to 0-2 range.
- * Please see paper for details.
- *
- * We scale alpha and beta differently depending on whether we are in
- * light, medium or high dropping mode.
+ * probability. alpha/beta are updated locally below by scaling down
+ * by 16 to come to 0-2 range.
*/
- if (q->vars.prob < MAX_PROB / 100) {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
- } else if (q->vars.prob < MAX_PROB / 10) {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
- } else {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+
+ /* We scale alpha and beta differently depending on how heavy the
+ * congestion is. Please see RFC 8033 for details.
+ */
+ if (q->vars.prob < MAX_PROB / 10) {
+ alpha >>= 1;
+ beta >>= 1;
+
+ power = 100;
+ while (q->vars.prob < div_u64(MAX_PROB, power) &&
+ power <= 1000000) {
+ alpha >>= 2;
+ beta >>= 2;
+ power *= 10;
+ }
}
/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
- delta += alpha * ((qdelay - q->params.target));
- delta += beta * ((qdelay - qdelay_old));
+ delta += alpha * (u64)(qdelay - q->params.target);
+ delta += beta * (u64)(qdelay - qdelay_old);
oldprob = q->vars.prob;
/* to ensure we increase probability in steps of no more than 2% */
- if (delta > (s32) (MAX_PROB / (100 / 2)) &&
+ if (delta > (s64)(MAX_PROB / (100 / 2)) &&
q->vars.prob >= MAX_PROB / 10)
delta = (MAX_PROB / 100) * 2;
@@ -405,8 +420,9 @@
* delay is 0 for 2 consecutive Tupdate periods.
*/
- if ((qdelay == 0) && (qdelay_old == 0) && update_prob)
- q->vars.prob = (q->vars.prob * 98) / 100;
+ if (qdelay == 0 && qdelay_old == 0 && update_prob)
+ /* Reduce drop probability to 98.4% */
+ q->vars.prob -= q->vars.prob / 64u;
q->vars.qdelay = qdelay;
q->vars.qlen_old = qlen;
@@ -419,8 +435,8 @@
*/
if ((q->vars.qdelay < q->params.target / 2) &&
(q->vars.qdelay_old < q->params.target / 2) &&
- (q->vars.prob == 0) &&
- (q->vars.avg_dq_rate > 0))
+ q->vars.prob == 0 &&
+ q->vars.avg_dq_rate > 0)
pie_vars_init(&q->vars);
}
@@ -437,7 +453,6 @@
if (q->params.tupdate)
mod_timer(&q->adapt_timer, jiffies + q->params.tupdate);
spin_unlock(root_lock);
-
}
static int pie_init(struct Qdisc *sch, struct nlattr *opt,
@@ -468,16 +483,17 @@
struct pie_sched_data *q = qdisc_priv(sch);
struct nlattr *opts;
- opts = nla_nest_start(skb, TCA_OPTIONS);
- if (opts == NULL)
+ opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
+ if (!opts)
goto nla_put_failure;
/* convert target from pschedtime to us */
if (nla_put_u32(skb, TCA_PIE_TARGET,
- ((u32) PSCHED_TICKS2NS(q->params.target)) /
+ ((u32)PSCHED_TICKS2NS(q->params.target)) /
NSEC_PER_USEC) ||
nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
- nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) ||
+ nla_put_u32(skb, TCA_PIE_TUPDATE,
+ jiffies_to_usecs(q->params.tupdate)) ||
nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
@@ -489,7 +505,6 @@
nla_put_failure:
nla_nest_cancel(skb, opts);
return -1;
-
}
static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
@@ -497,7 +512,7 @@
struct pie_sched_data *q = qdisc_priv(sch);
struct tc_pie_xstats st = {
.prob = q->vars.prob,
- .delay = ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) /
+ .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
NSEC_PER_USEC,
/* unscale and return dq_rate in bytes per sec */
.avg_dq_rate = q->vars.avg_dq_rate *
@@ -514,8 +529,7 @@
static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
{
- struct sk_buff *skb;
- skb = qdisc_dequeue_head(sch);
+ struct sk_buff *skb = qdisc_dequeue_head(sch);
if (!skb)
return NULL;
@@ -527,6 +541,7 @@
static void pie_reset(struct Qdisc *sch)
{
struct pie_sched_data *q = qdisc_priv(sch);
+
qdisc_reset_queue(sch);
pie_vars_init(&q->vars);
}
@@ -534,6 +549,7 @@
static void pie_destroy(struct Qdisc *sch)
{
struct pie_sched_data *q = qdisc_priv(sch);
+
q->params.tupdate = 0;
del_timer_sync(&q->adapt_timer);
}