Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 69d4676..75132d0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -231,6 +231,8 @@
index++;
if (index < s_i)
continue;
+ if (IS_ERR(p))
+ continue;
if (jiffy_since &&
time_after(jiffy_since,
@@ -303,6 +305,8 @@
mutex_lock(&idrinfo->lock);
idr_for_each_entry_ul(idr, p, tmp, id) {
+ if (IS_ERR(p))
+ continue;
ret = tcf_idr_release_unsafe(p);
if (ret == ACT_P_DELETED) {
module_put(ops->owner);
@@ -314,7 +318,8 @@
}
mutex_unlock(&idrinfo->lock);
- if (nla_put_u32(skb, TCA_FCNT, n_i))
+ ret = nla_put_u32(skb, TCA_FCNT, n_i);
+ if (ret)
goto nla_put_failure;
nla_nest_end(skb, nest);
@@ -451,17 +456,6 @@
}
EXPORT_SYMBOL(tcf_idr_create);
-void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
-{
- struct tcf_idrinfo *idrinfo = tn->idrinfo;
-
- mutex_lock(&idrinfo->lock);
- /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
- WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
- mutex_unlock(&idrinfo->lock);
-}
-EXPORT_SYMBOL(tcf_idr_insert);
-
/* Cleanup idr index that was allocated but not initialized. */
void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
@@ -715,13 +709,6 @@
return ret;
}
-static int tcf_action_destroy_1(struct tc_action *a, int bind)
-{
- struct tc_action *actions[] = { a, NULL };
-
- return tcf_action_destroy(actions, bind);
-}
-
static int tcf_action_put(struct tc_action *p)
{
return __tcf_action_put(p, false);
@@ -839,6 +826,26 @@
[TCA_ACT_OPTIONS] = { .type = NLA_NESTED },
};
+void tcf_idr_insert_many(struct tc_action *actions[])
+{
+ int i;
+
+ for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
+ struct tc_action *a = actions[i];
+ struct tcf_idrinfo *idrinfo;
+
+ if (!a)
+ continue;
+ idrinfo = a->idrinfo;
+ mutex_lock(&idrinfo->lock);
+ /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if
+ * it is just created, otherwise this is just a nop.
+ */
+ idr_replace(&idrinfo->action_idr, a, a->tcfa_index);
+ mutex_unlock(&idrinfo->lock);
+ }
+}
+
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
@@ -931,12 +938,8 @@
if (err != ACT_P_CREATED)
module_put(a_o->owner);
- if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
- !rcu_access_pointer(a->goto_chain)) {
- tcf_action_destroy_1(a, bind);
- NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
- return ERR_PTR(-EINVAL);
- }
+ if (!bind && ovr && err == ACT_P_CREATED)
+ refcount_set(&a->tcfa_refcnt, 2);
return a;
@@ -981,6 +984,11 @@
actions[i - 1] = act;
}
+ /* We have to commit them all together, because if any error happened in
+ * between, we could not handle the failure gracefully.
+ */
+ tcf_idr_insert_many(actions);
+
*attr_size = tcf_action_full_attrs_size(sz);
return i - 1;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 04b7bd4..b7d83d0 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -361,9 +361,7 @@
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
- if (res == ACT_P_CREATED) {
- tcf_idr_insert(tn, *act);
- } else {
+ if (res != ACT_P_CREATED) {
/* make sure the program being replaced is no longer executing */
synchronize_rcu();
tcf_bpf_cfg_cleanup(&old);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2b43cac..b00105e 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -43,17 +43,20 @@
tcf_lastuse_update(&ca->tcf_tm);
bstats_update(&ca->tcf_bstats, skb);
- if (skb->protocol == htons(ETH_P_IP)) {
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
if (skb->len < sizeof(struct iphdr))
goto out;
proto = NFPROTO_IPV4;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ break;
+ case htons(ETH_P_IPV6):
if (skb->len < sizeof(struct ipv6hdr))
goto out;
proto = NFPROTO_IPV6;
- } else {
+ break;
+ default:
goto out;
}
@@ -136,7 +139,6 @@
ci->net = net;
ci->zone = parm->zone;
- tcf_idr_insert(tn, *a);
ret = ACT_P_CREATED;
} else if (ret > 0) {
ci = to_connmark(*a);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d3cfad8..fa1b1fd 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -110,9 +110,6 @@
if (params_new)
kfree_rcu(params_new, rcu);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
-
return ret;
put_chain:
if (goto_ch)
@@ -587,7 +584,7 @@
goto drop;
update_flags = params->update_flags;
- protocol = tc_skb_protocol(skb);
+ protocol = skb_protocol(skb, false);
again:
switch (protocol) {
case cpu_to_be16(ETH_P_IP):
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index fcc4602..02d4491 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -100,7 +100,7 @@
{
u8 family = NFPROTO_UNSPEC;
- switch (skb->protocol) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
family = NFPROTO_IPV4;
break;
@@ -186,7 +186,7 @@
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
err = nf_ct_frag6_gather(net, skb, user);
if (err && err != -EINPROGRESS)
- goto out_free;
+ return err;
#else
err = -EOPNOTSUPP;
goto out_free;
@@ -222,6 +222,7 @@
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype)
{
+ __be16 proto = skb_protocol(skb, true);
int hooknum, err = NF_ACCEPT;
/* See HOOK2MANIP(). */
@@ -233,14 +234,13 @@
switch (ctinfo) {
case IP_CT_RELATED:
case IP_CT_RELATED_REPLY:
- if (skb->protocol == htons(ETH_P_IP) &&
+ if (proto == htons(ETH_P_IP) &&
ip_hdr(skb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
hooknum))
err = NF_DROP;
goto out;
- } else if (IS_ENABLED(CONFIG_IPV6) &&
- skb->protocol == htons(ETH_P_IPV6)) {
+ } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) {
__be16 frag_off;
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
int hdrlen = ipv6_skip_exthdr(skb,
@@ -329,6 +329,7 @@
bool commit)
{
#if IS_ENABLED(CONFIG_NF_NAT)
+ int err;
enum nf_nat_manip_type maniptype;
if (!(ct_action & TCA_CT_ACT_NAT))
@@ -359,7 +360,22 @@
return NF_ACCEPT;
}
- return ct_nat_execute(skb, ct, ctinfo, range, maniptype);
+ err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
+ if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
+ if (ct->status & IPS_SRC_NAT) {
+ if (maniptype == NF_NAT_MANIP_SRC)
+ maniptype = NF_NAT_MANIP_DST;
+ else
+ maniptype = NF_NAT_MANIP_SRC;
+
+ err = ct_nat_execute(skb, ct, ctinfo, range,
+ maniptype);
+ } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ err = ct_nat_execute(skb, ct, ctinfo, NULL,
+ NF_NAT_MANIP_SRC);
+ }
+ }
+ return err;
#else
return NF_ACCEPT;
#endif
@@ -458,7 +474,8 @@
/* This will take care of sending queued events
* even if the connection is already confirmed.
*/
- nf_conntrack_confirm(skb);
+ if (nf_conntrack_confirm(skb) != NF_ACCEPT)
+ goto drop;
}
out_push:
@@ -637,9 +654,6 @@
sizeof(p->zone));
}
- if (p->zone == NF_CT_DEFAULT_ZONE_ID)
- return 0;
-
nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0);
tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL);
if (!tmpl) {
@@ -728,9 +742,7 @@
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (params)
- kfree_rcu(params, rcu);
- if (res == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
+ call_rcu(¶ms->rcu, tcf_ct_params_free);
return res;
@@ -982,4 +994,3 @@
MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>");
MODULE_DESCRIPTION("Connection tracking action");
MODULE_LICENSE("GPL v2");
-
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 0dbcfd1..9722204 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -96,19 +96,22 @@
action = READ_ONCE(ca->tcf_action);
wlen = skb_network_offset(skb);
- if (tc_skb_protocol(skb) == htons(ETH_P_IP)) {
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
wlen += sizeof(struct iphdr);
if (!pskb_may_pull(skb, wlen))
goto out;
proto = NFPROTO_IPV4;
- } else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) {
+ break;
+ case htons(ETH_P_IPV6):
wlen += sizeof(struct ipv6hdr);
if (!pskb_may_pull(skb, wlen))
goto out;
proto = NFPROTO_IPV6;
- } else {
+ break;
+ default:
goto out;
}
@@ -266,9 +269,6 @@
if (cp_new)
kfree_rcu(cp_new, rcu);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
-
return ret;
put_chain:
@@ -360,6 +360,16 @@
return tcf_idr_search(tn, a, index);
}
+static void tcf_ctinfo_cleanup(struct tc_action *a)
+{
+ struct tcf_ctinfo *ci = to_ctinfo(a);
+ struct tcf_ctinfo_params *cp;
+
+ cp = rcu_dereference_protected(ci->params, 1);
+ if (cp)
+ kfree_rcu(cp, rcu);
+}
+
static struct tc_action_ops act_ctinfo_ops = {
.kind = "ctinfo",
.id = TCA_ID_CTINFO,
@@ -367,6 +377,7 @@
.act = tcf_ctinfo_act,
.dump = tcf_ctinfo_dump,
.init = tcf_ctinfo_init,
+ .cleanup= tcf_ctinfo_cleanup,
.walk = tcf_ctinfo_walker,
.lookup = tcf_ctinfo_search,
.size = sizeof(struct tcf_ctinfo),
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 324f1d1..faf68a4 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -139,8 +139,6 @@
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
return ret;
release_idr:
tcf_idr_release(*a, bind);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3a31e24..488d104 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -436,6 +436,25 @@
kfree_rcu(p, rcu);
}
+static int load_metalist(struct nlattr **tb, bool rtnl_held)
+{
+ int i;
+
+ for (i = 1; i < max_metacnt; i++) {
+ if (tb[i]) {
+ void *val = nla_data(tb[i]);
+ int len = nla_len(tb[i]);
+ int rc;
+
+ rc = load_metaops_and_vet(i, val, len, rtnl_held);
+ if (rc != 0)
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
bool exists, bool rtnl_held)
{
@@ -449,10 +468,6 @@
val = nla_data(tb[i]);
len = nla_len(tb[i]);
- rc = load_metaops_and_vet(i, val, len, rtnl_held);
- if (rc != 0)
- return rc;
-
rc = add_metainfo(ife, i, val, len, exists);
if (rc)
return rc;
@@ -508,6 +523,21 @@
if (!p)
return -ENOMEM;
+ if (tb[TCA_IFE_METALST]) {
+ err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
+ tb[TCA_IFE_METALST], NULL,
+ NULL);
+ if (err) {
+ kfree(p);
+ return err;
+ }
+ err = load_metalist(tb2, rtnl_held);
+ if (err) {
+ kfree(p);
+ return err;
+ }
+ }
+
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0) {
@@ -536,6 +566,9 @@
}
ife = to_ife(*a);
+ if (ret == ACT_P_CREATED)
+ INIT_LIST_HEAD(&ife->metalist);
+
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
@@ -565,20 +598,10 @@
p->eth_type = ife_type;
}
-
- if (ret == ACT_P_CREATED)
- INIT_LIST_HEAD(&ife->metalist);
-
if (tb[TCA_IFE_METALST]) {
- err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
- tb[TCA_IFE_METALST], NULL,
- NULL);
- if (err)
- goto metadata_parse_err;
err = populate_metalist(ife, tb2, exists, rtnl_held);
if (err)
goto metadata_parse_err;
-
} else {
/* if no passed metadata allow list or passed allow-all
* then here we process by adding as many supported metadatum
@@ -603,9 +626,6 @@
if (p)
kfree_rcu(p, rcu);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
-
return ret;
metadata_parse_err:
if (goto_ch)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 214a03d..02b0cb6 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -189,8 +189,6 @@
ipt->tcfi_t = t;
ipt->tcfi_hook = hook;
spin_unlock_bh(&ipt->tcf_lock);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
return ret;
err3:
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 08923b2..e3ff884 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -194,8 +194,6 @@
spin_lock(&mirred_list_lock);
list_add(&m->tcfm_list, &mirred_list);
spin_unlock(&mirred_list_lock);
-
- tcf_idr_insert(tn, *a);
}
return ret;
@@ -219,8 +217,10 @@
bool use_reinsert;
bool want_ingress;
bool is_redirect;
+ bool expects_nh;
int m_eaction;
int mac_len;
+ bool at_nh;
rec_level = __this_cpu_inc_return(mirred_rec_level);
if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
@@ -261,19 +261,22 @@
goto out;
}
- /* If action's target direction differs than filter's direction,
- * and devices expect a mac header on xmit, then mac push/pull is
- * needed.
- */
+ /* All mirred/redirected skbs should clear previous ct info */
+ nf_reset_ct(skb2);
+
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
- if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
- if (!skb_at_tc_ingress(skb)) {
- /* caught at egress, act ingress: pull mac */
- mac_len = skb_network_header(skb) - skb_mac_header(skb);
+
+ expects_nh = want_ingress || !m_mac_header_xmit;
+ at_nh = skb->data == skb_network_header(skb);
+ if (at_nh != expects_nh) {
+ mac_len = skb_at_tc_ingress(skb) ? skb->mac_len :
+ skb_network_header(skb) - skb_mac_header(skb);
+ if (expects_nh) {
+ /* target device/action expect data at nh */
skb_pull_rcsum(skb2, mac_len);
} else {
- /* caught at ingress, act egress: push mac */
- skb_push_rcsum(skb2, skb->mac_len);
+ /* target device/action expect data at mac */
+ skb_push_rcsum(skb2, mac_len);
}
}
@@ -282,10 +285,8 @@
/* mirror is always swallowed */
if (is_redirect) {
- skb2->tc_redirected = 1;
- skb2->tc_from_ingress = skb2->tc_at_ingress;
- if (skb2->tc_from_ingress)
- skb2->tstamp = 0;
+ skb_set_redirected(skb2, skb2->tc_at_ingress);
+
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
res->ingress = want_ingress;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 4cf6c55..0fccae3 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2019 Netronome Systems, Inc. */
+#include <linux/if_arp.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -76,15 +77,20 @@
switch (p->tcfm_action) {
case TCA_MPLS_ACT_POP:
- if (skb_mpls_pop(skb, p->tcfm_proto, mac_len))
+ if (skb_mpls_pop(skb, p->tcfm_proto, mac_len,
+ skb->dev && skb->dev->type == ARPHRD_ETHER))
goto drop;
break;
case TCA_MPLS_ACT_PUSH:
- new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol));
- if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len))
+ new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb_protocol(skb, true)));
+ if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len,
+ skb->dev && skb->dev->type == ARPHRD_ETHER))
goto drop;
break;
case TCA_MPLS_ACT_MODIFY:
+ if (!pskb_may_pull(skb,
+ skb_network_offset(skb) + MPLS_HLEN))
+ goto drop;
new_lse = tcf_mpls_get_lse(mpls_hdr(skb), p, false);
if (skb_mpls_update_lse(skb, new_lse))
goto drop;
@@ -270,8 +276,6 @@
if (p)
kfree_rcu(p, rcu);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
return ret;
put_chain:
if (goto_ch)
@@ -407,6 +411,7 @@
module_init(mpls_init_module);
module_exit(mpls_cleanup_module);
+MODULE_SOFTDEP("post: mpls_gso");
MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("MPLS manipulation actions");
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index ea4c535..8b5d236 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -93,9 +93,6 @@
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
-
return ret;
release_idr:
tcf_idr_release(*a, bind);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b5bc631..ff4f243 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -237,8 +237,6 @@
spin_unlock_bh(&p->tcf_lock);
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
return ret;
put_chain:
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 89c04c5..8fd23a8 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -201,8 +201,6 @@
if (new)
kfree_rcu(new, rcu);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
return ret;
failure:
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 514456a..74450b0 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -116,8 +116,6 @@
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
return ret;
put_chain:
if (goto_ch)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 6120e56..6b0617d 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -156,8 +156,6 @@
goto release_idr;
}
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
return ret;
put_chain:
if (goto_ch)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6a8d333..f736da5 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -41,7 +41,7 @@
if (params->flags & SKBEDIT_F_INHERITDSFIELD) {
int wlen = skb_network_offset(skb);
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
wlen += sizeof(struct iphdr);
if (!pskb_may_pull(skb, wlen))
@@ -214,8 +214,6 @@
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
return ret;
put_chain:
if (goto_ch)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 888437f..f60d349 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -6,6 +6,7 @@
*/
#include <linux/module.h>
+#include <linux/if_arp.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
@@ -33,6 +34,13 @@
tcf_lastuse_update(&d->tcf_tm);
bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+ action = READ_ONCE(d->tcf_action);
+ if (unlikely(action == TC_ACT_SHOT))
+ goto drop;
+
+ if (!skb->dev || skb->dev->type != ARPHRD_ETHER)
+ return action;
+
/* XXX: if you are going to edit more fields beyond ethernet header
* (example when you add IP header replacement or vlan swap)
* then MAX_EDIT_LEN needs to change appropriately
@@ -41,10 +49,6 @@
if (unlikely(err)) /* best policy is to drop on the floor */
goto drop;
- action = READ_ONCE(d->tcf_action);
- if (unlikely(action == TC_ACT_SHOT))
- goto drop;
-
p = rcu_dereference_bh(d->skbmod_p);
flags = p->flags;
if (flags & SKBMOD_F_DMAC)
@@ -190,8 +194,6 @@
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
return ret;
put_chain:
if (goto_ch)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index d55669e..a5a2bf0 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -315,7 +315,7 @@
metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port,
0, flags,
- key_id, 0);
+ key_id, opts_len);
} else {
NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst");
ret = -EINVAL;
@@ -392,9 +392,6 @@
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
-
return ret;
put_chain:
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 08aaf71..7dc76c6 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -70,7 +70,7 @@
/* replace the vid */
tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid;
/* replace prio bits, if tcfv_push_prio specified */
- if (p->tcfv_push_prio) {
+ if (p->tcfv_push_prio_exists) {
tci &= ~VLAN_PRIO_MASK;
tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT;
}
@@ -107,6 +107,7 @@
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
struct tcf_chain *goto_ch = NULL;
+ bool push_prio_exists = false;
struct tcf_vlan_params *p;
struct tc_vlan *parm;
struct tcf_vlan *v;
@@ -175,7 +176,8 @@
push_proto = htons(ETH_P_8021Q);
}
- if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY])
+ push_prio_exists = !!tb[TCA_VLAN_PUSH_VLAN_PRIORITY];
+ if (push_prio_exists)
push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]);
break;
default:
@@ -216,6 +218,7 @@
p->tcfv_action = action;
p->tcfv_push_vid = push_vid;
p->tcfv_push_prio = push_prio;
+ p->tcfv_push_prio_exists = push_prio_exists || action == TCA_VLAN_ACT_PUSH;
p->tcfv_push_proto = push_proto;
spin_lock_bh(&v->tcf_lock);
@@ -228,8 +231,6 @@
if (p)
kfree_rcu(p, rcu);
- if (ret == ACT_P_CREATED)
- tcf_idr_insert(tn, *a);
return ret;
put_chain:
if (goto_ch)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 20d60b8..7f20fd3 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -308,33 +308,12 @@
tcf_proto_destroy(tp, rtnl_held, true, extack);
}
-static int walker_check_empty(struct tcf_proto *tp, void *fh,
- struct tcf_walker *arg)
+static bool tcf_proto_check_delete(struct tcf_proto *tp)
{
- if (fh) {
- arg->nonempty = true;
- return -1;
- }
- return 0;
-}
+ if (tp->ops->delete_empty)
+ return tp->ops->delete_empty(tp);
-static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
-{
- struct tcf_walker walker = { .fn = walker_check_empty, };
-
- if (tp->ops->walk) {
- tp->ops->walk(tp, &walker, rtnl_held);
- return !walker.nonempty;
- }
- return true;
-}
-
-static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
-{
- spin_lock(&tp->lock);
- if (tcf_proto_is_empty(tp, rtnl_held))
- tp->deleting = true;
- spin_unlock(&tp->lock);
+ tp->deleting = true;
return tp->deleting;
}
@@ -626,15 +605,15 @@
static int tcf_block_setup(struct tcf_block *block,
struct flow_block_offload *bo);
-static void tc_indr_block_ing_cmd(struct net_device *dev,
- struct tcf_block *block,
- flow_indr_block_bind_cb_t *cb,
- void *cb_priv,
- enum flow_block_command command)
+static void tc_indr_block_cmd(struct net_device *dev, struct tcf_block *block,
+ flow_indr_block_bind_cb_t *cb, void *cb_priv,
+ enum flow_block_command command, bool ingress)
{
struct flow_block_offload bo = {
.command = command,
- .binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
+ .binder_type = ingress ?
+ FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS :
+ FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
.net = dev_net(dev),
.block_shared = tcf_block_non_null_shared(block),
};
@@ -652,9 +631,10 @@
up_write(&block->cb_lock);
}
-static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
+static struct tcf_block *tc_dev_block(struct net_device *dev, bool ingress)
{
const struct Qdisc_class_ops *cops;
+ const struct Qdisc_ops *ops;
struct Qdisc *qdisc;
if (!dev_ingress_queue(dev))
@@ -664,24 +644,37 @@
if (!qdisc)
return NULL;
- cops = qdisc->ops->cl_ops;
+ ops = qdisc->ops;
+ if (!ops)
+ return NULL;
+
+ if (!ingress && !strcmp("ingress", ops->id))
+ return NULL;
+
+ cops = ops->cl_ops;
if (!cops)
return NULL;
if (!cops->tcf_block)
return NULL;
- return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
+ return cops->tcf_block(qdisc,
+ ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS,
+ NULL);
}
-static void tc_indr_block_get_and_ing_cmd(struct net_device *dev,
- flow_indr_block_bind_cb_t *cb,
- void *cb_priv,
- enum flow_block_command command)
+static void tc_indr_block_get_and_cmd(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command command)
{
- struct tcf_block *block = tc_dev_ingress_block(dev);
+ struct tcf_block *block;
- tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command);
+ block = tc_dev_block(dev, true);
+ tc_indr_block_cmd(dev, block, cb, cb_priv, command, true);
+
+ block = tc_dev_block(dev, false);
+ tc_indr_block_cmd(dev, block, cb, cb_priv, command, false);
}
static void tc_indr_block_call(struct tcf_block *block,
@@ -1578,7 +1571,7 @@
reclassify:
#endif
for (; tp; tp = rcu_dereference_bh(tp->next)) {
- __be16 protocol = tc_skb_protocol(skb);
+ __be16 protocol = skb_protocol(skb, false);
int err;
if (tp->protocol != protocol &&
@@ -1737,7 +1730,7 @@
* concurrently.
* Mark tp for deletion if it is empty.
*/
- if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
+ if (!tp_iter || !tcf_proto_check_delete(tp)) {
mutex_unlock(&chain->filter_chain_lock);
return;
}
@@ -2012,6 +2005,7 @@
err = PTR_ERR(block);
goto errout;
}
+ block->classid = parent;
chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
if (chain_index > TC_ACT_EXT_VAL_MASK) {
@@ -2062,9 +2056,8 @@
&chain_info));
mutex_unlock(&chain->filter_chain_lock);
- tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
- protocol, prio, chain, rtnl_held,
- extack);
+ tp_new = tcf_proto_create(name, protocol, prio, chain,
+ rtnl_held, extack);
if (IS_ERR(tp_new)) {
err = PTR_ERR(tp_new);
goto errout_tp;
@@ -2555,12 +2548,10 @@
return skb->len;
parent = tcm->tcm_parent;
- if (!parent) {
+ if (!parent)
q = dev->qdisc;
- parent = q->handle;
- } else {
+ else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
- }
if (!q)
goto out;
cops = q->ops->cl_ops;
@@ -2576,6 +2567,7 @@
block = cops->tcf_block(q, cl, NULL);
if (!block)
goto out;
+ parent = block->classid;
if (tcf_block_shared(block))
q = NULL;
}
@@ -2721,13 +2713,19 @@
struct netlink_ext_ack *extack)
{
const struct tcf_proto_ops *ops;
+ char name[IFNAMSIZ];
void *tmplt_priv;
/* If kind is not set, user did not specify template. */
if (!tca[TCA_KIND])
return 0;
- ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+ NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
+ return -EINVAL;
+ }
+
+ ops = tcf_proto_lookup_ops(name, true, extack);
if (IS_ERR(ops))
return PTR_ERR(ops);
if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
@@ -2868,7 +2866,7 @@
break;
case RTM_GETCHAIN:
err = tc_chain_notify(chain, skb, n->nlmsg_seq,
- n->nlmsg_seq, n->nlmsg_type, true);
+ n->nlmsg_flags, n->nlmsg_type, true);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
break;
@@ -3028,6 +3026,7 @@
act->type = exts->type = TCA_OLD_COMPAT;
exts->actions[0] = act;
exts->nr_actions = 1;
+ tcf_idr_insert_many(exts->actions);
} else if (exts->action && tb[exts->action]) {
int err;
@@ -3626,9 +3625,9 @@
.size = sizeof(struct tcf_net),
};
-static struct flow_indr_block_ing_entry block_ing_entry = {
- .cb = tc_indr_block_get_and_ing_cmd,
- .list = LIST_HEAD_INIT(block_ing_entry.list),
+static struct flow_indr_block_entry block_entry = {
+ .cb = tc_indr_block_get_and_cmd,
+ .list = LIST_HEAD_INIT(block_entry.list),
};
static int __init tc_filter_init(void)
@@ -3643,7 +3642,7 @@
if (err)
goto err_register_pernet_subsys;
- flow_indr_add_block_ing_cb(&block_ing_entry);
+ flow_indr_add_block_cb(&block_entry);
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 4aafbe3..f256a7c 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -263,12 +263,17 @@
}
}
-static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
+static void basic_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct basic_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 8229ed4..6e3e63d 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -631,12 +631,17 @@
return -1;
}
-static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
+static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl,
+ void *q, unsigned long base)
{
struct cls_bpf_prog *prog = fh;
- if (prog && prog->res.classid == classid)
- prog->res.class = cl;
+ if (prog && prog->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &prog->res, base);
+ else
+ __tcf_unbind_filter(q, &prog->res);
+ }
}
static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg,
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 80ae7b9..ab53a93 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -80,7 +80,7 @@
if (dst)
return ntohl(dst);
- return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
+ return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true);
}
static u32 flow_get_proto(const struct sk_buff *skb,
@@ -104,7 +104,7 @@
if (flow->ports.ports)
return ntohs(flow->ports.dst);
- return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
+ return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true);
}
static u32 flow_get_iif(const struct sk_buff *skb)
@@ -151,7 +151,7 @@
static u32 flow_get_nfct_src(const struct sk_buff *skb,
const struct flow_keys *flow)
{
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, src.u3.ip));
case htons(ETH_P_IPV6):
@@ -164,7 +164,7 @@
static u32 flow_get_nfct_dst(const struct sk_buff *skb,
const struct flow_keys *flow)
{
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, dst.u3.ip));
case htons(ETH_P_IPV6):
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 74221e3..c5a0f2c 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -54,8 +54,13 @@
struct flow_dissector_key_ip ip;
struct flow_dissector_key_ip enc_ip;
struct flow_dissector_key_enc_opts enc_opts;
- struct flow_dissector_key_ports tp_min;
- struct flow_dissector_key_ports tp_max;
+ union {
+ struct flow_dissector_key_ports tp;
+ struct {
+ struct flow_dissector_key_ports tp_min;
+ struct flow_dissector_key_ports tp_max;
+ };
+ } tp_range;
struct flow_dissector_key_ct ct;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
@@ -198,19 +203,19 @@
{
__be16 min_mask, max_mask, min_val, max_val;
- min_mask = htons(filter->mask->key.tp_min.dst);
- max_mask = htons(filter->mask->key.tp_max.dst);
- min_val = htons(filter->key.tp_min.dst);
- max_val = htons(filter->key.tp_max.dst);
+ min_mask = htons(filter->mask->key.tp_range.tp_min.dst);
+ max_mask = htons(filter->mask->key.tp_range.tp_max.dst);
+ min_val = htons(filter->key.tp_range.tp_min.dst);
+ max_val = htons(filter->key.tp_range.tp_max.dst);
if (min_mask && max_mask) {
- if (htons(key->tp.dst) < min_val ||
- htons(key->tp.dst) > max_val)
+ if (htons(key->tp_range.tp.dst) < min_val ||
+ htons(key->tp_range.tp.dst) > max_val)
return false;
/* skb does not have min and max values */
- mkey->tp_min.dst = filter->mkey.tp_min.dst;
- mkey->tp_max.dst = filter->mkey.tp_max.dst;
+ mkey->tp_range.tp_min.dst = filter->mkey.tp_range.tp_min.dst;
+ mkey->tp_range.tp_max.dst = filter->mkey.tp_range.tp_max.dst;
}
return true;
}
@@ -221,19 +226,19 @@
{
__be16 min_mask, max_mask, min_val, max_val;
- min_mask = htons(filter->mask->key.tp_min.src);
- max_mask = htons(filter->mask->key.tp_max.src);
- min_val = htons(filter->key.tp_min.src);
- max_val = htons(filter->key.tp_max.src);
+ min_mask = htons(filter->mask->key.tp_range.tp_min.src);
+ max_mask = htons(filter->mask->key.tp_range.tp_max.src);
+ min_val = htons(filter->key.tp_range.tp_min.src);
+ max_val = htons(filter->key.tp_range.tp_max.src);
if (min_mask && max_mask) {
- if (htons(key->tp.src) < min_val ||
- htons(key->tp.src) > max_val)
+ if (htons(key->tp_range.tp.src) < min_val ||
+ htons(key->tp_range.tp.src) > max_val)
return false;
/* skb does not have min and max values */
- mkey->tp_min.src = filter->mkey.tp_min.src;
- mkey->tp_max.src = filter->mkey.tp_max.src;
+ mkey->tp_range.tp_min.src = filter->mkey.tp_range.tp_min.src;
+ mkey->tp_range.tp_max.src = filter->mkey.tp_range.tp_max.src;
}
return true;
}
@@ -298,13 +303,14 @@
struct cls_fl_filter *f;
list_for_each_entry_rcu(mask, &head->masks, list) {
+ flow_dissector_init_keys(&skb_key.control, &skb_key.basic);
fl_clear_masked_range(&skb_key, mask);
skb_flow_dissect_meta(skb, &mask->dissector, &skb_key);
/* skb_flow_dissect() does not set n_proto in case an unknown
* protocol, so do it rather here.
*/
- skb_key.basic.n_proto = skb->protocol;
+ skb_key.basic.n_proto = skb_protocol(skb, false);
skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
fl_ct_info_to_flower_map,
@@ -684,6 +690,7 @@
.len = 128 / BITS_PER_BYTE },
[TCA_FLOWER_KEY_CT_LABELS_MASK] = { .type = NLA_BINARY,
.len = 128 / BITS_PER_BYTE },
+ [TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
};
static const struct nla_policy
@@ -715,23 +722,25 @@
static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
struct fl_flow_key *mask)
{
- fl_set_key_val(tb, &key->tp_min.dst,
- TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst,
- TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst));
- fl_set_key_val(tb, &key->tp_max.dst,
- TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst,
- TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst));
- fl_set_key_val(tb, &key->tp_min.src,
- TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src,
- TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src));
- fl_set_key_val(tb, &key->tp_max.src,
- TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src,
- TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src));
+ fl_set_key_val(tb, &key->tp_range.tp_min.dst,
+ TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_range.tp_min.dst,
+ TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.dst));
+ fl_set_key_val(tb, &key->tp_range.tp_max.dst,
+ TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_range.tp_max.dst,
+ TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.dst));
+ fl_set_key_val(tb, &key->tp_range.tp_min.src,
+ TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_range.tp_min.src,
+ TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.src));
+ fl_set_key_val(tb, &key->tp_range.tp_max.src,
+ TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src,
+ TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
- if ((mask->tp_min.dst && mask->tp_max.dst &&
- htons(key->tp_max.dst) <= htons(key->tp_min.dst)) ||
- (mask->tp_min.src && mask->tp_max.src &&
- htons(key->tp_max.src) <= htons(key->tp_min.src)))
+ if ((mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
+ htons(key->tp_range.tp_max.dst) <=
+ htons(key->tp_range.tp_min.dst)) ||
+ (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src &&
+ htons(key->tp_range.tp_max.src) <=
+ htons(key->tp_range.tp_min.src)))
return -EINVAL;
return 0;
@@ -1320,9 +1329,10 @@
FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
- if (FL_KEY_IS_MASKED(mask, tp) ||
- FL_KEY_IS_MASKED(mask, tp_min) || FL_KEY_IS_MASKED(mask, tp_max))
- FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_PORTS, tp);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE, tp_range);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_IP, ip);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
@@ -1371,8 +1381,10 @@
fl_mask_copy(newmask, mask);
- if ((newmask->key.tp_min.dst && newmask->key.tp_max.dst) ||
- (newmask->key.tp_min.src && newmask->key.tp_max.src))
+ if ((newmask->key.tp_range.tp_min.dst &&
+ newmask->key.tp_range.tp_max.dst) ||
+ (newmask->key.tp_range.tp_min.src &&
+ newmask->key.tp_range.tp_max.src))
newmask->flags |= TCA_FLOWER_MASK_FLAGS_RANGE;
err = fl_init_mask_hashtable(newmask);
@@ -1970,18 +1982,22 @@
static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key,
struct fl_flow_key *mask)
{
- if (fl_dump_key_val(skb, &key->tp_min.dst, TCA_FLOWER_KEY_PORT_DST_MIN,
- &mask->tp_min.dst, TCA_FLOWER_UNSPEC,
- sizeof(key->tp_min.dst)) ||
- fl_dump_key_val(skb, &key->tp_max.dst, TCA_FLOWER_KEY_PORT_DST_MAX,
- &mask->tp_max.dst, TCA_FLOWER_UNSPEC,
- sizeof(key->tp_max.dst)) ||
- fl_dump_key_val(skb, &key->tp_min.src, TCA_FLOWER_KEY_PORT_SRC_MIN,
- &mask->tp_min.src, TCA_FLOWER_UNSPEC,
- sizeof(key->tp_min.src)) ||
- fl_dump_key_val(skb, &key->tp_max.src, TCA_FLOWER_KEY_PORT_SRC_MAX,
- &mask->tp_max.src, TCA_FLOWER_UNSPEC,
- sizeof(key->tp_max.src)))
+ if (fl_dump_key_val(skb, &key->tp_range.tp_min.dst,
+ TCA_FLOWER_KEY_PORT_DST_MIN,
+ &mask->tp_range.tp_min.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp_range.tp_min.dst)) ||
+ fl_dump_key_val(skb, &key->tp_range.tp_max.dst,
+ TCA_FLOWER_KEY_PORT_DST_MAX,
+ &mask->tp_range.tp_max.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp_range.tp_max.dst)) ||
+ fl_dump_key_val(skb, &key->tp_range.tp_min.src,
+ TCA_FLOWER_KEY_PORT_SRC_MIN,
+ &mask->tp_range.tp_min.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp_range.tp_min.src)) ||
+ fl_dump_key_val(skb, &key->tp_range.tp_max.src,
+ TCA_FLOWER_KEY_PORT_SRC_MAX,
+ &mask->tp_range.tp_max.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp_range.tp_max.src)))
return -1;
return 0;
@@ -2497,12 +2513,28 @@
return -EMSGSIZE;
}
-static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
+static void fl_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct cls_fl_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
+}
+
+static bool fl_delete_empty(struct tcf_proto *tp)
+{
+ struct cls_fl_head *head = fl_head_dereference(tp);
+
+ spin_lock(&tp->lock);
+ tp->deleting = idr_is_empty(&head->handle_idr);
+ spin_unlock(&tp->lock);
+
+ return tp->deleting;
}
static struct tcf_proto_ops cls_fl_ops __read_mostly = {
@@ -2514,6 +2546,7 @@
.put = fl_put,
.change = fl_change,
.delete = fl_delete,
+ .delete_empty = fl_delete_empty,
.walk = fl_walk,
.reoffload = fl_reoffload,
.hw_add = fl_hw_add,
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index c9496c9..ec94529 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -419,12 +419,17 @@
return -1;
}
-static void fw_bind_class(void *fh, u32 classid, unsigned long cl)
+static void fw_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct fw_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static struct tcf_proto_ops cls_fw_ops __read_mostly = {
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 7fc2eb6..610a0b7 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -157,6 +157,7 @@
static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
[TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC },
[TCA_MATCHALL_CLASSID] = { .type = NLA_U32 },
+ [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
};
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
@@ -393,12 +394,17 @@
return -1;
}
-static void mall_bind_class(void *fh, u32 classid, unsigned long cl)
+static void mall_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct cls_mall_head *head = fh;
- if (head && head->res.classid == classid)
- head->res.class = cl;
+ if (head && head->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &head->res, base);
+ else
+ __tcf_unbind_filter(q, &head->res);
+ }
}
static struct tcf_proto_ops cls_mall_ops __read_mostly = {
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 2d9e0b4..5efa3e7 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -534,8 +534,8 @@
fp = &b->ht[h];
for (pfp = rtnl_dereference(*fp); pfp;
fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
- if (pfp == f) {
- *fp = f->next;
+ if (pfp == fold) {
+ rcu_assign_pointer(*fp, fold->next);
break;
}
}
@@ -641,12 +641,17 @@
return -1;
}
-static void route4_bind_class(void *fh, u32 classid, unsigned long cl)
+static void route4_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct route4_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static struct tcf_proto_ops cls_route4_ops __read_mostly = {
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 2f3c03b..d36949d 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -463,10 +463,8 @@
static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
[TCA_RSVP_CLASSID] = { .type = NLA_U32 },
- [TCA_RSVP_DST] = { .type = NLA_BINARY,
- .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_SRC] = { .type = NLA_BINARY,
- .len = RSVP_DST_LEN * sizeof(u32) },
+ [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) },
+ [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) },
[TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
};
@@ -738,12 +736,17 @@
return -1;
}
-static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl)
+static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct rsvp_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static struct tcf_proto_ops RSVP_OPS __read_mostly = {
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index e573e5a..684187a 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -11,6 +11,7 @@
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <linux/refcount.h>
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
@@ -26,9 +27,12 @@
#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
+struct tcindex_data;
+
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
+ struct tcindex_data *p;
struct rcu_work rwork;
};
@@ -49,6 +53,7 @@
u32 hash; /* hash table size; 0 if undefined */
u32 alloc_hash; /* allocated size */
u32 fall_through; /* 0: only classify if explicit match */
+ refcount_t refcnt; /* a temporary refcnt for perfect hash */
struct rcu_work rwork;
};
@@ -57,6 +62,20 @@
return tcf_exts_has_actions(&r->exts) || r->res.classid;
}
+static void tcindex_data_get(struct tcindex_data *p)
+{
+ refcount_inc(&p->refcnt);
+}
+
+static void tcindex_data_put(struct tcindex_data *p)
+{
+ if (refcount_dec_and_test(&p->refcnt)) {
+ kfree(p->perfect);
+ kfree(p->h);
+ kfree(p);
+ }
+}
+
static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
u16 key)
{
@@ -132,6 +151,7 @@
p->mask = 0xffff;
p->hash = DEFAULT_HASH_SIZE;
p->fall_through = 1;
+ refcount_set(&p->refcnt, 1); /* Paired with tcindex_destroy_work() */
rcu_assign_pointer(tp->root, p);
return 0;
@@ -141,6 +161,7 @@
{
tcf_exts_destroy(&r->exts);
tcf_exts_put_net(&r->exts);
+ tcindex_data_put(r->p);
}
static void tcindex_destroy_rexts_work(struct work_struct *work)
@@ -212,6 +233,8 @@
else
__tcindex_destroy_fexts(f);
} else {
+ tcindex_data_get(p);
+
if (tcf_exts_get_net(&r->exts))
tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);
else
@@ -228,9 +251,7 @@
struct tcindex_data,
rwork);
- kfree(p->perfect);
- kfree(p->h);
- kfree(p);
+ tcindex_data_put(p);
}
static inline int
@@ -248,21 +269,28 @@
};
static int tcindex_filter_result_init(struct tcindex_filter_result *r,
+ struct tcindex_data *p,
struct net *net)
{
memset(r, 0, sizeof(*r));
+ r->p = p;
return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
TCA_TCINDEX_POLICE);
}
+static void tcindex_free_perfect_hash(struct tcindex_data *cp);
+
static void tcindex_partial_destroy_work(struct work_struct *work)
{
struct tcindex_data *p = container_of(to_rcu_work(work),
struct tcindex_data,
rwork);
- kfree(p->perfect);
+ rtnl_lock();
+ if (p->perfect)
+ tcindex_free_perfect_hash(p);
kfree(p);
+ rtnl_unlock();
}
static void tcindex_free_perfect_hash(struct tcindex_data *cp)
@@ -279,7 +307,7 @@
int i, err = 0;
cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (!cp->perfect)
return -ENOMEM;
@@ -288,6 +316,7 @@
TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
goto errout;
+ cp->perfect[i].p = cp;
}
return 0;
@@ -332,23 +361,7 @@
cp->alloc_hash = p->alloc_hash;
cp->fall_through = p->fall_through;
cp->tp = tp;
-
- if (p->perfect) {
- int i;
-
- if (tcindex_alloc_perfect_hash(net, cp) < 0)
- goto errout;
- for (i = 0; i < cp->hash; i++)
- cp->perfect[i].res = p->perfect[i].res;
- balloc = 1;
- }
- cp->h = p->h;
-
- err = tcindex_filter_result_init(&new_filter_result, net);
- if (err < 0)
- goto errout1;
- if (old_r)
- cr = r->res;
+ refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */
if (tb[TCA_TCINDEX_HASH])
cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
@@ -356,8 +369,40 @@
if (tb[TCA_TCINDEX_MASK])
cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
- if (tb[TCA_TCINDEX_SHIFT])
+ if (tb[TCA_TCINDEX_SHIFT]) {
cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+ if (cp->shift > 16) {
+ err = -EINVAL;
+ goto errout;
+ }
+ }
+ if (!cp->hash) {
+ /* Hash not specified, use perfect hash if the upper limit
+ * of the hashing index is below the threshold.
+ */
+ if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+ cp->hash = (cp->mask >> cp->shift) + 1;
+ else
+ cp->hash = DEFAULT_HASH_SIZE;
+ }
+
+ if (p->perfect) {
+ int i;
+
+ if (tcindex_alloc_perfect_hash(net, cp) < 0)
+ goto errout;
+ cp->alloc_hash = cp->hash;
+ for (i = 0; i < min(cp->hash, p->hash); i++)
+ cp->perfect[i].res = p->perfect[i].res;
+ balloc = 1;
+ }
+ cp->h = p->h;
+
+ err = tcindex_filter_result_init(&new_filter_result, cp, net);
+ if (err < 0)
+ goto errout_alloc;
+ if (old_r)
+ cr = r->res;
err = -EBUSY;
@@ -376,16 +421,6 @@
if (tb[TCA_TCINDEX_FALL_THROUGH])
cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
- if (!cp->hash) {
- /* Hash not specified, use perfect hash if the upper limit
- * of the hashing index is below the threshold.
- */
- if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
- cp->hash = (cp->mask >> cp->shift) + 1;
- else
- cp->hash = DEFAULT_HASH_SIZE;
- }
-
if (!cp->perfect && !cp->h)
cp->alloc_hash = cp->hash;
@@ -431,7 +466,7 @@
goto errout_alloc;
f->key = handle;
f->next = NULL;
- err = tcindex_filter_result_init(&f->result, net);
+ err = tcindex_filter_result_init(&f->result, cp, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
@@ -444,7 +479,7 @@
}
if (old_r && old_r != r) {
- err = tcindex_filter_result_init(old_r, net);
+ err = tcindex_filter_result_init(old_r, cp, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
@@ -484,7 +519,6 @@
tcindex_free_perfect_hash(cp);
else if (balloc == 2)
kfree(cp->h);
-errout1:
tcf_exts_destroy(&new_filter_result.exts);
errout:
kfree(cp);
@@ -569,6 +603,14 @@
for (i = 0; i < p->hash; i++) {
struct tcindex_filter_result *r = p->perfect + i;
+ /* tcf_queue_work() does not guarantee the ordering we
+ * want, so we have to take this refcnt temporarily to
+ * ensure 'p' is freed after all tcindex_filter_result
+ * here. Imperfect hash does not need this, because it
+ * uses linked lists rather than an array.
+ */
+ tcindex_data_get(p);
+
tcf_unbind_filter(tp, &r->res);
if (tcf_exts_get_net(&r->exts))
tcf_queue_work(&r->rwork,
@@ -654,12 +696,17 @@
return -1;
}
-static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl)
+static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl,
+ void *q, unsigned long base)
{
struct tcindex_filter_result *r = fh;
- if (r && r->res.classid == classid)
- r->res.class = cl;
+ if (r && r->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &r->res, base);
+ else
+ __tcf_unbind_filter(q, &r->res);
+ }
}
static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index a0e6fac..e15ff33 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1255,12 +1255,17 @@
return 0;
}
-static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
+static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct tc_u_knode *n = fh;
- if (n && n->res.classid == classid)
- n->res.class = cl;
+ if (n && n->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &n->res, base);
+ else
+ __tcf_unbind_filter(q, &n->res);
+ }
}
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index df00566..c95cf86 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -59,7 +59,7 @@
};
int ret, network_offset;
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
state.pf = NFPROTO_IPV4;
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
index 9fff648..e2c157d 100644
--- a/net/sched/em_ipt.c
+++ b/net/sched/em_ipt.c
@@ -212,7 +212,7 @@
struct nf_hook_state state;
int ret;
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 3177dcb..ad007cd 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -195,7 +195,7 @@
META_COLLECTOR(int_protocol)
{
/* Let userspace take care of the byte ordering */
- dst->value = tc_skb_protocol(skb);
+ dst->value = skb_protocol(skb, false);
}
META_COLLECTOR(int_pkttype)
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 8f2ad70..dd3b8c1 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -238,6 +238,9 @@
goto errout;
if (em->ops->change) {
+ err = -EINVAL;
+ if (em_hdr->flags & TCF_EM_SIMPLE)
+ goto errout;
err = em->ops->change(net, data, data_len, em);
if (err < 0)
goto errout;
@@ -263,12 +266,12 @@
}
em->data = (unsigned long) v;
}
+ em->datalen = data_len;
}
}
em->matchid = em_hdr->matchid;
em->flags = em_hdr->flags;
- em->datalen = data_len;
em->net = net;
err = 0;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1047825..3b1b5ee 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -409,7 +409,8 @@
{
struct qdisc_rate_table *rtab;
- if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
+ if (tab == NULL || r->rate == 0 ||
+ r->cell_log == 0 || r->cell_log >= 32 ||
nla_len(tab) != TC_RTAB_SIZE) {
NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
return NULL;
@@ -1891,8 +1892,9 @@
struct tcf_bind_args {
struct tcf_walker w;
- u32 classid;
+ unsigned long base;
unsigned long cl;
+ u32 classid;
};
static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
@@ -1903,28 +1905,30 @@
struct Qdisc *q = tcf_block_q(tp->chain->block);
sch_tree_lock(q);
- tp->ops->bind_class(n, a->classid, a->cl);
+ tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
sch_tree_unlock(q);
}
return 0;
}
-static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
- unsigned long new_cl)
+struct tc_bind_class_args {
+ struct qdisc_walker w;
+ unsigned long new_cl;
+ u32 portid;
+ u32 clid;
+};
+
+static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
+ struct qdisc_walker *w)
{
+ struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
const struct Qdisc_class_ops *cops = q->ops->cl_ops;
struct tcf_block *block;
struct tcf_chain *chain;
- unsigned long cl;
- cl = cops->find(q, portid);
- if (!cl)
- return;
- if (!cops->tcf_block)
- return;
block = cops->tcf_block(q, cl, NULL);
if (!block)
- return;
+ return 0;
for (chain = tcf_get_next_chain(block, NULL);
chain;
chain = tcf_get_next_chain(block, chain)) {
@@ -1935,11 +1939,29 @@
struct tcf_bind_args arg = {};
arg.w.fn = tcf_node_bind;
- arg.classid = clid;
- arg.cl = new_cl;
+ arg.classid = a->clid;
+ arg.base = cl;
+ arg.cl = a->new_cl;
tp->ops->walk(tp, &arg.w, true);
}
}
+
+ return 0;
+}
+
+static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+ unsigned long new_cl)
+{
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+ struct tc_bind_class_args args = {};
+
+ if (!cops->tcf_block)
+ return;
+ args.portid = portid;
+ args.clid = clid;
+ args.new_cl = new_cl;
+ args.w.fn = tc_bind_class_walker;
+ q->ops->cl_ops->walk(q, &args.w);
}
#else
@@ -2135,7 +2157,7 @@
static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
struct tcmsg *tcm, struct netlink_callback *cb,
- int *t_p, int s_t)
+ int *t_p, int s_t, bool recur)
{
struct Qdisc *q;
int b;
@@ -2146,7 +2168,7 @@
if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
return -1;
- if (!qdisc_dev(root))
+ if (!qdisc_dev(root) || !recur)
return 0;
if (tcm->tcm_parent) {
@@ -2181,13 +2203,13 @@
s_t = cb->args[0];
t = 0;
- if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
+ if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
- &t, s_t) < 0)
+ &t, s_t, false) < 0)
goto done;
done:
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index f4f9b8c..6385995 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -553,16 +553,16 @@
if (!p->link.q)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
+ p->link.vcc = NULL;
+ p->link.sock = NULL;
+ p->link.common.classid = sch->handle;
+ p->link.ref = 1;
err = tcf_block_get(&p->link.block, &p->link.filter_list, sch,
extack);
if (err)
return err;
- p->link.vcc = NULL;
- p->link.sock = NULL;
- p->link.common.classid = sch->handle;
- p->link.ref = 1;
tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
return 0;
}
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 53a80bc..e8eebe4 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -592,7 +592,7 @@
struct nf_conntrack_tuple tuple = {};
bool rev = !skb->_nfct;
- if (tc_skb_protocol(skb) != htons(ETH_P_IP))
+ if (skb_protocol(skb, true) != htons(ETH_P_IP))
return;
if (!nf_ct_get_tuple_skb(&tuple, skb))
@@ -907,7 +907,7 @@
}
tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
- if (!tcph)
+ if (!tcph || tcph->doff < 5)
return NULL;
return skb_header_pointer(skb, offset,
@@ -931,6 +931,8 @@
length--;
continue;
}
+ if (length < 2)
+ break;
opsize = *ptr++;
if (opsize < 2 || opsize > length)
break;
@@ -1068,6 +1070,8 @@
length--;
continue;
}
+ if (length < 2)
+ break;
opsize = *ptr++;
if (opsize < 2 || opsize > length)
break;
@@ -1515,32 +1519,51 @@
return idx + (tin << 16);
}
-static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash)
{
- int wlen = skb_network_offset(skb);
+ const int offset = skb_network_offset(skb);
+ u16 *buf, buf_;
u8 dscp;
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
- wlen += sizeof(struct iphdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
+ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
+ if (unlikely(!buf))
return 0;
- dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
- if (wash && dscp)
+ /* ToS is in the second byte of iphdr */
+ dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2;
+
+ if (wash && dscp) {
+ const int wlen = offset + sizeof(struct iphdr);
+
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+ }
+
return dscp;
case htons(ETH_P_IPV6):
- wlen += sizeof(struct ipv6hdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
+ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
+ if (unlikely(!buf))
return 0;
- dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
- if (wash && dscp)
+ /* Traffic class is in the first and second bytes of ipv6hdr */
+ dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2;
+
+ if (wash && dscp) {
+ const int wlen = offset + sizeof(struct ipv6hdr);
+
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+ }
+
return dscp;
case htons(ETH_P_ARP):
@@ -1557,14 +1580,17 @@
{
struct cake_sched_data *q = qdisc_priv(sch);
u32 tin, mark;
+ bool wash;
u8 dscp;
/* Tin selection: Default to diffserv-based selection, allow overriding
- * using firewall marks or skb->priority.
+ * using firewall marks or skb->priority. Call DSCP parsing early if
+ * wash is enabled, otherwise defer to below to skip unneeded parsing.
*/
- dscp = cake_handle_diffserv(skb,
- q->rate_flags & CAKE_FLAG_WASH);
mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
+ wash = !!(q->rate_flags & CAKE_FLAG_WASH);
+ if (wash)
+ dscp = cake_handle_diffserv(skb, wash);
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
tin = 0;
@@ -1578,6 +1604,8 @@
tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
else {
+ if (!wash)
+ dscp = cake_handle_diffserv(skb, wash);
tin = q->tin_index[dscp];
if (unlikely(tin >= q->tin_cnt))
@@ -1769,7 +1797,7 @@
q->avg_window_begin));
u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
- do_div(b, window_interval);
+ b = div64_u64(b, window_interval);
q->avg_peak_bandwidth =
cake_ewma(q->avg_peak_bandwidth, b,
b > q->avg_peak_bandwidth ? 2 : 8);
@@ -2184,6 +2212,7 @@
[TCA_CAKE_MPU] = { .type = NLA_U32 },
[TCA_CAKE_INGRESS] = { .type = NLA_U32 },
[TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 },
+ [TCA_CAKE_SPLIT_GSO] = { .type = NLA_U32 },
[TCA_CAKE_FWMARK] = { .type = NLA_U32 },
};
@@ -2678,7 +2707,7 @@
qdisc_watchdog_init(&q->watchdog, sch);
if (opt) {
- int err = cake_change(sch, opt, extack);
+ err = cake_change(sch, opt, extack);
if (err)
return err;
@@ -2995,7 +3024,7 @@
PUT_STAT_S32(BLUE_TIMER_US,
ktime_to_us(
ktime_sub(now,
- flow->cvars.blue_timer)));
+ flow->cvars.blue_timer)));
}
if (flow->cvars.dropping) {
PUT_STAT_S32(DROP_NEXT_US,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 39b427d..e597288 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1614,7 +1614,7 @@
err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
- return err;
+ goto failure;
}
if (tca[TCA_RATE]) {
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index b2905b0..2eaac2f 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -181,6 +181,11 @@
s64 credits;
int len;
+ /* The previous packet is still being sent */
+ if (now < q->last) {
+ qdisc_watchdog_schedule_ns(&q->watchdog, q->last);
+ return NULL;
+ }
if (q->credits < 0) {
credits = timediff_to_credits(now - q->last, q->idleslope);
@@ -212,7 +217,12 @@
credits += q->credits;
q->credits = max_t(s64, credits, q->locredit);
- q->last = now;
+ /* Estimate of the transmission of the last byte of the packet in ns */
+ if (unlikely(atomic64_read(&q->port_rate) == 0))
+ q->last = now;
+ else
+ q->last = now + div64_s64(len * NSEC_PER_SEC,
+ atomic64_read(&q->port_rate));
return skb;
}
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index dba7037..e54f6ea 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -323,7 +323,8 @@
sch->q.qlen = 0;
sch->qstats.backlog = 0;
- memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+ if (q->tab)
+ memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
red_restart(&q->vars);
}
@@ -350,6 +351,7 @@
struct sk_buff **old = NULL;
unsigned int mask;
u32 max_P;
+ u8 *stab;
if (opt == NULL)
return -EINVAL;
@@ -366,8 +368,8 @@
max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
ctl = nla_data(tb[TCA_CHOKE_PARMS]);
-
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ stab = nla_data(tb[TCA_CHOKE_STAB]);
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab))
return -EINVAL;
if (ctl->limit > CHOKE_MAX_QUEUE)
@@ -417,7 +419,7 @@
red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
ctl->Plog, ctl->Scell_log,
- nla_data(tb[TCA_CHOKE_STAB]),
+ stab,
max_P);
red_set_vars(&q->vars);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 05605b3..76ed1a0 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -210,7 +210,7 @@
if (p->set_tc_index) {
int wlen = skb_network_offset(skb);
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
wlen += sizeof(struct iphdr);
if (!pskb_may_pull(skb, wlen) ||
@@ -303,7 +303,7 @@
index = skb->tc_index & (p->indices - 1);
pr_debug("index %d->%d\n", skb->tc_index, index);
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask,
p->mv[index].value);
@@ -320,7 +320,7 @@
*/
if (p->mv[index].mask != 0xff || p->mv[index].value)
pr_warn("%s: unsupported protocol %d\n",
- __func__, ntohs(tc_skb_protocol(skb)));
+ __func__, ntohs(skb_protocol(skb, true)));
break;
}
@@ -406,7 +406,8 @@
struct dsmark_qdisc_data *p = qdisc_priv(sch);
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
- qdisc_reset(p->q);
+ if (p->q)
+ qdisc_reset(p->q);
sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index b1da558..c48f910 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -82,7 +82,7 @@
if (q->skip_sock_check)
goto skip;
- if (!sk)
+ if (!sk || !sk_fullsock(sk))
return false;
if (!sock_flag(sk, SOCK_TXTIME))
@@ -137,8 +137,9 @@
struct sock_exterr_skb *serr;
struct sk_buff *clone;
ktime_t txtime = skb->tstamp;
+ struct sock *sk = skb->sk;
- if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
+ if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors))
return;
clone = skb_clone(skb, GFP_ATOMIC);
@@ -154,7 +155,7 @@
serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
serr->ee.ee_info = txtime; /* low part of tstamp */
- if (sock_queue_err_skb(skb->sk, clone))
+ if (sock_queue_err_skb(sk, clone))
kfree_skb(clone);
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 98dd87c..f757ea9 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -301,6 +301,9 @@
f->socket_hash != sk->sk_hash)) {
f->credit = q->initial_quantum;
f->socket_hash = sk->sk_hash;
+ if (q->rate_enable)
+ smp_store_release(&sk->sk_pacing_status,
+ SK_PACING_FQ);
if (fq_flow_is_throttled(f))
fq_flow_unset_throttled(q, f);
f->time_next_packet = 0ULL;
@@ -322,8 +325,12 @@
fq_flow_set_detached(f);
f->sk = sk;
- if (skb->sk == sk)
+ if (skb->sk == sk) {
f->socket_hash = sk->sk_hash;
+ if (q->rate_enable)
+ smp_store_release(&sk->sk_pacing_status,
+ SK_PACING_FQ);
+ }
f->credit = q->initial_quantum;
rb_link_node(&f->fq_node, parent, p);
@@ -428,17 +435,9 @@
f->qlen++;
qdisc_qstats_backlog_inc(sch, skb);
if (fq_flow_is_detached(f)) {
- struct sock *sk = skb->sk;
-
fq_flow_add_tail(&q->new_flows, f);
if (time_after(jiffies, f->age + q->flow_refill_delay))
f->credit = max_t(u32, f->credit, q->quantum);
- if (sk && q->rate_enable) {
- if (unlikely(smp_load_acquire(&sk->sk_pacing_status) !=
- SK_PACING_FQ))
- smp_store_release(&sk->sk_pacing_status,
- SK_PACING_FQ);
- }
q->inactive_flows--;
}
@@ -746,6 +745,7 @@
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
[TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
+ [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 },
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
};
@@ -788,10 +788,12 @@
if (tb[TCA_FQ_QUANTUM]) {
u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
- if (quantum > 0)
+ if (quantum > 0 && quantum <= (1 << 20)) {
q->quantum = quantum;
- else
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
err = -EINVAL;
+ }
}
if (tb[TCA_FQ_INITIAL_QUANTUM])
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index c261c0a..86fb2f9 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -370,6 +370,7 @@
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_CODEL_MAX + 1];
+ u32 quantum = 0;
int err;
if (!opt)
@@ -387,6 +388,13 @@
q->flows_cnt > 65536)
return -EINVAL;
}
+ if (tb[TCA_FQ_CODEL_QUANTUM]) {
+ quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
+ if (quantum > FQ_CODEL_QUANTUM_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid quantum");
+ return -EINVAL;
+ }
+ }
sch_tree_lock(sch);
if (tb[TCA_FQ_CODEL_TARGET]) {
@@ -413,11 +421,11 @@
if (tb[TCA_FQ_CODEL_ECN])
q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
- if (tb[TCA_FQ_CODEL_QUANTUM])
- q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
+ if (quantum)
+ q->quantum = quantum;
if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
- q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+ q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 8769b4b..9bc5cbe 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -35,6 +35,25 @@
const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
EXPORT_SYMBOL(default_qdisc_ops);
+static void qdisc_maybe_clear_missed(struct Qdisc *q,
+ const struct netdev_queue *txq)
+{
+ clear_bit(__QDISC_STATE_MISSED, &q->state);
+
+ /* Make sure the below netif_xmit_frozen_or_stopped()
+ * checking happens after clearing STATE_MISSED.
+ */
+ smp_mb__after_atomic();
+
+ /* Checking netif_xmit_frozen_or_stopped() again to
+ * make sure STATE_MISSED is set if the STATE_MISSED
+ * set by netif_tx_wake_queue()'s rescheduling of
+ * net_tx_action() is cleared by the above clear_bit().
+ */
+ if (!netif_xmit_frozen_or_stopped(txq))
+ set_bit(__QDISC_STATE_MISSED, &q->state);
+}
+
/* Main transmission queue. */
/* Modifications to data participating in scheduling must be protected with
@@ -74,6 +93,7 @@
}
} else {
skb = SKB_XOFF_MAGIC;
+ qdisc_maybe_clear_missed(q, txq);
}
}
@@ -242,6 +262,7 @@
}
} else {
skb = NULL;
+ qdisc_maybe_clear_missed(q, txq);
}
if (lock)
spin_unlock(lock);
@@ -251,8 +272,10 @@
*validate = true;
if ((q->flags & TCQ_F_ONETXQUEUE) &&
- netif_xmit_frozen_or_stopped(txq))
+ netif_xmit_frozen_or_stopped(txq)) {
+ qdisc_maybe_clear_missed(q, txq);
return skb;
+ }
skb = qdisc_dequeue_skb_bad_txq(q);
if (unlikely(skb)) {
@@ -311,6 +334,8 @@
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_stopped(txq))
skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+ else
+ qdisc_maybe_clear_missed(q, txq);
HARD_TX_UNLOCK(dev, txq);
} else {
@@ -469,6 +494,7 @@
dev_hold(dev);
}
}
+EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
static void dev_watchdog_up(struct net_device *dev)
{
@@ -644,8 +670,10 @@
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
struct sk_buff *skb = NULL;
+ bool need_retry = true;
int band;
+retry:
for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
struct skb_array *q = band2list(priv, band);
@@ -656,8 +684,25 @@
}
if (likely(skb)) {
qdisc_update_stats_at_dequeue(qdisc, skb);
+ } else if (need_retry &&
+ test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
+ /* Delay clearing the STATE_MISSED here to reduce
+ * the overhead of the second spin_trylock() in
+ * qdisc_run_begin() and __netif_schedule() calling
+ * in qdisc_run_end().
+ */
+ clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+
+ /* Make sure dequeuing happens after clearing
+ * STATE_MISSED.
+ */
+ smp_mb__after_atomic();
+
+ need_retry = false;
+
+ goto retry;
} else {
- qdisc->empty = true;
+ WRITE_ONCE(qdisc->empty, true);
}
return skb;
@@ -1125,26 +1170,40 @@
struct netdev_queue *dev_queue,
void *_qdisc_default)
{
+ struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc);
struct Qdisc *qdisc_default = _qdisc_default;
- struct Qdisc *qdisc;
- qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
- bool nolock = qdisc->flags & TCQ_F_NOLOCK;
-
- if (nolock)
- spin_lock_bh(&qdisc->seqlock);
- spin_lock_bh(qdisc_lock(qdisc));
-
if (!(qdisc->flags & TCQ_F_BUILTIN))
set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
- qdisc_reset(qdisc);
+ }
+}
- spin_unlock_bh(qdisc_lock(qdisc));
- if (nolock)
- spin_unlock_bh(&qdisc->seqlock);
+static void dev_reset_queue(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_unused)
+{
+ struct Qdisc *qdisc;
+ bool nolock;
+
+ qdisc = dev_queue->qdisc_sleeping;
+ if (!qdisc)
+ return;
+
+ nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+ if (nolock)
+ spin_lock_bh(&qdisc->seqlock);
+ spin_lock_bh(qdisc_lock(qdisc));
+
+ qdisc_reset(qdisc);
+
+ spin_unlock_bh(qdisc_lock(qdisc));
+ if (nolock) {
+ clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ spin_unlock_bh(&qdisc->seqlock);
}
}
@@ -1206,12 +1265,20 @@
dev_watchdog_down(dev);
}
- /* Wait for outstanding qdisc-less dev_queue_xmit calls.
+ /* Wait for outstanding qdisc-less dev_queue_xmit calls or
+ * outstanding qdisc enqueuing calls.
* This is avoided if all devices are in dismantle phase :
* Caller will call synchronize_net() for us
*/
synchronize_net();
+ list_for_each_entry(dev, head, close_list) {
+ netdev_for_each_tx_queue(dev, dev_reset_queue, NULL);
+
+ if (dev_ingress_queue(dev))
+ dev_reset_queue(dev, dev_ingress_queue(dev), NULL);
+ }
+
/* Wait for outstanding qdisc_run calls. */
list_for_each_entry(dev, head, close_list) {
while (some_qdisc_is_busy(dev))
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 8599c6f..f4132dc 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -480,7 +480,7 @@
struct gred_sched *table = qdisc_priv(sch);
struct gred_sched_data *q = table->tab[dp];
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) {
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab)) {
NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters");
return -EINVAL;
}
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 278c0b2..e79f1af 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -153,6 +153,7 @@
__gnet_stats_copy_queue(&sch->qstats,
qdisc->cpu_qstats,
&qdisc->qstats, qlen);
+ sch->q.qlen += qlen;
} else {
sch->q.qlen += qdisc->q.qlen;
sch->bstats.bytes += qdisc->bstats.bytes;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 0d0113a..8766ab5 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -411,6 +411,7 @@
__gnet_stats_copy_queue(&sch->qstats,
qdisc->cpu_qstats,
&qdisc->qstats, qlen);
+ sch->q.qlen += qlen;
} else {
sch->q.qlen += qdisc->q.qlen;
sch->bstats.bytes += qdisc->bstats.bytes;
@@ -433,7 +434,7 @@
opt.offset[tc] = dev->tc_to_txq[tc].offset;
}
- if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
+ if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
if ((priv->flags & TC_MQPRIO_F_MODE) &&
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 42e557d..f4101a9 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -330,7 +330,7 @@
/* default uniform distribution */
if (dist == NULL)
- return ((rnd % (2 * sigma)) + mu) - sigma;
+ return ((rnd % (2 * (u32)sigma)) + mu) - sigma;
t = dist->table[rnd % dist->size];
x = (sigma % NETEM_DIST_SCALE) * t;
@@ -812,6 +812,10 @@
q->slot_config.max_packets = INT_MAX;
if (q->slot_config.max_bytes == 0)
q->slot_config.max_bytes = INT_MAX;
+
+ /* capping dist_jitter to the range acceptable by tabledist() */
+ q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
+
q->slot.packets_left = q->slot_config.max_packets;
q->slot.bytes_left = q->slot_config.max_bytes;
if (q->slot_config.min_delay | q->slot_config.max_delay |
@@ -1037,6 +1041,9 @@
if (tb[TCA_NETEM_SLOT])
get_slot(q, tb[TCA_NETEM_SLOT]);
+ /* capping jitter to the range acceptable by tabledist() */
+ q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
+
return ret;
get_table_failure:
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 18b884c..6479417 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -292,8 +292,14 @@
struct tc_prio_qopt_offload graft_offload;
unsigned long band = arg - 1;
- if (new == NULL)
- new = &noop_qdisc;
+ if (!new) {
+ new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle, arg), extack);
+ if (!new)
+ new = &noop_qdisc;
+ else
+ qdisc_hash_add(new, true);
+ }
*old = qdisc_replace(sch, new, &q->queues[band]);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 0b05ac7..b046fd3 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -485,11 +485,6 @@
if (cl->qdisc != &noop_qdisc)
qdisc_hash_add(cl->qdisc, true);
- sch_tree_lock(sch);
- qdisc_class_hash_insert(&q->clhash, &cl->common);
- sch_tree_unlock(sch);
-
- qdisc_class_hash_grow(sch, &q->clhash);
set_change_agg:
sch_tree_lock(sch);
@@ -507,8 +502,11 @@
}
if (existing)
qfq_deact_rm_from_agg(q, cl);
+ else
+ qdisc_class_hash_insert(&q->clhash, &cl->common);
qfq_add_to_agg(q, new_agg, cl);
sch_tree_unlock(sch);
+ qdisc_class_hash_grow(sch, &q->clhash);
*arg = (unsigned long)cl;
return 0;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 1695421..7741f10 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -197,6 +197,7 @@
struct tc_red_qopt *ctl;
int err;
u32 max_P;
+ u8 *stab;
if (opt == NULL)
return -EINVAL;
@@ -213,7 +214,9 @@
max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
ctl = nla_data(tb[TCA_RED_PARMS]);
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ stab = nla_data(tb[TCA_RED_STAB]);
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
+ ctl->Scell_log, stab))
return -EINVAL;
if (ctl->limit > 0) {
@@ -238,7 +241,7 @@
red_set_parms(&q->parms,
ctl->qth_min, ctl->qth_max, ctl->Wlog,
ctl->Plog, ctl->Scell_log,
- nla_data(tb[TCA_RED_STAB]),
+ stab,
max_P);
red_set_vars(&q->vars);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index c787d4d..b92bafa 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -637,8 +637,17 @@
if (ctl->divisor &&
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
+
+ /* slot->allot is a short, make sure quantum is not too big. */
+ if (ctl->quantum) {
+ unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
+
+ if (scaled <= 0 || scaled > SHRT_MAX)
+ return -EINVAL;
+ }
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
- ctl_v1->Wlog))
+ ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
if (ctl_v1 && ctl_v1->qth_min) {
p = kmalloc(sizeof(*p), GFP_KERNEL);
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 0fb10ab..7a5e4c4 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -169,6 +169,9 @@
{
struct tc_skbprio_qopt *ctl = nla_data(opt);
+ if (opt->nla_len != nla_attr_size(sizeof(*ctl)))
+ return -EINVAL;
+
sch->limit = ctl->limit;
return 0;
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index c609373..da9ed06 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -31,6 +31,7 @@
#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
+#define TAPRIO_FLAGS_INVALID U32_MAX
struct sched_entry {
struct list_head list;
@@ -563,8 +564,10 @@
prio = skb->priority;
tc = netdev_get_prio_tc_map(dev, prio);
- if (!(gate_mask & BIT(tc)))
+ if (!(gate_mask & BIT(tc))) {
+ skb = NULL;
continue;
+ }
len = qdisc_pkt_len(skb);
guard = ktime_add_ns(taprio_get_time(q),
@@ -574,13 +577,17 @@
* guard band ...
*/
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- ktime_after(guard, entry->close_time))
+ ktime_after(guard, entry->close_time)) {
+ skb = NULL;
continue;
+ }
/* ... and no budget. */
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- atomic_sub_return(len, &entry->budget) < 0)
+ atomic_sub_return(len, &entry->budget) < 0) {
+ skb = NULL;
continue;
+ }
skb = child->ops->dequeue(child);
if (unlikely(!skb))
@@ -766,11 +773,15 @@
[TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
+ [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
};
-static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
+static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb,
+ struct sched_entry *entry,
struct netlink_ext_ack *extack)
{
+ int min_duration = length_to_duration(q, ETH_ZLEN);
u32 interval = 0;
if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
@@ -785,7 +796,10 @@
interval = nla_get_u32(
tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
- if (interval == 0) {
+ /* The interval should allow at least the minimum ethernet
+ * frame to go out.
+ */
+ if (interval < min_duration) {
NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
return -EINVAL;
}
@@ -795,8 +809,9 @@
return 0;
}
-static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
- int index, struct netlink_ext_ack *extack)
+static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n,
+ struct sched_entry *entry, int index,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
int err;
@@ -810,10 +825,10 @@
entry->index = index;
- return fill_sched_entry(tb, entry, extack);
+ return fill_sched_entry(q, tb, entry, extack);
}
-static int parse_sched_list(struct nlattr *list,
+static int parse_sched_list(struct taprio_sched *q, struct nlattr *list,
struct sched_gate_list *sched,
struct netlink_ext_ack *extack)
{
@@ -838,7 +853,7 @@
return -ENOMEM;
}
- err = parse_sched_entry(n, entry, i, extack);
+ err = parse_sched_entry(q, n, entry, i, extack);
if (err < 0) {
kfree(entry);
return err;
@@ -853,7 +868,7 @@
return i;
}
-static int parse_taprio_schedule(struct nlattr **tb,
+static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
struct sched_gate_list *new,
struct netlink_ext_ack *extack)
{
@@ -874,8 +889,8 @@
new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
- err = parse_sched_list(
- tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack);
+ err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST],
+ new, extack);
if (err < 0)
return err;
@@ -885,6 +900,12 @@
list_for_each_entry(entry, &new->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
+
+ if (!cycle) {
+ NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
+ return -EINVAL;
+ }
+
new->cycle_time = cycle;
}
@@ -1168,9 +1189,27 @@
spin_unlock(&q->current_entry_lock);
}
-static void taprio_sched_to_offload(struct taprio_sched *q,
+static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
+{
+ u32 i, queue_mask = 0;
+
+ for (i = 0; i < dev->num_tc; i++) {
+ u32 offset, count;
+
+ if (!(tc_mask & BIT(i)))
+ continue;
+
+ offset = dev->tc_to_txq[i].offset;
+ count = dev->tc_to_txq[i].count;
+
+ queue_mask |= GENMASK(offset + count - 1, offset);
+ }
+
+ return queue_mask;
+}
+
+static void taprio_sched_to_offload(struct net_device *dev,
struct sched_gate_list *sched,
- const struct tc_mqprio_qopt *mqprio,
struct tc_taprio_qopt_offload *offload)
{
struct sched_entry *entry;
@@ -1185,7 +1224,8 @@
e->command = entry->command;
e->interval = entry->interval;
- e->gate_mask = entry->gate_mask;
+ e->gate_mask = tc_map_to_queue_mask(dev, entry->gate_mask);
+
i++;
}
@@ -1193,7 +1233,6 @@
}
static int taprio_enable_offload(struct net_device *dev,
- struct tc_mqprio_qopt *mqprio,
struct taprio_sched *q,
struct sched_gate_list *sched,
struct netlink_ext_ack *extack)
@@ -1215,7 +1254,7 @@
return -ENOMEM;
}
offload->enable = 1;
- taprio_sched_to_offload(q, sched, mqprio, offload);
+ taprio_sched_to_offload(dev, sched, offload);
err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
if (err < 0) {
@@ -1367,6 +1406,33 @@
return 0;
}
+/* The semantics of the 'flags' argument in relation to 'change()'
+ * requests, are interpreted following two rules (which are applied in
+ * this order): (1) an omitted 'flags' argument is interpreted as
+ * zero; (2) the 'flags' of a "running" taprio instance cannot be
+ * changed.
+ */
+static int taprio_new_flags(const struct nlattr *attr, u32 old,
+ struct netlink_ext_ack *extack)
+{
+ u32 new = 0;
+
+ if (attr)
+ new = nla_get_u32(attr);
+
+ if (old != TAPRIO_FLAGS_INVALID && old != new) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!taprio_flags_valid(new)) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
+ return -EINVAL;
+ }
+
+ return new;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -1375,7 +1441,6 @@
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
- u32 taprio_flags = 0;
unsigned long flags;
ktime_t start;
int i, err;
@@ -1388,21 +1453,14 @@
if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
- if (tb[TCA_TAPRIO_ATTR_FLAGS]) {
- taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]);
+ err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS],
+ q->flags, extack);
+ if (err < 0)
+ return err;
- if (q->flags != 0 && q->flags != taprio_flags) {
- NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
- return -EOPNOTSUPP;
- } else if (!taprio_flags_valid(taprio_flags)) {
- NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
- return -EINVAL;
- }
+ q->flags = err;
- q->flags = taprio_flags;
- }
-
- err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags);
+ err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
if (err < 0)
return err;
@@ -1428,7 +1486,7 @@
goto free_sched;
}
- err = parse_taprio_schedule(tb, new_admin, extack);
+ err = parse_taprio_schedule(q, tb, new_admin, extack);
if (err < 0)
goto free_sched;
@@ -1444,8 +1502,23 @@
taprio_set_picos_per_byte(dev, q);
- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
- err = taprio_enable_offload(dev, mqprio, q, new_admin, extack);
+ if (mqprio) {
+ err = netdev_set_num_tc(dev, mqprio->num_tc);
+ if (err)
+ goto free_sched;
+ for (i = 0; i < mqprio->num_tc; i++)
+ netdev_set_tc_queue(dev, i,
+ mqprio->count[i],
+ mqprio->offset[i]);
+
+ /* Always use supplied priority mappings */
+ for (i = 0; i <= TC_BITMASK; i++)
+ netdev_set_prio_tc_map(dev, i,
+ mqprio->prio_tc_map[i]);
+ }
+
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags))
+ err = taprio_enable_offload(dev, q, new_admin, extack);
else
err = taprio_disable_offload(dev, q, extack);
if (err)
@@ -1464,27 +1537,14 @@
q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
}
- if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) &&
- !FULL_OFFLOAD_IS_ENABLED(taprio_flags) &&
+ if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
+ !FULL_OFFLOAD_IS_ENABLED(q->flags) &&
!hrtimer_active(&q->advance_timer)) {
hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
q->advance_timer.function = advance_sched;
}
- if (mqprio) {
- netdev_set_num_tc(dev, mqprio->num_tc);
- for (i = 0; i < mqprio->num_tc; i++)
- netdev_set_tc_queue(dev, i,
- mqprio->count[i],
- mqprio->offset[i]);
-
- /* Always use supplied priority mappings */
- for (i = 0; i <= TC_BITMASK; i++)
- netdev_set_prio_tc_map(dev, i,
- mqprio->prio_tc_map[i]);
- }
-
- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) {
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
q->dequeue = taprio_dequeue_offload;
q->peek = taprio_peek_offload;
} else {
@@ -1501,9 +1561,9 @@
goto unlock;
}
- if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) {
- setup_txtime(q, new_admin, start);
+ setup_txtime(q, new_admin, start);
+ if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
if (!oper) {
rcu_assign_pointer(q->oper_sched, new_admin);
err = 0;
@@ -1528,7 +1588,7 @@
spin_unlock_irqrestore(&q->current_entry_lock, flags);
- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags))
taprio_offload_config_changed(q);
}
@@ -1545,6 +1605,21 @@
return err;
}
+static void taprio_reset(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int i;
+
+ hrtimer_cancel(&q->advance_timer);
+ if (q->qdiscs) {
+ for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
+ qdisc_reset(q->qdiscs[i]);
+ }
+ sch->qstats.backlog = 0;
+ sch->q.qlen = 0;
+}
+
static void taprio_destroy(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
@@ -1555,19 +1630,18 @@
list_del(&q->taprio_list);
spin_unlock(&taprio_list_lock);
- hrtimer_cancel(&q->advance_timer);
taprio_disable_offload(dev, q, NULL);
if (q->qdiscs) {
- for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
+ for (i = 0; i < dev->num_tx_queues; i++)
qdisc_put(q->qdiscs[i]);
kfree(q->qdiscs);
}
q->qdiscs = NULL;
- netdev_set_num_tc(dev, 0);
+ netdev_reset_tc(dev);
if (q->oper_sched)
call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb);
@@ -1597,6 +1671,7 @@
* and get the valid one on taprio_change().
*/
q->clockid = -1;
+ q->flags = TAPRIO_FLAGS_INVALID;
spin_lock(&taprio_list_lock);
list_add(&q->taprio_list, &taprio_list);
@@ -1901,6 +1976,7 @@
.init = taprio_init,
.change = taprio_change,
.destroy = taprio_destroy,
+ .reset = taprio_reset,
.peek = taprio_peek,
.dequeue = taprio_dequeue,
.enqueue = taprio_enqueue,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 689ef6f..6af6b95 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -134,6 +134,9 @@
struct teql_sched_data *dat = qdisc_priv(sch);
struct teql_master *master = dat->m;
+ if (!master)
+ return;
+
prev = master->slaves;
if (prev) {
do {
@@ -239,7 +242,7 @@
char haddr[MAX_ADDR_LEN];
neigh_ha_snapshot(haddr, n, dev);
- err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
+ err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
haddr, NULL, skb->len);
if (err < 0)