blob: 3b1b5ee521379788de424dc732f1aaeccacdf70b [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001// SPDX-License-Identifier: GPL-2.0-or-later
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002/*
3 * net/sched/sch_api.c Packet scheduler API.
4 *
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Fixes:
8 *
9 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12 */
13
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/string.h>
18#include <linux/errno.h>
19#include <linux/skbuff.h>
20#include <linux/init.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/kmod.h>
24#include <linux/list.h>
25#include <linux/hrtimer.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000026#include <linux/slab.h>
27#include <linux/hashtable.h>
28
29#include <net/net_namespace.h>
30#include <net/sock.h>
31#include <net/netlink.h>
32#include <net/pkt_sched.h>
33#include <net/pkt_cls.h>
34
35/*
36
37 Short review.
38 -------------
39
40 This file consists of two interrelated parts:
41
42 1. queueing disciplines manager frontend.
43 2. traffic classes manager frontend.
44
45 Generally, queueing discipline ("qdisc") is a black box,
46 which is able to enqueue packets and to dequeue them (when
47 device is ready to send something) in order and at times
48 determined by algorithm hidden in it.
49
50 qdisc's are divided to two categories:
51 - "queues", which have no internal structure visible from outside.
52 - "schedulers", which split all the packets to "traffic classes",
53 using "packet classifiers" (look at cls_api.c)
54
55 In turn, classes may have child qdiscs (as rule, queues)
56 attached to them etc. etc. etc.
57
58 The goal of the routines in this file is to translate
59 information supplied by user in the form of handles
60 to more intelligible for kernel form, to make some sanity
61 checks and part of work, which is common to all qdiscs
62 and to provide rtnetlink notifications.
63
64 All real intelligent work is done inside qdisc modules.
65
66
67
68 Every discipline has two major routines: enqueue and dequeue.
69
70 ---dequeue
71
72 dequeue usually returns a skb to send. It is allowed to return NULL,
73 but it does not mean that queue is empty, it just means that
74 discipline does not want to send anything this time.
75 Queue is really empty if q->q.qlen == 0.
76 For complicated disciplines with multiple queues q->q is not
77 real packet queue, but however q->q.qlen must be valid.
78
79 ---enqueue
80
81 enqueue returns 0, if packet was enqueued successfully.
82 If packet (this one or another one) was dropped, it returns
83 not zero error code.
84 NET_XMIT_DROP - this packet dropped
85 Expected action: do not backoff, but wait until queue will clear.
86 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
87 Expected action: backoff or ignore
88
89 Auxiliary routines:
90
91 ---peek
92
93 like dequeue but without removing a packet from the queue
94
95 ---reset
96
97 returns qdisc to initial state: purge all buffers, clear all
98 timers, counters (except for statistics) etc.
99
100 ---init
101
102 initializes newly created qdisc.
103
104 ---destroy
105
106 destroys resources allocated by init and during lifetime of qdisc.
107
108 ---change
109
110 changes qdisc parameters.
111 */
112
113/* Protects list of registered TC modules. It is pure SMP lock. */
114static DEFINE_RWLOCK(qdisc_mod_lock);
115
116
117/************************************************
118 * Queueing disciplines manipulation. *
119 ************************************************/
120
121
122/* The list of all installed queueing disciplines. */
123
124static struct Qdisc_ops *qdisc_base;
125
126/* Register/unregister queueing discipline */
127
128int register_qdisc(struct Qdisc_ops *qops)
129{
130 struct Qdisc_ops *q, **qp;
131 int rc = -EEXIST;
132
133 write_lock(&qdisc_mod_lock);
134 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
135 if (!strcmp(qops->id, q->id))
136 goto out;
137
138 if (qops->enqueue == NULL)
139 qops->enqueue = noop_qdisc_ops.enqueue;
140 if (qops->peek == NULL) {
141 if (qops->dequeue == NULL)
142 qops->peek = noop_qdisc_ops.peek;
143 else
144 goto out_einval;
145 }
146 if (qops->dequeue == NULL)
147 qops->dequeue = noop_qdisc_ops.dequeue;
148
149 if (qops->cl_ops) {
150 const struct Qdisc_class_ops *cops = qops->cl_ops;
151
152 if (!(cops->find && cops->walk && cops->leaf))
153 goto out_einval;
154
155 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
156 goto out_einval;
157 }
158
159 qops->next = NULL;
160 *qp = qops;
161 rc = 0;
162out:
163 write_unlock(&qdisc_mod_lock);
164 return rc;
165
166out_einval:
167 rc = -EINVAL;
168 goto out;
169}
170EXPORT_SYMBOL(register_qdisc);
171
172int unregister_qdisc(struct Qdisc_ops *qops)
173{
174 struct Qdisc_ops *q, **qp;
175 int err = -ENOENT;
176
177 write_lock(&qdisc_mod_lock);
178 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
179 if (q == qops)
180 break;
181 if (q) {
182 *qp = q->next;
183 q->next = NULL;
184 err = 0;
185 }
186 write_unlock(&qdisc_mod_lock);
187 return err;
188}
189EXPORT_SYMBOL(unregister_qdisc);
190
191/* Get default qdisc if not otherwise specified */
192void qdisc_get_default(char *name, size_t len)
193{
194 read_lock(&qdisc_mod_lock);
195 strlcpy(name, default_qdisc_ops->id, len);
196 read_unlock(&qdisc_mod_lock);
197}
198
199static struct Qdisc_ops *qdisc_lookup_default(const char *name)
200{
201 struct Qdisc_ops *q = NULL;
202
203 for (q = qdisc_base; q; q = q->next) {
204 if (!strcmp(name, q->id)) {
205 if (!try_module_get(q->owner))
206 q = NULL;
207 break;
208 }
209 }
210
211 return q;
212}
213
214/* Set new default qdisc to use */
215int qdisc_set_default(const char *name)
216{
217 const struct Qdisc_ops *ops;
218
219 if (!capable(CAP_NET_ADMIN))
220 return -EPERM;
221
222 write_lock(&qdisc_mod_lock);
223 ops = qdisc_lookup_default(name);
224 if (!ops) {
225 /* Not found, drop lock and try to load module */
226 write_unlock(&qdisc_mod_lock);
227 request_module("sch_%s", name);
228 write_lock(&qdisc_mod_lock);
229
230 ops = qdisc_lookup_default(name);
231 }
232
233 if (ops) {
234 /* Set new default */
235 module_put(default_qdisc_ops->owner);
236 default_qdisc_ops = ops;
237 }
238 write_unlock(&qdisc_mod_lock);
239
240 return ops ? 0 : -ENOENT;
241}
242
243#ifdef CONFIG_NET_SCH_DEFAULT
244/* Set default value from kernel config */
245static int __init sch_default_qdisc(void)
246{
247 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
248}
249late_initcall(sch_default_qdisc);
250#endif
251
252/* We know handle. Find qdisc among all qdisc's attached to device
253 * (root qdisc, all its children, children of children etc.)
254 * Note: caller either uses rtnl or rcu_read_lock()
255 */
256
257static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
258{
259 struct Qdisc *q;
260
261 if (!qdisc_dev(root))
262 return (root->handle == handle ? root : NULL);
263
264 if (!(root->flags & TCQ_F_BUILTIN) &&
265 root->handle == handle)
266 return root;
267
268 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
269 if (q->handle == handle)
270 return q;
271 }
272 return NULL;
273}
274
275void qdisc_hash_add(struct Qdisc *q, bool invisible)
276{
277 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
278 ASSERT_RTNL();
279 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
280 if (invisible)
281 q->flags |= TCQ_F_INVISIBLE;
282 }
283}
284EXPORT_SYMBOL(qdisc_hash_add);
285
286void qdisc_hash_del(struct Qdisc *q)
287{
288 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
289 ASSERT_RTNL();
290 hash_del_rcu(&q->hash);
291 }
292}
293EXPORT_SYMBOL(qdisc_hash_del);
294
295struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
296{
297 struct Qdisc *q;
298
299 if (!handle)
300 return NULL;
301 q = qdisc_match_from_root(dev->qdisc, handle);
302 if (q)
303 goto out;
304
305 if (dev_ingress_queue(dev))
306 q = qdisc_match_from_root(
307 dev_ingress_queue(dev)->qdisc_sleeping,
308 handle);
309out:
310 return q;
311}
312
David Brazdil0f672f62019-12-10 10:32:29 +0000313struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
314{
315 struct netdev_queue *nq;
316 struct Qdisc *q;
317
318 if (!handle)
319 return NULL;
320 q = qdisc_match_from_root(dev->qdisc, handle);
321 if (q)
322 goto out;
323
324 nq = dev_ingress_queue_rcu(dev);
325 if (nq)
326 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
327out:
328 return q;
329}
330
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000331static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
332{
333 unsigned long cl;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000334 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
335
336 if (cops == NULL)
337 return NULL;
338 cl = cops->find(p, classid);
339
340 if (cl == 0)
341 return NULL;
David Brazdil0f672f62019-12-10 10:32:29 +0000342 return cops->leaf(p, cl);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000343}
344
345/* Find queueing discipline by name */
346
347static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
348{
349 struct Qdisc_ops *q = NULL;
350
351 if (kind) {
352 read_lock(&qdisc_mod_lock);
353 for (q = qdisc_base; q; q = q->next) {
354 if (nla_strcmp(kind, q->id) == 0) {
355 if (!try_module_get(q->owner))
356 q = NULL;
357 break;
358 }
359 }
360 read_unlock(&qdisc_mod_lock);
361 }
362 return q;
363}
364
365/* The linklayer setting were not transferred from iproute2, in older
366 * versions, and the rate tables lookup systems have been dropped in
367 * the kernel. To keep backward compatible with older iproute2 tc
368 * utils, we detect the linklayer setting by detecting if the rate
369 * table were modified.
370 *
371 * For linklayer ATM table entries, the rate table will be aligned to
372 * 48 bytes, thus some table entries will contain the same value. The
373 * mpu (min packet unit) is also encoded into the old rate table, thus
374 * starting from the mpu, we find low and high table entries for
375 * mapping this cell. If these entries contain the same value, when
376 * the rate tables have been modified for linklayer ATM.
377 *
378 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
379 * and then roundup to the next cell, calc the table entry one below,
380 * and compare.
381 */
382static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
383{
384 int low = roundup(r->mpu, 48);
385 int high = roundup(low+1, 48);
386 int cell_low = low >> r->cell_log;
387 int cell_high = (high >> r->cell_log) - 1;
388
389 /* rtab is too inaccurate at rates > 100Mbit/s */
390 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
391 pr_debug("TC linklayer: Giving up ATM detection\n");
392 return TC_LINKLAYER_ETHERNET;
393 }
394
395 if ((cell_high > cell_low) && (cell_high < 256)
396 && (rtab[cell_low] == rtab[cell_high])) {
397 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
398 cell_low, cell_high, rtab[cell_high]);
399 return TC_LINKLAYER_ATM;
400 }
401 return TC_LINKLAYER_ETHERNET;
402}
403
404static struct qdisc_rate_table *qdisc_rtab_list;
405
406struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
407 struct nlattr *tab,
408 struct netlink_ext_ack *extack)
409{
410 struct qdisc_rate_table *rtab;
411
Olivier Deprez0e641232021-09-23 10:07:05 +0200412 if (tab == NULL || r->rate == 0 ||
413 r->cell_log == 0 || r->cell_log >= 32 ||
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000414 nla_len(tab) != TC_RTAB_SIZE) {
415 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
416 return NULL;
417 }
418
419 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
420 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
421 !memcmp(&rtab->data, nla_data(tab), 1024)) {
422 rtab->refcnt++;
423 return rtab;
424 }
425 }
426
427 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
428 if (rtab) {
429 rtab->rate = *r;
430 rtab->refcnt = 1;
431 memcpy(rtab->data, nla_data(tab), 1024);
432 if (r->linklayer == TC_LINKLAYER_UNAWARE)
433 r->linklayer = __detect_linklayer(r, rtab->data);
434 rtab->next = qdisc_rtab_list;
435 qdisc_rtab_list = rtab;
436 } else {
437 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
438 }
439 return rtab;
440}
441EXPORT_SYMBOL(qdisc_get_rtab);
442
443void qdisc_put_rtab(struct qdisc_rate_table *tab)
444{
445 struct qdisc_rate_table *rtab, **rtabp;
446
447 if (!tab || --tab->refcnt)
448 return;
449
450 for (rtabp = &qdisc_rtab_list;
451 (rtab = *rtabp) != NULL;
452 rtabp = &rtab->next) {
453 if (rtab == tab) {
454 *rtabp = rtab->next;
455 kfree(rtab);
456 return;
457 }
458 }
459}
460EXPORT_SYMBOL(qdisc_put_rtab);
461
462static LIST_HEAD(qdisc_stab_list);
463
464static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
465 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
466 [TCA_STAB_DATA] = { .type = NLA_BINARY },
467};
468
469static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
470 struct netlink_ext_ack *extack)
471{
472 struct nlattr *tb[TCA_STAB_MAX + 1];
473 struct qdisc_size_table *stab;
474 struct tc_sizespec *s;
475 unsigned int tsize = 0;
476 u16 *tab = NULL;
477 int err;
478
David Brazdil0f672f62019-12-10 10:32:29 +0000479 err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
480 extack);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000481 if (err < 0)
482 return ERR_PTR(err);
483 if (!tb[TCA_STAB_BASE]) {
484 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
485 return ERR_PTR(-EINVAL);
486 }
487
488 s = nla_data(tb[TCA_STAB_BASE]);
489
490 if (s->tsize > 0) {
491 if (!tb[TCA_STAB_DATA]) {
492 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
493 return ERR_PTR(-EINVAL);
494 }
495 tab = nla_data(tb[TCA_STAB_DATA]);
496 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
497 }
498
499 if (tsize != s->tsize || (!tab && tsize > 0)) {
500 NL_SET_ERR_MSG(extack, "Invalid size of size table");
501 return ERR_PTR(-EINVAL);
502 }
503
504 list_for_each_entry(stab, &qdisc_stab_list, list) {
505 if (memcmp(&stab->szopts, s, sizeof(*s)))
506 continue;
507 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
508 continue;
509 stab->refcnt++;
510 return stab;
511 }
512
513 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
514 if (!stab)
515 return ERR_PTR(-ENOMEM);
516
517 stab->refcnt = 1;
518 stab->szopts = *s;
519 if (tsize > 0)
520 memcpy(stab->data, tab, tsize * sizeof(u16));
521
522 list_add_tail(&stab->list, &qdisc_stab_list);
523
524 return stab;
525}
526
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000527void qdisc_put_stab(struct qdisc_size_table *tab)
528{
529 if (!tab)
530 return;
531
532 if (--tab->refcnt == 0) {
533 list_del(&tab->list);
David Brazdil0f672f62019-12-10 10:32:29 +0000534 kfree_rcu(tab, rcu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000535 }
536}
537EXPORT_SYMBOL(qdisc_put_stab);
538
539static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
540{
541 struct nlattr *nest;
542
David Brazdil0f672f62019-12-10 10:32:29 +0000543 nest = nla_nest_start_noflag(skb, TCA_STAB);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000544 if (nest == NULL)
545 goto nla_put_failure;
546 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
547 goto nla_put_failure;
548 nla_nest_end(skb, nest);
549
550 return skb->len;
551
552nla_put_failure:
553 return -1;
554}
555
556void __qdisc_calculate_pkt_len(struct sk_buff *skb,
557 const struct qdisc_size_table *stab)
558{
559 int pkt_len, slot;
560
561 pkt_len = skb->len + stab->szopts.overhead;
562 if (unlikely(!stab->szopts.tsize))
563 goto out;
564
565 slot = pkt_len + stab->szopts.cell_align;
566 if (unlikely(slot < 0))
567 slot = 0;
568
569 slot >>= stab->szopts.cell_log;
570 if (likely(slot < stab->szopts.tsize))
571 pkt_len = stab->data[slot];
572 else
573 pkt_len = stab->data[stab->szopts.tsize - 1] *
574 (slot / stab->szopts.tsize) +
575 stab->data[slot % stab->szopts.tsize];
576
577 pkt_len <<= stab->szopts.size_log;
578out:
579 if (unlikely(pkt_len < 1))
580 pkt_len = 1;
581 qdisc_skb_cb(skb)->pkt_len = pkt_len;
582}
583EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
584
585void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
586{
587 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
588 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
589 txt, qdisc->ops->id, qdisc->handle >> 16);
590 qdisc->flags |= TCQ_F_WARN_NONWC;
591 }
592}
593EXPORT_SYMBOL(qdisc_warn_nonwc);
594
595static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
596{
597 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
598 timer);
599
600 rcu_read_lock();
601 __netif_schedule(qdisc_root(wd->qdisc));
602 rcu_read_unlock();
603
604 return HRTIMER_NORESTART;
605}
606
607void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
608 clockid_t clockid)
609{
610 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
611 wd->timer.function = qdisc_watchdog;
612 wd->qdisc = qdisc;
613}
614EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
615
616void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
617{
618 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
619}
620EXPORT_SYMBOL(qdisc_watchdog_init);
621
622void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
623{
624 if (test_bit(__QDISC_STATE_DEACTIVATED,
625 &qdisc_root_sleeping(wd->qdisc)->state))
626 return;
627
628 if (wd->last_expires == expires)
629 return;
630
631 wd->last_expires = expires;
632 hrtimer_start(&wd->timer,
633 ns_to_ktime(expires),
634 HRTIMER_MODE_ABS_PINNED);
635}
636EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
637
638void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
639{
640 hrtimer_cancel(&wd->timer);
641}
642EXPORT_SYMBOL(qdisc_watchdog_cancel);
643
644static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
645{
646 struct hlist_head *h;
647 unsigned int i;
648
649 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
650
651 if (h != NULL) {
652 for (i = 0; i < n; i++)
653 INIT_HLIST_HEAD(&h[i]);
654 }
655 return h;
656}
657
658void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
659{
660 struct Qdisc_class_common *cl;
661 struct hlist_node *next;
662 struct hlist_head *nhash, *ohash;
663 unsigned int nsize, nmask, osize;
664 unsigned int i, h;
665
666 /* Rehash when load factor exceeds 0.75 */
667 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
668 return;
669 nsize = clhash->hashsize * 2;
670 nmask = nsize - 1;
671 nhash = qdisc_class_hash_alloc(nsize);
672 if (nhash == NULL)
673 return;
674
675 ohash = clhash->hash;
676 osize = clhash->hashsize;
677
678 sch_tree_lock(sch);
679 for (i = 0; i < osize; i++) {
680 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
681 h = qdisc_class_hash(cl->classid, nmask);
682 hlist_add_head(&cl->hnode, &nhash[h]);
683 }
684 }
685 clhash->hash = nhash;
686 clhash->hashsize = nsize;
687 clhash->hashmask = nmask;
688 sch_tree_unlock(sch);
689
690 kvfree(ohash);
691}
692EXPORT_SYMBOL(qdisc_class_hash_grow);
693
694int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
695{
696 unsigned int size = 4;
697
698 clhash->hash = qdisc_class_hash_alloc(size);
699 if (!clhash->hash)
700 return -ENOMEM;
701 clhash->hashsize = size;
702 clhash->hashmask = size - 1;
703 clhash->hashelems = 0;
704 return 0;
705}
706EXPORT_SYMBOL(qdisc_class_hash_init);
707
708void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
709{
710 kvfree(clhash->hash);
711}
712EXPORT_SYMBOL(qdisc_class_hash_destroy);
713
714void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
715 struct Qdisc_class_common *cl)
716{
717 unsigned int h;
718
719 INIT_HLIST_NODE(&cl->hnode);
720 h = qdisc_class_hash(cl->classid, clhash->hashmask);
721 hlist_add_head(&cl->hnode, &clhash->hash[h]);
722 clhash->hashelems++;
723}
724EXPORT_SYMBOL(qdisc_class_hash_insert);
725
726void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
727 struct Qdisc_class_common *cl)
728{
729 hlist_del(&cl->hnode);
730 clhash->hashelems--;
731}
732EXPORT_SYMBOL(qdisc_class_hash_remove);
733
734/* Allocate an unique handle from space managed by kernel
735 * Possible range is [8000-FFFF]:0000 (0x8000 values)
736 */
737static u32 qdisc_alloc_handle(struct net_device *dev)
738{
739 int i = 0x8000;
740 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
741
742 do {
743 autohandle += TC_H_MAKE(0x10000U, 0);
744 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
745 autohandle = TC_H_MAKE(0x80000000U, 0);
746 if (!qdisc_lookup(dev, autohandle))
747 return autohandle;
748 cond_resched();
749 } while (--i > 0);
750
751 return 0;
752}
753
David Brazdil0f672f62019-12-10 10:32:29 +0000754void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000755{
756 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
757 const struct Qdisc_class_ops *cops;
758 unsigned long cl;
759 u32 parentid;
760 bool notify;
761 int drops;
762
763 if (n == 0 && len == 0)
764 return;
765 drops = max_t(int, n, 0);
766 rcu_read_lock();
767 while ((parentid = sch->parent)) {
768 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
769 break;
770
771 if (sch->flags & TCQ_F_NOPARENT)
772 break;
773 /* Notify parent qdisc only if child qdisc becomes empty.
774 *
775 * If child was empty even before update then backlog
776 * counter is screwed and we skip notification because
777 * parent class is already passive.
778 *
779 * If the original child was offloaded then it is allowed
780 * to be seem as empty, so the parent is notified anyway.
781 */
782 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
783 !qdisc_is_offloaded);
784 /* TODO: perform the search on a per txq basis */
785 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
786 if (sch == NULL) {
787 WARN_ON_ONCE(parentid != TC_H_ROOT);
788 break;
789 }
790 cops = sch->ops->cl_ops;
791 if (notify && cops->qlen_notify) {
792 cl = cops->find(sch, parentid);
793 cops->qlen_notify(sch, cl);
794 }
795 sch->q.qlen -= n;
796 sch->qstats.backlog -= len;
797 __qdisc_qstats_drop(sch, drops);
798 }
799 rcu_read_unlock();
800}
801EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
802
David Brazdil0f672f62019-12-10 10:32:29 +0000803int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
804 void *type_data)
805{
806 struct net_device *dev = qdisc_dev(sch);
807 int err;
808
809 sch->flags &= ~TCQ_F_OFFLOADED;
810 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
811 return 0;
812
813 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
814 if (err == -EOPNOTSUPP)
815 return 0;
816
817 if (!err)
818 sch->flags |= TCQ_F_OFFLOADED;
819
820 return err;
821}
822EXPORT_SYMBOL(qdisc_offload_dump_helper);
823
824void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
825 struct Qdisc *new, struct Qdisc *old,
826 enum tc_setup_type type, void *type_data,
827 struct netlink_ext_ack *extack)
828{
829 bool any_qdisc_is_offloaded;
830 int err;
831
832 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
833 return;
834
835 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
836
837 /* Don't report error if the graft is part of destroy operation. */
838 if (!err || !new || new == &noop_qdisc)
839 return;
840
841 /* Don't report error if the parent, the old child and the new
842 * one are not offloaded.
843 */
844 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
845 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
846 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
847
848 if (any_qdisc_is_offloaded)
849 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
850}
851EXPORT_SYMBOL(qdisc_offload_graft_helper);
852
853static void qdisc_offload_graft_root(struct net_device *dev,
854 struct Qdisc *new, struct Qdisc *old,
855 struct netlink_ext_ack *extack)
856{
857 struct tc_root_qopt_offload graft_offload = {
858 .command = TC_ROOT_GRAFT,
859 .handle = new ? new->handle : 0,
860 .ingress = (new && new->flags & TCQ_F_INGRESS) ||
861 (old && old->flags & TCQ_F_INGRESS),
862 };
863
864 qdisc_offload_graft_helper(dev, NULL, new, old,
865 TC_SETUP_ROOT_QDISC, &graft_offload, extack);
866}
867
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000868static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
869 u32 portid, u32 seq, u16 flags, int event)
870{
871 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
872 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
873 struct tcmsg *tcm;
874 struct nlmsghdr *nlh;
875 unsigned char *b = skb_tail_pointer(skb);
876 struct gnet_dump d;
877 struct qdisc_size_table *stab;
878 u32 block_index;
879 __u32 qlen;
880
881 cond_resched();
882 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
883 if (!nlh)
884 goto out_nlmsg_trim;
885 tcm = nlmsg_data(nlh);
886 tcm->tcm_family = AF_UNSPEC;
887 tcm->tcm__pad1 = 0;
888 tcm->tcm__pad2 = 0;
889 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
890 tcm->tcm_parent = clid;
891 tcm->tcm_handle = q->handle;
892 tcm->tcm_info = refcount_read(&q->refcnt);
893 if (nla_put_string(skb, TCA_KIND, q->ops->id))
894 goto nla_put_failure;
895 if (q->ops->ingress_block_get) {
896 block_index = q->ops->ingress_block_get(q);
897 if (block_index &&
898 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
899 goto nla_put_failure;
900 }
901 if (q->ops->egress_block_get) {
902 block_index = q->ops->egress_block_get(q);
903 if (block_index &&
904 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
905 goto nla_put_failure;
906 }
907 if (q->ops->dump && q->ops->dump(q, skb) < 0)
908 goto nla_put_failure;
909 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
910 goto nla_put_failure;
911 qlen = qdisc_qlen_sum(q);
912
913 stab = rtnl_dereference(q->stab);
914 if (stab && qdisc_dump_stab(skb, stab) < 0)
915 goto nla_put_failure;
916
917 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
918 NULL, &d, TCA_PAD) < 0)
919 goto nla_put_failure;
920
921 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
922 goto nla_put_failure;
923
924 if (qdisc_is_percpu_stats(q)) {
925 cpu_bstats = q->cpu_bstats;
926 cpu_qstats = q->cpu_qstats;
927 }
928
929 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
930 &d, cpu_bstats, &q->bstats) < 0 ||
931 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
932 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
933 goto nla_put_failure;
934
935 if (gnet_stats_finish_copy(&d) < 0)
936 goto nla_put_failure;
937
938 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
939 return skb->len;
940
941out_nlmsg_trim:
942nla_put_failure:
943 nlmsg_trim(skb, b);
944 return -1;
945}
946
947static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
948{
949 if (q->flags & TCQ_F_BUILTIN)
950 return true;
951 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
952 return true;
953
954 return false;
955}
956
957static int qdisc_notify(struct net *net, struct sk_buff *oskb,
958 struct nlmsghdr *n, u32 clid,
959 struct Qdisc *old, struct Qdisc *new)
960{
961 struct sk_buff *skb;
962 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
963
964 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
965 if (!skb)
966 return -ENOBUFS;
967
968 if (old && !tc_qdisc_dump_ignore(old, false)) {
969 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
970 0, RTM_DELQDISC) < 0)
971 goto err_out;
972 }
973 if (new && !tc_qdisc_dump_ignore(new, false)) {
974 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
975 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
976 goto err_out;
977 }
978
979 if (skb->len)
980 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
981 n->nlmsg_flags & NLM_F_ECHO);
982
983err_out:
984 kfree_skb(skb);
985 return -EINVAL;
986}
987
988static void notify_and_destroy(struct net *net, struct sk_buff *skb,
989 struct nlmsghdr *n, u32 clid,
990 struct Qdisc *old, struct Qdisc *new)
991{
992 if (new || old)
993 qdisc_notify(net, skb, n, clid, old, new);
994
995 if (old)
David Brazdil0f672f62019-12-10 10:32:29 +0000996 qdisc_put(old);
997}
998
999static void qdisc_clear_nolock(struct Qdisc *sch)
1000{
1001 sch->flags &= ~TCQ_F_NOLOCK;
1002 if (!(sch->flags & TCQ_F_CPUSTATS))
1003 return;
1004
1005 free_percpu(sch->cpu_bstats);
1006 free_percpu(sch->cpu_qstats);
1007 sch->cpu_bstats = NULL;
1008 sch->cpu_qstats = NULL;
1009 sch->flags &= ~TCQ_F_CPUSTATS;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001010}
1011
1012/* Graft qdisc "new" to class "classid" of qdisc "parent" or
1013 * to device "dev".
1014 *
1015 * When appropriate send a netlink notification using 'skb'
1016 * and "n".
1017 *
1018 * On success, destroy old qdisc.
1019 */
1020
1021static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1022 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1023 struct Qdisc *new, struct Qdisc *old,
1024 struct netlink_ext_ack *extack)
1025{
1026 struct Qdisc *q = old;
1027 struct net *net = dev_net(dev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001028
1029 if (parent == NULL) {
1030 unsigned int i, num_q, ingress;
1031
1032 ingress = 0;
1033 num_q = dev->num_tx_queues;
1034 if ((q && q->flags & TCQ_F_INGRESS) ||
1035 (new && new->flags & TCQ_F_INGRESS)) {
1036 num_q = 1;
1037 ingress = 1;
1038 if (!dev_ingress_queue(dev)) {
1039 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1040 return -ENOENT;
1041 }
1042 }
1043
1044 if (dev->flags & IFF_UP)
1045 dev_deactivate(dev);
1046
David Brazdil0f672f62019-12-10 10:32:29 +00001047 qdisc_offload_graft_root(dev, new, old, extack);
1048
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001049 if (new && new->ops->attach)
1050 goto skip;
1051
1052 for (i = 0; i < num_q; i++) {
1053 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1054
1055 if (!ingress)
1056 dev_queue = netdev_get_tx_queue(dev, i);
1057
1058 old = dev_graft_qdisc(dev_queue, new);
1059 if (new && i > 0)
1060 qdisc_refcount_inc(new);
1061
1062 if (!ingress)
David Brazdil0f672f62019-12-10 10:32:29 +00001063 qdisc_put(old);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001064 }
1065
1066skip:
1067 if (!ingress) {
1068 notify_and_destroy(net, skb, n, classid,
1069 dev->qdisc, new);
1070 if (new && !new->ops->attach)
1071 qdisc_refcount_inc(new);
1072 dev->qdisc = new ? : &noop_qdisc;
1073
1074 if (new && new->ops->attach)
1075 new->ops->attach(new);
1076 } else {
1077 notify_and_destroy(net, skb, n, classid, old, new);
1078 }
1079
1080 if (dev->flags & IFF_UP)
1081 dev_activate(dev);
1082 } else {
1083 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
David Brazdil0f672f62019-12-10 10:32:29 +00001084 unsigned long cl;
1085 int err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001086
1087 /* Only support running class lockless if parent is lockless */
1088 if (new && (new->flags & TCQ_F_NOLOCK) &&
1089 parent && !(parent->flags & TCQ_F_NOLOCK))
David Brazdil0f672f62019-12-10 10:32:29 +00001090 qdisc_clear_nolock(new);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001091
David Brazdil0f672f62019-12-10 10:32:29 +00001092 if (!cops || !cops->graft)
1093 return -EOPNOTSUPP;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001094
David Brazdil0f672f62019-12-10 10:32:29 +00001095 cl = cops->find(parent, classid);
1096 if (!cl) {
1097 NL_SET_ERR_MSG(extack, "Specified class not found");
1098 return -ENOENT;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001099 }
David Brazdil0f672f62019-12-10 10:32:29 +00001100
1101 err = cops->graft(parent, cl, new, &old, extack);
1102 if (err)
1103 return err;
1104 notify_and_destroy(net, skb, n, classid, old, new);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001105 }
David Brazdil0f672f62019-12-10 10:32:29 +00001106 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001107}
1108
1109static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1110 struct netlink_ext_ack *extack)
1111{
1112 u32 block_index;
1113
1114 if (tca[TCA_INGRESS_BLOCK]) {
1115 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1116
1117 if (!block_index) {
1118 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1119 return -EINVAL;
1120 }
1121 if (!sch->ops->ingress_block_set) {
1122 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1123 return -EOPNOTSUPP;
1124 }
1125 sch->ops->ingress_block_set(sch, block_index);
1126 }
1127 if (tca[TCA_EGRESS_BLOCK]) {
1128 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1129
1130 if (!block_index) {
1131 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1132 return -EINVAL;
1133 }
1134 if (!sch->ops->egress_block_set) {
1135 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1136 return -EOPNOTSUPP;
1137 }
1138 sch->ops->egress_block_set(sch, block_index);
1139 }
1140 return 0;
1141}
1142
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001143/*
1144 Allocate and initialize new qdisc.
1145
1146 Parameters are passed via opt.
1147 */
1148
1149static struct Qdisc *qdisc_create(struct net_device *dev,
1150 struct netdev_queue *dev_queue,
1151 struct Qdisc *p, u32 parent, u32 handle,
1152 struct nlattr **tca, int *errp,
1153 struct netlink_ext_ack *extack)
1154{
1155 int err;
1156 struct nlattr *kind = tca[TCA_KIND];
1157 struct Qdisc *sch;
1158 struct Qdisc_ops *ops;
1159 struct qdisc_size_table *stab;
1160
1161 ops = qdisc_lookup_ops(kind);
1162#ifdef CONFIG_MODULES
1163 if (ops == NULL && kind != NULL) {
1164 char name[IFNAMSIZ];
1165 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1166 /* We dropped the RTNL semaphore in order to
1167 * perform the module load. So, even if we
1168 * succeeded in loading the module we have to
1169 * tell the caller to replay the request. We
1170 * indicate this using -EAGAIN.
1171 * We replay the request because the device may
1172 * go away in the mean time.
1173 */
1174 rtnl_unlock();
1175 request_module("sch_%s", name);
1176 rtnl_lock();
1177 ops = qdisc_lookup_ops(kind);
1178 if (ops != NULL) {
1179 /* We will try again qdisc_lookup_ops,
1180 * so don't keep a reference.
1181 */
1182 module_put(ops->owner);
1183 err = -EAGAIN;
1184 goto err_out;
1185 }
1186 }
1187 }
1188#endif
1189
1190 err = -ENOENT;
1191 if (!ops) {
1192 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1193 goto err_out;
1194 }
1195
1196 sch = qdisc_alloc(dev_queue, ops, extack);
1197 if (IS_ERR(sch)) {
1198 err = PTR_ERR(sch);
1199 goto err_out2;
1200 }
1201
1202 sch->parent = parent;
1203
1204 if (handle == TC_H_INGRESS) {
1205 sch->flags |= TCQ_F_INGRESS;
1206 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001207 } else {
1208 if (handle == 0) {
1209 handle = qdisc_alloc_handle(dev);
David Brazdil0f672f62019-12-10 10:32:29 +00001210 if (handle == 0) {
1211 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1212 err = -ENOSPC;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001213 goto err_out3;
David Brazdil0f672f62019-12-10 10:32:29 +00001214 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001215 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001216 if (!netif_is_multiqueue(dev))
1217 sch->flags |= TCQ_F_ONETXQUEUE;
1218 }
1219
1220 sch->handle = handle;
1221
1222 /* This exist to keep backward compatible with a userspace
1223 * loophole, what allowed userspace to get IFF_NO_QUEUE
1224 * facility on older kernels by setting tx_queue_len=0 (prior
1225 * to qdisc init), and then forgot to reinit tx_queue_len
1226 * before again attaching a qdisc.
1227 */
1228 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1229 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1230 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1231 }
1232
1233 err = qdisc_block_indexes_set(sch, tca, extack);
1234 if (err)
1235 goto err_out3;
1236
1237 if (ops->init) {
1238 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1239 if (err != 0)
1240 goto err_out5;
1241 }
1242
1243 if (tca[TCA_STAB]) {
1244 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1245 if (IS_ERR(stab)) {
1246 err = PTR_ERR(stab);
1247 goto err_out4;
1248 }
1249 rcu_assign_pointer(sch->stab, stab);
1250 }
1251 if (tca[TCA_RATE]) {
1252 seqcount_t *running;
1253
1254 err = -EOPNOTSUPP;
1255 if (sch->flags & TCQ_F_MQROOT) {
1256 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1257 goto err_out4;
1258 }
1259
1260 if (sch->parent != TC_H_ROOT &&
1261 !(sch->flags & TCQ_F_INGRESS) &&
1262 (!p || !(p->flags & TCQ_F_MQROOT)))
1263 running = qdisc_root_sleeping_running(sch);
1264 else
1265 running = &sch->running;
1266
1267 err = gen_new_estimator(&sch->bstats,
1268 sch->cpu_bstats,
1269 &sch->rate_est,
1270 NULL,
1271 running,
1272 tca[TCA_RATE]);
1273 if (err) {
1274 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1275 goto err_out4;
1276 }
1277 }
1278
1279 qdisc_hash_add(sch, false);
1280
1281 return sch;
1282
1283err_out5:
1284 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1285 if (ops->destroy)
1286 ops->destroy(sch);
1287err_out3:
1288 dev_put(dev);
1289 qdisc_free(sch);
1290err_out2:
1291 module_put(ops->owner);
1292err_out:
1293 *errp = err;
1294 return NULL;
1295
1296err_out4:
1297 /*
1298 * Any broken qdiscs that would require a ops->reset() here?
1299 * The qdisc was never in action so it shouldn't be necessary.
1300 */
1301 qdisc_put_stab(rtnl_dereference(sch->stab));
1302 if (ops->destroy)
1303 ops->destroy(sch);
1304 goto err_out3;
1305}
1306
1307static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1308 struct netlink_ext_ack *extack)
1309{
1310 struct qdisc_size_table *ostab, *stab = NULL;
1311 int err = 0;
1312
1313 if (tca[TCA_OPTIONS]) {
1314 if (!sch->ops->change) {
1315 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1316 return -EINVAL;
1317 }
1318 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1319 NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1320 return -EOPNOTSUPP;
1321 }
1322 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1323 if (err)
1324 return err;
1325 }
1326
1327 if (tca[TCA_STAB]) {
1328 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1329 if (IS_ERR(stab))
1330 return PTR_ERR(stab);
1331 }
1332
1333 ostab = rtnl_dereference(sch->stab);
1334 rcu_assign_pointer(sch->stab, stab);
1335 qdisc_put_stab(ostab);
1336
1337 if (tca[TCA_RATE]) {
1338 /* NB: ignores errors from replace_estimator
1339 because change can't be undone. */
1340 if (sch->flags & TCQ_F_MQROOT)
1341 goto out;
1342 gen_replace_estimator(&sch->bstats,
1343 sch->cpu_bstats,
1344 &sch->rate_est,
1345 NULL,
1346 qdisc_root_sleeping_running(sch),
1347 tca[TCA_RATE]);
1348 }
1349out:
1350 return 0;
1351}
1352
1353struct check_loop_arg {
1354 struct qdisc_walker w;
1355 struct Qdisc *p;
1356 int depth;
1357};
1358
1359static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1360 struct qdisc_walker *w);
1361
1362static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1363{
1364 struct check_loop_arg arg;
1365
1366 if (q->ops->cl_ops == NULL)
1367 return 0;
1368
1369 arg.w.stop = arg.w.skip = arg.w.count = 0;
1370 arg.w.fn = check_loop_fn;
1371 arg.depth = depth;
1372 arg.p = p;
1373 q->ops->cl_ops->walk(q, &arg.w);
1374 return arg.w.stop ? -ELOOP : 0;
1375}
1376
1377static int
1378check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1379{
1380 struct Qdisc *leaf;
1381 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1382 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1383
1384 leaf = cops->leaf(q, cl);
1385 if (leaf) {
1386 if (leaf == arg->p || arg->depth > 7)
1387 return -ELOOP;
1388 return check_loop(leaf, arg->p, arg->depth + 1);
1389 }
1390 return 0;
1391}
1392
1393const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1394 [TCA_KIND] = { .type = NLA_STRING },
1395 [TCA_RATE] = { .type = NLA_BINARY,
1396 .len = sizeof(struct tc_estimator) },
1397 [TCA_STAB] = { .type = NLA_NESTED },
1398 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1399 [TCA_CHAIN] = { .type = NLA_U32 },
1400 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
1401 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
1402};
1403
1404/*
1405 * Delete/get qdisc.
1406 */
1407
1408static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1409 struct netlink_ext_ack *extack)
1410{
1411 struct net *net = sock_net(skb->sk);
1412 struct tcmsg *tcm = nlmsg_data(n);
1413 struct nlattr *tca[TCA_MAX + 1];
1414 struct net_device *dev;
1415 u32 clid;
1416 struct Qdisc *q = NULL;
1417 struct Qdisc *p = NULL;
1418 int err;
1419
1420 if ((n->nlmsg_type != RTM_GETQDISC) &&
1421 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1422 return -EPERM;
1423
David Brazdil0f672f62019-12-10 10:32:29 +00001424 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1425 rtm_tca_policy, extack);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001426 if (err < 0)
1427 return err;
1428
1429 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1430 if (!dev)
1431 return -ENODEV;
1432
1433 clid = tcm->tcm_parent;
1434 if (clid) {
1435 if (clid != TC_H_ROOT) {
1436 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1437 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1438 if (!p) {
1439 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1440 return -ENOENT;
1441 }
1442 q = qdisc_leaf(p, clid);
1443 } else if (dev_ingress_queue(dev)) {
1444 q = dev_ingress_queue(dev)->qdisc_sleeping;
1445 }
1446 } else {
1447 q = dev->qdisc;
1448 }
1449 if (!q) {
1450 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1451 return -ENOENT;
1452 }
1453
1454 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1455 NL_SET_ERR_MSG(extack, "Invalid handle");
1456 return -EINVAL;
1457 }
1458 } else {
1459 q = qdisc_lookup(dev, tcm->tcm_handle);
1460 if (!q) {
1461 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1462 return -ENOENT;
1463 }
1464 }
1465
1466 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1467 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1468 return -EINVAL;
1469 }
1470
1471 if (n->nlmsg_type == RTM_DELQDISC) {
1472 if (!clid) {
1473 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1474 return -EINVAL;
1475 }
1476 if (q->handle == 0) {
1477 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1478 return -ENOENT;
1479 }
1480 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1481 if (err != 0)
1482 return err;
1483 } else {
1484 qdisc_notify(net, skb, n, clid, NULL, q);
1485 }
1486 return 0;
1487}
1488
1489/*
1490 * Create/change qdisc.
1491 */
1492
1493static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1494 struct netlink_ext_ack *extack)
1495{
1496 struct net *net = sock_net(skb->sk);
1497 struct tcmsg *tcm;
1498 struct nlattr *tca[TCA_MAX + 1];
1499 struct net_device *dev;
1500 u32 clid;
1501 struct Qdisc *q, *p;
1502 int err;
1503
1504 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1505 return -EPERM;
1506
1507replay:
1508 /* Reinit, just in case something touches this. */
David Brazdil0f672f62019-12-10 10:32:29 +00001509 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1510 rtm_tca_policy, extack);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001511 if (err < 0)
1512 return err;
1513
1514 tcm = nlmsg_data(n);
1515 clid = tcm->tcm_parent;
1516 q = p = NULL;
1517
1518 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1519 if (!dev)
1520 return -ENODEV;
1521
1522
1523 if (clid) {
1524 if (clid != TC_H_ROOT) {
1525 if (clid != TC_H_INGRESS) {
1526 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1527 if (!p) {
1528 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1529 return -ENOENT;
1530 }
1531 q = qdisc_leaf(p, clid);
1532 } else if (dev_ingress_queue_create(dev)) {
1533 q = dev_ingress_queue(dev)->qdisc_sleeping;
1534 }
1535 } else {
1536 q = dev->qdisc;
1537 }
1538
1539 /* It may be default qdisc, ignore it */
1540 if (q && q->handle == 0)
1541 q = NULL;
1542
1543 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1544 if (tcm->tcm_handle) {
1545 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1546 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1547 return -EEXIST;
1548 }
1549 if (TC_H_MIN(tcm->tcm_handle)) {
1550 NL_SET_ERR_MSG(extack, "Invalid minor handle");
1551 return -EINVAL;
1552 }
1553 q = qdisc_lookup(dev, tcm->tcm_handle);
1554 if (!q)
1555 goto create_n_graft;
1556 if (n->nlmsg_flags & NLM_F_EXCL) {
1557 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1558 return -EEXIST;
1559 }
1560 if (tca[TCA_KIND] &&
1561 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1562 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1563 return -EINVAL;
1564 }
1565 if (q == p ||
1566 (p && check_loop(q, p, 0))) {
1567 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1568 return -ELOOP;
1569 }
1570 qdisc_refcount_inc(q);
1571 goto graft;
1572 } else {
1573 if (!q)
1574 goto create_n_graft;
1575
1576 /* This magic test requires explanation.
1577 *
1578 * We know, that some child q is already
1579 * attached to this parent and have choice:
1580 * either to change it or to create/graft new one.
1581 *
1582 * 1. We are allowed to create/graft only
1583 * if CREATE and REPLACE flags are set.
1584 *
1585 * 2. If EXCL is set, requestor wanted to say,
1586 * that qdisc tcm_handle is not expected
1587 * to exist, so that we choose create/graft too.
1588 *
1589 * 3. The last case is when no flags are set.
1590 * Alas, it is sort of hole in API, we
1591 * cannot decide what to do unambiguously.
1592 * For now we select create/graft, if
1593 * user gave KIND, which does not match existing.
1594 */
1595 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1596 (n->nlmsg_flags & NLM_F_REPLACE) &&
1597 ((n->nlmsg_flags & NLM_F_EXCL) ||
1598 (tca[TCA_KIND] &&
1599 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1600 goto create_n_graft;
1601 }
1602 }
1603 } else {
1604 if (!tcm->tcm_handle) {
1605 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1606 return -EINVAL;
1607 }
1608 q = qdisc_lookup(dev, tcm->tcm_handle);
1609 }
1610
1611 /* Change qdisc parameters */
1612 if (!q) {
1613 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1614 return -ENOENT;
1615 }
1616 if (n->nlmsg_flags & NLM_F_EXCL) {
1617 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1618 return -EEXIST;
1619 }
1620 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1621 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1622 return -EINVAL;
1623 }
1624 err = qdisc_change(q, tca, extack);
1625 if (err == 0)
1626 qdisc_notify(net, skb, n, clid, NULL, q);
1627 return err;
1628
1629create_n_graft:
1630 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1631 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1632 return -ENOENT;
1633 }
1634 if (clid == TC_H_INGRESS) {
1635 if (dev_ingress_queue(dev)) {
1636 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1637 tcm->tcm_parent, tcm->tcm_parent,
1638 tca, &err, extack);
1639 } else {
1640 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1641 err = -ENOENT;
1642 }
1643 } else {
1644 struct netdev_queue *dev_queue;
1645
1646 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1647 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1648 else if (p)
1649 dev_queue = p->dev_queue;
1650 else
1651 dev_queue = netdev_get_tx_queue(dev, 0);
1652
1653 q = qdisc_create(dev, dev_queue, p,
1654 tcm->tcm_parent, tcm->tcm_handle,
1655 tca, &err, extack);
1656 }
1657 if (q == NULL) {
1658 if (err == -EAGAIN)
1659 goto replay;
1660 return err;
1661 }
1662
1663graft:
1664 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1665 if (err) {
1666 if (q)
David Brazdil0f672f62019-12-10 10:32:29 +00001667 qdisc_put(q);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001668 return err;
1669 }
1670
1671 return 0;
1672}
1673
1674static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1675 struct netlink_callback *cb,
1676 int *q_idx_p, int s_q_idx, bool recur,
1677 bool dump_invisible)
1678{
1679 int ret = 0, q_idx = *q_idx_p;
1680 struct Qdisc *q;
1681 int b;
1682
1683 if (!root)
1684 return 0;
1685
1686 q = root;
1687 if (q_idx < s_q_idx) {
1688 q_idx++;
1689 } else {
1690 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1691 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1692 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1693 RTM_NEWQDISC) <= 0)
1694 goto done;
1695 q_idx++;
1696 }
1697
1698 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1699 * itself has already been dumped.
1700 *
1701 * If we've already dumped the top-level (ingress) qdisc above and the global
1702 * qdisc hashtable, we don't want to hit it again
1703 */
1704 if (!qdisc_dev(root) || !recur)
1705 goto out;
1706
1707 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1708 if (q_idx < s_q_idx) {
1709 q_idx++;
1710 continue;
1711 }
1712 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1713 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1714 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1715 RTM_NEWQDISC) <= 0)
1716 goto done;
1717 q_idx++;
1718 }
1719
1720out:
1721 *q_idx_p = q_idx;
1722 return ret;
1723done:
1724 ret = -1;
1725 goto out;
1726}
1727
1728static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1729{
1730 struct net *net = sock_net(skb->sk);
1731 int idx, q_idx;
1732 int s_idx, s_q_idx;
1733 struct net_device *dev;
1734 const struct nlmsghdr *nlh = cb->nlh;
1735 struct nlattr *tca[TCA_MAX + 1];
1736 int err;
1737
1738 s_idx = cb->args[0];
1739 s_q_idx = q_idx = cb->args[1];
1740
1741 idx = 0;
1742 ASSERT_RTNL();
1743
David Brazdil0f672f62019-12-10 10:32:29 +00001744 err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1745 rtm_tca_policy, cb->extack);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001746 if (err < 0)
1747 return err;
1748
1749 for_each_netdev(net, dev) {
1750 struct netdev_queue *dev_queue;
1751
1752 if (idx < s_idx)
1753 goto cont;
1754 if (idx > s_idx)
1755 s_q_idx = 0;
1756 q_idx = 0;
1757
1758 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1759 true, tca[TCA_DUMP_INVISIBLE]) < 0)
1760 goto done;
1761
1762 dev_queue = dev_ingress_queue(dev);
1763 if (dev_queue &&
1764 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1765 &q_idx, s_q_idx, false,
1766 tca[TCA_DUMP_INVISIBLE]) < 0)
1767 goto done;
1768
1769cont:
1770 idx++;
1771 }
1772
1773done:
1774 cb->args[0] = idx;
1775 cb->args[1] = q_idx;
1776
1777 return skb->len;
1778}
1779
1780
1781
1782/************************************************
1783 * Traffic classes manipulation. *
1784 ************************************************/
1785
1786static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1787 unsigned long cl,
1788 u32 portid, u32 seq, u16 flags, int event)
1789{
1790 struct tcmsg *tcm;
1791 struct nlmsghdr *nlh;
1792 unsigned char *b = skb_tail_pointer(skb);
1793 struct gnet_dump d;
1794 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1795
1796 cond_resched();
1797 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1798 if (!nlh)
1799 goto out_nlmsg_trim;
1800 tcm = nlmsg_data(nlh);
1801 tcm->tcm_family = AF_UNSPEC;
1802 tcm->tcm__pad1 = 0;
1803 tcm->tcm__pad2 = 0;
1804 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1805 tcm->tcm_parent = q->handle;
1806 tcm->tcm_handle = q->handle;
1807 tcm->tcm_info = 0;
1808 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1809 goto nla_put_failure;
1810 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1811 goto nla_put_failure;
1812
1813 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1814 NULL, &d, TCA_PAD) < 0)
1815 goto nla_put_failure;
1816
1817 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1818 goto nla_put_failure;
1819
1820 if (gnet_stats_finish_copy(&d) < 0)
1821 goto nla_put_failure;
1822
1823 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1824 return skb->len;
1825
1826out_nlmsg_trim:
1827nla_put_failure:
1828 nlmsg_trim(skb, b);
1829 return -1;
1830}
1831
1832static int tclass_notify(struct net *net, struct sk_buff *oskb,
1833 struct nlmsghdr *n, struct Qdisc *q,
1834 unsigned long cl, int event)
1835{
1836 struct sk_buff *skb;
1837 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
David Brazdil0f672f62019-12-10 10:32:29 +00001838 int err = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001839
1840 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1841 if (!skb)
1842 return -ENOBUFS;
1843
1844 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1845 kfree_skb(skb);
1846 return -EINVAL;
1847 }
1848
David Brazdil0f672f62019-12-10 10:32:29 +00001849 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1850 n->nlmsg_flags & NLM_F_ECHO);
1851 if (err > 0)
1852 err = 0;
1853 return err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001854}
1855
1856static int tclass_del_notify(struct net *net,
1857 const struct Qdisc_class_ops *cops,
1858 struct sk_buff *oskb, struct nlmsghdr *n,
1859 struct Qdisc *q, unsigned long cl)
1860{
1861 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1862 struct sk_buff *skb;
1863 int err = 0;
1864
1865 if (!cops->delete)
1866 return -EOPNOTSUPP;
1867
1868 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1869 if (!skb)
1870 return -ENOBUFS;
1871
1872 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1873 RTM_DELTCLASS) < 0) {
1874 kfree_skb(skb);
1875 return -EINVAL;
1876 }
1877
1878 err = cops->delete(q, cl);
1879 if (err) {
1880 kfree_skb(skb);
1881 return err;
1882 }
1883
David Brazdil0f672f62019-12-10 10:32:29 +00001884 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1885 n->nlmsg_flags & NLM_F_ECHO);
1886 if (err > 0)
1887 err = 0;
1888 return err;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001889}
1890
1891#ifdef CONFIG_NET_CLS
1892
1893struct tcf_bind_args {
1894 struct tcf_walker w;
Olivier Deprez0e641232021-09-23 10:07:05 +02001895 unsigned long base;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001896 unsigned long cl;
Olivier Deprez0e641232021-09-23 10:07:05 +02001897 u32 classid;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001898};
1899
1900static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1901{
1902 struct tcf_bind_args *a = (void *)arg;
1903
1904 if (tp->ops->bind_class) {
1905 struct Qdisc *q = tcf_block_q(tp->chain->block);
1906
1907 sch_tree_lock(q);
Olivier Deprez0e641232021-09-23 10:07:05 +02001908 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001909 sch_tree_unlock(q);
1910 }
1911 return 0;
1912}
1913
Olivier Deprez0e641232021-09-23 10:07:05 +02001914struct tc_bind_class_args {
1915 struct qdisc_walker w;
1916 unsigned long new_cl;
1917 u32 portid;
1918 u32 clid;
1919};
1920
1921static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1922 struct qdisc_walker *w)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001923{
Olivier Deprez0e641232021-09-23 10:07:05 +02001924 struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001925 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1926 struct tcf_block *block;
1927 struct tcf_chain *chain;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001928
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001929 block = cops->tcf_block(q, cl, NULL);
1930 if (!block)
Olivier Deprez0e641232021-09-23 10:07:05 +02001931 return 0;
David Brazdil0f672f62019-12-10 10:32:29 +00001932 for (chain = tcf_get_next_chain(block, NULL);
1933 chain;
1934 chain = tcf_get_next_chain(block, chain)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001935 struct tcf_proto *tp;
1936
David Brazdil0f672f62019-12-10 10:32:29 +00001937 for (tp = tcf_get_next_proto(chain, NULL, true);
1938 tp; tp = tcf_get_next_proto(chain, tp, true)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001939 struct tcf_bind_args arg = {};
1940
1941 arg.w.fn = tcf_node_bind;
Olivier Deprez0e641232021-09-23 10:07:05 +02001942 arg.classid = a->clid;
1943 arg.base = cl;
1944 arg.cl = a->new_cl;
David Brazdil0f672f62019-12-10 10:32:29 +00001945 tp->ops->walk(tp, &arg.w, true);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001946 }
1947 }
Olivier Deprez0e641232021-09-23 10:07:05 +02001948
1949 return 0;
1950}
1951
1952static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1953 unsigned long new_cl)
1954{
1955 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1956 struct tc_bind_class_args args = {};
1957
1958 if (!cops->tcf_block)
1959 return;
1960 args.portid = portid;
1961 args.clid = clid;
1962 args.new_cl = new_cl;
1963 args.w.fn = tc_bind_class_walker;
1964 q->ops->cl_ops->walk(q, &args.w);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001965}
1966
1967#else
1968
1969static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1970 unsigned long new_cl)
1971{
1972}
1973
1974#endif
1975
1976static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1977 struct netlink_ext_ack *extack)
1978{
1979 struct net *net = sock_net(skb->sk);
1980 struct tcmsg *tcm = nlmsg_data(n);
1981 struct nlattr *tca[TCA_MAX + 1];
1982 struct net_device *dev;
1983 struct Qdisc *q = NULL;
1984 const struct Qdisc_class_ops *cops;
1985 unsigned long cl = 0;
1986 unsigned long new_cl;
1987 u32 portid;
1988 u32 clid;
1989 u32 qid;
1990 int err;
1991
1992 if ((n->nlmsg_type != RTM_GETTCLASS) &&
1993 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1994 return -EPERM;
1995
David Brazdil0f672f62019-12-10 10:32:29 +00001996 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1997 rtm_tca_policy, extack);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001998 if (err < 0)
1999 return err;
2000
2001 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2002 if (!dev)
2003 return -ENODEV;
2004
2005 /*
2006 parent == TC_H_UNSPEC - unspecified parent.
2007 parent == TC_H_ROOT - class is root, which has no parent.
2008 parent == X:0 - parent is root class.
2009 parent == X:Y - parent is a node in hierarchy.
2010 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
2011
2012 handle == 0:0 - generate handle from kernel pool.
2013 handle == 0:Y - class is X:Y, where X:0 is qdisc.
2014 handle == X:Y - clear.
2015 handle == X:0 - root class.
2016 */
2017
2018 /* Step 1. Determine qdisc handle X:0 */
2019
2020 portid = tcm->tcm_parent;
2021 clid = tcm->tcm_handle;
2022 qid = TC_H_MAJ(clid);
2023
2024 if (portid != TC_H_ROOT) {
2025 u32 qid1 = TC_H_MAJ(portid);
2026
2027 if (qid && qid1) {
2028 /* If both majors are known, they must be identical. */
2029 if (qid != qid1)
2030 return -EINVAL;
2031 } else if (qid1) {
2032 qid = qid1;
2033 } else if (qid == 0)
2034 qid = dev->qdisc->handle;
2035
2036 /* Now qid is genuine qdisc handle consistent
2037 * both with parent and child.
2038 *
2039 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2040 */
2041 if (portid)
2042 portid = TC_H_MAKE(qid, portid);
2043 } else {
2044 if (qid == 0)
2045 qid = dev->qdisc->handle;
2046 }
2047
2048 /* OK. Locate qdisc */
2049 q = qdisc_lookup(dev, qid);
2050 if (!q)
2051 return -ENOENT;
2052
2053 /* An check that it supports classes */
2054 cops = q->ops->cl_ops;
2055 if (cops == NULL)
2056 return -EINVAL;
2057
2058 /* Now try to get class */
2059 if (clid == 0) {
2060 if (portid == TC_H_ROOT)
2061 clid = qid;
2062 } else
2063 clid = TC_H_MAKE(qid, clid);
2064
2065 if (clid)
2066 cl = cops->find(q, clid);
2067
2068 if (cl == 0) {
2069 err = -ENOENT;
2070 if (n->nlmsg_type != RTM_NEWTCLASS ||
2071 !(n->nlmsg_flags & NLM_F_CREATE))
2072 goto out;
2073 } else {
2074 switch (n->nlmsg_type) {
2075 case RTM_NEWTCLASS:
2076 err = -EEXIST;
2077 if (n->nlmsg_flags & NLM_F_EXCL)
2078 goto out;
2079 break;
2080 case RTM_DELTCLASS:
2081 err = tclass_del_notify(net, cops, skb, n, q, cl);
2082 /* Unbind the class with flilters with 0 */
2083 tc_bind_tclass(q, portid, clid, 0);
2084 goto out;
2085 case RTM_GETTCLASS:
2086 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2087 goto out;
2088 default:
2089 err = -EINVAL;
2090 goto out;
2091 }
2092 }
2093
2094 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2095 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2096 return -EOPNOTSUPP;
2097 }
2098
2099 new_cl = cl;
2100 err = -EOPNOTSUPP;
2101 if (cops->change)
2102 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2103 if (err == 0) {
2104 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2105 /* We just create a new class, need to do reverse binding. */
2106 if (cl != new_cl)
2107 tc_bind_tclass(q, portid, clid, new_cl);
2108 }
2109out:
2110 return err;
2111}
2112
2113struct qdisc_dump_args {
2114 struct qdisc_walker w;
2115 struct sk_buff *skb;
2116 struct netlink_callback *cb;
2117};
2118
2119static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2120 struct qdisc_walker *arg)
2121{
2122 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2123
2124 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2125 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2126 RTM_NEWTCLASS);
2127}
2128
2129static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2130 struct tcmsg *tcm, struct netlink_callback *cb,
2131 int *t_p, int s_t)
2132{
2133 struct qdisc_dump_args arg;
2134
2135 if (tc_qdisc_dump_ignore(q, false) ||
2136 *t_p < s_t || !q->ops->cl_ops ||
2137 (tcm->tcm_parent &&
2138 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2139 (*t_p)++;
2140 return 0;
2141 }
2142 if (*t_p > s_t)
2143 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2144 arg.w.fn = qdisc_class_dump;
2145 arg.skb = skb;
2146 arg.cb = cb;
2147 arg.w.stop = 0;
2148 arg.w.skip = cb->args[1];
2149 arg.w.count = 0;
2150 q->ops->cl_ops->walk(q, &arg.w);
2151 cb->args[1] = arg.w.count;
2152 if (arg.w.stop)
2153 return -1;
2154 (*t_p)++;
2155 return 0;
2156}
2157
2158static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2159 struct tcmsg *tcm, struct netlink_callback *cb,
Olivier Deprez0e641232021-09-23 10:07:05 +02002160 int *t_p, int s_t, bool recur)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002161{
2162 struct Qdisc *q;
2163 int b;
2164
2165 if (!root)
2166 return 0;
2167
2168 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2169 return -1;
2170
Olivier Deprez0e641232021-09-23 10:07:05 +02002171 if (!qdisc_dev(root) || !recur)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002172 return 0;
2173
2174 if (tcm->tcm_parent) {
2175 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2176 if (q && q != root &&
2177 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2178 return -1;
2179 return 0;
2180 }
2181 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2182 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2183 return -1;
2184 }
2185
2186 return 0;
2187}
2188
2189static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2190{
2191 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2192 struct net *net = sock_net(skb->sk);
2193 struct netdev_queue *dev_queue;
2194 struct net_device *dev;
2195 int t, s_t;
2196
2197 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2198 return 0;
2199 dev = dev_get_by_index(net, tcm->tcm_ifindex);
2200 if (!dev)
2201 return 0;
2202
2203 s_t = cb->args[0];
2204 t = 0;
2205
Olivier Deprez0e641232021-09-23 10:07:05 +02002206 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002207 goto done;
2208
2209 dev_queue = dev_ingress_queue(dev);
2210 if (dev_queue &&
2211 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
Olivier Deprez0e641232021-09-23 10:07:05 +02002212 &t, s_t, false) < 0)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002213 goto done;
2214
2215done:
2216 cb->args[0] = t;
2217
2218 dev_put(dev);
2219 return skb->len;
2220}
2221
2222#ifdef CONFIG_PROC_FS
2223static int psched_show(struct seq_file *seq, void *v)
2224{
2225 seq_printf(seq, "%08x %08x %08x %08x\n",
2226 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2227 1000000,
2228 (u32)NSEC_PER_SEC / hrtimer_resolution);
2229
2230 return 0;
2231}
2232
2233static int __net_init psched_net_init(struct net *net)
2234{
2235 struct proc_dir_entry *e;
2236
2237 e = proc_create_single("psched", 0, net->proc_net, psched_show);
2238 if (e == NULL)
2239 return -ENOMEM;
2240
2241 return 0;
2242}
2243
2244static void __net_exit psched_net_exit(struct net *net)
2245{
2246 remove_proc_entry("psched", net->proc_net);
2247}
2248#else
2249static int __net_init psched_net_init(struct net *net)
2250{
2251 return 0;
2252}
2253
2254static void __net_exit psched_net_exit(struct net *net)
2255{
2256}
2257#endif
2258
2259static struct pernet_operations psched_net_ops = {
2260 .init = psched_net_init,
2261 .exit = psched_net_exit,
2262};
2263
2264static int __init pktsched_init(void)
2265{
2266 int err;
2267
2268 err = register_pernet_subsys(&psched_net_ops);
2269 if (err) {
2270 pr_err("pktsched_init: "
2271 "cannot initialize per netns operations\n");
2272 return err;
2273 }
2274
2275 register_qdisc(&pfifo_fast_ops);
2276 register_qdisc(&pfifo_qdisc_ops);
2277 register_qdisc(&bfifo_qdisc_ops);
2278 register_qdisc(&pfifo_head_drop_qdisc_ops);
2279 register_qdisc(&mq_qdisc_ops);
2280 register_qdisc(&noqueue_qdisc_ops);
2281
2282 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2283 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2284 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2285 0);
2286 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2287 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2288 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2289 0);
2290
2291 return 0;
2292}
2293
2294subsys_initcall(pktsched_init);