blob: d0b7e0249c133619fbb081881e054cab393ebb49 [file] [log] [blame]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <linux/uaccess.h>
20#include <linux/types.h>
21#include <linux/sched.h>
22#include <linux/errno.h>
23#include <linux/mm.h>
24#include <linux/kernel.h>
25#include <linux/fcntl.h>
26#include <linux/stat.h>
27#include <linux/socket.h>
28#include <linux/inet.h>
29#include <linux/netdevice.h>
30#include <linux/inetdevice.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33#include <linux/init.h>
34#include <linux/compat.h>
35#include <linux/rhashtable.h>
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/raw.h>
39#include <linux/notifier.h>
40#include <linux/if_arp.h>
41#include <net/checksum.h>
42#include <net/netlink.h>
43#include <net/fib_rules.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
48#include <linux/pim.h>
49#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51#include <linux/export.h>
52#include <net/ip6_checksum.h>
53#include <linux/netconf.h>
54
55struct ip6mr_rule {
56 struct fib_rule common;
57};
58
59struct ip6mr_result {
60 struct mr_table *mrt;
61};
62
63/* Big lock, protecting vif table, mrt cache and mroute socket state.
64 Note that the changes are semaphored via rtnl_lock.
65 */
66
67static DEFINE_RWLOCK(mrt_lock);
68
69/* Multicast router control variables */
70
71/* Special spinlock for queue of unresolved entries */
72static DEFINE_SPINLOCK(mfc_unres_lock);
73
74/* We return to original Alan's scheme. Hash table of resolved
75 entries is changed only in process context and protected
76 with weak lock mrt_lock. Queue of unresolved entries is protected
77 with strong spinlock mfc_unres_lock.
78
79 In this case data path is free of exclusive locks at all.
80 */
81
82static struct kmem_cache *mrt_cachep __read_mostly;
83
84static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
85static void ip6mr_free_table(struct mr_table *mrt);
86
87static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
88 struct sk_buff *skb, struct mfc6_cache *cache);
89static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
90 mifi_t mifi, int assert);
91static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
92 int cmd);
93static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
94static int ip6mr_rtm_dumproute(struct sk_buff *skb,
95 struct netlink_callback *cb);
96static void mroute_clean_tables(struct mr_table *mrt, bool all);
97static void ipmr_expire_process(struct timer_list *t);
98
99#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
100#define ip6mr_for_each_table(mrt, net) \
101 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
102
103static struct mr_table *ip6mr_mr_table_iter(struct net *net,
104 struct mr_table *mrt)
105{
106 struct mr_table *ret;
107
108 if (!mrt)
109 ret = list_entry_rcu(net->ipv6.mr6_tables.next,
110 struct mr_table, list);
111 else
112 ret = list_entry_rcu(mrt->list.next,
113 struct mr_table, list);
114
115 if (&ret->list == &net->ipv6.mr6_tables)
116 return NULL;
117 return ret;
118}
119
120static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
121{
122 struct mr_table *mrt;
123
124 ip6mr_for_each_table(mrt, net) {
125 if (mrt->id == id)
126 return mrt;
127 }
128 return NULL;
129}
130
131static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
132 struct mr_table **mrt)
133{
134 int err;
135 struct ip6mr_result res;
136 struct fib_lookup_arg arg = {
137 .result = &res,
138 .flags = FIB_LOOKUP_NOREF,
139 };
140
141 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
142 flowi6_to_flowi(flp6), 0, &arg);
143 if (err < 0)
144 return err;
145 *mrt = res.mrt;
146 return 0;
147}
148
149static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
150 int flags, struct fib_lookup_arg *arg)
151{
152 struct ip6mr_result *res = arg->result;
153 struct mr_table *mrt;
154
155 switch (rule->action) {
156 case FR_ACT_TO_TBL:
157 break;
158 case FR_ACT_UNREACHABLE:
159 return -ENETUNREACH;
160 case FR_ACT_PROHIBIT:
161 return -EACCES;
162 case FR_ACT_BLACKHOLE:
163 default:
164 return -EINVAL;
165 }
166
167 mrt = ip6mr_get_table(rule->fr_net, rule->table);
168 if (!mrt)
169 return -EAGAIN;
170 res->mrt = mrt;
171 return 0;
172}
173
174static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
175{
176 return 1;
177}
178
179static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
180 FRA_GENERIC_POLICY,
181};
182
183static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
184 struct fib_rule_hdr *frh, struct nlattr **tb,
185 struct netlink_ext_ack *extack)
186{
187 return 0;
188}
189
190static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
191 struct nlattr **tb)
192{
193 return 1;
194}
195
196static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
197 struct fib_rule_hdr *frh)
198{
199 frh->dst_len = 0;
200 frh->src_len = 0;
201 frh->tos = 0;
202 return 0;
203}
204
205static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
206 .family = RTNL_FAMILY_IP6MR,
207 .rule_size = sizeof(struct ip6mr_rule),
208 .addr_size = sizeof(struct in6_addr),
209 .action = ip6mr_rule_action,
210 .match = ip6mr_rule_match,
211 .configure = ip6mr_rule_configure,
212 .compare = ip6mr_rule_compare,
213 .fill = ip6mr_rule_fill,
214 .nlgroup = RTNLGRP_IPV6_RULE,
215 .policy = ip6mr_rule_policy,
216 .owner = THIS_MODULE,
217};
218
219static int __net_init ip6mr_rules_init(struct net *net)
220{
221 struct fib_rules_ops *ops;
222 struct mr_table *mrt;
223 int err;
224
225 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
226 if (IS_ERR(ops))
227 return PTR_ERR(ops);
228
229 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
230
231 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
232 if (IS_ERR(mrt)) {
233 err = PTR_ERR(mrt);
234 goto err1;
235 }
236
237 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
238 if (err < 0)
239 goto err2;
240
241 net->ipv6.mr6_rules_ops = ops;
242 return 0;
243
244err2:
245 ip6mr_free_table(mrt);
246err1:
247 fib_rules_unregister(ops);
248 return err;
249}
250
251static void __net_exit ip6mr_rules_exit(struct net *net)
252{
253 struct mr_table *mrt, *next;
254
255 rtnl_lock();
256 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
257 list_del(&mrt->list);
258 ip6mr_free_table(mrt);
259 }
260 fib_rules_unregister(net->ipv6.mr6_rules_ops);
261 rtnl_unlock();
262}
263
264static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
265{
266 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
267}
268
269static unsigned int ip6mr_rules_seq_read(struct net *net)
270{
271 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
272}
273
274bool ip6mr_rule_default(const struct fib_rule *rule)
275{
276 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
277 rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
278}
279EXPORT_SYMBOL(ip6mr_rule_default);
280#else
281#define ip6mr_for_each_table(mrt, net) \
282 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
283
284static struct mr_table *ip6mr_mr_table_iter(struct net *net,
285 struct mr_table *mrt)
286{
287 if (!mrt)
288 return net->ipv6.mrt6;
289 return NULL;
290}
291
292static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
293{
294 return net->ipv6.mrt6;
295}
296
297static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
298 struct mr_table **mrt)
299{
300 *mrt = net->ipv6.mrt6;
301 return 0;
302}
303
304static int __net_init ip6mr_rules_init(struct net *net)
305{
306 struct mr_table *mrt;
307
308 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
309 if (IS_ERR(mrt))
310 return PTR_ERR(mrt);
311 net->ipv6.mrt6 = mrt;
312 return 0;
313}
314
315static void __net_exit ip6mr_rules_exit(struct net *net)
316{
317 rtnl_lock();
318 ip6mr_free_table(net->ipv6.mrt6);
319 net->ipv6.mrt6 = NULL;
320 rtnl_unlock();
321}
322
323static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
324{
325 return 0;
326}
327
328static unsigned int ip6mr_rules_seq_read(struct net *net)
329{
330 return 0;
331}
332#endif
333
334static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
335 const void *ptr)
336{
337 const struct mfc6_cache_cmp_arg *cmparg = arg->key;
338 struct mfc6_cache *c = (struct mfc6_cache *)ptr;
339
340 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
341 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
342}
343
344static const struct rhashtable_params ip6mr_rht_params = {
345 .head_offset = offsetof(struct mr_mfc, mnode),
346 .key_offset = offsetof(struct mfc6_cache, cmparg),
347 .key_len = sizeof(struct mfc6_cache_cmp_arg),
348 .nelem_hint = 3,
349 .locks_mul = 1,
350 .obj_cmpfn = ip6mr_hash_cmp,
351 .automatic_shrinking = true,
352};
353
354static void ip6mr_new_table_set(struct mr_table *mrt,
355 struct net *net)
356{
357#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
358 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
359#endif
360}
361
362static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
363 .mf6c_origin = IN6ADDR_ANY_INIT,
364 .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
365};
366
367static struct mr_table_ops ip6mr_mr_table_ops = {
368 .rht_params = &ip6mr_rht_params,
369 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
370};
371
372static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
373{
374 struct mr_table *mrt;
375
376 mrt = ip6mr_get_table(net, id);
377 if (mrt)
378 return mrt;
379
380 return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
381 ipmr_expire_process, ip6mr_new_table_set);
382}
383
384static void ip6mr_free_table(struct mr_table *mrt)
385{
386 del_timer_sync(&mrt->ipmr_expire_timer);
387 mroute_clean_tables(mrt, true);
388 rhltable_destroy(&mrt->mfc_hash);
389 kfree(mrt);
390}
391
392#ifdef CONFIG_PROC_FS
393/* The /proc interfaces to multicast routing
394 * /proc/ip6_mr_cache /proc/ip6_mr_vif
395 */
396
397static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
398 __acquires(mrt_lock)
399{
400 struct mr_vif_iter *iter = seq->private;
401 struct net *net = seq_file_net(seq);
402 struct mr_table *mrt;
403
404 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
405 if (!mrt)
406 return ERR_PTR(-ENOENT);
407
408 iter->mrt = mrt;
409
410 read_lock(&mrt_lock);
411 return mr_vif_seq_start(seq, pos);
412}
413
414static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
415 __releases(mrt_lock)
416{
417 read_unlock(&mrt_lock);
418}
419
420static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
421{
422 struct mr_vif_iter *iter = seq->private;
423 struct mr_table *mrt = iter->mrt;
424
425 if (v == SEQ_START_TOKEN) {
426 seq_puts(seq,
427 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
428 } else {
429 const struct vif_device *vif = v;
430 const char *name = vif->dev ? vif->dev->name : "none";
431
432 seq_printf(seq,
433 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
434 vif - mrt->vif_table,
435 name, vif->bytes_in, vif->pkt_in,
436 vif->bytes_out, vif->pkt_out,
437 vif->flags);
438 }
439 return 0;
440}
441
442static const struct seq_operations ip6mr_vif_seq_ops = {
443 .start = ip6mr_vif_seq_start,
444 .next = mr_vif_seq_next,
445 .stop = ip6mr_vif_seq_stop,
446 .show = ip6mr_vif_seq_show,
447};
448
449static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
450{
451 struct net *net = seq_file_net(seq);
452 struct mr_table *mrt;
453
454 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
455 if (!mrt)
456 return ERR_PTR(-ENOENT);
457
458 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
459}
460
461static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
462{
463 int n;
464
465 if (v == SEQ_START_TOKEN) {
466 seq_puts(seq,
467 "Group "
468 "Origin "
469 "Iif Pkts Bytes Wrong Oifs\n");
470 } else {
471 const struct mfc6_cache *mfc = v;
472 const struct mr_mfc_iter *it = seq->private;
473 struct mr_table *mrt = it->mrt;
474
475 seq_printf(seq, "%pI6 %pI6 %-3hd",
476 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
477 mfc->_c.mfc_parent);
478
479 if (it->cache != &mrt->mfc_unres_queue) {
480 seq_printf(seq, " %8lu %8lu %8lu",
481 mfc->_c.mfc_un.res.pkt,
482 mfc->_c.mfc_un.res.bytes,
483 mfc->_c.mfc_un.res.wrong_if);
484 for (n = mfc->_c.mfc_un.res.minvif;
485 n < mfc->_c.mfc_un.res.maxvif; n++) {
486 if (VIF_EXISTS(mrt, n) &&
487 mfc->_c.mfc_un.res.ttls[n] < 255)
488 seq_printf(seq,
489 " %2d:%-3d", n,
490 mfc->_c.mfc_un.res.ttls[n]);
491 }
492 } else {
493 /* unresolved mfc_caches don't contain
494 * pkt, bytes and wrong_if values
495 */
496 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
497 }
498 seq_putc(seq, '\n');
499 }
500 return 0;
501}
502
503static const struct seq_operations ipmr_mfc_seq_ops = {
504 .start = ipmr_mfc_seq_start,
505 .next = mr_mfc_seq_next,
506 .stop = mr_mfc_seq_stop,
507 .show = ipmr_mfc_seq_show,
508};
509#endif
510
511#ifdef CONFIG_IPV6_PIMSM_V2
512
513static int pim6_rcv(struct sk_buff *skb)
514{
515 struct pimreghdr *pim;
516 struct ipv6hdr *encap;
517 struct net_device *reg_dev = NULL;
518 struct net *net = dev_net(skb->dev);
519 struct mr_table *mrt;
520 struct flowi6 fl6 = {
521 .flowi6_iif = skb->dev->ifindex,
522 .flowi6_mark = skb->mark,
523 };
524 int reg_vif_num;
525
526 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
527 goto drop;
528
529 pim = (struct pimreghdr *)skb_transport_header(skb);
530 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
531 (pim->flags & PIM_NULL_REGISTER) ||
532 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
533 sizeof(*pim), IPPROTO_PIM,
534 csum_partial((void *)pim, sizeof(*pim), 0)) &&
535 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
536 goto drop;
537
538 /* check if the inner packet is destined to mcast group */
539 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
540 sizeof(*pim));
541
542 if (!ipv6_addr_is_multicast(&encap->daddr) ||
543 encap->payload_len == 0 ||
544 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
545 goto drop;
546
547 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
548 goto drop;
549 reg_vif_num = mrt->mroute_reg_vif_num;
550
551 read_lock(&mrt_lock);
552 if (reg_vif_num >= 0)
553 reg_dev = mrt->vif_table[reg_vif_num].dev;
554 if (reg_dev)
555 dev_hold(reg_dev);
556 read_unlock(&mrt_lock);
557
558 if (!reg_dev)
559 goto drop;
560
561 skb->mac_header = skb->network_header;
562 skb_pull(skb, (u8 *)encap - skb->data);
563 skb_reset_network_header(skb);
564 skb->protocol = htons(ETH_P_IPV6);
565 skb->ip_summed = CHECKSUM_NONE;
566
567 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
568
569 netif_rx(skb);
570
571 dev_put(reg_dev);
572 return 0;
573 drop:
574 kfree_skb(skb);
575 return 0;
576}
577
578static const struct inet6_protocol pim6_protocol = {
579 .handler = pim6_rcv,
580};
581
582/* Service routines creating virtual interfaces: PIMREG */
583
584static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
585 struct net_device *dev)
586{
587 struct net *net = dev_net(dev);
588 struct mr_table *mrt;
589 struct flowi6 fl6 = {
590 .flowi6_oif = dev->ifindex,
591 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
592 .flowi6_mark = skb->mark,
593 };
594 int err;
595
596 err = ip6mr_fib_lookup(net, &fl6, &mrt);
597 if (err < 0) {
598 kfree_skb(skb);
599 return err;
600 }
601
602 read_lock(&mrt_lock);
603 dev->stats.tx_bytes += skb->len;
604 dev->stats.tx_packets++;
605 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
606 read_unlock(&mrt_lock);
607 kfree_skb(skb);
608 return NETDEV_TX_OK;
609}
610
611static int reg_vif_get_iflink(const struct net_device *dev)
612{
613 return 0;
614}
615
616static const struct net_device_ops reg_vif_netdev_ops = {
617 .ndo_start_xmit = reg_vif_xmit,
618 .ndo_get_iflink = reg_vif_get_iflink,
619};
620
621static void reg_vif_setup(struct net_device *dev)
622{
623 dev->type = ARPHRD_PIMREG;
624 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
625 dev->flags = IFF_NOARP;
626 dev->netdev_ops = &reg_vif_netdev_ops;
627 dev->needs_free_netdev = true;
628 dev->features |= NETIF_F_NETNS_LOCAL;
629}
630
631static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
632{
633 struct net_device *dev;
634 char name[IFNAMSIZ];
635
636 if (mrt->id == RT6_TABLE_DFLT)
637 sprintf(name, "pim6reg");
638 else
639 sprintf(name, "pim6reg%u", mrt->id);
640
641 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
642 if (!dev)
643 return NULL;
644
645 dev_net_set(dev, net);
646
647 if (register_netdevice(dev)) {
648 free_netdev(dev);
649 return NULL;
650 }
651
652 if (dev_open(dev))
653 goto failure;
654
655 dev_hold(dev);
656 return dev;
657
658failure:
659 unregister_netdevice(dev);
660 return NULL;
661}
662#endif
663
664static int call_ip6mr_vif_entry_notifiers(struct net *net,
665 enum fib_event_type event_type,
666 struct vif_device *vif,
667 mifi_t vif_index, u32 tb_id)
668{
669 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
670 vif, vif_index, tb_id,
671 &net->ipv6.ipmr_seq);
672}
673
674static int call_ip6mr_mfc_entry_notifiers(struct net *net,
675 enum fib_event_type event_type,
676 struct mfc6_cache *mfc, u32 tb_id)
677{
678 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
679 &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
680}
681
682/* Delete a VIF entry */
683static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
684 struct list_head *head)
685{
686 struct vif_device *v;
687 struct net_device *dev;
688 struct inet6_dev *in6_dev;
689
690 if (vifi < 0 || vifi >= mrt->maxvif)
691 return -EADDRNOTAVAIL;
692
693 v = &mrt->vif_table[vifi];
694
695 if (VIF_EXISTS(mrt, vifi))
696 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
697 FIB_EVENT_VIF_DEL, v, vifi,
698 mrt->id);
699
700 write_lock_bh(&mrt_lock);
701 dev = v->dev;
702 v->dev = NULL;
703
704 if (!dev) {
705 write_unlock_bh(&mrt_lock);
706 return -EADDRNOTAVAIL;
707 }
708
709#ifdef CONFIG_IPV6_PIMSM_V2
710 if (vifi == mrt->mroute_reg_vif_num)
711 mrt->mroute_reg_vif_num = -1;
712#endif
713
714 if (vifi + 1 == mrt->maxvif) {
715 int tmp;
716 for (tmp = vifi - 1; tmp >= 0; tmp--) {
717 if (VIF_EXISTS(mrt, tmp))
718 break;
719 }
720 mrt->maxvif = tmp + 1;
721 }
722
723 write_unlock_bh(&mrt_lock);
724
725 dev_set_allmulti(dev, -1);
726
727 in6_dev = __in6_dev_get(dev);
728 if (in6_dev) {
729 in6_dev->cnf.mc_forwarding--;
730 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
731 NETCONFA_MC_FORWARDING,
732 dev->ifindex, &in6_dev->cnf);
733 }
734
735 if ((v->flags & MIFF_REGISTER) && !notify)
736 unregister_netdevice_queue(dev, head);
737
738 dev_put(dev);
739 return 0;
740}
741
742static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
743{
744 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
745
746 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
747}
748
749static inline void ip6mr_cache_free(struct mfc6_cache *c)
750{
751 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
752}
753
754/* Destroy an unresolved cache entry, killing queued skbs
755 and reporting error to netlink readers.
756 */
757
758static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
759{
760 struct net *net = read_pnet(&mrt->net);
761 struct sk_buff *skb;
762
763 atomic_dec(&mrt->cache_resolve_queue_len);
764
765 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
766 if (ipv6_hdr(skb)->version == 0) {
767 struct nlmsghdr *nlh = skb_pull(skb,
768 sizeof(struct ipv6hdr));
769 nlh->nlmsg_type = NLMSG_ERROR;
770 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
771 skb_trim(skb, nlh->nlmsg_len);
772 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
773 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
774 } else
775 kfree_skb(skb);
776 }
777
778 ip6mr_cache_free(c);
779}
780
781
782/* Timer process for all the unresolved queue. */
783
784static void ipmr_do_expire_process(struct mr_table *mrt)
785{
786 unsigned long now = jiffies;
787 unsigned long expires = 10 * HZ;
788 struct mr_mfc *c, *next;
789
790 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
791 if (time_after(c->mfc_un.unres.expires, now)) {
792 /* not yet... */
793 unsigned long interval = c->mfc_un.unres.expires - now;
794 if (interval < expires)
795 expires = interval;
796 continue;
797 }
798
799 list_del(&c->list);
800 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
801 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
802 }
803
804 if (!list_empty(&mrt->mfc_unres_queue))
805 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
806}
807
808static void ipmr_expire_process(struct timer_list *t)
809{
810 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
811
812 if (!spin_trylock(&mfc_unres_lock)) {
813 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
814 return;
815 }
816
817 if (!list_empty(&mrt->mfc_unres_queue))
818 ipmr_do_expire_process(mrt);
819
820 spin_unlock(&mfc_unres_lock);
821}
822
823/* Fill oifs list. It is called under write locked mrt_lock. */
824
825static void ip6mr_update_thresholds(struct mr_table *mrt,
826 struct mr_mfc *cache,
827 unsigned char *ttls)
828{
829 int vifi;
830
831 cache->mfc_un.res.minvif = MAXMIFS;
832 cache->mfc_un.res.maxvif = 0;
833 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
834
835 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
836 if (VIF_EXISTS(mrt, vifi) &&
837 ttls[vifi] && ttls[vifi] < 255) {
838 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
839 if (cache->mfc_un.res.minvif > vifi)
840 cache->mfc_un.res.minvif = vifi;
841 if (cache->mfc_un.res.maxvif <= vifi)
842 cache->mfc_un.res.maxvif = vifi + 1;
843 }
844 }
845 cache->mfc_un.res.lastuse = jiffies;
846}
847
848static int mif6_add(struct net *net, struct mr_table *mrt,
849 struct mif6ctl *vifc, int mrtsock)
850{
851 int vifi = vifc->mif6c_mifi;
852 struct vif_device *v = &mrt->vif_table[vifi];
853 struct net_device *dev;
854 struct inet6_dev *in6_dev;
855 int err;
856
857 /* Is vif busy ? */
858 if (VIF_EXISTS(mrt, vifi))
859 return -EADDRINUSE;
860
861 switch (vifc->mif6c_flags) {
862#ifdef CONFIG_IPV6_PIMSM_V2
863 case MIFF_REGISTER:
864 /*
865 * Special Purpose VIF in PIM
866 * All the packets will be sent to the daemon
867 */
868 if (mrt->mroute_reg_vif_num >= 0)
869 return -EADDRINUSE;
870 dev = ip6mr_reg_vif(net, mrt);
871 if (!dev)
872 return -ENOBUFS;
873 err = dev_set_allmulti(dev, 1);
874 if (err) {
875 unregister_netdevice(dev);
876 dev_put(dev);
877 return err;
878 }
879 break;
880#endif
881 case 0:
882 dev = dev_get_by_index(net, vifc->mif6c_pifi);
883 if (!dev)
884 return -EADDRNOTAVAIL;
885 err = dev_set_allmulti(dev, 1);
886 if (err) {
887 dev_put(dev);
888 return err;
889 }
890 break;
891 default:
892 return -EINVAL;
893 }
894
895 in6_dev = __in6_dev_get(dev);
896 if (in6_dev) {
897 in6_dev->cnf.mc_forwarding++;
898 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
899 NETCONFA_MC_FORWARDING,
900 dev->ifindex, &in6_dev->cnf);
901 }
902
903 /* Fill in the VIF structures */
904 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
905 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
906 MIFF_REGISTER);
907
908 /* And finish update writing critical data */
909 write_lock_bh(&mrt_lock);
910 v->dev = dev;
911#ifdef CONFIG_IPV6_PIMSM_V2
912 if (v->flags & MIFF_REGISTER)
913 mrt->mroute_reg_vif_num = vifi;
914#endif
915 if (vifi + 1 > mrt->maxvif)
916 mrt->maxvif = vifi + 1;
917 write_unlock_bh(&mrt_lock);
918 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
919 v, vifi, mrt->id);
920 return 0;
921}
922
923static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
924 const struct in6_addr *origin,
925 const struct in6_addr *mcastgrp)
926{
927 struct mfc6_cache_cmp_arg arg = {
928 .mf6c_origin = *origin,
929 .mf6c_mcastgrp = *mcastgrp,
930 };
931
932 return mr_mfc_find(mrt, &arg);
933}
934
935/* Look for a (*,G) entry */
936static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
937 struct in6_addr *mcastgrp,
938 mifi_t mifi)
939{
940 struct mfc6_cache_cmp_arg arg = {
941 .mf6c_origin = in6addr_any,
942 .mf6c_mcastgrp = *mcastgrp,
943 };
944
945 if (ipv6_addr_any(mcastgrp))
946 return mr_mfc_find_any_parent(mrt, mifi);
947 return mr_mfc_find_any(mrt, mifi, &arg);
948}
949
950/* Look for a (S,G,iif) entry if parent != -1 */
951static struct mfc6_cache *
952ip6mr_cache_find_parent(struct mr_table *mrt,
953 const struct in6_addr *origin,
954 const struct in6_addr *mcastgrp,
955 int parent)
956{
957 struct mfc6_cache_cmp_arg arg = {
958 .mf6c_origin = *origin,
959 .mf6c_mcastgrp = *mcastgrp,
960 };
961
962 return mr_mfc_find_parent(mrt, &arg, parent);
963}
964
965/* Allocate a multicast cache entry */
966static struct mfc6_cache *ip6mr_cache_alloc(void)
967{
968 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
969 if (!c)
970 return NULL;
971 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
972 c->_c.mfc_un.res.minvif = MAXMIFS;
973 c->_c.free = ip6mr_cache_free_rcu;
974 refcount_set(&c->_c.mfc_un.res.refcount, 1);
975 return c;
976}
977
978static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
979{
980 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
981 if (!c)
982 return NULL;
983 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
984 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
985 return c;
986}
987
988/*
989 * A cache entry has gone into a resolved state from queued
990 */
991
992static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
993 struct mfc6_cache *uc, struct mfc6_cache *c)
994{
995 struct sk_buff *skb;
996
997 /*
998 * Play the pending entries through our router
999 */
1000
1001 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1002 if (ipv6_hdr(skb)->version == 0) {
1003 struct nlmsghdr *nlh = skb_pull(skb,
1004 sizeof(struct ipv6hdr));
1005
1006 if (mr_fill_mroute(mrt, skb, &c->_c,
1007 nlmsg_data(nlh)) > 0) {
1008 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1009 } else {
1010 nlh->nlmsg_type = NLMSG_ERROR;
1011 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1012 skb_trim(skb, nlh->nlmsg_len);
1013 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1014 }
1015 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1016 } else
1017 ip6_mr_forward(net, mrt, skb, c);
1018 }
1019}
1020
1021/*
1022 * Bounce a cache query up to pim6sd and netlink.
1023 *
1024 * Called under mrt_lock.
1025 */
1026
1027static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1028 mifi_t mifi, int assert)
1029{
1030 struct sock *mroute6_sk;
1031 struct sk_buff *skb;
1032 struct mrt6msg *msg;
1033 int ret;
1034
1035#ifdef CONFIG_IPV6_PIMSM_V2
1036 if (assert == MRT6MSG_WHOLEPKT)
1037 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1038 +sizeof(*msg));
1039 else
1040#endif
1041 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1042
1043 if (!skb)
1044 return -ENOBUFS;
1045
1046 /* I suppose that internal messages
1047 * do not require checksums */
1048
1049 skb->ip_summed = CHECKSUM_UNNECESSARY;
1050
1051#ifdef CONFIG_IPV6_PIMSM_V2
1052 if (assert == MRT6MSG_WHOLEPKT) {
1053 /* Ugly, but we have no choice with this interface.
1054 Duplicate old header, fix length etc.
1055 And all this only to mangle msg->im6_msgtype and
1056 to set msg->im6_mbz to "mbz" :-)
1057 */
1058 skb_push(skb, -skb_network_offset(pkt));
1059
1060 skb_push(skb, sizeof(*msg));
1061 skb_reset_transport_header(skb);
1062 msg = (struct mrt6msg *)skb_transport_header(skb);
1063 msg->im6_mbz = 0;
1064 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1065 msg->im6_mif = mrt->mroute_reg_vif_num;
1066 msg->im6_pad = 0;
1067 msg->im6_src = ipv6_hdr(pkt)->saddr;
1068 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1069
1070 skb->ip_summed = CHECKSUM_UNNECESSARY;
1071 } else
1072#endif
1073 {
1074 /*
1075 * Copy the IP header
1076 */
1077
1078 skb_put(skb, sizeof(struct ipv6hdr));
1079 skb_reset_network_header(skb);
1080 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1081
1082 /*
1083 * Add our header
1084 */
1085 skb_put(skb, sizeof(*msg));
1086 skb_reset_transport_header(skb);
1087 msg = (struct mrt6msg *)skb_transport_header(skb);
1088
1089 msg->im6_mbz = 0;
1090 msg->im6_msgtype = assert;
1091 msg->im6_mif = mifi;
1092 msg->im6_pad = 0;
1093 msg->im6_src = ipv6_hdr(pkt)->saddr;
1094 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1095
1096 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1097 skb->ip_summed = CHECKSUM_UNNECESSARY;
1098 }
1099
1100 rcu_read_lock();
1101 mroute6_sk = rcu_dereference(mrt->mroute_sk);
1102 if (!mroute6_sk) {
1103 rcu_read_unlock();
1104 kfree_skb(skb);
1105 return -EINVAL;
1106 }
1107
1108 mrt6msg_netlink_event(mrt, skb);
1109
1110 /* Deliver to user space multicast routing algorithms */
1111 ret = sock_queue_rcv_skb(mroute6_sk, skb);
1112 rcu_read_unlock();
1113 if (ret < 0) {
1114 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1115 kfree_skb(skb);
1116 }
1117
1118 return ret;
1119}
1120
1121/* Queue a packet for resolution. It gets locked cache entry! */
1122static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1123 struct sk_buff *skb)
1124{
1125 struct mfc6_cache *c;
1126 bool found = false;
1127 int err;
1128
1129 spin_lock_bh(&mfc_unres_lock);
1130 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1131 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1132 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1133 found = true;
1134 break;
1135 }
1136 }
1137
1138 if (!found) {
1139 /*
1140 * Create a new entry if allowable
1141 */
1142
1143 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1144 (c = ip6mr_cache_alloc_unres()) == NULL) {
1145 spin_unlock_bh(&mfc_unres_lock);
1146
1147 kfree_skb(skb);
1148 return -ENOBUFS;
1149 }
1150
1151 /* Fill in the new cache entry */
1152 c->_c.mfc_parent = -1;
1153 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1154 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1155
1156 /*
1157 * Reflect first query at pim6sd
1158 */
1159 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1160 if (err < 0) {
1161 /* If the report failed throw the cache entry
1162 out - Brad Parker
1163 */
1164 spin_unlock_bh(&mfc_unres_lock);
1165
1166 ip6mr_cache_free(c);
1167 kfree_skb(skb);
1168 return err;
1169 }
1170
1171 atomic_inc(&mrt->cache_resolve_queue_len);
1172 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1173 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1174
1175 ipmr_do_expire_process(mrt);
1176 }
1177
1178 /* See if we can append the packet */
1179 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1180 kfree_skb(skb);
1181 err = -ENOBUFS;
1182 } else {
1183 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1184 err = 0;
1185 }
1186
1187 spin_unlock_bh(&mfc_unres_lock);
1188 return err;
1189}
1190
1191/*
1192 * MFC6 cache manipulation by user space
1193 */
1194
1195static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1196 int parent)
1197{
1198 struct mfc6_cache *c;
1199
1200 /* The entries are added/deleted only under RTNL */
1201 rcu_read_lock();
1202 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1203 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1204 rcu_read_unlock();
1205 if (!c)
1206 return -ENOENT;
1207 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1208 list_del_rcu(&c->_c.list);
1209
1210 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1211 FIB_EVENT_ENTRY_DEL, c, mrt->id);
1212 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1213 mr_cache_put(&c->_c);
1214 return 0;
1215}
1216
1217static int ip6mr_device_event(struct notifier_block *this,
1218 unsigned long event, void *ptr)
1219{
1220 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1221 struct net *net = dev_net(dev);
1222 struct mr_table *mrt;
1223 struct vif_device *v;
1224 int ct;
1225
1226 if (event != NETDEV_UNREGISTER)
1227 return NOTIFY_DONE;
1228
1229 ip6mr_for_each_table(mrt, net) {
1230 v = &mrt->vif_table[0];
1231 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1232 if (v->dev == dev)
1233 mif6_delete(mrt, ct, 1, NULL);
1234 }
1235 }
1236
1237 return NOTIFY_DONE;
1238}
1239
1240static unsigned int ip6mr_seq_read(struct net *net)
1241{
1242 ASSERT_RTNL();
1243
1244 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1245}
1246
1247static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1248{
1249 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1250 ip6mr_mr_table_iter, &mrt_lock);
1251}
1252
1253static struct notifier_block ip6_mr_notifier = {
1254 .notifier_call = ip6mr_device_event
1255};
1256
1257static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1258 .family = RTNL_FAMILY_IP6MR,
1259 .fib_seq_read = ip6mr_seq_read,
1260 .fib_dump = ip6mr_dump,
1261 .owner = THIS_MODULE,
1262};
1263
1264static int __net_init ip6mr_notifier_init(struct net *net)
1265{
1266 struct fib_notifier_ops *ops;
1267
1268 net->ipv6.ipmr_seq = 0;
1269
1270 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1271 if (IS_ERR(ops))
1272 return PTR_ERR(ops);
1273
1274 net->ipv6.ip6mr_notifier_ops = ops;
1275
1276 return 0;
1277}
1278
1279static void __net_exit ip6mr_notifier_exit(struct net *net)
1280{
1281 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1282 net->ipv6.ip6mr_notifier_ops = NULL;
1283}
1284
1285/* Setup for IP multicast routing */
1286static int __net_init ip6mr_net_init(struct net *net)
1287{
1288 int err;
1289
1290 err = ip6mr_notifier_init(net);
1291 if (err)
1292 return err;
1293
1294 err = ip6mr_rules_init(net);
1295 if (err < 0)
1296 goto ip6mr_rules_fail;
1297
1298#ifdef CONFIG_PROC_FS
1299 err = -ENOMEM;
1300 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1301 sizeof(struct mr_vif_iter)))
1302 goto proc_vif_fail;
1303 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1304 sizeof(struct mr_mfc_iter)))
1305 goto proc_cache_fail;
1306#endif
1307
1308 return 0;
1309
1310#ifdef CONFIG_PROC_FS
1311proc_cache_fail:
1312 remove_proc_entry("ip6_mr_vif", net->proc_net);
1313proc_vif_fail:
1314 ip6mr_rules_exit(net);
1315#endif
1316ip6mr_rules_fail:
1317 ip6mr_notifier_exit(net);
1318 return err;
1319}
1320
1321static void __net_exit ip6mr_net_exit(struct net *net)
1322{
1323#ifdef CONFIG_PROC_FS
1324 remove_proc_entry("ip6_mr_cache", net->proc_net);
1325 remove_proc_entry("ip6_mr_vif", net->proc_net);
1326#endif
1327 ip6mr_rules_exit(net);
1328 ip6mr_notifier_exit(net);
1329}
1330
1331static struct pernet_operations ip6mr_net_ops = {
1332 .init = ip6mr_net_init,
1333 .exit = ip6mr_net_exit,
1334};
1335
1336int __init ip6_mr_init(void)
1337{
1338 int err;
1339
1340 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1341 sizeof(struct mfc6_cache),
1342 0, SLAB_HWCACHE_ALIGN,
1343 NULL);
1344 if (!mrt_cachep)
1345 return -ENOMEM;
1346
1347 err = register_pernet_subsys(&ip6mr_net_ops);
1348 if (err)
1349 goto reg_pernet_fail;
1350
1351 err = register_netdevice_notifier(&ip6_mr_notifier);
1352 if (err)
1353 goto reg_notif_fail;
1354#ifdef CONFIG_IPV6_PIMSM_V2
1355 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1356 pr_err("%s: can't add PIM protocol\n", __func__);
1357 err = -EAGAIN;
1358 goto add_proto_fail;
1359 }
1360#endif
1361 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1362 NULL, ip6mr_rtm_dumproute, 0);
1363 if (err == 0)
1364 return 0;
1365
1366#ifdef CONFIG_IPV6_PIMSM_V2
1367 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1368add_proto_fail:
1369 unregister_netdevice_notifier(&ip6_mr_notifier);
1370#endif
1371reg_notif_fail:
1372 unregister_pernet_subsys(&ip6mr_net_ops);
1373reg_pernet_fail:
1374 kmem_cache_destroy(mrt_cachep);
1375 return err;
1376}
1377
1378void ip6_mr_cleanup(void)
1379{
1380 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1381#ifdef CONFIG_IPV6_PIMSM_V2
1382 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1383#endif
1384 unregister_netdevice_notifier(&ip6_mr_notifier);
1385 unregister_pernet_subsys(&ip6mr_net_ops);
1386 kmem_cache_destroy(mrt_cachep);
1387}
1388
1389static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1390 struct mf6cctl *mfc, int mrtsock, int parent)
1391{
1392 unsigned char ttls[MAXMIFS];
1393 struct mfc6_cache *uc, *c;
1394 struct mr_mfc *_uc;
1395 bool found;
1396 int i, err;
1397
1398 if (mfc->mf6cc_parent >= MAXMIFS)
1399 return -ENFILE;
1400
1401 memset(ttls, 255, MAXMIFS);
1402 for (i = 0; i < MAXMIFS; i++) {
1403 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1404 ttls[i] = 1;
1405 }
1406
1407 /* The entries are added/deleted only under RTNL */
1408 rcu_read_lock();
1409 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1410 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1411 rcu_read_unlock();
1412 if (c) {
1413 write_lock_bh(&mrt_lock);
1414 c->_c.mfc_parent = mfc->mf6cc_parent;
1415 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1416 if (!mrtsock)
1417 c->_c.mfc_flags |= MFC_STATIC;
1418 write_unlock_bh(&mrt_lock);
1419 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1420 c, mrt->id);
1421 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1422 return 0;
1423 }
1424
1425 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1426 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1427 return -EINVAL;
1428
1429 c = ip6mr_cache_alloc();
1430 if (!c)
1431 return -ENOMEM;
1432
1433 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1434 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1435 c->_c.mfc_parent = mfc->mf6cc_parent;
1436 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1437 if (!mrtsock)
1438 c->_c.mfc_flags |= MFC_STATIC;
1439
1440 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1441 ip6mr_rht_params);
1442 if (err) {
1443 pr_err("ip6mr: rhtable insert error %d\n", err);
1444 ip6mr_cache_free(c);
1445 return err;
1446 }
1447 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1448
1449 /* Check to see if we resolved a queued list. If so we
1450 * need to send on the frames and tidy up.
1451 */
1452 found = false;
1453 spin_lock_bh(&mfc_unres_lock);
1454 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1455 uc = (struct mfc6_cache *)_uc;
1456 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1457 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1458 list_del(&_uc->list);
1459 atomic_dec(&mrt->cache_resolve_queue_len);
1460 found = true;
1461 break;
1462 }
1463 }
1464 if (list_empty(&mrt->mfc_unres_queue))
1465 del_timer(&mrt->ipmr_expire_timer);
1466 spin_unlock_bh(&mfc_unres_lock);
1467
1468 if (found) {
1469 ip6mr_cache_resolve(net, mrt, uc, c);
1470 ip6mr_cache_free(uc);
1471 }
1472 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1473 c, mrt->id);
1474 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1475 return 0;
1476}
1477
1478/*
1479 * Close the multicast socket, and clear the vif tables etc
1480 */
1481
1482static void mroute_clean_tables(struct mr_table *mrt, bool all)
1483{
1484 struct mr_mfc *c, *tmp;
1485 LIST_HEAD(list);
1486 int i;
1487
1488 /* Shut down all active vif entries */
1489 for (i = 0; i < mrt->maxvif; i++) {
1490 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1491 continue;
1492 mif6_delete(mrt, i, 0, &list);
1493 }
1494 unregister_netdevice_many(&list);
1495
1496 /* Wipe the cache */
1497 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1498 if (!all && (c->mfc_flags & MFC_STATIC))
1499 continue;
1500 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1501 list_del_rcu(&c->list);
1502 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1503 mr_cache_put(c);
1504 }
1505
1506 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1507 spin_lock_bh(&mfc_unres_lock);
1508 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1509 list_del(&c->list);
1510 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1511 FIB_EVENT_ENTRY_DEL,
1512 (struct mfc6_cache *)c,
1513 mrt->id);
1514 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1515 RTM_DELROUTE);
1516 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1517 }
1518 spin_unlock_bh(&mfc_unres_lock);
1519 }
1520}
1521
1522static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1523{
1524 int err = 0;
1525 struct net *net = sock_net(sk);
1526
1527 rtnl_lock();
1528 write_lock_bh(&mrt_lock);
1529 if (rtnl_dereference(mrt->mroute_sk)) {
1530 err = -EADDRINUSE;
1531 } else {
1532 rcu_assign_pointer(mrt->mroute_sk, sk);
1533 sock_set_flag(sk, SOCK_RCU_FREE);
1534 net->ipv6.devconf_all->mc_forwarding++;
1535 }
1536 write_unlock_bh(&mrt_lock);
1537
1538 if (!err)
1539 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1540 NETCONFA_MC_FORWARDING,
1541 NETCONFA_IFINDEX_ALL,
1542 net->ipv6.devconf_all);
1543 rtnl_unlock();
1544
1545 return err;
1546}
1547
1548int ip6mr_sk_done(struct sock *sk)
1549{
1550 int err = -EACCES;
1551 struct net *net = sock_net(sk);
1552 struct mr_table *mrt;
1553
1554 if (sk->sk_type != SOCK_RAW ||
1555 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1556 return err;
1557
1558 rtnl_lock();
1559 ip6mr_for_each_table(mrt, net) {
1560 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1561 write_lock_bh(&mrt_lock);
1562 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1563 /* Note that mroute_sk had SOCK_RCU_FREE set,
1564 * so the RCU grace period before sk freeing
1565 * is guaranteed by sk_destruct()
1566 */
1567 net->ipv6.devconf_all->mc_forwarding--;
1568 write_unlock_bh(&mrt_lock);
1569 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1570 NETCONFA_MC_FORWARDING,
1571 NETCONFA_IFINDEX_ALL,
1572 net->ipv6.devconf_all);
1573
1574 mroute_clean_tables(mrt, false);
1575 err = 0;
1576 break;
1577 }
1578 }
1579 rtnl_unlock();
1580
1581 return err;
1582}
1583
1584bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1585{
1586 struct mr_table *mrt;
1587 struct flowi6 fl6 = {
1588 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1589 .flowi6_oif = skb->dev->ifindex,
1590 .flowi6_mark = skb->mark,
1591 };
1592
1593 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1594 return NULL;
1595
1596 return rcu_access_pointer(mrt->mroute_sk);
1597}
1598EXPORT_SYMBOL(mroute6_is_socket);
1599
1600/*
1601 * Socket options and virtual interface manipulation. The whole
1602 * virtual interface system is a complete heap, but unfortunately
1603 * that's how BSD mrouted happens to think. Maybe one day with a proper
1604 * MOSPF/PIM router set up we can clean this up.
1605 */
1606
1607int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1608{
1609 int ret, parent = 0;
1610 struct mif6ctl vif;
1611 struct mf6cctl mfc;
1612 mifi_t mifi;
1613 struct net *net = sock_net(sk);
1614 struct mr_table *mrt;
1615
1616 if (sk->sk_type != SOCK_RAW ||
1617 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1618 return -EOPNOTSUPP;
1619
1620 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1621 if (!mrt)
1622 return -ENOENT;
1623
1624 if (optname != MRT6_INIT) {
1625 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1626 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1627 return -EACCES;
1628 }
1629
1630 switch (optname) {
1631 case MRT6_INIT:
1632 if (optlen < sizeof(int))
1633 return -EINVAL;
1634
1635 return ip6mr_sk_init(mrt, sk);
1636
1637 case MRT6_DONE:
1638 return ip6mr_sk_done(sk);
1639
1640 case MRT6_ADD_MIF:
1641 if (optlen < sizeof(vif))
1642 return -EINVAL;
1643 if (copy_from_user(&vif, optval, sizeof(vif)))
1644 return -EFAULT;
1645 if (vif.mif6c_mifi >= MAXMIFS)
1646 return -ENFILE;
1647 rtnl_lock();
1648 ret = mif6_add(net, mrt, &vif,
1649 sk == rtnl_dereference(mrt->mroute_sk));
1650 rtnl_unlock();
1651 return ret;
1652
1653 case MRT6_DEL_MIF:
1654 if (optlen < sizeof(mifi_t))
1655 return -EINVAL;
1656 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1657 return -EFAULT;
1658 rtnl_lock();
1659 ret = mif6_delete(mrt, mifi, 0, NULL);
1660 rtnl_unlock();
1661 return ret;
1662
1663 /*
1664 * Manipulate the forwarding caches. These live
1665 * in a sort of kernel/user symbiosis.
1666 */
1667 case MRT6_ADD_MFC:
1668 case MRT6_DEL_MFC:
1669 parent = -1;
1670 /* fall through */
1671 case MRT6_ADD_MFC_PROXY:
1672 case MRT6_DEL_MFC_PROXY:
1673 if (optlen < sizeof(mfc))
1674 return -EINVAL;
1675 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1676 return -EFAULT;
1677 if (parent == 0)
1678 parent = mfc.mf6cc_parent;
1679 rtnl_lock();
1680 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1681 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1682 else
1683 ret = ip6mr_mfc_add(net, mrt, &mfc,
1684 sk ==
1685 rtnl_dereference(mrt->mroute_sk),
1686 parent);
1687 rtnl_unlock();
1688 return ret;
1689
1690 /*
1691 * Control PIM assert (to activate pim will activate assert)
1692 */
1693 case MRT6_ASSERT:
1694 {
1695 int v;
1696
1697 if (optlen != sizeof(v))
1698 return -EINVAL;
1699 if (get_user(v, (int __user *)optval))
1700 return -EFAULT;
1701 mrt->mroute_do_assert = v;
1702 return 0;
1703 }
1704
1705#ifdef CONFIG_IPV6_PIMSM_V2
1706 case MRT6_PIM:
1707 {
1708 int v;
1709
1710 if (optlen != sizeof(v))
1711 return -EINVAL;
1712 if (get_user(v, (int __user *)optval))
1713 return -EFAULT;
1714 v = !!v;
1715 rtnl_lock();
1716 ret = 0;
1717 if (v != mrt->mroute_do_pim) {
1718 mrt->mroute_do_pim = v;
1719 mrt->mroute_do_assert = v;
1720 }
1721 rtnl_unlock();
1722 return ret;
1723 }
1724
1725#endif
1726#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1727 case MRT6_TABLE:
1728 {
1729 u32 v;
1730
1731 if (optlen != sizeof(u32))
1732 return -EINVAL;
1733 if (get_user(v, (u32 __user *)optval))
1734 return -EFAULT;
1735 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1736 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1737 return -EINVAL;
1738 if (sk == rcu_access_pointer(mrt->mroute_sk))
1739 return -EBUSY;
1740
1741 rtnl_lock();
1742 ret = 0;
1743 mrt = ip6mr_new_table(net, v);
1744 if (IS_ERR(mrt))
1745 ret = PTR_ERR(mrt);
1746 else
1747 raw6_sk(sk)->ip6mr_table = v;
1748 rtnl_unlock();
1749 return ret;
1750 }
1751#endif
1752 /*
1753 * Spurious command, or MRT6_VERSION which you cannot
1754 * set.
1755 */
1756 default:
1757 return -ENOPROTOOPT;
1758 }
1759}
1760
1761/*
1762 * Getsock opt support for the multicast routing system.
1763 */
1764
1765int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1766 int __user *optlen)
1767{
1768 int olr;
1769 int val;
1770 struct net *net = sock_net(sk);
1771 struct mr_table *mrt;
1772
1773 if (sk->sk_type != SOCK_RAW ||
1774 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1775 return -EOPNOTSUPP;
1776
1777 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1778 if (!mrt)
1779 return -ENOENT;
1780
1781 switch (optname) {
1782 case MRT6_VERSION:
1783 val = 0x0305;
1784 break;
1785#ifdef CONFIG_IPV6_PIMSM_V2
1786 case MRT6_PIM:
1787 val = mrt->mroute_do_pim;
1788 break;
1789#endif
1790 case MRT6_ASSERT:
1791 val = mrt->mroute_do_assert;
1792 break;
1793 default:
1794 return -ENOPROTOOPT;
1795 }
1796
1797 if (get_user(olr, optlen))
1798 return -EFAULT;
1799
1800 olr = min_t(int, olr, sizeof(int));
1801 if (olr < 0)
1802 return -EINVAL;
1803
1804 if (put_user(olr, optlen))
1805 return -EFAULT;
1806 if (copy_to_user(optval, &val, olr))
1807 return -EFAULT;
1808 return 0;
1809}
1810
1811/*
1812 * The IP multicast ioctl support routines.
1813 */
1814
1815int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1816{
1817 struct sioc_sg_req6 sr;
1818 struct sioc_mif_req6 vr;
1819 struct vif_device *vif;
1820 struct mfc6_cache *c;
1821 struct net *net = sock_net(sk);
1822 struct mr_table *mrt;
1823
1824 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1825 if (!mrt)
1826 return -ENOENT;
1827
1828 switch (cmd) {
1829 case SIOCGETMIFCNT_IN6:
1830 if (copy_from_user(&vr, arg, sizeof(vr)))
1831 return -EFAULT;
1832 if (vr.mifi >= mrt->maxvif)
1833 return -EINVAL;
1834 read_lock(&mrt_lock);
1835 vif = &mrt->vif_table[vr.mifi];
1836 if (VIF_EXISTS(mrt, vr.mifi)) {
1837 vr.icount = vif->pkt_in;
1838 vr.ocount = vif->pkt_out;
1839 vr.ibytes = vif->bytes_in;
1840 vr.obytes = vif->bytes_out;
1841 read_unlock(&mrt_lock);
1842
1843 if (copy_to_user(arg, &vr, sizeof(vr)))
1844 return -EFAULT;
1845 return 0;
1846 }
1847 read_unlock(&mrt_lock);
1848 return -EADDRNOTAVAIL;
1849 case SIOCGETSGCNT_IN6:
1850 if (copy_from_user(&sr, arg, sizeof(sr)))
1851 return -EFAULT;
1852
1853 rcu_read_lock();
1854 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1855 if (c) {
1856 sr.pktcnt = c->_c.mfc_un.res.pkt;
1857 sr.bytecnt = c->_c.mfc_un.res.bytes;
1858 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1859 rcu_read_unlock();
1860
1861 if (copy_to_user(arg, &sr, sizeof(sr)))
1862 return -EFAULT;
1863 return 0;
1864 }
1865 rcu_read_unlock();
1866 return -EADDRNOTAVAIL;
1867 default:
1868 return -ENOIOCTLCMD;
1869 }
1870}
1871
1872#ifdef CONFIG_COMPAT
1873struct compat_sioc_sg_req6 {
1874 struct sockaddr_in6 src;
1875 struct sockaddr_in6 grp;
1876 compat_ulong_t pktcnt;
1877 compat_ulong_t bytecnt;
1878 compat_ulong_t wrong_if;
1879};
1880
1881struct compat_sioc_mif_req6 {
1882 mifi_t mifi;
1883 compat_ulong_t icount;
1884 compat_ulong_t ocount;
1885 compat_ulong_t ibytes;
1886 compat_ulong_t obytes;
1887};
1888
1889int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1890{
1891 struct compat_sioc_sg_req6 sr;
1892 struct compat_sioc_mif_req6 vr;
1893 struct vif_device *vif;
1894 struct mfc6_cache *c;
1895 struct net *net = sock_net(sk);
1896 struct mr_table *mrt;
1897
1898 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1899 if (!mrt)
1900 return -ENOENT;
1901
1902 switch (cmd) {
1903 case SIOCGETMIFCNT_IN6:
1904 if (copy_from_user(&vr, arg, sizeof(vr)))
1905 return -EFAULT;
1906 if (vr.mifi >= mrt->maxvif)
1907 return -EINVAL;
1908 read_lock(&mrt_lock);
1909 vif = &mrt->vif_table[vr.mifi];
1910 if (VIF_EXISTS(mrt, vr.mifi)) {
1911 vr.icount = vif->pkt_in;
1912 vr.ocount = vif->pkt_out;
1913 vr.ibytes = vif->bytes_in;
1914 vr.obytes = vif->bytes_out;
1915 read_unlock(&mrt_lock);
1916
1917 if (copy_to_user(arg, &vr, sizeof(vr)))
1918 return -EFAULT;
1919 return 0;
1920 }
1921 read_unlock(&mrt_lock);
1922 return -EADDRNOTAVAIL;
1923 case SIOCGETSGCNT_IN6:
1924 if (copy_from_user(&sr, arg, sizeof(sr)))
1925 return -EFAULT;
1926
1927 rcu_read_lock();
1928 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1929 if (c) {
1930 sr.pktcnt = c->_c.mfc_un.res.pkt;
1931 sr.bytecnt = c->_c.mfc_un.res.bytes;
1932 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1933 rcu_read_unlock();
1934
1935 if (copy_to_user(arg, &sr, sizeof(sr)))
1936 return -EFAULT;
1937 return 0;
1938 }
1939 rcu_read_unlock();
1940 return -EADDRNOTAVAIL;
1941 default:
1942 return -ENOIOCTLCMD;
1943 }
1944}
1945#endif
1946
1947static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1948{
1949 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1950 IPSTATS_MIB_OUTFORWDATAGRAMS);
1951 __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1952 IPSTATS_MIB_OUTOCTETS, skb->len);
1953 return dst_output(net, sk, skb);
1954}
1955
1956/*
1957 * Processing handlers for ip6mr_forward
1958 */
1959
1960static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1961 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1962{
1963 struct ipv6hdr *ipv6h;
1964 struct vif_device *vif = &mrt->vif_table[vifi];
1965 struct net_device *dev;
1966 struct dst_entry *dst;
1967 struct flowi6 fl6;
1968
1969 if (!vif->dev)
1970 goto out_free;
1971
1972#ifdef CONFIG_IPV6_PIMSM_V2
1973 if (vif->flags & MIFF_REGISTER) {
1974 vif->pkt_out++;
1975 vif->bytes_out += skb->len;
1976 vif->dev->stats.tx_bytes += skb->len;
1977 vif->dev->stats.tx_packets++;
1978 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1979 goto out_free;
1980 }
1981#endif
1982
1983 ipv6h = ipv6_hdr(skb);
1984
1985 fl6 = (struct flowi6) {
1986 .flowi6_oif = vif->link,
1987 .daddr = ipv6h->daddr,
1988 };
1989
1990 dst = ip6_route_output(net, NULL, &fl6);
1991 if (dst->error) {
1992 dst_release(dst);
1993 goto out_free;
1994 }
1995
1996 skb_dst_drop(skb);
1997 skb_dst_set(skb, dst);
1998
1999 /*
2000 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2001 * not only before forwarding, but after forwarding on all output
2002 * interfaces. It is clear, if mrouter runs a multicasting
2003 * program, it should receive packets not depending to what interface
2004 * program is joined.
2005 * If we will not make it, the program will have to join on all
2006 * interfaces. On the other hand, multihoming host (or router, but
2007 * not mrouter) cannot join to more than one interface - it will
2008 * result in receiving multiple packets.
2009 */
2010 dev = vif->dev;
2011 skb->dev = dev;
2012 vif->pkt_out++;
2013 vif->bytes_out += skb->len;
2014
2015 /* We are about to write */
2016 /* XXX: extension headers? */
2017 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2018 goto out_free;
2019
2020 ipv6h = ipv6_hdr(skb);
2021 ipv6h->hop_limit--;
2022
2023 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2024
2025 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2026 net, NULL, skb, skb->dev, dev,
2027 ip6mr_forward2_finish);
2028
2029out_free:
2030 kfree_skb(skb);
2031 return 0;
2032}
2033
2034static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2035{
2036 int ct;
2037
2038 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2039 if (mrt->vif_table[ct].dev == dev)
2040 break;
2041 }
2042 return ct;
2043}
2044
2045static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2046 struct sk_buff *skb, struct mfc6_cache *c)
2047{
2048 int psend = -1;
2049 int vif, ct;
2050 int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2051
2052 vif = c->_c.mfc_parent;
2053 c->_c.mfc_un.res.pkt++;
2054 c->_c.mfc_un.res.bytes += skb->len;
2055 c->_c.mfc_un.res.lastuse = jiffies;
2056
2057 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2058 struct mfc6_cache *cache_proxy;
2059
2060 /* For an (*,G) entry, we only check that the incoming
2061 * interface is part of the static tree.
2062 */
2063 rcu_read_lock();
2064 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2065 if (cache_proxy &&
2066 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2067 rcu_read_unlock();
2068 goto forward;
2069 }
2070 rcu_read_unlock();
2071 }
2072
2073 /*
2074 * Wrong interface: drop packet and (maybe) send PIM assert.
2075 */
2076 if (mrt->vif_table[vif].dev != skb->dev) {
2077 c->_c.mfc_un.res.wrong_if++;
2078
2079 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2080 /* pimsm uses asserts, when switching from RPT to SPT,
2081 so that we cannot check that packet arrived on an oif.
2082 It is bad, but otherwise we would need to move pretty
2083 large chunk of pimd to kernel. Ough... --ANK
2084 */
2085 (mrt->mroute_do_pim ||
2086 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2087 time_after(jiffies,
2088 c->_c.mfc_un.res.last_assert +
2089 MFC_ASSERT_THRESH)) {
2090 c->_c.mfc_un.res.last_assert = jiffies;
2091 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2092 }
2093 goto dont_forward;
2094 }
2095
2096forward:
2097 mrt->vif_table[vif].pkt_in++;
2098 mrt->vif_table[vif].bytes_in += skb->len;
2099
2100 /*
2101 * Forward the frame
2102 */
2103 if (ipv6_addr_any(&c->mf6c_origin) &&
2104 ipv6_addr_any(&c->mf6c_mcastgrp)) {
2105 if (true_vifi >= 0 &&
2106 true_vifi != c->_c.mfc_parent &&
2107 ipv6_hdr(skb)->hop_limit >
2108 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2109 /* It's an (*,*) entry and the packet is not coming from
2110 * the upstream: forward the packet to the upstream
2111 * only.
2112 */
2113 psend = c->_c.mfc_parent;
2114 goto last_forward;
2115 }
2116 goto dont_forward;
2117 }
2118 for (ct = c->_c.mfc_un.res.maxvif - 1;
2119 ct >= c->_c.mfc_un.res.minvif; ct--) {
2120 /* For (*,G) entry, don't forward to the incoming interface */
2121 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2122 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2123 if (psend != -1) {
2124 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2125 if (skb2)
2126 ip6mr_forward2(net, mrt, skb2,
2127 c, psend);
2128 }
2129 psend = ct;
2130 }
2131 }
2132last_forward:
2133 if (psend != -1) {
2134 ip6mr_forward2(net, mrt, skb, c, psend);
2135 return;
2136 }
2137
2138dont_forward:
2139 kfree_skb(skb);
2140}
2141
2142
2143/*
2144 * Multicast packets for forwarding arrive here
2145 */
2146
2147int ip6_mr_input(struct sk_buff *skb)
2148{
2149 struct mfc6_cache *cache;
2150 struct net *net = dev_net(skb->dev);
2151 struct mr_table *mrt;
2152 struct flowi6 fl6 = {
2153 .flowi6_iif = skb->dev->ifindex,
2154 .flowi6_mark = skb->mark,
2155 };
2156 int err;
2157
2158 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2159 if (err < 0) {
2160 kfree_skb(skb);
2161 return err;
2162 }
2163
2164 read_lock(&mrt_lock);
2165 cache = ip6mr_cache_find(mrt,
2166 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2167 if (!cache) {
2168 int vif = ip6mr_find_vif(mrt, skb->dev);
2169
2170 if (vif >= 0)
2171 cache = ip6mr_cache_find_any(mrt,
2172 &ipv6_hdr(skb)->daddr,
2173 vif);
2174 }
2175
2176 /*
2177 * No usable cache entry
2178 */
2179 if (!cache) {
2180 int vif;
2181
2182 vif = ip6mr_find_vif(mrt, skb->dev);
2183 if (vif >= 0) {
2184 int err = ip6mr_cache_unresolved(mrt, vif, skb);
2185 read_unlock(&mrt_lock);
2186
2187 return err;
2188 }
2189 read_unlock(&mrt_lock);
2190 kfree_skb(skb);
2191 return -ENODEV;
2192 }
2193
2194 ip6_mr_forward(net, mrt, skb, cache);
2195
2196 read_unlock(&mrt_lock);
2197
2198 return 0;
2199}
2200
2201int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2202 u32 portid)
2203{
2204 int err;
2205 struct mr_table *mrt;
2206 struct mfc6_cache *cache;
2207 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2208
2209 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2210 if (!mrt)
2211 return -ENOENT;
2212
2213 read_lock(&mrt_lock);
2214 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2215 if (!cache && skb->dev) {
2216 int vif = ip6mr_find_vif(mrt, skb->dev);
2217
2218 if (vif >= 0)
2219 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2220 vif);
2221 }
2222
2223 if (!cache) {
2224 struct sk_buff *skb2;
2225 struct ipv6hdr *iph;
2226 struct net_device *dev;
2227 int vif;
2228
2229 dev = skb->dev;
2230 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2231 read_unlock(&mrt_lock);
2232 return -ENODEV;
2233 }
2234
2235 /* really correct? */
2236 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2237 if (!skb2) {
2238 read_unlock(&mrt_lock);
2239 return -ENOMEM;
2240 }
2241
2242 NETLINK_CB(skb2).portid = portid;
2243 skb_reset_transport_header(skb2);
2244
2245 skb_put(skb2, sizeof(struct ipv6hdr));
2246 skb_reset_network_header(skb2);
2247
2248 iph = ipv6_hdr(skb2);
2249 iph->version = 0;
2250 iph->priority = 0;
2251 iph->flow_lbl[0] = 0;
2252 iph->flow_lbl[1] = 0;
2253 iph->flow_lbl[2] = 0;
2254 iph->payload_len = 0;
2255 iph->nexthdr = IPPROTO_NONE;
2256 iph->hop_limit = 0;
2257 iph->saddr = rt->rt6i_src.addr;
2258 iph->daddr = rt->rt6i_dst.addr;
2259
2260 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2261 read_unlock(&mrt_lock);
2262
2263 return err;
2264 }
2265
2266 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2267 read_unlock(&mrt_lock);
2268 return err;
2269}
2270
2271static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2272 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2273 int flags)
2274{
2275 struct nlmsghdr *nlh;
2276 struct rtmsg *rtm;
2277 int err;
2278
2279 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2280 if (!nlh)
2281 return -EMSGSIZE;
2282
2283 rtm = nlmsg_data(nlh);
2284 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2285 rtm->rtm_dst_len = 128;
2286 rtm->rtm_src_len = 128;
2287 rtm->rtm_tos = 0;
2288 rtm->rtm_table = mrt->id;
2289 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2290 goto nla_put_failure;
2291 rtm->rtm_type = RTN_MULTICAST;
2292 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2293 if (c->_c.mfc_flags & MFC_STATIC)
2294 rtm->rtm_protocol = RTPROT_STATIC;
2295 else
2296 rtm->rtm_protocol = RTPROT_MROUTED;
2297 rtm->rtm_flags = 0;
2298
2299 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2300 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2301 goto nla_put_failure;
2302 err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2303 /* do not break the dump if cache is unresolved */
2304 if (err < 0 && err != -ENOENT)
2305 goto nla_put_failure;
2306
2307 nlmsg_end(skb, nlh);
2308 return 0;
2309
2310nla_put_failure:
2311 nlmsg_cancel(skb, nlh);
2312 return -EMSGSIZE;
2313}
2314
2315static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2316 u32 portid, u32 seq, struct mr_mfc *c,
2317 int cmd, int flags)
2318{
2319 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2320 cmd, flags);
2321}
2322
2323static int mr6_msgsize(bool unresolved, int maxvif)
2324{
2325 size_t len =
2326 NLMSG_ALIGN(sizeof(struct rtmsg))
2327 + nla_total_size(4) /* RTA_TABLE */
2328 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2329 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2330 ;
2331
2332 if (!unresolved)
2333 len = len
2334 + nla_total_size(4) /* RTA_IIF */
2335 + nla_total_size(0) /* RTA_MULTIPATH */
2336 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2337 /* RTA_MFC_STATS */
2338 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2339 ;
2340
2341 return len;
2342}
2343
2344static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2345 int cmd)
2346{
2347 struct net *net = read_pnet(&mrt->net);
2348 struct sk_buff *skb;
2349 int err = -ENOBUFS;
2350
2351 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2352 GFP_ATOMIC);
2353 if (!skb)
2354 goto errout;
2355
2356 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2357 if (err < 0)
2358 goto errout;
2359
2360 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2361 return;
2362
2363errout:
2364 kfree_skb(skb);
2365 if (err < 0)
2366 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2367}
2368
2369static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2370{
2371 size_t len =
2372 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2373 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
2374 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
2375 /* IP6MRA_CREPORT_SRC_ADDR */
2376 + nla_total_size(sizeof(struct in6_addr))
2377 /* IP6MRA_CREPORT_DST_ADDR */
2378 + nla_total_size(sizeof(struct in6_addr))
2379 /* IP6MRA_CREPORT_PKT */
2380 + nla_total_size(payloadlen)
2381 ;
2382
2383 return len;
2384}
2385
2386static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2387{
2388 struct net *net = read_pnet(&mrt->net);
2389 struct nlmsghdr *nlh;
2390 struct rtgenmsg *rtgenm;
2391 struct mrt6msg *msg;
2392 struct sk_buff *skb;
2393 struct nlattr *nla;
2394 int payloadlen;
2395
2396 payloadlen = pkt->len - sizeof(struct mrt6msg);
2397 msg = (struct mrt6msg *)skb_transport_header(pkt);
2398
2399 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2400 if (!skb)
2401 goto errout;
2402
2403 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2404 sizeof(struct rtgenmsg), 0);
2405 if (!nlh)
2406 goto errout;
2407 rtgenm = nlmsg_data(nlh);
2408 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2409 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2410 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2411 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2412 &msg->im6_src) ||
2413 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2414 &msg->im6_dst))
2415 goto nla_put_failure;
2416
2417 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2418 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2419 nla_data(nla), payloadlen))
2420 goto nla_put_failure;
2421
2422 nlmsg_end(skb, nlh);
2423
2424 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2425 return;
2426
2427nla_put_failure:
2428 nlmsg_cancel(skb, nlh);
2429errout:
2430 kfree_skb(skb);
2431 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2432}
2433
2434static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2435{
2436 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2437 _ip6mr_fill_mroute, &mfc_unres_lock);
2438}