- patches.arch/x86_mce_intel_decode_physical_address.patch:
[linux-flexiantxendom0-3.2.10.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/reserve.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
58
59 #include <asm/uaccess.h>
60
61 #ifdef CONFIG_SYSCTL
62 #include <linux/sysctl.h>
63 #endif
64
65 /* Set to 3 to get tracing. */
66 #define RT6_DEBUG 2
67
68 #if RT6_DEBUG >= 3
69 #define RDBG(x) printk x
70 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #else
72 #define RDBG(x)
73 #define RT6_TRACE(x...) do { ; } while (0)
74 #endif
75
76 #define CLONE_OFFLINK_ROUTE 0
77
78 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
79 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void             ip6_dst_destroy(struct dst_entry *);
82 static void             ip6_dst_ifdown(struct dst_entry *,
83                                        struct net_device *dev, int how);
84 static int               ip6_dst_gc(struct dst_ops *ops);
85
86 static int              ip6_pkt_discard(struct sk_buff *skb);
87 static int              ip6_pkt_discard_out(struct sk_buff *skb);
88 static void             ip6_link_failure(struct sk_buff *skb);
89 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90
91 #ifdef CONFIG_IPV6_ROUTE_INFO
92 static struct rt6_info *rt6_add_route_info(struct net *net,
93                                            struct in6_addr *prefix, int prefixlen,
94                                            struct in6_addr *gwaddr, int ifindex,
95                                            unsigned pref);
96 static struct rt6_info *rt6_get_route_info(struct net *net,
97                                            struct in6_addr *prefix, int prefixlen,
98                                            struct in6_addr *gwaddr, int ifindex);
99 #endif
100
101 static struct dst_ops ip6_dst_ops_template = {
102         .family                 =       AF_INET6,
103         .protocol               =       cpu_to_be16(ETH_P_IPV6),
104         .gc                     =       ip6_dst_gc,
105         .gc_thresh              =       1024,
106         .check                  =       ip6_dst_check,
107         .destroy                =       ip6_dst_destroy,
108         .ifdown                 =       ip6_dst_ifdown,
109         .negative_advice        =       ip6_negative_advice,
110         .link_failure           =       ip6_link_failure,
111         .update_pmtu            =       ip6_rt_update_pmtu,
112         .local_out              =       __ip6_local_out,
113         .entries                =       ATOMIC_INIT(0),
114 };
115
116 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117 {
118 }
119
120 static struct dst_ops ip6_dst_blackhole_ops = {
121         .family                 =       AF_INET6,
122         .protocol               =       cpu_to_be16(ETH_P_IPV6),
123         .destroy                =       ip6_dst_destroy,
124         .check                  =       ip6_dst_check,
125         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
126         .entries                =       ATOMIC_INIT(0),
127 };
128
129 static struct rt6_info ip6_null_entry_template = {
130         .u = {
131                 .dst = {
132                         .__refcnt       = ATOMIC_INIT(1),
133                         .__use          = 1,
134                         .obsolete       = -1,
135                         .error          = -ENETUNREACH,
136                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
137                         .input          = ip6_pkt_discard,
138                         .output         = ip6_pkt_discard_out,
139                 }
140         },
141         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
142         .rt6i_protocol  = RTPROT_KERNEL,
143         .rt6i_metric    = ~(u32) 0,
144         .rt6i_ref       = ATOMIC_INIT(1),
145 };
146
147 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
148
149 static int ip6_pkt_prohibit(struct sk_buff *skb);
150 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
151
152 static struct rt6_info ip6_prohibit_entry_template = {
153         .u = {
154                 .dst = {
155                         .__refcnt       = ATOMIC_INIT(1),
156                         .__use          = 1,
157                         .obsolete       = -1,
158                         .error          = -EACCES,
159                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
160                         .input          = ip6_pkt_prohibit,
161                         .output         = ip6_pkt_prohibit_out,
162                 }
163         },
164         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
165         .rt6i_protocol  = RTPROT_KERNEL,
166         .rt6i_metric    = ~(u32) 0,
167         .rt6i_ref       = ATOMIC_INIT(1),
168 };
169
170 static struct rt6_info ip6_blk_hole_entry_template = {
171         .u = {
172                 .dst = {
173                         .__refcnt       = ATOMIC_INIT(1),
174                         .__use          = 1,
175                         .obsolete       = -1,
176                         .error          = -EINVAL,
177                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
178                         .input          = dst_discard,
179                         .output         = dst_discard,
180                 }
181         },
182         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
183         .rt6i_protocol  = RTPROT_KERNEL,
184         .rt6i_metric    = ~(u32) 0,
185         .rt6i_ref       = ATOMIC_INIT(1),
186 };
187
188 #endif
189
190 /* allocate dst with ip6_dst_ops */
191 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
192 {
193         return (struct rt6_info *)dst_alloc(ops);
194 }
195
196 static void ip6_dst_destroy(struct dst_entry *dst)
197 {
198         struct rt6_info *rt = (struct rt6_info *)dst;
199         struct inet6_dev *idev = rt->rt6i_idev;
200
201         if (idev != NULL) {
202                 rt->rt6i_idev = NULL;
203                 in6_dev_put(idev);
204         }
205 }
206
207 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
208                            int how)
209 {
210         struct rt6_info *rt = (struct rt6_info *)dst;
211         struct inet6_dev *idev = rt->rt6i_idev;
212         struct net_device *loopback_dev =
213                 dev_net(dev)->loopback_dev;
214
215         if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
216                 struct inet6_dev *loopback_idev =
217                         in6_dev_get(loopback_dev);
218                 if (loopback_idev != NULL) {
219                         rt->rt6i_idev = loopback_idev;
220                         in6_dev_put(idev);
221                 }
222         }
223 }
224
225 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
226 {
227         return (rt->rt6i_flags & RTF_EXPIRES &&
228                 time_after(jiffies, rt->rt6i_expires));
229 }
230
231 static inline int rt6_need_strict(struct in6_addr *daddr)
232 {
233         return (ipv6_addr_type(daddr) &
234                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
235 }
236
237 /*
238  *      Route lookup. Any table->tb6_lock is implied.
239  */
240
241 static inline struct rt6_info *rt6_device_match(struct net *net,
242                                                     struct rt6_info *rt,
243                                                     struct in6_addr *saddr,
244                                                     int oif,
245                                                     int flags)
246 {
247         struct rt6_info *local = NULL;
248         struct rt6_info *sprt;
249
250         if (!oif && ipv6_addr_any(saddr))
251                 goto out;
252
253         for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
254                 struct net_device *dev = sprt->rt6i_dev;
255
256                 if (oif) {
257                         if (dev->ifindex == oif)
258                                 return sprt;
259                         if (dev->flags & IFF_LOOPBACK) {
260                                 if (sprt->rt6i_idev == NULL ||
261                                     sprt->rt6i_idev->dev->ifindex != oif) {
262                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
263                                                 continue;
264                                         if (local && (!oif ||
265                                                       local->rt6i_idev->dev->ifindex == oif))
266                                                 continue;
267                                 }
268                                 local = sprt;
269                         }
270                 } else {
271                         if (ipv6_chk_addr(net, saddr, dev,
272                                           flags & RT6_LOOKUP_F_IFACE))
273                                 return sprt;
274                 }
275         }
276
277         if (oif) {
278                 if (local)
279                         return local;
280
281                 if (flags & RT6_LOOKUP_F_IFACE)
282                         return net->ipv6.ip6_null_entry;
283         }
284 out:
285         return rt;
286 }
287
288 #ifdef CONFIG_IPV6_ROUTER_PREF
289 static void rt6_probe(struct rt6_info *rt)
290 {
291         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
292         /*
293          * Okay, this does not seem to be appropriate
294          * for now, however, we need to check if it
295          * is really so; aka Router Reachability Probing.
296          *
297          * Router Reachability Probe MUST be rate-limited
298          * to no more than one per minute.
299          */
300         if (!neigh || (neigh->nud_state & NUD_VALID))
301                 return;
302         read_lock_bh(&neigh->lock);
303         if (!(neigh->nud_state & NUD_VALID) &&
304             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
305                 struct in6_addr mcaddr;
306                 struct in6_addr *target;
307
308                 neigh->updated = jiffies;
309                 read_unlock_bh(&neigh->lock);
310
311                 target = (struct in6_addr *)&neigh->primary_key;
312                 addrconf_addr_solict_mult(target, &mcaddr);
313                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
314         } else
315                 read_unlock_bh(&neigh->lock);
316 }
317 #else
318 static inline void rt6_probe(struct rt6_info *rt)
319 {
320 }
321 #endif
322
323 /*
324  * Default Router Selection (RFC 2461 6.3.6)
325  */
326 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
327 {
328         struct net_device *dev = rt->rt6i_dev;
329         if (!oif || dev->ifindex == oif)
330                 return 2;
331         if ((dev->flags & IFF_LOOPBACK) &&
332             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
333                 return 1;
334         return 0;
335 }
336
337 static inline int rt6_check_neigh(struct rt6_info *rt)
338 {
339         struct neighbour *neigh = rt->rt6i_nexthop;
340         int m;
341         if (rt->rt6i_flags & RTF_NONEXTHOP ||
342             !(rt->rt6i_flags & RTF_GATEWAY))
343                 m = 1;
344         else if (neigh) {
345                 read_lock_bh(&neigh->lock);
346                 if (neigh->nud_state & NUD_VALID)
347                         m = 2;
348 #ifdef CONFIG_IPV6_ROUTER_PREF
349                 else if (neigh->nud_state & NUD_FAILED)
350                         m = 0;
351 #endif
352                 else
353                         m = 1;
354                 read_unlock_bh(&neigh->lock);
355         } else
356                 m = 0;
357         return m;
358 }
359
360 static int rt6_score_route(struct rt6_info *rt, int oif,
361                            int strict)
362 {
363         int m, n;
364
365         m = rt6_check_dev(rt, oif);
366         if (!m && (strict & RT6_LOOKUP_F_IFACE))
367                 return -1;
368 #ifdef CONFIG_IPV6_ROUTER_PREF
369         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
370 #endif
371         n = rt6_check_neigh(rt);
372         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
373                 return -1;
374         return m;
375 }
376
377 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
378                                    int *mpri, struct rt6_info *match)
379 {
380         int m;
381
382         if (rt6_check_expired(rt))
383                 goto out;
384
385         m = rt6_score_route(rt, oif, strict);
386         if (m < 0)
387                 goto out;
388
389         if (m > *mpri) {
390                 if (strict & RT6_LOOKUP_F_REACHABLE)
391                         rt6_probe(match);
392                 *mpri = m;
393                 match = rt;
394         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
395                 rt6_probe(rt);
396         }
397
398 out:
399         return match;
400 }
401
402 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
403                                      struct rt6_info *rr_head,
404                                      u32 metric, int oif, int strict)
405 {
406         struct rt6_info *rt, *match;
407         int mpri = -1;
408
409         match = NULL;
410         for (rt = rr_head; rt && rt->rt6i_metric == metric;
411              rt = rt->u.dst.rt6_next)
412                 match = find_match(rt, oif, strict, &mpri, match);
413         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
414              rt = rt->u.dst.rt6_next)
415                 match = find_match(rt, oif, strict, &mpri, match);
416
417         return match;
418 }
419
420 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
421 {
422         struct rt6_info *match, *rt0;
423         struct net *net;
424
425         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
426                   __func__, fn->leaf, oif);
427
428         rt0 = fn->rr_ptr;
429         if (!rt0)
430                 fn->rr_ptr = rt0 = fn->leaf;
431
432         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
433
434         if (!match &&
435             (strict & RT6_LOOKUP_F_REACHABLE)) {
436                 struct rt6_info *next = rt0->u.dst.rt6_next;
437
438                 /* no entries matched; do round-robin */
439                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
440                         next = fn->leaf;
441
442                 if (next != rt0)
443                         fn->rr_ptr = next;
444         }
445
446         RT6_TRACE("%s() => %p\n",
447                   __func__, match);
448
449         net = dev_net(rt0->rt6i_dev);
450         return (match ? match : net->ipv6.ip6_null_entry);
451 }
452
453 #ifdef CONFIG_IPV6_ROUTE_INFO
454 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
455                   struct in6_addr *gwaddr)
456 {
457         struct net *net = dev_net(dev);
458         struct route_info *rinfo = (struct route_info *) opt;
459         struct in6_addr prefix_buf, *prefix;
460         unsigned int pref;
461         unsigned long lifetime;
462         struct rt6_info *rt;
463
464         if (len < sizeof(struct route_info)) {
465                 return -EINVAL;
466         }
467
468         /* Sanity check for prefix_len and length */
469         if (rinfo->length > 3) {
470                 return -EINVAL;
471         } else if (rinfo->prefix_len > 128) {
472                 return -EINVAL;
473         } else if (rinfo->prefix_len > 64) {
474                 if (rinfo->length < 2) {
475                         return -EINVAL;
476                 }
477         } else if (rinfo->prefix_len > 0) {
478                 if (rinfo->length < 1) {
479                         return -EINVAL;
480                 }
481         }
482
483         pref = rinfo->route_pref;
484         if (pref == ICMPV6_ROUTER_PREF_INVALID)
485                 return -EINVAL;
486
487         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
488
489         if (rinfo->length == 3)
490                 prefix = (struct in6_addr *)rinfo->prefix;
491         else {
492                 /* this function is safe */
493                 ipv6_addr_prefix(&prefix_buf,
494                                  (struct in6_addr *)rinfo->prefix,
495                                  rinfo->prefix_len);
496                 prefix = &prefix_buf;
497         }
498
499         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
500                                 dev->ifindex);
501
502         if (rt && !lifetime) {
503                 ip6_del_rt(rt);
504                 rt = NULL;
505         }
506
507         if (!rt && lifetime)
508                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
509                                         pref);
510         else if (rt)
511                 rt->rt6i_flags = RTF_ROUTEINFO |
512                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
513
514         if (rt) {
515                 if (!addrconf_finite_timeout(lifetime)) {
516                         rt->rt6i_flags &= ~RTF_EXPIRES;
517                 } else {
518                         rt->rt6i_expires = jiffies + HZ * lifetime;
519                         rt->rt6i_flags |= RTF_EXPIRES;
520                 }
521                 dst_release(&rt->u.dst);
522         }
523         return 0;
524 }
525 #endif
526
527 #define BACKTRACK(__net, saddr)                 \
528 do { \
529         if (rt == __net->ipv6.ip6_null_entry) { \
530                 struct fib6_node *pn; \
531                 while (1) { \
532                         if (fn->fn_flags & RTN_TL_ROOT) \
533                                 goto out; \
534                         pn = fn->parent; \
535                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
536                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
537                         else \
538                                 fn = pn; \
539                         if (fn->fn_flags & RTN_RTINFO) \
540                                 goto restart; \
541                 } \
542         } \
543 } while(0)
544
545 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
546                                              struct fib6_table *table,
547                                              struct flowi *fl, int flags)
548 {
549         struct fib6_node *fn;
550         struct rt6_info *rt;
551
552         read_lock_bh(&table->tb6_lock);
553         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
554 restart:
555         rt = fn->leaf;
556         rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
557         BACKTRACK(net, &fl->fl6_src);
558 out:
559         dst_use(&rt->u.dst, jiffies);
560         read_unlock_bh(&table->tb6_lock);
561         return rt;
562
563 }
564
565 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
566                             const struct in6_addr *saddr, int oif, int strict)
567 {
568         struct flowi fl = {
569                 .oif = oif,
570                 .nl_u = {
571                         .ip6_u = {
572                                 .daddr = *daddr,
573                         },
574                 },
575         };
576         struct dst_entry *dst;
577         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
578
579         if (saddr) {
580                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
581                 flags |= RT6_LOOKUP_F_HAS_SADDR;
582         }
583
584         dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
585         if (dst->error == 0)
586                 return (struct rt6_info *) dst;
587
588         dst_release(dst);
589
590         return NULL;
591 }
592
593 EXPORT_SYMBOL(rt6_lookup);
594
595 /* ip6_ins_rt is called with FREE table->tb6_lock.
596    It takes new route entry, the addition fails by any reason the
597    route is freed. In any case, if caller does not hold it, it may
598    be destroyed.
599  */
600
601 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
602 {
603         int err;
604         struct fib6_table *table;
605
606         table = rt->rt6i_table;
607         write_lock_bh(&table->tb6_lock);
608         err = fib6_add(&table->tb6_root, rt, info);
609         write_unlock_bh(&table->tb6_lock);
610
611         return err;
612 }
613
614 int ip6_ins_rt(struct rt6_info *rt)
615 {
616         struct nl_info info = {
617                 .nl_net = dev_net(rt->rt6i_dev),
618         };
619         return __ip6_ins_rt(rt, &info);
620 }
621
622 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
623                                       struct in6_addr *saddr)
624 {
625         struct rt6_info *rt;
626
627         /*
628          *      Clone the route.
629          */
630
631         rt = ip6_rt_copy(ort);
632
633         if (rt) {
634                 struct neighbour *neigh;
635                 int attempts = !in_softirq();
636
637                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
638                         if (rt->rt6i_dst.plen != 128 &&
639                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
640                                 rt->rt6i_flags |= RTF_ANYCAST;
641                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
642                 }
643
644                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
645                 rt->rt6i_dst.plen = 128;
646                 rt->rt6i_flags |= RTF_CACHE;
647                 rt->u.dst.flags |= DST_HOST;
648
649 #ifdef CONFIG_IPV6_SUBTREES
650                 if (rt->rt6i_src.plen && saddr) {
651                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
652                         rt->rt6i_src.plen = 128;
653                 }
654 #endif
655
656         retry:
657                 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
658                 if (IS_ERR(neigh)) {
659                         struct net *net = dev_net(rt->rt6i_dev);
660                         int saved_rt_min_interval =
661                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
662                         int saved_rt_elasticity =
663                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
664
665                         if (attempts-- > 0) {
666                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
667                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
668
669                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
670
671                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
672                                         saved_rt_elasticity;
673                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
674                                         saved_rt_min_interval;
675                                 goto retry;
676                         }
677
678                         if (net_ratelimit())
679                                 printk(KERN_WARNING
680                                        "Neighbour table overflow.\n");
681                         dst_free(&rt->u.dst);
682                         return NULL;
683                 }
684                 rt->rt6i_nexthop = neigh;
685
686         }
687
688         return rt;
689 }
690
691 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
692 {
693         struct rt6_info *rt = ip6_rt_copy(ort);
694         if (rt) {
695                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
696                 rt->rt6i_dst.plen = 128;
697                 rt->rt6i_flags |= RTF_CACHE;
698                 rt->u.dst.flags |= DST_HOST;
699                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
700         }
701         return rt;
702 }
703
704 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
705                                       struct flowi *fl, int flags)
706 {
707         struct fib6_node *fn;
708         struct rt6_info *rt, *nrt;
709         int strict = 0;
710         int attempts = 3;
711         int err;
712         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
713
714         strict |= flags & RT6_LOOKUP_F_IFACE;
715
716 relookup:
717         read_lock_bh(&table->tb6_lock);
718
719 restart_2:
720         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
721
722 restart:
723         rt = rt6_select(fn, oif, strict | reachable);
724
725         BACKTRACK(net, &fl->fl6_src);
726         if (rt == net->ipv6.ip6_null_entry ||
727             rt->rt6i_flags & RTF_CACHE)
728                 goto out;
729
730         dst_hold(&rt->u.dst);
731         read_unlock_bh(&table->tb6_lock);
732
733         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
734                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
735         else {
736 #if CLONE_OFFLINK_ROUTE
737                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
738 #else
739                 goto out2;
740 #endif
741         }
742
743         dst_release(&rt->u.dst);
744         rt = nrt ? : net->ipv6.ip6_null_entry;
745
746         dst_hold(&rt->u.dst);
747         if (nrt) {
748                 err = ip6_ins_rt(nrt);
749                 if (!err)
750                         goto out2;
751         }
752
753         if (--attempts <= 0)
754                 goto out2;
755
756         /*
757          * Race condition! In the gap, when table->tb6_lock was
758          * released someone could insert this route.  Relookup.
759          */
760         dst_release(&rt->u.dst);
761         goto relookup;
762
763 out:
764         if (reachable) {
765                 reachable = 0;
766                 goto restart_2;
767         }
768         dst_hold(&rt->u.dst);
769         read_unlock_bh(&table->tb6_lock);
770 out2:
771         rt->u.dst.lastuse = jiffies;
772         rt->u.dst.__use++;
773
774         return rt;
775 }
776
777 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
778                                             struct flowi *fl, int flags)
779 {
780         return ip6_pol_route(net, table, fl->iif, fl, flags);
781 }
782
783 void ip6_route_input(struct sk_buff *skb)
784 {
785         struct ipv6hdr *iph = ipv6_hdr(skb);
786         struct net *net = dev_net(skb->dev);
787         int flags = RT6_LOOKUP_F_HAS_SADDR;
788         struct flowi fl = {
789                 .iif = skb->dev->ifindex,
790                 .nl_u = {
791                         .ip6_u = {
792                                 .daddr = iph->daddr,
793                                 .saddr = iph->saddr,
794                                 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
795                         },
796                 },
797                 .mark = skb->mark,
798                 .proto = iph->nexthdr,
799         };
800
801         if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
802                 flags |= RT6_LOOKUP_F_IFACE;
803
804         skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
805 }
806
807 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
808                                              struct flowi *fl, int flags)
809 {
810         return ip6_pol_route(net, table, fl->oif, fl, flags);
811 }
812
813 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
814                                     struct flowi *fl)
815 {
816         int flags = 0;
817
818         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
819                 flags |= RT6_LOOKUP_F_IFACE;
820
821         if (!ipv6_addr_any(&fl->fl6_src))
822                 flags |= RT6_LOOKUP_F_HAS_SADDR;
823         else if (sk)
824                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
825
826         return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
827 }
828
829 EXPORT_SYMBOL(ip6_route_output);
830
831 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
832 {
833         struct rt6_info *ort = (struct rt6_info *) *dstp;
834         struct rt6_info *rt = (struct rt6_info *)
835                 dst_alloc(&ip6_dst_blackhole_ops);
836         struct dst_entry *new = NULL;
837
838         if (rt) {
839                 new = &rt->u.dst;
840
841                 atomic_set(&new->__refcnt, 1);
842                 new->__use = 1;
843                 new->input = dst_discard;
844                 new->output = dst_discard;
845
846                 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
847                 new->dev = ort->u.dst.dev;
848                 if (new->dev)
849                         dev_hold(new->dev);
850                 rt->rt6i_idev = ort->rt6i_idev;
851                 if (rt->rt6i_idev)
852                         in6_dev_hold(rt->rt6i_idev);
853                 rt->rt6i_expires = 0;
854
855                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
856                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
857                 rt->rt6i_metric = 0;
858
859                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
860 #ifdef CONFIG_IPV6_SUBTREES
861                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
862 #endif
863
864                 dst_free(new);
865         }
866
867         dst_release(*dstp);
868         *dstp = new;
869         return (new ? 0 : -ENOMEM);
870 }
871 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
872
873 /*
874  *      Destination cache support functions
875  */
876
877 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
878 {
879         struct rt6_info *rt;
880
881         rt = (struct rt6_info *) dst;
882
883         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
884                 return dst;
885
886         return NULL;
887 }
888
889 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
890 {
891         struct rt6_info *rt = (struct rt6_info *) dst;
892
893         if (rt) {
894                 if (rt->rt6i_flags & RTF_CACHE) {
895                         if (rt6_check_expired(rt)) {
896                                 ip6_del_rt(rt);
897                                 dst = NULL;
898                         }
899                 } else {
900                         dst_release(dst);
901                         dst = NULL;
902                 }
903         }
904         return dst;
905 }
906
907 static void ip6_link_failure(struct sk_buff *skb)
908 {
909         struct rt6_info *rt;
910
911         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
912
913         rt = (struct rt6_info *) skb_dst(skb);
914         if (rt) {
915                 if (rt->rt6i_flags&RTF_CACHE) {
916                         dst_set_expires(&rt->u.dst, 0);
917                         rt->rt6i_flags |= RTF_EXPIRES;
918                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
919                         rt->rt6i_node->fn_sernum = -1;
920         }
921 }
922
923 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
924 {
925         struct rt6_info *rt6 = (struct rt6_info*)dst;
926
927         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
928                 rt6->rt6i_flags |= RTF_MODIFIED;
929                 if (mtu < IPV6_MIN_MTU) {
930                         mtu = IPV6_MIN_MTU;
931                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
932                 }
933                 dst->metrics[RTAX_MTU-1] = mtu;
934                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
935         }
936 }
937
938 static int ipv6_get_mtu(struct net_device *dev);
939
940 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
941 {
942         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
943
944         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
945                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
946
947         /*
948          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
949          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
950          * IPV6_MAXPLEN is also valid and means: "any MSS,
951          * rely only on pmtu discovery"
952          */
953         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
954                 mtu = IPV6_MAXPLEN;
955         return mtu;
956 }
957
958 static struct dst_entry *icmp6_dst_gc_list;
959 static DEFINE_SPINLOCK(icmp6_dst_lock);
960
961 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
962                                   struct neighbour *neigh,
963                                   const struct in6_addr *addr)
964 {
965         struct rt6_info *rt;
966         struct inet6_dev *idev = in6_dev_get(dev);
967         struct net *net = dev_net(dev);
968
969         if (unlikely(idev == NULL))
970                 return NULL;
971
972         rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
973         if (unlikely(rt == NULL)) {
974                 in6_dev_put(idev);
975                 goto out;
976         }
977
978         dev_hold(dev);
979         if (neigh)
980                 neigh_hold(neigh);
981         else {
982                 neigh = ndisc_get_neigh(dev, addr);
983                 if (IS_ERR(neigh))
984                         neigh = NULL;
985         }
986
987         rt->rt6i_dev      = dev;
988         rt->rt6i_idev     = idev;
989         rt->rt6i_nexthop  = neigh;
990         atomic_set(&rt->u.dst.__refcnt, 1);
991         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
992         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
993         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
994         rt->u.dst.output  = ip6_output;
995
996 #if 0   /* there's no chance to use these for ndisc */
997         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
998                                 ? DST_HOST
999                                 : 0;
1000         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1001         rt->rt6i_dst.plen = 128;
1002 #endif
1003
1004         spin_lock_bh(&icmp6_dst_lock);
1005         rt->u.dst.next = icmp6_dst_gc_list;
1006         icmp6_dst_gc_list = &rt->u.dst;
1007         spin_unlock_bh(&icmp6_dst_lock);
1008
1009         fib6_force_start_gc(net);
1010
1011 out:
1012         return &rt->u.dst;
1013 }
1014
1015 int icmp6_dst_gc(void)
1016 {
1017         struct dst_entry *dst, *next, **pprev;
1018         int more = 0;
1019
1020         next = NULL;
1021
1022         spin_lock_bh(&icmp6_dst_lock);
1023         pprev = &icmp6_dst_gc_list;
1024
1025         while ((dst = *pprev) != NULL) {
1026                 if (!atomic_read(&dst->__refcnt)) {
1027                         *pprev = dst->next;
1028                         dst_free(dst);
1029                 } else {
1030                         pprev = &dst->next;
1031                         ++more;
1032                 }
1033         }
1034
1035         spin_unlock_bh(&icmp6_dst_lock);
1036
1037         return more;
1038 }
1039
1040 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1041                             void *arg)
1042 {
1043         struct dst_entry *dst, **pprev;
1044
1045         spin_lock_bh(&icmp6_dst_lock);
1046         pprev = &icmp6_dst_gc_list;
1047         while ((dst = *pprev) != NULL) {
1048                 struct rt6_info *rt = (struct rt6_info *) dst;
1049                 if (func(rt, arg)) {
1050                         *pprev = dst->next;
1051                         dst_free(dst);
1052                 } else {
1053                         pprev = &dst->next;
1054                 }
1055         }
1056         spin_unlock_bh(&icmp6_dst_lock);
1057 }
1058
1059 static int ip6_dst_gc(struct dst_ops *ops)
1060 {
1061         unsigned long now = jiffies;
1062         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1063         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1064         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1065         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1066         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1067         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1068
1069         if (time_after(rt_last_gc + rt_min_interval, now) &&
1070             atomic_read(&ops->entries) <= rt_max_size)
1071                 goto out;
1072
1073         net->ipv6.ip6_rt_gc_expire++;
1074         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1075         net->ipv6.ip6_rt_last_gc = now;
1076         if (atomic_read(&ops->entries) < ops->gc_thresh)
1077                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1078 out:
1079         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1080         return (atomic_read(&ops->entries) > rt_max_size);
1081 }
1082
1083 /* Clean host part of a prefix. Not necessary in radix tree,
1084    but results in cleaner routing tables.
1085
1086    Remove it only when all the things will work!
1087  */
1088
1089 static int ipv6_get_mtu(struct net_device *dev)
1090 {
1091         int mtu = IPV6_MIN_MTU;
1092         struct inet6_dev *idev;
1093
1094         idev = in6_dev_get(dev);
1095         if (idev) {
1096                 mtu = idev->cnf.mtu6;
1097                 in6_dev_put(idev);
1098         }
1099         return mtu;
1100 }
1101
1102 int ip6_dst_hoplimit(struct dst_entry *dst)
1103 {
1104         int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1105         if (hoplimit < 0) {
1106                 struct net_device *dev = dst->dev;
1107                 struct inet6_dev *idev = in6_dev_get(dev);
1108                 if (idev) {
1109                         hoplimit = idev->cnf.hop_limit;
1110                         in6_dev_put(idev);
1111                 } else
1112                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1113         }
1114         return hoplimit;
1115 }
1116
1117 /*
1118  *
1119  */
1120
1121 int ip6_route_add(struct fib6_config *cfg)
1122 {
1123         int err;
1124         struct net *net = cfg->fc_nlinfo.nl_net;
1125         struct rt6_info *rt = NULL;
1126         struct net_device *dev = NULL;
1127         struct inet6_dev *idev = NULL;
1128         struct fib6_table *table;
1129         int addr_type;
1130
1131         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1132                 return -EINVAL;
1133 #ifndef CONFIG_IPV6_SUBTREES
1134         if (cfg->fc_src_len)
1135                 return -EINVAL;
1136 #endif
1137         if (cfg->fc_ifindex) {
1138                 err = -ENODEV;
1139                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1140                 if (!dev)
1141                         goto out;
1142                 idev = in6_dev_get(dev);
1143                 if (!idev)
1144                         goto out;
1145         }
1146
1147         if (cfg->fc_metric == 0)
1148                 cfg->fc_metric = IP6_RT_PRIO_USER;
1149
1150         table = fib6_new_table(net, cfg->fc_table);
1151         if (table == NULL) {
1152                 err = -ENOBUFS;
1153                 goto out;
1154         }
1155
1156         rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1157
1158         if (rt == NULL) {
1159                 err = -ENOMEM;
1160                 goto out;
1161         }
1162
1163         rt->u.dst.obsolete = -1;
1164         rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1165                                 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1166                                 0;
1167
1168         if (cfg->fc_protocol == RTPROT_UNSPEC)
1169                 cfg->fc_protocol = RTPROT_BOOT;
1170         rt->rt6i_protocol = cfg->fc_protocol;
1171
1172         addr_type = ipv6_addr_type(&cfg->fc_dst);
1173
1174         if (addr_type & IPV6_ADDR_MULTICAST)
1175                 rt->u.dst.input = ip6_mc_input;
1176         else
1177                 rt->u.dst.input = ip6_forward;
1178
1179         rt->u.dst.output = ip6_output;
1180
1181         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1182         rt->rt6i_dst.plen = cfg->fc_dst_len;
1183         if (rt->rt6i_dst.plen == 128)
1184                rt->u.dst.flags = DST_HOST;
1185
1186 #ifdef CONFIG_IPV6_SUBTREES
1187         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1188         rt->rt6i_src.plen = cfg->fc_src_len;
1189 #endif
1190
1191         rt->rt6i_metric = cfg->fc_metric;
1192
1193         /* We cannot add true routes via loopback here,
1194            they would result in kernel looping; promote them to reject routes
1195          */
1196         if ((cfg->fc_flags & RTF_REJECT) ||
1197             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1198                 /* hold loopback dev/idev if we haven't done so. */
1199                 if (dev != net->loopback_dev) {
1200                         if (dev) {
1201                                 dev_put(dev);
1202                                 in6_dev_put(idev);
1203                         }
1204                         dev = net->loopback_dev;
1205                         dev_hold(dev);
1206                         idev = in6_dev_get(dev);
1207                         if (!idev) {
1208                                 err = -ENODEV;
1209                                 goto out;
1210                         }
1211                 }
1212                 rt->u.dst.output = ip6_pkt_discard_out;
1213                 rt->u.dst.input = ip6_pkt_discard;
1214                 rt->u.dst.error = -ENETUNREACH;
1215                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1216                 goto install_route;
1217         }
1218
1219         if (cfg->fc_flags & RTF_GATEWAY) {
1220                 struct in6_addr *gw_addr;
1221                 int gwa_type;
1222
1223                 gw_addr = &cfg->fc_gateway;
1224                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1225                 gwa_type = ipv6_addr_type(gw_addr);
1226
1227                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1228                         struct rt6_info *grt;
1229
1230                         /* IPv6 strictly inhibits using not link-local
1231                            addresses as nexthop address.
1232                            Otherwise, router will not able to send redirects.
1233                            It is very good, but in some (rare!) circumstances
1234                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1235                            some exceptions. --ANK
1236                          */
1237                         err = -EINVAL;
1238                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1239                                 goto out;
1240
1241                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1242
1243                         err = -EHOSTUNREACH;
1244                         if (grt == NULL)
1245                                 goto out;
1246                         if (dev) {
1247                                 if (dev != grt->rt6i_dev) {
1248                                         dst_release(&grt->u.dst);
1249                                         goto out;
1250                                 }
1251                         } else {
1252                                 dev = grt->rt6i_dev;
1253                                 idev = grt->rt6i_idev;
1254                                 dev_hold(dev);
1255                                 in6_dev_hold(grt->rt6i_idev);
1256                         }
1257                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1258                                 err = 0;
1259                         dst_release(&grt->u.dst);
1260
1261                         if (err)
1262                                 goto out;
1263                 }
1264                 err = -EINVAL;
1265                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1266                         goto out;
1267         }
1268
1269         err = -ENODEV;
1270         if (dev == NULL)
1271                 goto out;
1272
1273         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1274                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1275                 if (IS_ERR(rt->rt6i_nexthop)) {
1276                         err = PTR_ERR(rt->rt6i_nexthop);
1277                         rt->rt6i_nexthop = NULL;
1278                         goto out;
1279                 }
1280         }
1281
1282         rt->rt6i_flags = cfg->fc_flags;
1283
1284 install_route:
1285         if (cfg->fc_mx) {
1286                 struct nlattr *nla;
1287                 int remaining;
1288
1289                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1290                         int type = nla_type(nla);
1291
1292                         if (type) {
1293                                 if (type > RTAX_MAX) {
1294                                         err = -EINVAL;
1295                                         goto out;
1296                                 }
1297
1298                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1299                         }
1300                 }
1301         }
1302
1303         if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1304                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1305         if (!dst_mtu(&rt->u.dst))
1306                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1307         if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
1308                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1309         rt->u.dst.dev = dev;
1310         rt->rt6i_idev = idev;
1311         rt->rt6i_table = table;
1312
1313         cfg->fc_nlinfo.nl_net = dev_net(dev);
1314
1315         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1316
1317 out:
1318         if (dev)
1319                 dev_put(dev);
1320         if (idev)
1321                 in6_dev_put(idev);
1322         if (rt)
1323                 dst_free(&rt->u.dst);
1324         return err;
1325 }
1326
1327 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1328 {
1329         int err;
1330         struct fib6_table *table;
1331         struct net *net = dev_net(rt->rt6i_dev);
1332
1333         if (rt == net->ipv6.ip6_null_entry)
1334                 return -ENOENT;
1335
1336         table = rt->rt6i_table;
1337         write_lock_bh(&table->tb6_lock);
1338
1339         err = fib6_del(rt, info);
1340         dst_release(&rt->u.dst);
1341
1342         write_unlock_bh(&table->tb6_lock);
1343
1344         return err;
1345 }
1346
1347 int ip6_del_rt(struct rt6_info *rt)
1348 {
1349         struct nl_info info = {
1350                 .nl_net = dev_net(rt->rt6i_dev),
1351         };
1352         return __ip6_del_rt(rt, &info);
1353 }
1354
1355 static int ip6_route_del(struct fib6_config *cfg)
1356 {
1357         struct fib6_table *table;
1358         struct fib6_node *fn;
1359         struct rt6_info *rt;
1360         int err = -ESRCH;
1361
1362         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1363         if (table == NULL)
1364                 return err;
1365
1366         read_lock_bh(&table->tb6_lock);
1367
1368         fn = fib6_locate(&table->tb6_root,
1369                          &cfg->fc_dst, cfg->fc_dst_len,
1370                          &cfg->fc_src, cfg->fc_src_len);
1371
1372         if (fn) {
1373                 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1374                         if (cfg->fc_ifindex &&
1375                             (rt->rt6i_dev == NULL ||
1376                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1377                                 continue;
1378                         if (cfg->fc_flags & RTF_GATEWAY &&
1379                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1380                                 continue;
1381                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1382                                 continue;
1383                         dst_hold(&rt->u.dst);
1384                         read_unlock_bh(&table->tb6_lock);
1385
1386                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1387                 }
1388         }
1389         read_unlock_bh(&table->tb6_lock);
1390
1391         return err;
1392 }
1393
1394 /*
1395  *      Handle redirects
1396  */
1397 struct ip6rd_flowi {
1398         struct flowi fl;
1399         struct in6_addr gateway;
1400 };
1401
1402 static struct rt6_info *__ip6_route_redirect(struct net *net,
1403                                              struct fib6_table *table,
1404                                              struct flowi *fl,
1405                                              int flags)
1406 {
1407         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1408         struct rt6_info *rt;
1409         struct fib6_node *fn;
1410
1411         /*
1412          * Get the "current" route for this destination and
1413          * check if the redirect has come from approriate router.
1414          *
1415          * RFC 2461 specifies that redirects should only be
1416          * accepted if they come from the nexthop to the target.
1417          * Due to the way the routes are chosen, this notion
1418          * is a bit fuzzy and one might need to check all possible
1419          * routes.
1420          */
1421
1422         read_lock_bh(&table->tb6_lock);
1423         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1424 restart:
1425         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1426                 /*
1427                  * Current route is on-link; redirect is always invalid.
1428                  *
1429                  * Seems, previous statement is not true. It could
1430                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1431                  * But then router serving it might decide, that we should
1432                  * know truth 8)8) --ANK (980726).
1433                  */
1434                 if (rt6_check_expired(rt))
1435                         continue;
1436                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1437                         continue;
1438                 if (fl->oif != rt->rt6i_dev->ifindex)
1439                         continue;
1440                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1441                         continue;
1442                 break;
1443         }
1444
1445         if (!rt)
1446                 rt = net->ipv6.ip6_null_entry;
1447         BACKTRACK(net, &fl->fl6_src);
1448 out:
1449         dst_hold(&rt->u.dst);
1450
1451         read_unlock_bh(&table->tb6_lock);
1452
1453         return rt;
1454 };
1455
1456 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1457                                            struct in6_addr *src,
1458                                            struct in6_addr *gateway,
1459                                            struct net_device *dev)
1460 {
1461         int flags = RT6_LOOKUP_F_HAS_SADDR;
1462         struct net *net = dev_net(dev);
1463         struct ip6rd_flowi rdfl = {
1464                 .fl = {
1465                         .oif = dev->ifindex,
1466                         .nl_u = {
1467                                 .ip6_u = {
1468                                         .daddr = *dest,
1469                                         .saddr = *src,
1470                                 },
1471                         },
1472                 },
1473         };
1474
1475         ipv6_addr_copy(&rdfl.gateway, gateway);
1476
1477         if (rt6_need_strict(dest))
1478                 flags |= RT6_LOOKUP_F_IFACE;
1479
1480         return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1481                                                    flags, __ip6_route_redirect);
1482 }
1483
1484 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1485                   struct in6_addr *saddr,
1486                   struct neighbour *neigh, u8 *lladdr, int on_link)
1487 {
1488         struct rt6_info *rt, *nrt = NULL;
1489         struct netevent_redirect netevent;
1490         struct net *net = dev_net(neigh->dev);
1491
1492         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1493
1494         if (rt == net->ipv6.ip6_null_entry) {
1495                 if (net_ratelimit())
1496                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1497                                "for redirect target\n");
1498                 goto out;
1499         }
1500
1501         /*
1502          *      We have finally decided to accept it.
1503          */
1504
1505         neigh_update(neigh, lladdr, NUD_STALE,
1506                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1507                      NEIGH_UPDATE_F_OVERRIDE|
1508                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1509                                      NEIGH_UPDATE_F_ISROUTER))
1510                      );
1511
1512         /*
1513          * Redirect received -> path was valid.
1514          * Look, redirects are sent only in response to data packets,
1515          * so that this nexthop apparently is reachable. --ANK
1516          */
1517         dst_confirm(&rt->u.dst);
1518
1519         /* Duplicate redirect: silently ignore. */
1520         if (neigh == rt->u.dst.neighbour)
1521                 goto out;
1522
1523         nrt = ip6_rt_copy(rt);
1524         if (nrt == NULL)
1525                 goto out;
1526
1527         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1528         if (on_link)
1529                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1530
1531         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1532         nrt->rt6i_dst.plen = 128;
1533         nrt->u.dst.flags |= DST_HOST;
1534
1535         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1536         nrt->rt6i_nexthop = neigh_clone(neigh);
1537         /* Reset pmtu, it may be better */
1538         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1539         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1540                                                         dst_mtu(&nrt->u.dst));
1541
1542         if (ip6_ins_rt(nrt))
1543                 goto out;
1544
1545         netevent.old = &rt->u.dst;
1546         netevent.new = &nrt->u.dst;
1547         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1548
1549         if (rt->rt6i_flags&RTF_CACHE) {
1550                 ip6_del_rt(rt);
1551                 return;
1552         }
1553
1554 out:
1555         dst_release(&rt->u.dst);
1556 }
1557
1558 /*
1559  *      Handle ICMP "packet too big" messages
1560  *      i.e. Path MTU discovery
1561  */
1562
1563 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1564                         struct net_device *dev, u32 pmtu)
1565 {
1566         struct rt6_info *rt, *nrt;
1567         struct net *net = dev_net(dev);
1568         int allfrag = 0;
1569
1570         rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1571         if (rt == NULL)
1572                 return;
1573
1574         if (pmtu >= dst_mtu(&rt->u.dst))
1575                 goto out;
1576
1577         if (pmtu < IPV6_MIN_MTU) {
1578                 /*
1579                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1580                  * MTU (1280) and a fragment header should always be included
1581                  * after a node receiving Too Big message reporting PMTU is
1582                  * less than the IPv6 Minimum Link MTU.
1583                  */
1584                 pmtu = IPV6_MIN_MTU;
1585                 allfrag = 1;
1586         }
1587
1588         /* New mtu received -> path was valid.
1589            They are sent only in response to data packets,
1590            so that this nexthop apparently is reachable. --ANK
1591          */
1592         dst_confirm(&rt->u.dst);
1593
1594         /* Host route. If it is static, it would be better
1595            not to override it, but add new one, so that
1596            when cache entry will expire old pmtu
1597            would return automatically.
1598          */
1599         if (rt->rt6i_flags & RTF_CACHE) {
1600                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1601                 if (allfrag)
1602                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1603                 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1604                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1605                 goto out;
1606         }
1607
1608         /* Network route.
1609            Two cases are possible:
1610            1. It is connected route. Action: COW
1611            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1612          */
1613         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1614                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1615         else
1616                 nrt = rt6_alloc_clone(rt, daddr);
1617
1618         if (nrt) {
1619                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1620                 if (allfrag)
1621                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1622
1623                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1624                  * happened within 5 mins, the recommended timer is 10 mins.
1625                  * Here this route expiration time is set to ip6_rt_mtu_expires
1626                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1627                  * and detecting PMTU increase will be automatically happened.
1628                  */
1629                 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1630                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1631
1632                 ip6_ins_rt(nrt);
1633         }
1634 out:
1635         dst_release(&rt->u.dst);
1636 }
1637
1638 /*
1639  *      Misc support functions
1640  */
1641
1642 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1643 {
1644         struct net *net = dev_net(ort->rt6i_dev);
1645         struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1646
1647         if (rt) {
1648                 rt->u.dst.input = ort->u.dst.input;
1649                 rt->u.dst.output = ort->u.dst.output;
1650
1651                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1652                 rt->u.dst.error = ort->u.dst.error;
1653                 rt->u.dst.dev = ort->u.dst.dev;
1654                 if (rt->u.dst.dev)
1655                         dev_hold(rt->u.dst.dev);
1656                 rt->rt6i_idev = ort->rt6i_idev;
1657                 if (rt->rt6i_idev)
1658                         in6_dev_hold(rt->rt6i_idev);
1659                 rt->u.dst.lastuse = jiffies;
1660                 rt->rt6i_expires = 0;
1661
1662                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1663                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1664                 rt->rt6i_metric = 0;
1665
1666                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1667 #ifdef CONFIG_IPV6_SUBTREES
1668                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1669 #endif
1670                 rt->rt6i_table = ort->rt6i_table;
1671         }
1672         return rt;
1673 }
1674
1675 #ifdef CONFIG_IPV6_ROUTE_INFO
1676 static struct rt6_info *rt6_get_route_info(struct net *net,
1677                                            struct in6_addr *prefix, int prefixlen,
1678                                            struct in6_addr *gwaddr, int ifindex)
1679 {
1680         struct fib6_node *fn;
1681         struct rt6_info *rt = NULL;
1682         struct fib6_table *table;
1683
1684         table = fib6_get_table(net, RT6_TABLE_INFO);
1685         if (table == NULL)
1686                 return NULL;
1687
1688         write_lock_bh(&table->tb6_lock);
1689         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1690         if (!fn)
1691                 goto out;
1692
1693         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1694                 if (rt->rt6i_dev->ifindex != ifindex)
1695                         continue;
1696                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1697                         continue;
1698                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1699                         continue;
1700                 dst_hold(&rt->u.dst);
1701                 break;
1702         }
1703 out:
1704         write_unlock_bh(&table->tb6_lock);
1705         return rt;
1706 }
1707
1708 static struct rt6_info *rt6_add_route_info(struct net *net,
1709                                            struct in6_addr *prefix, int prefixlen,
1710                                            struct in6_addr *gwaddr, int ifindex,
1711                                            unsigned pref)
1712 {
1713         struct fib6_config cfg = {
1714                 .fc_table       = RT6_TABLE_INFO,
1715                 .fc_metric      = IP6_RT_PRIO_USER,
1716                 .fc_ifindex     = ifindex,
1717                 .fc_dst_len     = prefixlen,
1718                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1719                                   RTF_UP | RTF_PREF(pref),
1720                 .fc_nlinfo.pid = 0,
1721                 .fc_nlinfo.nlh = NULL,
1722                 .fc_nlinfo.nl_net = net,
1723         };
1724
1725         ipv6_addr_copy(&cfg.fc_dst, prefix);
1726         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1727
1728         /* We should treat it as a default route if prefix length is 0. */
1729         if (!prefixlen)
1730                 cfg.fc_flags |= RTF_DEFAULT;
1731
1732         ip6_route_add(&cfg);
1733
1734         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1735 }
1736 #endif
1737
1738 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1739 {
1740         struct rt6_info *rt;
1741         struct fib6_table *table;
1742
1743         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1744         if (table == NULL)
1745                 return NULL;
1746
1747         write_lock_bh(&table->tb6_lock);
1748         for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1749                 if (dev == rt->rt6i_dev &&
1750                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1751                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1752                         break;
1753         }
1754         if (rt)
1755                 dst_hold(&rt->u.dst);
1756         write_unlock_bh(&table->tb6_lock);
1757         return rt;
1758 }
1759
1760 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1761                                      struct net_device *dev,
1762                                      unsigned int pref)
1763 {
1764         struct fib6_config cfg = {
1765                 .fc_table       = RT6_TABLE_DFLT,
1766                 .fc_metric      = IP6_RT_PRIO_USER,
1767                 .fc_ifindex     = dev->ifindex,
1768                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1769                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1770                 .fc_nlinfo.pid = 0,
1771                 .fc_nlinfo.nlh = NULL,
1772                 .fc_nlinfo.nl_net = dev_net(dev),
1773         };
1774
1775         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1776
1777         ip6_route_add(&cfg);
1778
1779         return rt6_get_dflt_router(gwaddr, dev);
1780 }
1781
1782 void rt6_purge_dflt_routers(struct net *net)
1783 {
1784         struct rt6_info *rt;
1785         struct fib6_table *table;
1786
1787         /* NOTE: Keep consistent with rt6_get_dflt_router */
1788         table = fib6_get_table(net, RT6_TABLE_DFLT);
1789         if (table == NULL)
1790                 return;
1791
1792 restart:
1793         read_lock_bh(&table->tb6_lock);
1794         for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1795                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1796                         dst_hold(&rt->u.dst);
1797                         read_unlock_bh(&table->tb6_lock);
1798                         ip6_del_rt(rt);
1799                         goto restart;
1800                 }
1801         }
1802         read_unlock_bh(&table->tb6_lock);
1803 }
1804
1805 static void rtmsg_to_fib6_config(struct net *net,
1806                                  struct in6_rtmsg *rtmsg,
1807                                  struct fib6_config *cfg)
1808 {
1809         memset(cfg, 0, sizeof(*cfg));
1810
1811         cfg->fc_table = RT6_TABLE_MAIN;
1812         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1813         cfg->fc_metric = rtmsg->rtmsg_metric;
1814         cfg->fc_expires = rtmsg->rtmsg_info;
1815         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1816         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1817         cfg->fc_flags = rtmsg->rtmsg_flags;
1818
1819         cfg->fc_nlinfo.nl_net = net;
1820
1821         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1822         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1823         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1824 }
1825
1826 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1827 {
1828         struct fib6_config cfg;
1829         struct in6_rtmsg rtmsg;
1830         int err;
1831
1832         switch(cmd) {
1833         case SIOCADDRT:         /* Add a route */
1834         case SIOCDELRT:         /* Delete a route */
1835                 if (!capable(CAP_NET_ADMIN))
1836                         return -EPERM;
1837                 err = copy_from_user(&rtmsg, arg,
1838                                      sizeof(struct in6_rtmsg));
1839                 if (err)
1840                         return -EFAULT;
1841
1842                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1843
1844                 rtnl_lock();
1845                 switch (cmd) {
1846                 case SIOCADDRT:
1847                         err = ip6_route_add(&cfg);
1848                         break;
1849                 case SIOCDELRT:
1850                         err = ip6_route_del(&cfg);
1851                         break;
1852                 default:
1853                         err = -EINVAL;
1854                 }
1855                 rtnl_unlock();
1856
1857                 return err;
1858         }
1859
1860         return -EINVAL;
1861 }
1862
1863 /*
1864  *      Drop the packet on the floor
1865  */
1866
1867 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1868 {
1869         int type;
1870         struct dst_entry *dst = skb_dst(skb);
1871         switch (ipstats_mib_noroutes) {
1872         case IPSTATS_MIB_INNOROUTES:
1873                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1874                 if (type == IPV6_ADDR_ANY) {
1875                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1876                                       IPSTATS_MIB_INADDRERRORS);
1877                         break;
1878                 }
1879                 /* FALLTHROUGH */
1880         case IPSTATS_MIB_OUTNOROUTES:
1881                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1882                               ipstats_mib_noroutes);
1883                 break;
1884         }
1885         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1886         kfree_skb(skb);
1887         return 0;
1888 }
1889
1890 static int ip6_pkt_discard(struct sk_buff *skb)
1891 {
1892         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1893 }
1894
1895 static int ip6_pkt_discard_out(struct sk_buff *skb)
1896 {
1897         skb->dev = skb_dst(skb)->dev;
1898         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1899 }
1900
1901 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1902
1903 static int ip6_pkt_prohibit(struct sk_buff *skb)
1904 {
1905         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1906 }
1907
1908 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1909 {
1910         skb->dev = skb_dst(skb)->dev;
1911         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1912 }
1913
1914 #endif
1915
1916 /*
1917  *      Allocate a dst for local (unicast / anycast) address.
1918  */
1919
1920 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1921                                     const struct in6_addr *addr,
1922                                     int anycast)
1923 {
1924         struct net *net = dev_net(idev->dev);
1925         struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1926         struct neighbour *neigh;
1927
1928         if (rt == NULL)
1929                 return ERR_PTR(-ENOMEM);
1930
1931         dev_hold(net->loopback_dev);
1932         in6_dev_hold(idev);
1933
1934         rt->u.dst.flags = DST_HOST;
1935         rt->u.dst.input = ip6_input;
1936         rt->u.dst.output = ip6_output;
1937         rt->rt6i_dev = net->loopback_dev;
1938         rt->rt6i_idev = idev;
1939         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1940         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1941         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1942         rt->u.dst.obsolete = -1;
1943
1944         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1945         if (anycast)
1946                 rt->rt6i_flags |= RTF_ANYCAST;
1947         else
1948                 rt->rt6i_flags |= RTF_LOCAL;
1949         neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1950         if (IS_ERR(neigh)) {
1951                 dst_free(&rt->u.dst);
1952
1953                 /* We are casting this because that is the return
1954                  * value type.  But an errno encoded pointer is the
1955                  * same regardless of the underlying pointer type,
1956                  * and that's what we are returning.  So this is OK.
1957                  */
1958                 return (struct rt6_info *) neigh;
1959         }
1960         rt->rt6i_nexthop = neigh;
1961
1962         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1963         rt->rt6i_dst.plen = 128;
1964         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1965
1966         atomic_set(&rt->u.dst.__refcnt, 1);
1967
1968         return rt;
1969 }
1970
1971 struct arg_dev_net {
1972         struct net_device *dev;
1973         struct net *net;
1974 };
1975
1976 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1977 {
1978         struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1979         struct net *net = ((struct arg_dev_net *)arg)->net;
1980
1981         if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1982             rt != net->ipv6.ip6_null_entry) {
1983                 RT6_TRACE("deleted by ifdown %p\n", rt);
1984                 return -1;
1985         }
1986         return 0;
1987 }
1988
1989 void rt6_ifdown(struct net *net, struct net_device *dev)
1990 {
1991         struct arg_dev_net adn = {
1992                 .dev = dev,
1993                 .net = net,
1994         };
1995
1996         fib6_clean_all(net, fib6_ifdown, 0, &adn);
1997         icmp6_clean_all(fib6_ifdown, &adn);
1998 }
1999
2000 struct rt6_mtu_change_arg
2001 {
2002         struct net_device *dev;
2003         unsigned mtu;
2004 };
2005
2006 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2007 {
2008         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2009         struct inet6_dev *idev;
2010         struct net *net = dev_net(arg->dev);
2011
2012         /* In IPv6 pmtu discovery is not optional,
2013            so that RTAX_MTU lock cannot disable it.
2014            We still use this lock to block changes
2015            caused by addrconf/ndisc.
2016         */
2017
2018         idev = __in6_dev_get(arg->dev);
2019         if (idev == NULL)
2020                 return 0;
2021
2022         /* For administrative MTU increase, there is no way to discover
2023            IPv6 PMTU increase, so PMTU increase should be updated here.
2024            Since RFC 1981 doesn't include administrative MTU increase
2025            update PMTU increase is a MUST. (i.e. jumbo frame)
2026          */
2027         /*
2028            If new MTU is less than route PMTU, this new MTU will be the
2029            lowest MTU in the path, update the route PMTU to reflect PMTU
2030            decreases; if new MTU is greater than route PMTU, and the
2031            old MTU is the lowest MTU in the path, update the route PMTU
2032            to reflect the increase. In this case if the other nodes' MTU
2033            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2034            PMTU discouvery.
2035          */
2036         if (rt->rt6i_dev == arg->dev &&
2037             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
2038             (dst_mtu(&rt->u.dst) >= arg->mtu ||
2039              (dst_mtu(&rt->u.dst) < arg->mtu &&
2040               dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
2041                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
2042                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2043         }
2044         return 0;
2045 }
2046
2047 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2048 {
2049         struct rt6_mtu_change_arg arg = {
2050                 .dev = dev,
2051                 .mtu = mtu,
2052         };
2053
2054         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2055 }
2056
2057 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2058         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2059         [RTA_OIF]               = { .type = NLA_U32 },
2060         [RTA_IIF]               = { .type = NLA_U32 },
2061         [RTA_PRIORITY]          = { .type = NLA_U32 },
2062         [RTA_METRICS]           = { .type = NLA_NESTED },
2063 };
2064
2065 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2066                               struct fib6_config *cfg)
2067 {
2068         struct rtmsg *rtm;
2069         struct nlattr *tb[RTA_MAX+1];
2070         int err;
2071
2072         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2073         if (err < 0)
2074                 goto errout;
2075
2076         err = -EINVAL;
2077         rtm = nlmsg_data(nlh);
2078         memset(cfg, 0, sizeof(*cfg));
2079
2080         cfg->fc_table = rtm->rtm_table;
2081         cfg->fc_dst_len = rtm->rtm_dst_len;
2082         cfg->fc_src_len = rtm->rtm_src_len;
2083         cfg->fc_flags = RTF_UP;
2084         cfg->fc_protocol = rtm->rtm_protocol;
2085
2086         if (rtm->rtm_type == RTN_UNREACHABLE)
2087                 cfg->fc_flags |= RTF_REJECT;
2088
2089         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2090         cfg->fc_nlinfo.nlh = nlh;
2091         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2092
2093         if (tb[RTA_GATEWAY]) {
2094                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2095                 cfg->fc_flags |= RTF_GATEWAY;
2096         }
2097
2098         if (tb[RTA_DST]) {
2099                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2100
2101                 if (nla_len(tb[RTA_DST]) < plen)
2102                         goto errout;
2103
2104                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2105         }
2106
2107         if (tb[RTA_SRC]) {
2108                 int plen = (rtm->rtm_src_len + 7) >> 3;
2109
2110                 if (nla_len(tb[RTA_SRC]) < plen)
2111                         goto errout;
2112
2113                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2114         }
2115
2116         if (tb[RTA_OIF])
2117                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2118
2119         if (tb[RTA_PRIORITY])
2120                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2121
2122         if (tb[RTA_METRICS]) {
2123                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2124                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2125         }
2126
2127         if (tb[RTA_TABLE])
2128                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2129
2130         err = 0;
2131 errout:
2132         return err;
2133 }
2134
2135 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2136 {
2137         struct fib6_config cfg;
2138         int err;
2139
2140         err = rtm_to_fib6_config(skb, nlh, &cfg);
2141         if (err < 0)
2142                 return err;
2143
2144         return ip6_route_del(&cfg);
2145 }
2146
2147 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2148 {
2149         struct fib6_config cfg;
2150         int err;
2151
2152         err = rtm_to_fib6_config(skb, nlh, &cfg);
2153         if (err < 0)
2154                 return err;
2155
2156         return ip6_route_add(&cfg);
2157 }
2158
2159 static inline size_t rt6_nlmsg_size(void)
2160 {
2161         return NLMSG_ALIGN(sizeof(struct rtmsg))
2162                + nla_total_size(16) /* RTA_SRC */
2163                + nla_total_size(16) /* RTA_DST */
2164                + nla_total_size(16) /* RTA_GATEWAY */
2165                + nla_total_size(16) /* RTA_PREFSRC */
2166                + nla_total_size(4) /* RTA_TABLE */
2167                + nla_total_size(4) /* RTA_IIF */
2168                + nla_total_size(4) /* RTA_OIF */
2169                + nla_total_size(4) /* RTA_PRIORITY */
2170                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2171                + nla_total_size(sizeof(struct rta_cacheinfo));
2172 }
2173
2174 static int rt6_fill_node(struct net *net,
2175                          struct sk_buff *skb, struct rt6_info *rt,
2176                          struct in6_addr *dst, struct in6_addr *src,
2177                          int iif, int type, u32 pid, u32 seq,
2178                          int prefix, int nowait, unsigned int flags)
2179 {
2180         struct rtmsg *rtm;
2181         struct nlmsghdr *nlh;
2182         long expires;
2183         u32 table;
2184
2185         if (prefix) {   /* user wants prefix routes only */
2186                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2187                         /* success since this is not a prefix route */
2188                         return 1;
2189                 }
2190         }
2191
2192         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2193         if (nlh == NULL)
2194                 return -EMSGSIZE;
2195
2196         rtm = nlmsg_data(nlh);
2197         rtm->rtm_family = AF_INET6;
2198         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2199         rtm->rtm_src_len = rt->rt6i_src.plen;
2200         rtm->rtm_tos = 0;
2201         if (rt->rt6i_table)
2202                 table = rt->rt6i_table->tb6_id;
2203         else
2204                 table = RT6_TABLE_UNSPEC;
2205         rtm->rtm_table = table;
2206         NLA_PUT_U32(skb, RTA_TABLE, table);
2207         if (rt->rt6i_flags&RTF_REJECT)
2208                 rtm->rtm_type = RTN_UNREACHABLE;
2209         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2210                 rtm->rtm_type = RTN_LOCAL;
2211         else
2212                 rtm->rtm_type = RTN_UNICAST;
2213         rtm->rtm_flags = 0;
2214         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2215         rtm->rtm_protocol = rt->rt6i_protocol;
2216         if (rt->rt6i_flags&RTF_DYNAMIC)
2217                 rtm->rtm_protocol = RTPROT_REDIRECT;
2218         else if (rt->rt6i_flags & RTF_ADDRCONF)
2219                 rtm->rtm_protocol = RTPROT_KERNEL;
2220         else if (rt->rt6i_flags&RTF_DEFAULT)
2221                 rtm->rtm_protocol = RTPROT_RA;
2222
2223         if (rt->rt6i_flags&RTF_CACHE)
2224                 rtm->rtm_flags |= RTM_F_CLONED;
2225
2226         if (dst) {
2227                 NLA_PUT(skb, RTA_DST, 16, dst);
2228                 rtm->rtm_dst_len = 128;
2229         } else if (rtm->rtm_dst_len)
2230                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2231 #ifdef CONFIG_IPV6_SUBTREES
2232         if (src) {
2233                 NLA_PUT(skb, RTA_SRC, 16, src);
2234                 rtm->rtm_src_len = 128;
2235         } else if (rtm->rtm_src_len)
2236                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2237 #endif
2238         if (iif) {
2239 #ifdef CONFIG_IPV6_MROUTE
2240                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2241                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2242                         if (err <= 0) {
2243                                 if (!nowait) {
2244                                         if (err == 0)
2245                                                 return 0;
2246                                         goto nla_put_failure;
2247                                 } else {
2248                                         if (err == -EMSGSIZE)
2249                                                 goto nla_put_failure;
2250                                 }
2251                         }
2252                 } else
2253 #endif
2254                         NLA_PUT_U32(skb, RTA_IIF, iif);
2255         } else if (dst) {
2256                 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
2257                 struct in6_addr saddr_buf;
2258                 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2259                                        dst, 0, &saddr_buf) == 0)
2260                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2261         }
2262
2263         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2264                 goto nla_put_failure;
2265
2266         if (rt->u.dst.neighbour)
2267                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2268
2269         if (rt->u.dst.dev)
2270                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2271
2272         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2273
2274         if (!(rt->rt6i_flags & RTF_EXPIRES))
2275                 expires = 0;
2276         else if (rt->rt6i_expires - jiffies < INT_MAX)
2277                 expires = rt->rt6i_expires - jiffies;
2278         else
2279                 expires = INT_MAX;
2280
2281         if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2282                                expires, rt->u.dst.error) < 0)
2283                 goto nla_put_failure;
2284
2285         return nlmsg_end(skb, nlh);
2286
2287 nla_put_failure:
2288         nlmsg_cancel(skb, nlh);
2289         return -EMSGSIZE;
2290 }
2291
2292 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2293 {
2294         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2295         int prefix;
2296
2297         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2298                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2299                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2300         } else
2301                 prefix = 0;
2302
2303         return rt6_fill_node(arg->net,
2304                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2305                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2306                      prefix, 0, NLM_F_MULTI);
2307 }
2308
2309 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2310 {
2311         struct net *net = sock_net(in_skb->sk);
2312         struct nlattr *tb[RTA_MAX+1];
2313         struct rt6_info *rt;
2314         struct sk_buff *skb;
2315         struct rtmsg *rtm;
2316         struct flowi fl;
2317         int err, iif = 0;
2318
2319         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2320         if (err < 0)
2321                 goto errout;
2322
2323         err = -EINVAL;
2324         memset(&fl, 0, sizeof(fl));
2325
2326         if (tb[RTA_SRC]) {
2327                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2328                         goto errout;
2329
2330                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2331         }
2332
2333         if (tb[RTA_DST]) {
2334                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2335                         goto errout;
2336
2337                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2338         }
2339
2340         if (tb[RTA_IIF])
2341                 iif = nla_get_u32(tb[RTA_IIF]);
2342
2343         if (tb[RTA_OIF])
2344                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2345
2346         if (iif) {
2347                 struct net_device *dev;
2348                 dev = __dev_get_by_index(net, iif);
2349                 if (!dev) {
2350                         err = -ENODEV;
2351                         goto errout;
2352                 }
2353         }
2354
2355         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2356         if (skb == NULL) {
2357                 err = -ENOBUFS;
2358                 goto errout;
2359         }
2360
2361         /* Reserve room for dummy headers, this skb can pass
2362            through good chunk of routing engine.
2363          */
2364         skb_reset_mac_header(skb);
2365         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2366
2367         rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2368         skb_dst_set(skb, &rt->u.dst);
2369
2370         err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2371                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2372                             nlh->nlmsg_seq, 0, 0, 0);
2373         if (err < 0) {
2374                 kfree_skb(skb);
2375                 goto errout;
2376         }
2377
2378         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2379 errout:
2380         return err;
2381 }
2382
2383 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2384 {
2385         struct sk_buff *skb;
2386         struct net *net = info->nl_net;
2387         u32 seq;
2388         int err;
2389
2390         err = -ENOBUFS;
2391         seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2392
2393         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2394         if (skb == NULL)
2395                 goto errout;
2396
2397         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2398                                 event, info->pid, seq, 0, 0, 0);
2399         if (err < 0) {
2400                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2401                 WARN_ON(err == -EMSGSIZE);
2402                 kfree_skb(skb);
2403                 goto errout;
2404         }
2405         rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2406                     info->nlh, gfp_any());
2407         return;
2408 errout:
2409         if (err < 0)
2410                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2411 }
2412
2413 static int ip6_route_dev_notify(struct notifier_block *this,
2414                                 unsigned long event, void *data)
2415 {
2416         struct net_device *dev = (struct net_device *)data;
2417         struct net *net = dev_net(dev);
2418
2419         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2420                 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2421                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2422 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2423                 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2424                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2425                 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2426                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2427 #endif
2428         }
2429
2430         return NOTIFY_OK;
2431 }
2432
2433 /*
2434  *      /proc
2435  */
2436
2437 #ifdef CONFIG_PROC_FS
2438
2439 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2440
2441 struct rt6_proc_arg
2442 {
2443         char *buffer;
2444         int offset;
2445         int length;
2446         int skip;
2447         int len;
2448 };
2449
2450 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2451 {
2452         struct seq_file *m = p_arg;
2453
2454         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2455
2456 #ifdef CONFIG_IPV6_SUBTREES
2457         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2458 #else
2459         seq_puts(m, "00000000000000000000000000000000 00 ");
2460 #endif
2461
2462         if (rt->rt6i_nexthop) {
2463                 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2464         } else {
2465                 seq_puts(m, "00000000000000000000000000000000");
2466         }
2467         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2468                    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2469                    rt->u.dst.__use, rt->rt6i_flags,
2470                    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2471         return 0;
2472 }
2473
2474 static int ipv6_route_show(struct seq_file *m, void *v)
2475 {
2476         struct net *net = (struct net *)m->private;
2477         fib6_clean_all(net, rt6_info_route, 0, m);
2478         return 0;
2479 }
2480
2481 static int ipv6_route_open(struct inode *inode, struct file *file)
2482 {
2483         return single_open_net(inode, file, ipv6_route_show);
2484 }
2485
2486 static const struct file_operations ipv6_route_proc_fops = {
2487         .owner          = THIS_MODULE,
2488         .open           = ipv6_route_open,
2489         .read           = seq_read,
2490         .llseek         = seq_lseek,
2491         .release        = single_release_net,
2492 };
2493
2494 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2495 {
2496         struct net *net = (struct net *)seq->private;
2497         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2498                    net->ipv6.rt6_stats->fib_nodes,
2499                    net->ipv6.rt6_stats->fib_route_nodes,
2500                    net->ipv6.rt6_stats->fib_rt_alloc,
2501                    net->ipv6.rt6_stats->fib_rt_entries,
2502                    net->ipv6.rt6_stats->fib_rt_cache,
2503                    atomic_read(&net->ipv6.ip6_dst_ops.entries),
2504                    net->ipv6.rt6_stats->fib_discarded_routes);
2505
2506         return 0;
2507 }
2508
2509 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2510 {
2511         return single_open_net(inode, file, rt6_stats_seq_show);
2512 }
2513
2514 static const struct file_operations rt6_stats_seq_fops = {
2515         .owner   = THIS_MODULE,
2516         .open    = rt6_stats_seq_open,
2517         .read    = seq_read,
2518         .llseek  = seq_lseek,
2519         .release = single_release_net,
2520 };
2521 #endif  /* CONFIG_PROC_FS */
2522
2523 #ifdef CONFIG_SYSCTL
2524
2525 static
2526 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2527                               void __user *buffer, size_t *lenp, loff_t *ppos)
2528 {
2529         struct net *net = current->nsproxy->net_ns;
2530         int delay = net->ipv6.sysctl.flush_delay;
2531         if (write) {
2532                 proc_dointvec(ctl, write, buffer, lenp, ppos);
2533                 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2534                 return 0;
2535         } else
2536                 return -EINVAL;
2537 }
2538
2539 static int
2540 proc_dointvec_route(struct ctl_table *table, int write,
2541                 void __user *buffer, size_t *lenp, loff_t *ppos)
2542 {
2543         struct net *net = container_of(table->data, struct net,
2544                                        ipv6.sysctl.ip6_rt_max_size);
2545         ctl_table tmp = *table;
2546         int new_size, ret;
2547
2548         mutex_lock(&net->ipv6.sysctl.ip6_rt_lock);
2549         if (write) {
2550                 tmp.data = &new_size;
2551                 table = &tmp;
2552         }
2553
2554         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2555
2556         if (!ret && write) {
2557                 ret = mem_reserve_kmem_cache_set(&net->ipv6.ip6_rt_reserve,
2558                                 net->ipv6.ip6_dst_ops.kmem_cachep, new_size);
2559                 if (!ret)
2560                         net->ipv6.sysctl.ip6_rt_max_size = new_size;
2561         }
2562         mutex_unlock(&net->ipv6.sysctl.ip6_rt_lock);
2563
2564         return ret;
2565 }
2566
2567 ctl_table ipv6_route_table_template[] = {
2568         {
2569                 .procname       =       "flush",
2570                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2571                 .maxlen         =       sizeof(int),
2572                 .mode           =       0200,
2573                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2574         },
2575         {
2576                 .procname       =       "gc_thresh",
2577                 .data           =       &ip6_dst_ops_template.gc_thresh,
2578                 .maxlen         =       sizeof(int),
2579                 .mode           =       0644,
2580                 .proc_handler   =       proc_dointvec,
2581         },
2582         {
2583                 .procname       =       "max_size",
2584                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2585                 .maxlen         =       sizeof(int),
2586                 .mode           =       0644,
2587                 .proc_handler   =       proc_dointvec_route,
2588         },
2589         {
2590                 .procname       =       "gc_min_interval",
2591                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2592                 .maxlen         =       sizeof(int),
2593                 .mode           =       0644,
2594                 .proc_handler   =       proc_dointvec_jiffies,
2595         },
2596         {
2597                 .procname       =       "gc_timeout",
2598                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2599                 .maxlen         =       sizeof(int),
2600                 .mode           =       0644,
2601                 .proc_handler   =       proc_dointvec_jiffies,
2602         },
2603         {
2604                 .procname       =       "gc_interval",
2605                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2606                 .maxlen         =       sizeof(int),
2607                 .mode           =       0644,
2608                 .proc_handler   =       proc_dointvec_jiffies,
2609         },
2610         {
2611                 .procname       =       "gc_elasticity",
2612                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2613                 .maxlen         =       sizeof(int),
2614                 .mode           =       0644,
2615                 .proc_handler   =       proc_dointvec_jiffies,
2616         },
2617         {
2618                 .procname       =       "mtu_expires",
2619                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2620                 .maxlen         =       sizeof(int),
2621                 .mode           =       0644,
2622                 .proc_handler   =       proc_dointvec_jiffies,
2623         },
2624         {
2625                 .procname       =       "min_adv_mss",
2626                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2627                 .maxlen         =       sizeof(int),
2628                 .mode           =       0644,
2629                 .proc_handler   =       proc_dointvec_jiffies,
2630         },
2631         {
2632                 .procname       =       "gc_min_interval_ms",
2633                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2634                 .maxlen         =       sizeof(int),
2635                 .mode           =       0644,
2636                 .proc_handler   =       proc_dointvec_ms_jiffies,
2637         },
2638         { }
2639 };
2640
2641 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2642 {
2643         struct ctl_table *table;
2644
2645         table = kmemdup(ipv6_route_table_template,
2646                         sizeof(ipv6_route_table_template),
2647                         GFP_KERNEL);
2648
2649         if (table) {
2650                 table[0].data = &net->ipv6.sysctl.flush_delay;
2651                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2652                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2653                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2654                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2655                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2656                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2657                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2658                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2659                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2660         }
2661
2662         mutex_init(&net->ipv6.sysctl.ip6_rt_lock);
2663
2664         return table;
2665 }
2666 #endif
2667
2668 static int __net_init ip6_route_net_init(struct net *net)
2669 {
2670         int ret = -ENOMEM;
2671
2672         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2673                sizeof(net->ipv6.ip6_dst_ops));
2674
2675         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2676                                            sizeof(*net->ipv6.ip6_null_entry),
2677                                            GFP_KERNEL);
2678         if (!net->ipv6.ip6_null_entry)
2679                 goto out_ip6_dst_ops;
2680         net->ipv6.ip6_null_entry->u.dst.path =
2681                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2682         net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2683
2684 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2685         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2686                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2687                                                GFP_KERNEL);
2688         if (!net->ipv6.ip6_prohibit_entry)
2689                 goto out_ip6_null_entry;
2690         net->ipv6.ip6_prohibit_entry->u.dst.path =
2691                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2692         net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2693
2694         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2695                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2696                                                GFP_KERNEL);
2697         if (!net->ipv6.ip6_blk_hole_entry)
2698                 goto out_ip6_prohibit_entry;
2699         net->ipv6.ip6_blk_hole_entry->u.dst.path =
2700                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2701         net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2702 #endif
2703
2704         net->ipv6.sysctl.flush_delay = 0;
2705         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2706         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2707         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2708         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2709         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2710         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2711         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2712
2713         mem_reserve_init(&net->ipv6.ip6_rt_reserve, "IPv6 route cache",
2714                          &net_rx_reserve);
2715         ret = mem_reserve_kmem_cache_set(&net->ipv6.ip6_rt_reserve,
2716                         net->ipv6.ip6_dst_ops.kmem_cachep,
2717                         net->ipv6.sysctl.ip6_rt_max_size);
2718         if (ret)
2719                 goto out_reserve_fail;
2720
2721 #ifdef CONFIG_PROC_FS
2722         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2723         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2724 #endif
2725         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2726
2727         ret = 0;
2728 out:
2729         return ret;
2730
2731 out_reserve_fail:
2732         mem_reserve_disconnect(&net->ipv6.ip6_rt_reserve);
2733 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2734         kfree(net->ipv6.ip6_blk_hole_entry);
2735 out_ip6_prohibit_entry:
2736         kfree(net->ipv6.ip6_prohibit_entry);
2737 out_ip6_null_entry:
2738 #endif
2739         kfree(net->ipv6.ip6_null_entry);
2740 out_ip6_dst_ops:
2741         goto out;
2742 }
2743
2744 static void __net_exit ip6_route_net_exit(struct net *net)
2745 {
2746 #ifdef CONFIG_PROC_FS
2747         proc_net_remove(net, "ipv6_route");
2748         proc_net_remove(net, "rt6_stats");
2749 #endif
2750         mem_reserve_disconnect(&net->ipv6.ip6_rt_reserve);
2751         kfree(net->ipv6.ip6_null_entry);
2752 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2753         kfree(net->ipv6.ip6_prohibit_entry);
2754         kfree(net->ipv6.ip6_blk_hole_entry);
2755 #endif
2756 }
2757
2758 static struct pernet_operations ip6_route_net_ops = {
2759         .init = ip6_route_net_init,
2760         .exit = ip6_route_net_exit,
2761 };
2762
2763 static struct notifier_block ip6_route_dev_notifier = {
2764         .notifier_call = ip6_route_dev_notify,
2765         .priority = 0,
2766 };
2767
2768 int __init ip6_route_init(void)
2769 {
2770         int ret;
2771
2772         ret = -ENOMEM;
2773         ip6_dst_ops_template.kmem_cachep =
2774                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2775                                   SLAB_HWCACHE_ALIGN, NULL);
2776         if (!ip6_dst_ops_template.kmem_cachep)
2777                 goto out;
2778
2779         ret = register_pernet_subsys(&ip6_route_net_ops);
2780         if (ret)
2781                 goto out_kmem_cache;
2782
2783         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2784
2785         /* Registering of the loopback is done before this portion of code,
2786          * the loopback reference in rt6_info will not be taken, do it
2787          * manually for init_net */
2788         init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2789         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2790   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2791         init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2792         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2793         init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2794         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2795   #endif
2796         ret = fib6_init();
2797         if (ret)
2798                 goto out_register_subsys;
2799
2800         ret = xfrm6_init();
2801         if (ret)
2802                 goto out_fib6_init;
2803
2804         ret = fib6_rules_init();
2805         if (ret)
2806                 goto xfrm6_init;
2807
2808         ret = -ENOBUFS;
2809         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2810             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2811             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2812                 goto fib6_rules_init;
2813
2814         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2815         if (ret)
2816                 goto fib6_rules_init;
2817
2818 out:
2819         return ret;
2820
2821 fib6_rules_init:
2822         fib6_rules_cleanup();
2823 xfrm6_init:
2824         xfrm6_fini();
2825 out_fib6_init:
2826         fib6_gc_cleanup();
2827 out_register_subsys:
2828         unregister_pernet_subsys(&ip6_route_net_ops);
2829 out_kmem_cache:
2830         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2831         goto out;
2832 }
2833
2834 void ip6_route_cleanup(void)
2835 {
2836         unregister_netdevice_notifier(&ip6_route_dev_notifier);
2837         fib6_rules_cleanup();
2838         xfrm6_fini();
2839         fib6_gc_cleanup();
2840         unregister_pernet_subsys(&ip6_route_net_ops);
2841         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2842 }