224c4006a9756e83898013b2c68a07eb13025999
[linux-flexiantxendom0-3.2.10.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static int               ip6_dst_gc(void);
87
88 static int              ip6_pkt_discard(struct sk_buff *skb);
89 static void             ip6_link_failure(struct sk_buff *skb);
90 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
92 static struct dst_ops ip6_dst_ops = {
93         .family                 =       AF_INET6,
94         .protocol               =       __constant_htons(ETH_P_IPV6),
95         .gc                     =       ip6_dst_gc,
96         .gc_thresh              =       1024,
97         .check                  =       ip6_dst_check,
98         .negative_advice        =       ip6_negative_advice,
99         .link_failure           =       ip6_link_failure,
100         .update_pmtu            =       ip6_rt_update_pmtu,
101         .entry_size             =       sizeof(struct rt6_info),
102 };
103
104 struct rt6_info ip6_null_entry = {
105         .u = {
106                 .dst = {
107                         .__refcnt       = ATOMIC_INIT(1),
108                         .__use          = 1,
109                         .dev            = &loopback_dev,
110                         .obsolete       = -1,
111                         .error          = -ENETUNREACH,
112                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
113                         .input          = ip6_pkt_discard,
114                         .output         = ip6_pkt_discard,
115                         .ops            = &ip6_dst_ops,
116                         .path           = (struct dst_entry*)&ip6_null_entry,
117                 }
118         },
119         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
120         .rt6i_metric    = ~(u32) 0,
121         .rt6i_ref       = ATOMIC_INIT(1),
122 };
123
124 struct fib6_node ip6_routing_table = {
125         .leaf           = &ip6_null_entry,
126         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
127 };
128
129 /* Protects all the ip6 fib */
130
131 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
132
133
134 /* allocate dst with ip6_dst_ops */
135 static __inline__ struct rt6_info *ip6_dst_alloc(void)
136 {
137         return dst_alloc(&ip6_dst_ops);
138 }
139
140 /*
141  *      Route lookup. Any rt6_lock is implied.
142  */
143
144 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
145                                                     int oif,
146                                                     int strict)
147 {
148         struct rt6_info *local = NULL;
149         struct rt6_info *sprt;
150
151         if (oif) {
152                 for (sprt = rt; sprt; sprt = sprt->u.next) {
153                         struct net_device *dev = sprt->rt6i_dev;
154                         if (dev->ifindex == oif)
155                                 return sprt;
156                         if (dev->flags&IFF_LOOPBACK)
157                                 local = sprt;
158                 }
159
160                 if (local)
161                         return local;
162
163                 if (strict)
164                         return &ip6_null_entry;
165         }
166         return rt;
167 }
168
169 /*
170  *      pointer to the last default router chosen. BH is disabled locally.
171  */
172 static struct rt6_info *rt6_dflt_pointer;
173 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
174
175 /* Default Router Selection (RFC 2461 6.3.6) */
176 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
177 {
178         struct rt6_info *match = NULL;
179         struct rt6_info *sprt;
180         int mpri = 0;
181
182         for (sprt = rt; sprt; sprt = sprt->u.next) {
183                 struct neighbour *neigh;
184                 int m = 0;
185
186                 if (!oif ||
187                     (sprt->rt6i_dev &&
188                      sprt->rt6i_dev->ifindex == oif))
189                         m += 8;
190
191                 if (sprt == rt6_dflt_pointer)
192                         m += 4;
193
194                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
195                         read_lock_bh(&neigh->lock);
196                         switch (neigh->nud_state) {
197                         case NUD_REACHABLE:
198                                 m += 3;
199                                 break;
200
201                         case NUD_STALE:
202                         case NUD_DELAY:
203                         case NUD_PROBE:
204                                 m += 2;
205                                 break;
206
207                         case NUD_NOARP:
208                         case NUD_PERMANENT:
209                                 m += 1;
210                                 break;
211
212                         case NUD_INCOMPLETE:
213                         default:
214                                 read_unlock_bh(&neigh->lock);
215                                 continue;
216                         }
217                         read_unlock_bh(&neigh->lock);
218                 } else {
219                         continue;
220                 }
221
222                 if (m > mpri || m >= 12) {
223                         match = sprt;
224                         mpri = m;
225                         if (m >= 12) {
226                                 /* we choose the lastest default router if it
227                                  * is in (probably) reachable state.
228                                  * If route changed, we should do pmtu
229                                  * discovery. --yoshfuji
230                                  */
231                                 break;
232                         }
233                 }
234         }
235
236         spin_lock(&rt6_dflt_lock);
237         if (!match) {
238                 /*
239                  *      No default routers are known to be reachable.
240                  *      SHOULD round robin
241                  */
242                 if (rt6_dflt_pointer) {
243                         for (sprt = rt6_dflt_pointer->u.next;
244                              sprt; sprt = sprt->u.next) {
245                                 if (sprt->u.dst.obsolete <= 0 &&
246                                     sprt->u.dst.error == 0) {
247                                         match = sprt;
248                                         break;
249                                 }
250                         }
251                         for (sprt = rt;
252                              !match && sprt;
253                              sprt = sprt->u.next) {
254                                 if (sprt->u.dst.obsolete <= 0 &&
255                                     sprt->u.dst.error == 0) {
256                                         match = sprt;
257                                         break;
258                                 }
259                                 if (sprt == rt6_dflt_pointer)
260                                         break;
261                         }
262                 }
263         }
264
265         if (match) {
266                 if (rt6_dflt_pointer != match)
267                         RT6_TRACE("changed default router: %p->%p\n",
268                                   rt6_dflt_pointer, match);
269                 rt6_dflt_pointer = match;
270         }
271         spin_unlock(&rt6_dflt_lock);
272
273         if (!match) {
274                 /*
275                  * Last Resort: if no default routers found, 
276                  * use addrconf default route.
277                  * We don't record this route.
278                  */
279                 for (sprt = ip6_routing_table.leaf;
280                      sprt; sprt = sprt->u.next) {
281                         if ((sprt->rt6i_flags & RTF_DEFAULT) &&
282                             (!oif ||
283                              (sprt->rt6i_dev &&
284                               sprt->rt6i_dev->ifindex == oif))) {
285                                 match = sprt;
286                                 break;
287                         }
288                 }
289                 if (!match) {
290                         /* no default route.  give up. */
291                         match = &ip6_null_entry;
292                 }
293         }
294
295         return match;
296 }
297
298 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
299                             int oif, int strict)
300 {
301         struct fib6_node *fn;
302         struct rt6_info *rt;
303
304         read_lock_bh(&rt6_lock);
305         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
306         rt = rt6_device_match(fn->leaf, oif, strict);
307         dst_hold(&rt->u.dst);
308         rt->u.dst.__use++;
309         read_unlock_bh(&rt6_lock);
310
311         rt->u.dst.lastuse = jiffies;
312         if (rt->u.dst.error == 0)
313                 return rt;
314         dst_release(&rt->u.dst);
315         return NULL;
316 }
317
318 /* rt6_ins is called with FREE rt6_lock.
319    It takes new route entry, the addition fails by any reason the
320    route is freed. In any case, if caller does not hold it, it may
321    be destroyed.
322  */
323
324 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
325 {
326         int err;
327
328         write_lock_bh(&rt6_lock);
329         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
330         write_unlock_bh(&rt6_lock);
331
332         return err;
333 }
334
335 /* No rt6_lock! If COW failed, the function returns dead route entry
336    with dst->error set to errno value.
337  */
338
339 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
340                                 struct in6_addr *saddr)
341 {
342         int err;
343         struct rt6_info *rt;
344
345         /*
346          *      Clone the route.
347          */
348
349         rt = ip6_rt_copy(ort);
350
351         if (rt) {
352                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
353
354                 if (!(rt->rt6i_flags&RTF_GATEWAY))
355                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
356
357                 rt->rt6i_dst.plen = 128;
358                 rt->rt6i_flags |= RTF_CACHE;
359                 rt->u.dst.flags |= DST_HOST;
360
361 #ifdef CONFIG_IPV6_SUBTREES
362                 if (rt->rt6i_src.plen && saddr) {
363                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
364                         rt->rt6i_src.plen = 128;
365                 }
366 #endif
367
368                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
369
370                 dst_hold(&rt->u.dst);
371
372                 err = rt6_ins(rt, NULL, NULL);
373                 if (err == 0)
374                         return rt;
375
376                 rt->u.dst.error = err;
377
378                 return rt;
379         }
380         dst_hold(&ip6_null_entry.u.dst);
381         return &ip6_null_entry;
382 }
383
384 #define BACKTRACK() \
385 if (rt == &ip6_null_entry && strict) { \
386        while ((fn = fn->parent) != NULL) { \
387                 if (fn->fn_flags & RTN_ROOT) { \
388                         dst_hold(&rt->u.dst); \
389                         goto out; \
390                 } \
391                 if (fn->fn_flags & RTN_RTINFO) \
392                         goto restart; \
393         } \
394 }
395
396
397 void ip6_route_input(struct sk_buff *skb)
398 {
399         struct fib6_node *fn;
400         struct rt6_info *rt;
401         int strict;
402         int attempts = 3;
403
404         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
405
406 relookup:
407         read_lock_bh(&rt6_lock);
408
409         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
410                          &skb->nh.ipv6h->saddr);
411
412 restart:
413         rt = fn->leaf;
414
415         if ((rt->rt6i_flags & RTF_CACHE)) {
416                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
417                 BACKTRACK();
418                 dst_hold(&rt->u.dst);
419                 goto out;
420         }
421
422         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
423         BACKTRACK();
424
425         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
426                 read_unlock_bh(&rt6_lock);
427
428                 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
429                              &skb->nh.ipv6h->saddr);
430                         
431                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
432                         goto out2;
433                 /* Race condition! In the gap, when rt6_lock was
434                    released someone could insert this route.  Relookup.
435                 */
436                 dst_release(&rt->u.dst);
437                 goto relookup;
438         }
439         dst_hold(&rt->u.dst);
440
441 out:
442         read_unlock_bh(&rt6_lock);
443 out2:
444         rt->u.dst.lastuse = jiffies;
445         rt->u.dst.__use++;
446         skb->dst = (struct dst_entry *) rt;
447 }
448
449 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
450 {
451         struct fib6_node *fn;
452         struct rt6_info *rt;
453         int strict;
454         int attempts = 3;
455
456         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
457
458 relookup:
459         read_lock_bh(&rt6_lock);
460
461         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
462
463 restart:
464         rt = fn->leaf;
465
466         if ((rt->rt6i_flags & RTF_CACHE)) {
467                 rt = rt6_device_match(rt, fl->oif, strict);
468                 BACKTRACK();
469                 dst_hold(&rt->u.dst);
470                 goto out;
471         }
472         if (rt->rt6i_flags & RTF_DEFAULT) {
473                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
474                         rt = rt6_best_dflt(rt, fl->oif);
475         } else {
476                 rt = rt6_device_match(rt, fl->oif, strict);
477                 BACKTRACK();
478         }
479
480         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
481                 read_unlock_bh(&rt6_lock);
482
483                 rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
484
485                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
486                         goto out2;
487
488                 /* Race condition! In the gap, when rt6_lock was
489                    released someone could insert this route.  Relookup.
490                 */
491                 dst_release(&rt->u.dst);
492                 goto relookup;
493         }
494         dst_hold(&rt->u.dst);
495
496 out:
497         read_unlock_bh(&rt6_lock);
498 out2:
499         rt->u.dst.lastuse = jiffies;
500         rt->u.dst.__use++;
501         return &rt->u.dst;
502 }
503
504
505 /*
506  *      Destination cache support functions
507  */
508
509 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
510 {
511         struct rt6_info *rt;
512
513         rt = (struct rt6_info *) dst;
514
515         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
516                 return dst;
517
518         dst_release(dst);
519         return NULL;
520 }
521
522 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
523 {
524         struct rt6_info *rt = (struct rt6_info *) dst;
525
526         if (rt) {
527                 if (rt->rt6i_flags & RTF_CACHE)
528                         ip6_del_rt(rt, NULL, NULL);
529                 else
530                         dst_release(dst);
531         }
532         return NULL;
533 }
534
535 static void ip6_link_failure(struct sk_buff *skb)
536 {
537         struct rt6_info *rt;
538
539         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
540
541         rt = (struct rt6_info *) skb->dst;
542         if (rt) {
543                 if (rt->rt6i_flags&RTF_CACHE) {
544                         dst_set_expires(&rt->u.dst, 0);
545                         rt->rt6i_flags |= RTF_EXPIRES;
546                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
547                         rt->rt6i_node->fn_sernum = -1;
548         }
549 }
550
551 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
552 {
553         struct rt6_info *rt6 = (struct rt6_info*)dst;
554
555         if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
556                 rt6->rt6i_flags |= RTF_MODIFIED;
557                 dst->metrics[RTAX_MTU-1] = mtu;
558         }
559 }
560
561 /* Protected by rt6_lock.  */
562 static struct dst_entry *ndisc_dst_gc_list;
563
564 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
565                                   struct neighbour *neigh,
566                                   struct in6_addr *addr,
567                                   int (*output)(struct sk_buff *))
568 {
569         struct rt6_info *rt = ip6_dst_alloc();
570
571         if (unlikely(rt == NULL))
572                 goto out;
573
574         if (dev)
575                 dev_hold(dev);
576         if (neigh)
577                 neigh_hold(neigh);
578         else
579                 neigh = ndisc_get_neigh(dev, addr);
580
581         rt->rt6i_dev      = dev;
582         rt->rt6i_nexthop  = neigh;
583         rt->rt6i_expires  = 0;
584         rt->rt6i_flags    = RTF_LOCAL;
585         rt->rt6i_metric   = 0;
586         atomic_set(&rt->u.dst.__refcnt, 1);
587         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
588         rt->u.dst.output  = output;
589
590         write_lock_bh(&rt6_lock);
591         rt->u.dst.next = ndisc_dst_gc_list;
592         ndisc_dst_gc_list = &rt->u.dst;
593         write_unlock_bh(&rt6_lock);
594
595         fib6_force_start_gc();
596
597 out:
598         return (struct dst_entry *)rt;
599 }
600
601 int ndisc_dst_gc(int *more)
602 {
603         struct dst_entry *dst, *next, **pprev;
604         int freed;
605
606         next = NULL;
607         pprev = &ndisc_dst_gc_list;
608         freed = 0;
609         while ((dst = *pprev) != NULL) {
610                 if (!atomic_read(&dst->__refcnt)) {
611                         *pprev = dst->next;
612                         dst_free(dst);
613                         freed++;
614                 } else {
615                         pprev = &dst->next;
616                         (*more)++;
617                 }
618         }
619
620         return freed;
621 }
622
623 static int ip6_dst_gc(void)
624 {
625         static unsigned expire = 30*HZ;
626         static unsigned long last_gc;
627         unsigned long now = jiffies;
628
629         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
630             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
631                 goto out;
632
633         expire++;
634         fib6_run_gc(expire);
635         last_gc = now;
636         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
637                 expire = ip6_rt_gc_timeout>>1;
638
639 out:
640         expire -= expire>>ip6_rt_gc_elasticity;
641         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
642 }
643
644 /* Clean host part of a prefix. Not necessary in radix tree,
645    but results in cleaner routing tables.
646
647    Remove it only when all the things will work!
648  */
649
650 static int ipv6_get_mtu(struct net_device *dev)
651 {
652         int mtu = IPV6_MIN_MTU;
653         struct inet6_dev *idev;
654
655         idev = in6_dev_get(dev);
656         if (idev) {
657                 mtu = idev->cnf.mtu6;
658                 in6_dev_put(idev);
659         }
660         return mtu;
661 }
662
663 static inline unsigned int ipv6_advmss(unsigned int mtu)
664 {
665         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
666
667         if (mtu < ip6_rt_min_advmss)
668                 mtu = ip6_rt_min_advmss;
669
670         /*
671          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
672          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
673          * IPV6_MAXPLEN is also valid and means: "any MSS, 
674          * rely only on pmtu discovery"
675          */
676         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
677                 mtu = IPV6_MAXPLEN;
678         return mtu;
679 }
680
681 static int ipv6_get_hoplimit(struct net_device *dev)
682 {
683         int hoplimit = ipv6_devconf.hop_limit;
684         struct inet6_dev *idev;
685
686         idev = in6_dev_get(dev);
687         if (idev) {
688                 hoplimit = idev->cnf.hop_limit;
689                 in6_dev_put(idev);
690         }
691         return hoplimit;
692 }
693
694 /*
695  *
696  */
697
698 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
699 {
700         int err;
701         struct rtmsg *r;
702         struct rtattr **rta;
703         struct rt6_info *rt;
704         struct net_device *dev = NULL;
705         int addr_type;
706
707         rta = (struct rtattr **) _rtattr;
708
709         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
710                 return -EINVAL;
711 #ifndef CONFIG_IPV6_SUBTREES
712         if (rtmsg->rtmsg_src_len)
713                 return -EINVAL;
714 #endif
715         if (rtmsg->rtmsg_metric == 0)
716                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
717
718         rt = ip6_dst_alloc();
719
720         if (rt == NULL)
721                 return -ENOMEM;
722
723         rt->u.dst.obsolete = -1;
724         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
725         if (nlh && (r = NLMSG_DATA(nlh))) {
726                 rt->rt6i_protocol = r->rtm_protocol;
727         } else {
728                 rt->rt6i_protocol = RTPROT_BOOT;
729         }
730
731         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
732
733         if (addr_type & IPV6_ADDR_MULTICAST)
734                 rt->u.dst.input = ip6_mc_input;
735         else
736                 rt->u.dst.input = ip6_forward;
737
738         rt->u.dst.output = ip6_output;
739
740         if (rtmsg->rtmsg_ifindex) {
741                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
742                 err = -ENODEV;
743                 if (dev == NULL)
744                         goto out;
745         }
746
747         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
748                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
749         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
750         if (rt->rt6i_dst.plen == 128)
751                rt->u.dst.flags = DST_HOST;
752
753 #ifdef CONFIG_IPV6_SUBTREES
754         ipv6_addr_prefix(&rt->rt6i_src.addr, 
755                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
756         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
757 #endif
758
759         rt->rt6i_metric = rtmsg->rtmsg_metric;
760
761         /* We cannot add true routes via loopback here,
762            they would result in kernel looping; promote them to reject routes
763          */
764         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
765             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
766                 if (dev)
767                         dev_put(dev);
768                 dev = &loopback_dev;
769                 dev_hold(dev);
770                 rt->u.dst.output = ip6_pkt_discard;
771                 rt->u.dst.input = ip6_pkt_discard;
772                 rt->u.dst.error = -ENETUNREACH;
773                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
774                 goto install_route;
775         }
776
777         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
778                 struct in6_addr *gw_addr;
779                 int gwa_type;
780
781                 gw_addr = &rtmsg->rtmsg_gateway;
782                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
783                 gwa_type = ipv6_addr_type(gw_addr);
784
785                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
786                         struct rt6_info *grt;
787
788                         /* IPv6 strictly inhibits using not link-local
789                            addresses as nexthop address.
790                            Otherwise, router will not able to send redirects.
791                            It is very good, but in some (rare!) circumstances
792                            (SIT, PtP, NBMA NOARP links) it is handy to allow
793                            some exceptions. --ANK
794                          */
795                         err = -EINVAL;
796                         if (!(gwa_type&IPV6_ADDR_UNICAST))
797                                 goto out;
798
799                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
800
801                         err = -EHOSTUNREACH;
802                         if (grt == NULL)
803                                 goto out;
804                         if (dev) {
805                                 if (dev != grt->rt6i_dev) {
806                                         dst_release(&grt->u.dst);
807                                         goto out;
808                                 }
809                         } else {
810                                 dev = grt->rt6i_dev;
811                                 dev_hold(dev);
812                         }
813                         if (!(grt->rt6i_flags&RTF_GATEWAY))
814                                 err = 0;
815                         dst_release(&grt->u.dst);
816
817                         if (err)
818                                 goto out;
819                 }
820                 err = -EINVAL;
821                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
822                         goto out;
823         }
824
825         err = -ENODEV;
826         if (dev == NULL)
827                 goto out;
828
829         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
830                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
831                 if (IS_ERR(rt->rt6i_nexthop)) {
832                         err = PTR_ERR(rt->rt6i_nexthop);
833                         rt->rt6i_nexthop = NULL;
834                         goto out;
835                 }
836         }
837
838         rt->rt6i_flags = rtmsg->rtmsg_flags;
839
840 install_route:
841         if (rta && rta[RTA_METRICS-1]) {
842                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
843                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
844
845                 while (RTA_OK(attr, attrlen)) {
846                         unsigned flavor = attr->rta_type;
847                         if (flavor) {
848                                 if (flavor > RTAX_MAX) {
849                                         err = -EINVAL;
850                                         goto out;
851                                 }
852                                 rt->u.dst.metrics[flavor-1] =
853                                         *(u32 *)RTA_DATA(attr);
854                         }
855                         attr = RTA_NEXT(attr, attrlen);
856                 }
857         }
858
859         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
860                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
861                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
862                                 IPV6_DEFAULT_MCASTHOPS;
863                 else
864                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
865                                 ipv6_get_hoplimit(dev);
866         }
867
868         if (!rt->u.dst.metrics[RTAX_MTU-1])
869                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
870         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
871                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
872         rt->u.dst.dev = dev;
873         return rt6_ins(rt, nlh, _rtattr);
874
875 out:
876         if (dev)
877                 dev_put(dev);
878         dst_free((struct dst_entry *) rt);
879         return err;
880 }
881
882 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
883 {
884         int err;
885
886         write_lock_bh(&rt6_lock);
887
888         spin_lock_bh(&rt6_dflt_lock);
889         rt6_dflt_pointer = NULL;
890         spin_unlock_bh(&rt6_dflt_lock);
891
892         dst_release(&rt->u.dst);
893
894         err = fib6_del(rt, nlh, _rtattr);
895         write_unlock_bh(&rt6_lock);
896
897         return err;
898 }
899
900 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
901 {
902         struct fib6_node *fn;
903         struct rt6_info *rt;
904         int err = -ESRCH;
905
906         read_lock_bh(&rt6_lock);
907
908         fn = fib6_locate(&ip6_routing_table,
909                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
910                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
911         
912         if (fn) {
913                 for (rt = fn->leaf; rt; rt = rt->u.next) {
914                         if (rtmsg->rtmsg_ifindex &&
915                             (rt->rt6i_dev == NULL ||
916                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
917                                 continue;
918                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
919                             ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
920                                 continue;
921                         if (rtmsg->rtmsg_metric &&
922                             rtmsg->rtmsg_metric != rt->rt6i_metric)
923                                 continue;
924                         dst_hold(&rt->u.dst);
925                         read_unlock_bh(&rt6_lock);
926
927                         return ip6_del_rt(rt, nlh, _rtattr);
928                 }
929         }
930         read_unlock_bh(&rt6_lock);
931
932         return err;
933 }
934
935 /*
936  *      Handle redirects
937  */
938 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
939                   struct neighbour *neigh, int on_link)
940 {
941         struct rt6_info *rt, *nrt;
942
943         /* Locate old route to this destination. */
944         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
945
946         if (rt == NULL)
947                 return;
948
949         if (neigh->dev != rt->rt6i_dev)
950                 goto out;
951
952         /* Redirect received -> path was valid.
953            Look, redirects are sent only in response to data packets,
954            so that this nexthop apparently is reachable. --ANK
955          */
956         dst_confirm(&rt->u.dst);
957
958         /* Duplicate redirect: silently ignore. */
959         if (neigh == rt->u.dst.neighbour)
960                 goto out;
961
962         /* Current route is on-link; redirect is always invalid.
963            
964            Seems, previous statement is not true. It could
965            be node, which looks for us as on-link (f.e. proxy ndisc)
966            But then router serving it might decide, that we should
967            know truth 8)8) --ANK (980726).
968          */
969         if (!(rt->rt6i_flags&RTF_GATEWAY))
970                 goto out;
971
972         /*
973          *      RFC 2461 specifies that redirects should only be
974          *      accepted if they come from the nexthop to the target.
975          *      Due to the way default routers are chosen, this notion
976          *      is a bit fuzzy and one might need to check all default
977          *      routers.
978          */
979
980         if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
981                 if (rt->rt6i_flags & RTF_DEFAULT) {
982                         struct rt6_info *rt1;
983
984                         read_lock(&rt6_lock);
985                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
986                                 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
987                                         dst_hold(&rt1->u.dst);
988                                         dst_release(&rt->u.dst);
989                                         read_unlock(&rt6_lock);
990                                         rt = rt1;
991                                         goto source_ok;
992                                 }
993                         }
994                         read_unlock(&rt6_lock);
995                 }
996                 if (net_ratelimit())
997                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
998                                "for redirect target\n");
999                 goto out;
1000         }
1001
1002 source_ok:
1003
1004         /*
1005          *      We have finally decided to accept it.
1006          */
1007
1008         nrt = ip6_rt_copy(rt);
1009         if (nrt == NULL)
1010                 goto out;
1011
1012         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1013         if (on_link)
1014                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1015
1016         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1017         nrt->rt6i_dst.plen = 128;
1018         nrt->u.dst.flags |= DST_HOST;
1019
1020         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1021         nrt->rt6i_nexthop = neigh_clone(neigh);
1022         /* Reset pmtu, it may be better */
1023         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1024         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1025
1026         if (rt6_ins(nrt, NULL, NULL))
1027                 goto out;
1028
1029         if (rt->rt6i_flags&RTF_CACHE) {
1030                 ip6_del_rt(rt, NULL, NULL);
1031                 return;
1032         }
1033
1034 out:
1035         dst_release(&rt->u.dst);
1036         return;
1037 }
1038
1039 /*
1040  *      Handle ICMP "packet too big" messages
1041  *      i.e. Path MTU discovery
1042  */
1043
1044 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1045                         struct net_device *dev, u32 pmtu)
1046 {
1047         struct rt6_info *rt, *nrt;
1048
1049         if (pmtu < IPV6_MIN_MTU) {
1050                 if (net_ratelimit())
1051                         printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1052                                pmtu);
1053                 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1054                    link MTU if the node receives a Packet Too Big message
1055                    reporting next-hop MTU that is less than the IPv6 minimum MTU.
1056                    */
1057                 pmtu = IPV6_MIN_MTU;
1058         }
1059
1060         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1061
1062         if (rt == NULL)
1063                 return;
1064
1065         if (pmtu >= dst_pmtu(&rt->u.dst))
1066                 goto out;
1067
1068         /* New mtu received -> path was valid.
1069            They are sent only in response to data packets,
1070            so that this nexthop apparently is reachable. --ANK
1071          */
1072         dst_confirm(&rt->u.dst);
1073
1074         /* Host route. If it is static, it would be better
1075            not to override it, but add new one, so that
1076            when cache entry will expire old pmtu
1077            would return automatically.
1078          */
1079         if (rt->rt6i_flags & RTF_CACHE) {
1080                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1081                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1082                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1083                 goto out;
1084         }
1085
1086         /* Network route.
1087            Two cases are possible:
1088            1. It is connected route. Action: COW
1089            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1090          */
1091         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1092                 nrt = rt6_cow(rt, daddr, saddr);
1093                 if (!nrt->u.dst.error) {
1094                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1095                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1096                            happened within 5 mins, the recommended timer is 10 mins.
1097                            Here this route expiration time is set to ip6_rt_mtu_expires
1098                            which is 10 mins. After 10 mins the decreased pmtu is expired
1099                            and detecting PMTU increase will be automatically happened.
1100                          */
1101                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1102                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1103                 }
1104                 dst_release(&nrt->u.dst);
1105         } else {
1106                 nrt = ip6_rt_copy(rt);
1107                 if (nrt == NULL)
1108                         goto out;
1109                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1110                 nrt->rt6i_dst.plen = 128;
1111                 nrt->u.dst.flags |= DST_HOST;
1112                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1113                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1114                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1115                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1116                 rt6_ins(nrt, NULL, NULL);
1117         }
1118
1119 out:
1120         dst_release(&rt->u.dst);
1121 }
1122
1123 /*
1124  *      Misc support functions
1125  */
1126
1127 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1128 {
1129         struct rt6_info *rt = ip6_dst_alloc();
1130
1131         BUG_ON(ort->rt6i_flags & RTF_NDISC);
1132
1133         if (rt) {
1134                 rt->u.dst.input = ort->u.dst.input;
1135                 rt->u.dst.output = ort->u.dst.output;
1136
1137                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1138                 rt->u.dst.dev = ort->u.dst.dev;
1139                 if (rt->u.dst.dev)
1140                         dev_hold(rt->u.dst.dev);
1141                 rt->u.dst.lastuse = jiffies;
1142                 rt->rt6i_expires = 0;
1143
1144                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1145                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1146                 rt->rt6i_metric = 0;
1147
1148                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1149 #ifdef CONFIG_IPV6_SUBTREES
1150                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1151 #endif
1152         }
1153         return rt;
1154 }
1155
1156 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1157 {       
1158         struct rt6_info *rt;
1159         struct fib6_node *fn;
1160
1161         fn = &ip6_routing_table;
1162
1163         write_lock_bh(&rt6_lock);
1164         for (rt = fn->leaf; rt; rt=rt->u.next) {
1165                 if (dev == rt->rt6i_dev &&
1166                     ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1167                         break;
1168         }
1169         if (rt)
1170                 dst_hold(&rt->u.dst);
1171         write_unlock_bh(&rt6_lock);
1172         return rt;
1173 }
1174
1175 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1176                                      struct net_device *dev)
1177 {
1178         struct in6_rtmsg rtmsg;
1179
1180         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1181         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1182         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1183         rtmsg.rtmsg_metric = 1024;
1184         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1185
1186         rtmsg.rtmsg_ifindex = dev->ifindex;
1187
1188         ip6_route_add(&rtmsg, NULL, NULL);
1189         return rt6_get_dflt_router(gwaddr, dev);
1190 }
1191
1192 void rt6_purge_dflt_routers(int last_resort)
1193 {
1194         struct rt6_info *rt;
1195         u32 flags;
1196
1197         if (last_resort)
1198                 flags = RTF_ALLONLINK;
1199         else
1200                 flags = RTF_DEFAULT | RTF_ADDRCONF;     
1201
1202 restart:
1203         read_lock_bh(&rt6_lock);
1204         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1205                 if (rt->rt6i_flags & flags) {
1206                         dst_hold(&rt->u.dst);
1207
1208                         spin_lock_bh(&rt6_dflt_lock);
1209                         rt6_dflt_pointer = NULL;
1210                         spin_unlock_bh(&rt6_dflt_lock);
1211
1212                         read_unlock_bh(&rt6_lock);
1213
1214                         ip6_del_rt(rt, NULL, NULL);
1215
1216                         goto restart;
1217                 }
1218         }
1219         read_unlock_bh(&rt6_lock);
1220 }
1221
1222 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1223 {
1224         struct in6_rtmsg rtmsg;
1225         int err;
1226
1227         switch(cmd) {
1228         case SIOCADDRT:         /* Add a route */
1229         case SIOCDELRT:         /* Delete a route */
1230                 if (!capable(CAP_NET_ADMIN))
1231                         return -EPERM;
1232                 err = copy_from_user(&rtmsg, arg,
1233                                      sizeof(struct in6_rtmsg));
1234                 if (err)
1235                         return -EFAULT;
1236                         
1237                 rtnl_lock();
1238                 switch (cmd) {
1239                 case SIOCADDRT:
1240                         err = ip6_route_add(&rtmsg, NULL, NULL);
1241                         break;
1242                 case SIOCDELRT:
1243                         err = ip6_route_del(&rtmsg, NULL, NULL);
1244                         break;
1245                 default:
1246                         err = -EINVAL;
1247                 }
1248                 rtnl_unlock();
1249
1250                 return err;
1251         };
1252
1253         return -EINVAL;
1254 }
1255
1256 /*
1257  *      Drop the packet on the floor
1258  */
1259
1260 int ip6_pkt_discard(struct sk_buff *skb)
1261 {
1262         IP6_INC_STATS(Ip6OutNoRoutes);
1263         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1264         kfree_skb(skb);
1265         return 0;
1266 }
1267
1268 /*
1269  *      Add address
1270  */
1271
1272 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
1273 {
1274         struct rt6_info *rt = ip6_dst_alloc();
1275
1276         if (rt == NULL)
1277                 return -ENOMEM;
1278
1279         dev_hold(&loopback_dev);
1280
1281         rt->u.dst.flags = DST_HOST;
1282         rt->u.dst.input = ip6_input;
1283         rt->u.dst.output = ip6_output;
1284         rt->rt6i_dev = &loopback_dev;
1285         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1286         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1287         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1288         rt->u.dst.obsolete = -1;
1289
1290         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1291         if (!anycast)
1292                 rt->rt6i_flags |= RTF_LOCAL;
1293         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1294         if (rt->rt6i_nexthop == NULL) {
1295                 dst_free((struct dst_entry *) rt);
1296                 return -ENOMEM;
1297         }
1298
1299         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1300         rt->rt6i_dst.plen = 128;
1301         rt6_ins(rt, NULL, NULL);
1302
1303         return 0;
1304 }
1305
1306 /* Delete address. Warning: you should check that this address
1307    disappeared before calling this function.
1308  */
1309
1310 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1311 {
1312         struct rt6_info *rt;
1313         int err = -ENOENT;
1314
1315         rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1316         if (rt) {
1317                 if (rt->rt6i_dst.plen == 128)
1318                         err = ip6_del_rt(rt, NULL, NULL);
1319                 else
1320                         dst_release(&rt->u.dst);
1321         }
1322
1323         return err;
1324 }
1325
1326 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1327 {
1328         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1329             rt != &ip6_null_entry) {
1330                 RT6_TRACE("deleted by ifdown %p\n", rt);
1331                 return -1;
1332         }
1333         return 0;
1334 }
1335
1336 void rt6_ifdown(struct net_device *dev)
1337 {
1338         write_lock_bh(&rt6_lock);
1339         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1340         write_unlock_bh(&rt6_lock);
1341 }
1342
1343 struct rt6_mtu_change_arg
1344 {
1345         struct net_device *dev;
1346         unsigned mtu;
1347 };
1348
1349 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1350 {
1351         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1352         struct inet6_dev *idev;
1353
1354         /* In IPv6 pmtu discovery is not optional,
1355            so that RTAX_MTU lock cannot disable it.
1356            We still use this lock to block changes
1357            caused by addrconf/ndisc.
1358         */
1359
1360         idev = __in6_dev_get(arg->dev);
1361         if (idev == NULL)
1362                 return 0;
1363
1364         /* For administrative MTU increase, there is no way to discover
1365            IPv6 PMTU increase, so PMTU increase should be updated here.
1366            Since RFC 1981 doesn't include administrative MTU increase
1367            update PMTU increase is a MUST. (i.e. jumbo frame)
1368          */
1369         /*
1370            If new MTU is less than route PMTU, this new MTU will be the
1371            lowest MTU in the path, update the route PMTU to reflect PMTU
1372            decreases; if new MTU is greater than route PMTU, and the
1373            old MTU is the lowest MTU in the path, update the route PMTU
1374            to reflect the increase. In this case if the other nodes' MTU
1375            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1376            PMTU discouvery.
1377          */
1378         if (rt->rt6i_dev == arg->dev &&
1379             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1380             (dst_pmtu(&rt->u.dst) > arg->mtu ||
1381              (dst_pmtu(&rt->u.dst) < arg->mtu &&
1382               dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1383                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1384         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1385         return 0;
1386 }
1387
1388 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1389 {
1390         struct rt6_mtu_change_arg arg;
1391
1392         arg.dev = dev;
1393         arg.mtu = mtu;
1394         read_lock_bh(&rt6_lock);
1395         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1396         read_unlock_bh(&rt6_lock);
1397 }
1398
1399 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1400                               struct in6_rtmsg *rtmsg)
1401 {
1402         memset(rtmsg, 0, sizeof(*rtmsg));
1403
1404         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1405         rtmsg->rtmsg_src_len = r->rtm_src_len;
1406         rtmsg->rtmsg_flags = RTF_UP;
1407         if (r->rtm_type == RTN_UNREACHABLE)
1408                 rtmsg->rtmsg_flags |= RTF_REJECT;
1409
1410         if (rta[RTA_GATEWAY-1]) {
1411                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1412                         return -EINVAL;
1413                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1414                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1415         }
1416         if (rta[RTA_DST-1]) {
1417                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1418                         return -EINVAL;
1419                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1420         }
1421         if (rta[RTA_SRC-1]) {
1422                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1423                         return -EINVAL;
1424                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1425         }
1426         if (rta[RTA_OIF-1]) {
1427                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1428                         return -EINVAL;
1429                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1430         }
1431         if (rta[RTA_PRIORITY-1]) {
1432                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1433                         return -EINVAL;
1434                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1435         }
1436         return 0;
1437 }
1438
1439 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1440 {
1441         struct rtmsg *r = NLMSG_DATA(nlh);
1442         struct in6_rtmsg rtmsg;
1443
1444         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1445                 return -EINVAL;
1446         return ip6_route_del(&rtmsg, nlh, arg);
1447 }
1448
1449 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1450 {
1451         struct rtmsg *r = NLMSG_DATA(nlh);
1452         struct in6_rtmsg rtmsg;
1453
1454         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1455                 return -EINVAL;
1456         return ip6_route_add(&rtmsg, nlh, arg);
1457 }
1458
1459 struct rt6_rtnl_dump_arg
1460 {
1461         struct sk_buff *skb;
1462         struct netlink_callback *cb;
1463 };
1464
1465 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1466                          struct in6_addr *dst,
1467                          struct in6_addr *src,
1468                          int iif,
1469                          int type, u32 pid, u32 seq,
1470                          struct nlmsghdr *in_nlh, int prefix)
1471 {
1472         struct rtmsg *rtm;
1473         struct nlmsghdr  *nlh;
1474         unsigned char    *b = skb->tail;
1475         struct rta_cacheinfo ci;
1476
1477         if (prefix) {   /* user wants prefix routes only */
1478                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1479                         /* success since this is not a prefix route */
1480                         return 1;
1481                 }
1482         }
1483
1484         if (!pid && in_nlh) {
1485                 pid = in_nlh->nlmsg_pid;
1486         }
1487
1488         nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1489         rtm = NLMSG_DATA(nlh);
1490         rtm->rtm_family = AF_INET6;
1491         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1492         rtm->rtm_src_len = rt->rt6i_src.plen;
1493         rtm->rtm_tos = 0;
1494         rtm->rtm_table = RT_TABLE_MAIN;
1495         if (rt->rt6i_flags&RTF_REJECT)
1496                 rtm->rtm_type = RTN_UNREACHABLE;
1497         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1498                 rtm->rtm_type = RTN_LOCAL;
1499         else
1500                 rtm->rtm_type = RTN_UNICAST;
1501         rtm->rtm_flags = 0;
1502         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1503         rtm->rtm_protocol = rt->rt6i_protocol;
1504         if (rt->rt6i_flags&RTF_DYNAMIC)
1505                 rtm->rtm_protocol = RTPROT_REDIRECT;
1506         else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1507                 rtm->rtm_protocol = RTPROT_KERNEL;
1508         else if (rt->rt6i_flags&RTF_DEFAULT)
1509                 rtm->rtm_protocol = RTPROT_RA;
1510
1511         if (rt->rt6i_flags&RTF_CACHE)
1512                 rtm->rtm_flags |= RTM_F_CLONED;
1513
1514         if (dst) {
1515                 RTA_PUT(skb, RTA_DST, 16, dst);
1516                 rtm->rtm_dst_len = 128;
1517         } else if (rtm->rtm_dst_len)
1518                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1519 #ifdef CONFIG_IPV6_SUBTREES
1520         if (src) {
1521                 RTA_PUT(skb, RTA_SRC, 16, src);
1522                 rtm->rtm_src_len = 128;
1523         } else if (rtm->rtm_src_len)
1524                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1525 #endif
1526         if (iif)
1527                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1528         else if (dst) {
1529                 struct in6_addr saddr_buf;
1530                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1531                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1532         }
1533         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1534                 goto rtattr_failure;
1535         if (rt->u.dst.neighbour)
1536                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1537         if (rt->u.dst.dev)
1538                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1539         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1540         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1541         if (rt->rt6i_expires)
1542                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1543         else
1544                 ci.rta_expires = 0;
1545         ci.rta_used = rt->u.dst.__use;
1546         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1547         ci.rta_error = rt->u.dst.error;
1548         ci.rta_id = 0;
1549         ci.rta_ts = 0;
1550         ci.rta_tsage = 0;
1551         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1552         nlh->nlmsg_len = skb->tail - b;
1553         return skb->len;
1554
1555 nlmsg_failure:
1556 rtattr_failure:
1557         skb_trim(skb, b - skb->data);
1558         return -1;
1559 }
1560
1561 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1562 {
1563         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1564         int prefix;
1565
1566         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1567                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1568                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1569         } else
1570                 prefix = 0;
1571
1572         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1573                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1574                      NULL, prefix);
1575 }
1576
1577 static int fib6_dump_node(struct fib6_walker_t *w)
1578 {
1579         int res;
1580         struct rt6_info *rt;
1581
1582         for (rt = w->leaf; rt; rt = rt->u.next) {
1583                 res = rt6_dump_route(rt, w->args);
1584                 if (res < 0) {
1585                         /* Frame is full, suspend walking */
1586                         w->leaf = rt;
1587                         return 1;
1588                 }
1589                 BUG_TRAP(res!=0);
1590         }
1591         w->leaf = NULL;
1592         return 0;
1593 }
1594
1595 static void fib6_dump_end(struct netlink_callback *cb)
1596 {
1597         struct fib6_walker_t *w = (void*)cb->args[0];
1598
1599         if (w) {
1600                 cb->args[0] = 0;
1601                 fib6_walker_unlink(w);
1602                 kfree(w);
1603         }
1604         if (cb->args[1]) {
1605                 cb->done = (void*)cb->args[1];
1606                 cb->args[1] = 0;
1607         }
1608 }
1609
1610 static int fib6_dump_done(struct netlink_callback *cb)
1611 {
1612         fib6_dump_end(cb);
1613         return cb->done(cb);
1614 }
1615
1616 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1617 {
1618         struct rt6_rtnl_dump_arg arg;
1619         struct fib6_walker_t *w;
1620         int res;
1621
1622         arg.skb = skb;
1623         arg.cb = cb;
1624
1625         w = (void*)cb->args[0];
1626         if (w == NULL) {
1627                 /* New dump:
1628                  * 
1629                  * 1. hook callback destructor.
1630                  */
1631                 cb->args[1] = (long)cb->done;
1632                 cb->done = fib6_dump_done;
1633
1634                 /*
1635                  * 2. allocate and initialize walker.
1636                  */
1637                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1638                 if (w == NULL)
1639                         return -ENOMEM;
1640                 RT6_TRACE("dump<%p", w);
1641                 memset(w, 0, sizeof(*w));
1642                 w->root = &ip6_routing_table;
1643                 w->func = fib6_dump_node;
1644                 w->args = &arg;
1645                 cb->args[0] = (long)w;
1646                 read_lock_bh(&rt6_lock);
1647                 res = fib6_walk(w);
1648                 read_unlock_bh(&rt6_lock);
1649         } else {
1650                 w->args = &arg;
1651                 read_lock_bh(&rt6_lock);
1652                 res = fib6_walk_continue(w);
1653                 read_unlock_bh(&rt6_lock);
1654         }
1655 #if RT6_DEBUG >= 3
1656         if (res <= 0 && skb->len == 0)
1657                 RT6_TRACE("%p>dump end\n", w);
1658 #endif
1659         res = res < 0 ? res : skb->len;
1660         /* res < 0 is an error. (really, impossible)
1661            res == 0 means that dump is complete, but skb still can contain data.
1662            res > 0 dump is not complete, but frame is full.
1663          */
1664         /* Destroy walker, if dump of this table is complete. */
1665         if (res <= 0)
1666                 fib6_dump_end(cb);
1667         return res;
1668 }
1669
1670 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1671 {
1672         struct rtattr **rta = arg;
1673         int iif = 0;
1674         int err = -ENOBUFS;
1675         struct sk_buff *skb;
1676         struct flowi fl;
1677         struct rt6_info *rt;
1678
1679         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1680         if (skb == NULL)
1681                 goto out;
1682
1683         /* Reserve room for dummy headers, this skb can pass
1684            through good chunk of routing engine.
1685          */
1686         skb->mac.raw = skb->data;
1687         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1688
1689         memset(&fl, 0, sizeof(fl));
1690         if (rta[RTA_SRC-1])
1691                 ipv6_addr_copy(&fl.fl6_src,
1692                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1693         if (rta[RTA_DST-1])
1694                 ipv6_addr_copy(&fl.fl6_dst,
1695                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1696
1697         if (rta[RTA_IIF-1])
1698                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1699
1700         if (iif) {
1701                 struct net_device *dev;
1702                 dev = __dev_get_by_index(iif);
1703                 if (!dev) {
1704                         err = -ENODEV;
1705                         goto out_free;
1706                 }
1707         }
1708
1709         fl.oif = 0;
1710         if (rta[RTA_OIF-1])
1711                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1712
1713         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1714
1715         skb->dst = &rt->u.dst;
1716
1717         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1718         err = rt6_fill_node(skb, rt, 
1719                             &fl.fl6_dst, &fl.fl6_src,
1720                             iif,
1721                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1722                             nlh->nlmsg_seq, nlh, 0);
1723         if (err < 0) {
1724                 err = -EMSGSIZE;
1725                 goto out_free;
1726         }
1727
1728         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1729         if (err > 0)
1730                 err = 0;
1731 out:
1732         return err;
1733 out_free:
1734         kfree_skb(skb);
1735         goto out;       
1736 }
1737
1738 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1739 {
1740         struct sk_buff *skb;
1741         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1742
1743         skb = alloc_skb(size, gfp_any());
1744         if (!skb) {
1745                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1746                 return;
1747         }
1748         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1749                 kfree_skb(skb);
1750                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1751                 return;
1752         }
1753         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1754         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1755 }
1756
1757 /*
1758  *      /proc
1759  */
1760
1761 #ifdef CONFIG_PROC_FS
1762
1763 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1764
1765 struct rt6_proc_arg
1766 {
1767         char *buffer;
1768         int offset;
1769         int length;
1770         int skip;
1771         int len;
1772 };
1773
1774 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1775 {
1776         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1777         int i;
1778
1779         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1780                 arg->skip++;
1781                 return 0;
1782         }
1783
1784         if (arg->len >= arg->length)
1785                 return 0;
1786
1787         for (i=0; i<16; i++) {
1788                 sprintf(arg->buffer + arg->len, "%02x",
1789                         rt->rt6i_dst.addr.s6_addr[i]);
1790                 arg->len += 2;
1791         }
1792         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1793                             rt->rt6i_dst.plen);
1794
1795 #ifdef CONFIG_IPV6_SUBTREES
1796         for (i=0; i<16; i++) {
1797                 sprintf(arg->buffer + arg->len, "%02x",
1798                         rt->rt6i_src.addr.s6_addr[i]);
1799                 arg->len += 2;
1800         }
1801         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1802                             rt->rt6i_src.plen);
1803 #else
1804         sprintf(arg->buffer + arg->len,
1805                 "00000000000000000000000000000000 00 ");
1806         arg->len += 36;
1807 #endif
1808
1809         if (rt->rt6i_nexthop) {
1810                 for (i=0; i<16; i++) {
1811                         sprintf(arg->buffer + arg->len, "%02x",
1812                                 rt->rt6i_nexthop->primary_key[i]);
1813                         arg->len += 2;
1814                 }
1815         } else {
1816                 sprintf(arg->buffer + arg->len,
1817                         "00000000000000000000000000000000");
1818                 arg->len += 32;
1819         }
1820         arg->len += sprintf(arg->buffer + arg->len,
1821                             " %08x %08x %08x %08x %8s\n",
1822                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1823                             rt->u.dst.__use, rt->rt6i_flags, 
1824                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1825         return 0;
1826 }
1827
1828 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1829 {
1830         struct rt6_proc_arg arg;
1831         arg.buffer = buffer;
1832         arg.offset = offset;
1833         arg.length = length;
1834         arg.skip = 0;
1835         arg.len = 0;
1836
1837         read_lock_bh(&rt6_lock);
1838         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1839         read_unlock_bh(&rt6_lock);
1840
1841         *start = buffer;
1842         if (offset)
1843                 *start += offset % RT6_INFO_LEN;
1844
1845         arg.len -= offset % RT6_INFO_LEN;
1846
1847         if (arg.len > length)
1848                 arg.len = length;
1849         if (arg.len < 0)
1850                 arg.len = 0;
1851
1852         return arg.len;
1853 }
1854
1855 extern struct rt6_statistics rt6_stats;
1856
1857 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1858 {
1859         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1860                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1861                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1862                       rt6_stats.fib_rt_cache,
1863                       atomic_read(&ip6_dst_ops.entries),
1864                       rt6_stats.fib_discarded_routes);
1865
1866         return 0;
1867 }
1868
1869 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1870 {
1871         return single_open(file, rt6_stats_seq_show, NULL);
1872 }
1873
1874 static struct file_operations rt6_stats_seq_fops = {
1875         .owner   = THIS_MODULE,
1876         .open    = rt6_stats_seq_open,
1877         .read    = seq_read,
1878         .llseek  = seq_lseek,
1879         .release = single_release,
1880 };
1881 #endif  /* CONFIG_PROC_FS */
1882
1883 #ifdef CONFIG_SYSCTL
1884
1885 static int flush_delay;
1886
1887 static
1888 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1889                               void *buffer, size_t *lenp)
1890 {
1891         if (write) {
1892                 proc_dointvec(ctl, write, filp, buffer, lenp);
1893                 if (flush_delay < 0)
1894                         flush_delay = 0;
1895                 fib6_run_gc((unsigned long)flush_delay);
1896                 return 0;
1897         } else
1898                 return -EINVAL;
1899 }
1900
1901 ctl_table ipv6_route_table[] = {
1902         {
1903                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1904                 .procname       =       "flush",
1905                 .data           =       &flush_delay,
1906                 .maxlen         =       sizeof(int),
1907                 .mode           =       0644,
1908                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1909         },
1910         {
1911                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1912                 .procname       =       "gc_thresh",
1913                 .data           =       &ip6_dst_ops.gc_thresh,
1914                 .maxlen         =       sizeof(int),
1915                 .mode           =       0644,
1916                 .proc_handler   =       &proc_dointvec,
1917         },
1918         {
1919                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1920                 .procname       =       "max_size",
1921                 .data           =       &ip6_rt_max_size,
1922                 .maxlen         =       sizeof(int),
1923                 .mode           =       0644,
1924                 .proc_handler   =       &proc_dointvec,
1925         },
1926         {
1927                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1928                 .procname       =       "gc_min_interval",
1929                 .data           =       &ip6_rt_gc_min_interval,
1930                 .maxlen         =       sizeof(int),
1931                 .mode           =       0644,
1932                 .proc_handler   =       &proc_dointvec_jiffies,
1933                 .strategy       =       &sysctl_jiffies,
1934         },
1935         {
1936                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
1937                 .procname       =       "gc_timeout",
1938                 .data           =       &ip6_rt_gc_timeout,
1939                 .maxlen         =       sizeof(int),
1940                 .mode           =       0644,
1941                 .proc_handler   =       &proc_dointvec_jiffies,
1942                 .strategy       =       &sysctl_jiffies,
1943         },
1944         {
1945                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
1946                 .procname       =       "gc_interval",
1947                 .data           =       &ip6_rt_gc_interval,
1948                 .maxlen         =       sizeof(int),
1949                 .mode           =       0644,
1950                 .proc_handler   =       &proc_dointvec_jiffies,
1951                 .strategy       =       &sysctl_jiffies,
1952         },
1953         {
1954                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
1955                 .procname       =       "gc_elasticity",
1956                 .data           =       &ip6_rt_gc_elasticity,
1957                 .maxlen         =       sizeof(int),
1958                 .mode           =       0644,
1959                 .proc_handler   =       &proc_dointvec_jiffies,
1960                 .strategy       =       &sysctl_jiffies,
1961         },
1962         {
1963                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
1964                 .procname       =       "mtu_expires",
1965                 .data           =       &ip6_rt_mtu_expires,
1966                 .maxlen         =       sizeof(int),
1967                 .mode           =       0644,
1968                 .proc_handler   =       &proc_dointvec_jiffies,
1969                 .strategy       =       &sysctl_jiffies,
1970         },
1971         {
1972                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
1973                 .procname       =       "min_adv_mss",
1974                 .data           =       &ip6_rt_min_advmss,
1975                 .maxlen         =       sizeof(int),
1976                 .mode           =       0644,
1977                 .proc_handler   =       &proc_dointvec_jiffies,
1978                 .strategy       =       &sysctl_jiffies,
1979         },
1980         { .ctl_name = 0 }
1981 };
1982
1983 #endif
1984
1985 void __init ip6_route_init(void)
1986 {
1987         struct proc_dir_entry *p;
1988
1989         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
1990                                                      sizeof(struct rt6_info),
1991                                                      0, SLAB_HWCACHE_ALIGN,
1992                                                      NULL, NULL);
1993         fib6_init();
1994 #ifdef  CONFIG_PROC_FS
1995         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
1996         if (p)
1997                 p->owner = THIS_MODULE;
1998
1999         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2000 #endif
2001 #ifdef CONFIG_XFRM
2002         xfrm6_init();
2003 #endif
2004 }
2005
2006 #ifdef MODULE
2007 void ip6_route_cleanup(void)
2008 {
2009 #ifdef CONFIG_PROC_FS
2010         proc_net_remove("ipv6_route");
2011         proc_net_remove("rt6_stats");
2012 #endif
2013 #ifdef CONFIG_XFRM
2014         xfrm6_fini();
2015 #endif
2016         rt6_ifdown(NULL);
2017         fib6_gc_cleanup();
2018         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2019 }
2020 #endif  /* MODULE */