Linux-2.6.12-rc2
[linux-flexiantxendom0-natty.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void             ip6_dst_destroy(struct dst_entry *);
87 static void             ip6_dst_ifdown(struct dst_entry *,
88                                        struct net_device *dev, int how);
89 static int               ip6_dst_gc(void);
90
91 static int              ip6_pkt_discard(struct sk_buff *skb);
92 static int              ip6_pkt_discard_out(struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95
96 static struct dst_ops ip6_dst_ops = {
97         .family                 =       AF_INET6,
98         .protocol               =       __constant_htons(ETH_P_IPV6),
99         .gc                     =       ip6_dst_gc,
100         .gc_thresh              =       1024,
101         .check                  =       ip6_dst_check,
102         .destroy                =       ip6_dst_destroy,
103         .ifdown                 =       ip6_dst_ifdown,
104         .negative_advice        =       ip6_negative_advice,
105         .link_failure           =       ip6_link_failure,
106         .update_pmtu            =       ip6_rt_update_pmtu,
107         .entry_size             =       sizeof(struct rt6_info),
108 };
109
110 struct rt6_info ip6_null_entry = {
111         .u = {
112                 .dst = {
113                         .__refcnt       = ATOMIC_INIT(1),
114                         .__use          = 1,
115                         .dev            = &loopback_dev,
116                         .obsolete       = -1,
117                         .error          = -ENETUNREACH,
118                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
119                         .input          = ip6_pkt_discard,
120                         .output         = ip6_pkt_discard_out,
121                         .ops            = &ip6_dst_ops,
122                         .path           = (struct dst_entry*)&ip6_null_entry,
123                 }
124         },
125         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
126         .rt6i_metric    = ~(u32) 0,
127         .rt6i_ref       = ATOMIC_INIT(1),
128 };
129
130 struct fib6_node ip6_routing_table = {
131         .leaf           = &ip6_null_entry,
132         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133 };
134
135 /* Protects all the ip6 fib */
136
137 DEFINE_RWLOCK(rt6_lock);
138
139
140 /* allocate dst with ip6_dst_ops */
141 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 {
143         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144 }
145
146 static void ip6_dst_destroy(struct dst_entry *dst)
147 {
148         struct rt6_info *rt = (struct rt6_info *)dst;
149         struct inet6_dev *idev = rt->rt6i_idev;
150
151         if (idev != NULL) {
152                 rt->rt6i_idev = NULL;
153                 in6_dev_put(idev);
154         }       
155 }
156
157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158                            int how)
159 {
160         struct rt6_info *rt = (struct rt6_info *)dst;
161         struct inet6_dev *idev = rt->rt6i_idev;
162
163         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165                 if (loopback_idev != NULL) {
166                         rt->rt6i_idev = loopback_idev;
167                         in6_dev_put(idev);
168                 }
169         }
170 }
171
172 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173 {
174         return (rt->rt6i_flags & RTF_EXPIRES &&
175                 time_after(jiffies, rt->rt6i_expires));
176 }
177
178 /*
179  *      Route lookup. Any rt6_lock is implied.
180  */
181
182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183                                                     int oif,
184                                                     int strict)
185 {
186         struct rt6_info *local = NULL;
187         struct rt6_info *sprt;
188
189         if (oif) {
190                 for (sprt = rt; sprt; sprt = sprt->u.next) {
191                         struct net_device *dev = sprt->rt6i_dev;
192                         if (dev->ifindex == oif)
193                                 return sprt;
194                         if (dev->flags & IFF_LOOPBACK) {
195                                 if (sprt->rt6i_idev == NULL ||
196                                     sprt->rt6i_idev->dev->ifindex != oif) {
197                                         if (strict && oif)
198                                                 continue;
199                                         if (local && (!oif || 
200                                                       local->rt6i_idev->dev->ifindex == oif))
201                                                 continue;
202                                 }
203                                 local = sprt;
204                         }
205                 }
206
207                 if (local)
208                         return local;
209
210                 if (strict)
211                         return &ip6_null_entry;
212         }
213         return rt;
214 }
215
216 /*
217  *      pointer to the last default router chosen. BH is disabled locally.
218  */
219 static struct rt6_info *rt6_dflt_pointer;
220 static DEFINE_SPINLOCK(rt6_dflt_lock);
221
222 void rt6_reset_dflt_pointer(struct rt6_info *rt)
223 {
224         spin_lock_bh(&rt6_dflt_lock);
225         if (rt == NULL || rt == rt6_dflt_pointer) {
226                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227                 rt6_dflt_pointer = NULL;
228         }
229         spin_unlock_bh(&rt6_dflt_lock);
230 }
231
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234 {
235         struct rt6_info *match = NULL;
236         struct rt6_info *sprt;
237         int mpri = 0;
238
239         for (sprt = rt; sprt; sprt = sprt->u.next) {
240                 struct neighbour *neigh;
241                 int m = 0;
242
243                 if (!oif ||
244                     (sprt->rt6i_dev &&
245                      sprt->rt6i_dev->ifindex == oif))
246                         m += 8;
247
248                 if (rt6_check_expired(sprt))
249                         continue;
250
251                 if (sprt == rt6_dflt_pointer)
252                         m += 4;
253
254                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255                         read_lock_bh(&neigh->lock);
256                         switch (neigh->nud_state) {
257                         case NUD_REACHABLE:
258                                 m += 3;
259                                 break;
260
261                         case NUD_STALE:
262                         case NUD_DELAY:
263                         case NUD_PROBE:
264                                 m += 2;
265                                 break;
266
267                         case NUD_NOARP:
268                         case NUD_PERMANENT:
269                                 m += 1;
270                                 break;
271
272                         case NUD_INCOMPLETE:
273                         default:
274                                 read_unlock_bh(&neigh->lock);
275                                 continue;
276                         }
277                         read_unlock_bh(&neigh->lock);
278                 } else {
279                         continue;
280                 }
281
282                 if (m > mpri || m >= 12) {
283                         match = sprt;
284                         mpri = m;
285                         if (m >= 12) {
286                                 /* we choose the last default router if it
287                                  * is in (probably) reachable state.
288                                  * If route changed, we should do pmtu
289                                  * discovery. --yoshfuji
290                                  */
291                                 break;
292                         }
293                 }
294         }
295
296         spin_lock(&rt6_dflt_lock);
297         if (!match) {
298                 /*
299                  *      No default routers are known to be reachable.
300                  *      SHOULD round robin
301                  */
302                 if (rt6_dflt_pointer) {
303                         for (sprt = rt6_dflt_pointer->u.next;
304                              sprt; sprt = sprt->u.next) {
305                                 if (sprt->u.dst.obsolete <= 0 &&
306                                     sprt->u.dst.error == 0 &&
307                                     !rt6_check_expired(sprt)) {
308                                         match = sprt;
309                                         break;
310                                 }
311                         }
312                         for (sprt = rt;
313                              !match && sprt;
314                              sprt = sprt->u.next) {
315                                 if (sprt->u.dst.obsolete <= 0 &&
316                                     sprt->u.dst.error == 0 &&
317                                     !rt6_check_expired(sprt)) {
318                                         match = sprt;
319                                         break;
320                                 }
321                                 if (sprt == rt6_dflt_pointer)
322                                         break;
323                         }
324                 }
325         }
326
327         if (match) {
328                 if (rt6_dflt_pointer != match)
329                         RT6_TRACE("changed default router: %p->%p\n",
330                                   rt6_dflt_pointer, match);
331                 rt6_dflt_pointer = match;
332         }
333         spin_unlock(&rt6_dflt_lock);
334
335         if (!match) {
336                 /*
337                  * Last Resort: if no default routers found, 
338                  * use addrconf default route.
339                  * We don't record this route.
340                  */
341                 for (sprt = ip6_routing_table.leaf;
342                      sprt; sprt = sprt->u.next) {
343                         if (!rt6_check_expired(sprt) &&
344                             (sprt->rt6i_flags & RTF_DEFAULT) &&
345                             (!oif ||
346                              (sprt->rt6i_dev &&
347                               sprt->rt6i_dev->ifindex == oif))) {
348                                 match = sprt;
349                                 break;
350                         }
351                 }
352                 if (!match) {
353                         /* no default route.  give up. */
354                         match = &ip6_null_entry;
355                 }
356         }
357
358         return match;
359 }
360
361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362                             int oif, int strict)
363 {
364         struct fib6_node *fn;
365         struct rt6_info *rt;
366
367         read_lock_bh(&rt6_lock);
368         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369         rt = rt6_device_match(fn->leaf, oif, strict);
370         dst_hold(&rt->u.dst);
371         rt->u.dst.__use++;
372         read_unlock_bh(&rt6_lock);
373
374         rt->u.dst.lastuse = jiffies;
375         if (rt->u.dst.error == 0)
376                 return rt;
377         dst_release(&rt->u.dst);
378         return NULL;
379 }
380
381 /* ip6_ins_rt is called with FREE rt6_lock.
382    It takes new route entry, the addition fails by any reason the
383    route is freed. In any case, if caller does not hold it, it may
384    be destroyed.
385  */
386
387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
388 {
389         int err;
390
391         write_lock_bh(&rt6_lock);
392         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
393         write_unlock_bh(&rt6_lock);
394
395         return err;
396 }
397
398 /* No rt6_lock! If COW failed, the function returns dead route entry
399    with dst->error set to errno value.
400  */
401
402 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
403                                 struct in6_addr *saddr)
404 {
405         int err;
406         struct rt6_info *rt;
407
408         /*
409          *      Clone the route.
410          */
411
412         rt = ip6_rt_copy(ort);
413
414         if (rt) {
415                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
416
417                 if (!(rt->rt6i_flags&RTF_GATEWAY))
418                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
419
420                 rt->rt6i_dst.plen = 128;
421                 rt->rt6i_flags |= RTF_CACHE;
422                 rt->u.dst.flags |= DST_HOST;
423
424 #ifdef CONFIG_IPV6_SUBTREES
425                 if (rt->rt6i_src.plen && saddr) {
426                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
427                         rt->rt6i_src.plen = 128;
428                 }
429 #endif
430
431                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
432
433                 dst_hold(&rt->u.dst);
434
435                 err = ip6_ins_rt(rt, NULL, NULL);
436                 if (err == 0)
437                         return rt;
438
439                 rt->u.dst.error = err;
440
441                 return rt;
442         }
443         dst_hold(&ip6_null_entry.u.dst);
444         return &ip6_null_entry;
445 }
446
447 #define BACKTRACK() \
448 if (rt == &ip6_null_entry && strict) { \
449        while ((fn = fn->parent) != NULL) { \
450                 if (fn->fn_flags & RTN_ROOT) { \
451                         dst_hold(&rt->u.dst); \
452                         goto out; \
453                 } \
454                 if (fn->fn_flags & RTN_RTINFO) \
455                         goto restart; \
456         } \
457 }
458
459
460 void ip6_route_input(struct sk_buff *skb)
461 {
462         struct fib6_node *fn;
463         struct rt6_info *rt;
464         int strict;
465         int attempts = 3;
466
467         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
468
469 relookup:
470         read_lock_bh(&rt6_lock);
471
472         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
473                          &skb->nh.ipv6h->saddr);
474
475 restart:
476         rt = fn->leaf;
477
478         if ((rt->rt6i_flags & RTF_CACHE)) {
479                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
480                 BACKTRACK();
481                 dst_hold(&rt->u.dst);
482                 goto out;
483         }
484
485         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
486         BACKTRACK();
487
488         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
489                 struct rt6_info *nrt;
490                 dst_hold(&rt->u.dst);
491                 read_unlock_bh(&rt6_lock);
492
493                 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
494                               &skb->nh.ipv6h->saddr);
495
496                 dst_release(&rt->u.dst);
497                 rt = nrt;
498
499                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
500                         goto out2;
501
502                 /* Race condition! In the gap, when rt6_lock was
503                    released someone could insert this route.  Relookup.
504                 */
505                 dst_release(&rt->u.dst);
506                 goto relookup;
507         }
508         dst_hold(&rt->u.dst);
509
510 out:
511         read_unlock_bh(&rt6_lock);
512 out2:
513         rt->u.dst.lastuse = jiffies;
514         rt->u.dst.__use++;
515         skb->dst = (struct dst_entry *) rt;
516 }
517
518 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
519 {
520         struct fib6_node *fn;
521         struct rt6_info *rt;
522         int strict;
523         int attempts = 3;
524
525         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
526
527 relookup:
528         read_lock_bh(&rt6_lock);
529
530         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
531
532 restart:
533         rt = fn->leaf;
534
535         if ((rt->rt6i_flags & RTF_CACHE)) {
536                 rt = rt6_device_match(rt, fl->oif, strict);
537                 BACKTRACK();
538                 dst_hold(&rt->u.dst);
539                 goto out;
540         }
541         if (rt->rt6i_flags & RTF_DEFAULT) {
542                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
543                         rt = rt6_best_dflt(rt, fl->oif);
544         } else {
545                 rt = rt6_device_match(rt, fl->oif, strict);
546                 BACKTRACK();
547         }
548
549         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
550                 struct rt6_info *nrt;
551                 dst_hold(&rt->u.dst);
552                 read_unlock_bh(&rt6_lock);
553
554                 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
555
556                 dst_release(&rt->u.dst);
557                 rt = nrt;
558
559                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
560                         goto out2;
561
562                 /* Race condition! In the gap, when rt6_lock was
563                    released someone could insert this route.  Relookup.
564                 */
565                 dst_release(&rt->u.dst);
566                 goto relookup;
567         }
568         dst_hold(&rt->u.dst);
569
570 out:
571         read_unlock_bh(&rt6_lock);
572 out2:
573         rt->u.dst.lastuse = jiffies;
574         rt->u.dst.__use++;
575         return &rt->u.dst;
576 }
577
578
579 /*
580  *      Destination cache support functions
581  */
582
583 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
584 {
585         struct rt6_info *rt;
586
587         rt = (struct rt6_info *) dst;
588
589         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
590                 return dst;
591
592         return NULL;
593 }
594
595 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
596 {
597         struct rt6_info *rt = (struct rt6_info *) dst;
598
599         if (rt) {
600                 if (rt->rt6i_flags & RTF_CACHE)
601                         ip6_del_rt(rt, NULL, NULL);
602                 else
603                         dst_release(dst);
604         }
605         return NULL;
606 }
607
608 static void ip6_link_failure(struct sk_buff *skb)
609 {
610         struct rt6_info *rt;
611
612         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
613
614         rt = (struct rt6_info *) skb->dst;
615         if (rt) {
616                 if (rt->rt6i_flags&RTF_CACHE) {
617                         dst_set_expires(&rt->u.dst, 0);
618                         rt->rt6i_flags |= RTF_EXPIRES;
619                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
620                         rt->rt6i_node->fn_sernum = -1;
621         }
622 }
623
624 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
625 {
626         struct rt6_info *rt6 = (struct rt6_info*)dst;
627
628         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
629                 rt6->rt6i_flags |= RTF_MODIFIED;
630                 if (mtu < IPV6_MIN_MTU) {
631                         mtu = IPV6_MIN_MTU;
632                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
633                 }
634                 dst->metrics[RTAX_MTU-1] = mtu;
635         }
636 }
637
638 /* Protected by rt6_lock.  */
639 static struct dst_entry *ndisc_dst_gc_list;
640 static int ipv6_get_mtu(struct net_device *dev);
641
642 static inline unsigned int ipv6_advmss(unsigned int mtu)
643 {
644         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
645
646         if (mtu < ip6_rt_min_advmss)
647                 mtu = ip6_rt_min_advmss;
648
649         /*
650          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
651          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
652          * IPV6_MAXPLEN is also valid and means: "any MSS, 
653          * rely only on pmtu discovery"
654          */
655         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
656                 mtu = IPV6_MAXPLEN;
657         return mtu;
658 }
659
660 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
661                                   struct neighbour *neigh,
662                                   struct in6_addr *addr,
663                                   int (*output)(struct sk_buff *))
664 {
665         struct rt6_info *rt;
666         struct inet6_dev *idev = in6_dev_get(dev);
667
668         if (unlikely(idev == NULL))
669                 return NULL;
670
671         rt = ip6_dst_alloc();
672         if (unlikely(rt == NULL)) {
673                 in6_dev_put(idev);
674                 goto out;
675         }
676
677         dev_hold(dev);
678         if (neigh)
679                 neigh_hold(neigh);
680         else
681                 neigh = ndisc_get_neigh(dev, addr);
682
683         rt->rt6i_dev      = dev;
684         rt->rt6i_idev     = idev;
685         rt->rt6i_nexthop  = neigh;
686         atomic_set(&rt->u.dst.__refcnt, 1);
687         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
688         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
689         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
690         rt->u.dst.output  = output;
691
692 #if 0   /* there's no chance to use these for ndisc */
693         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
694                                 ? DST_HOST 
695                                 : 0;
696         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
697         rt->rt6i_dst.plen = 128;
698 #endif
699
700         write_lock_bh(&rt6_lock);
701         rt->u.dst.next = ndisc_dst_gc_list;
702         ndisc_dst_gc_list = &rt->u.dst;
703         write_unlock_bh(&rt6_lock);
704
705         fib6_force_start_gc();
706
707 out:
708         return (struct dst_entry *)rt;
709 }
710
711 int ndisc_dst_gc(int *more)
712 {
713         struct dst_entry *dst, *next, **pprev;
714         int freed;
715
716         next = NULL;
717         pprev = &ndisc_dst_gc_list;
718         freed = 0;
719         while ((dst = *pprev) != NULL) {
720                 if (!atomic_read(&dst->__refcnt)) {
721                         *pprev = dst->next;
722                         dst_free(dst);
723                         freed++;
724                 } else {
725                         pprev = &dst->next;
726                         (*more)++;
727                 }
728         }
729
730         return freed;
731 }
732
733 static int ip6_dst_gc(void)
734 {
735         static unsigned expire = 30*HZ;
736         static unsigned long last_gc;
737         unsigned long now = jiffies;
738
739         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
740             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
741                 goto out;
742
743         expire++;
744         fib6_run_gc(expire);
745         last_gc = now;
746         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
747                 expire = ip6_rt_gc_timeout>>1;
748
749 out:
750         expire -= expire>>ip6_rt_gc_elasticity;
751         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
752 }
753
754 /* Clean host part of a prefix. Not necessary in radix tree,
755    but results in cleaner routing tables.
756
757    Remove it only when all the things will work!
758  */
759
760 static int ipv6_get_mtu(struct net_device *dev)
761 {
762         int mtu = IPV6_MIN_MTU;
763         struct inet6_dev *idev;
764
765         idev = in6_dev_get(dev);
766         if (idev) {
767                 mtu = idev->cnf.mtu6;
768                 in6_dev_put(idev);
769         }
770         return mtu;
771 }
772
773 int ipv6_get_hoplimit(struct net_device *dev)
774 {
775         int hoplimit = ipv6_devconf.hop_limit;
776         struct inet6_dev *idev;
777
778         idev = in6_dev_get(dev);
779         if (idev) {
780                 hoplimit = idev->cnf.hop_limit;
781                 in6_dev_put(idev);
782         }
783         return hoplimit;
784 }
785
786 /*
787  *
788  */
789
790 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
791 {
792         int err;
793         struct rtmsg *r;
794         struct rtattr **rta;
795         struct rt6_info *rt = NULL;
796         struct net_device *dev = NULL;
797         struct inet6_dev *idev = NULL;
798         int addr_type;
799
800         rta = (struct rtattr **) _rtattr;
801
802         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
803                 return -EINVAL;
804 #ifndef CONFIG_IPV6_SUBTREES
805         if (rtmsg->rtmsg_src_len)
806                 return -EINVAL;
807 #endif
808         if (rtmsg->rtmsg_ifindex) {
809                 err = -ENODEV;
810                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
811                 if (!dev)
812                         goto out;
813                 idev = in6_dev_get(dev);
814                 if (!idev)
815                         goto out;
816         }
817
818         if (rtmsg->rtmsg_metric == 0)
819                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
820
821         rt = ip6_dst_alloc();
822
823         if (rt == NULL) {
824                 err = -ENOMEM;
825                 goto out;
826         }
827
828         rt->u.dst.obsolete = -1;
829         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
830         if (nlh && (r = NLMSG_DATA(nlh))) {
831                 rt->rt6i_protocol = r->rtm_protocol;
832         } else {
833                 rt->rt6i_protocol = RTPROT_BOOT;
834         }
835
836         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
837
838         if (addr_type & IPV6_ADDR_MULTICAST)
839                 rt->u.dst.input = ip6_mc_input;
840         else
841                 rt->u.dst.input = ip6_forward;
842
843         rt->u.dst.output = ip6_output;
844
845         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
846                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
847         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
848         if (rt->rt6i_dst.plen == 128)
849                rt->u.dst.flags = DST_HOST;
850
851 #ifdef CONFIG_IPV6_SUBTREES
852         ipv6_addr_prefix(&rt->rt6i_src.addr, 
853                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
854         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
855 #endif
856
857         rt->rt6i_metric = rtmsg->rtmsg_metric;
858
859         /* We cannot add true routes via loopback here,
860            they would result in kernel looping; promote them to reject routes
861          */
862         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
863             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
864                 /* hold loopback dev/idev if we haven't done so. */
865                 if (dev != &loopback_dev) {
866                         if (dev) {
867                                 dev_put(dev);
868                                 in6_dev_put(idev);
869                         }
870                         dev = &loopback_dev;
871                         dev_hold(dev);
872                         idev = in6_dev_get(dev);
873                         if (!idev) {
874                                 err = -ENODEV;
875                                 goto out;
876                         }
877                 }
878                 rt->u.dst.output = ip6_pkt_discard_out;
879                 rt->u.dst.input = ip6_pkt_discard;
880                 rt->u.dst.error = -ENETUNREACH;
881                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
882                 goto install_route;
883         }
884
885         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
886                 struct in6_addr *gw_addr;
887                 int gwa_type;
888
889                 gw_addr = &rtmsg->rtmsg_gateway;
890                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
891                 gwa_type = ipv6_addr_type(gw_addr);
892
893                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
894                         struct rt6_info *grt;
895
896                         /* IPv6 strictly inhibits using not link-local
897                            addresses as nexthop address.
898                            Otherwise, router will not able to send redirects.
899                            It is very good, but in some (rare!) circumstances
900                            (SIT, PtP, NBMA NOARP links) it is handy to allow
901                            some exceptions. --ANK
902                          */
903                         err = -EINVAL;
904                         if (!(gwa_type&IPV6_ADDR_UNICAST))
905                                 goto out;
906
907                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
908
909                         err = -EHOSTUNREACH;
910                         if (grt == NULL)
911                                 goto out;
912                         if (dev) {
913                                 if (dev != grt->rt6i_dev) {
914                                         dst_release(&grt->u.dst);
915                                         goto out;
916                                 }
917                         } else {
918                                 dev = grt->rt6i_dev;
919                                 idev = grt->rt6i_idev;
920                                 dev_hold(dev);
921                                 in6_dev_hold(grt->rt6i_idev);
922                         }
923                         if (!(grt->rt6i_flags&RTF_GATEWAY))
924                                 err = 0;
925                         dst_release(&grt->u.dst);
926
927                         if (err)
928                                 goto out;
929                 }
930                 err = -EINVAL;
931                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
932                         goto out;
933         }
934
935         err = -ENODEV;
936         if (dev == NULL)
937                 goto out;
938
939         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
940                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
941                 if (IS_ERR(rt->rt6i_nexthop)) {
942                         err = PTR_ERR(rt->rt6i_nexthop);
943                         rt->rt6i_nexthop = NULL;
944                         goto out;
945                 }
946         }
947
948         rt->rt6i_flags = rtmsg->rtmsg_flags;
949
950 install_route:
951         if (rta && rta[RTA_METRICS-1]) {
952                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
953                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
954
955                 while (RTA_OK(attr, attrlen)) {
956                         unsigned flavor = attr->rta_type;
957                         if (flavor) {
958                                 if (flavor > RTAX_MAX) {
959                                         err = -EINVAL;
960                                         goto out;
961                                 }
962                                 rt->u.dst.metrics[flavor-1] =
963                                         *(u32 *)RTA_DATA(attr);
964                         }
965                         attr = RTA_NEXT(attr, attrlen);
966                 }
967         }
968
969         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
970                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
971         if (!rt->u.dst.metrics[RTAX_MTU-1])
972                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
973         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
974                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
975         rt->u.dst.dev = dev;
976         rt->rt6i_idev = idev;
977         return ip6_ins_rt(rt, nlh, _rtattr);
978
979 out:
980         if (dev)
981                 dev_put(dev);
982         if (idev)
983                 in6_dev_put(idev);
984         if (rt)
985                 dst_free((struct dst_entry *) rt);
986         return err;
987 }
988
989 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
990 {
991         int err;
992
993         write_lock_bh(&rt6_lock);
994
995         rt6_reset_dflt_pointer(NULL);
996
997         err = fib6_del(rt, nlh, _rtattr);
998         dst_release(&rt->u.dst);
999
1000         write_unlock_bh(&rt6_lock);
1001
1002         return err;
1003 }
1004
1005 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
1006 {
1007         struct fib6_node *fn;
1008         struct rt6_info *rt;
1009         int err = -ESRCH;
1010
1011         read_lock_bh(&rt6_lock);
1012
1013         fn = fib6_locate(&ip6_routing_table,
1014                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1015                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1016         
1017         if (fn) {
1018                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1019                         if (rtmsg->rtmsg_ifindex &&
1020                             (rt->rt6i_dev == NULL ||
1021                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1022                                 continue;
1023                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1024                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1025                                 continue;
1026                         if (rtmsg->rtmsg_metric &&
1027                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1028                                 continue;
1029                         dst_hold(&rt->u.dst);
1030                         read_unlock_bh(&rt6_lock);
1031
1032                         return ip6_del_rt(rt, nlh, _rtattr);
1033                 }
1034         }
1035         read_unlock_bh(&rt6_lock);
1036
1037         return err;
1038 }
1039
1040 /*
1041  *      Handle redirects
1042  */
1043 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1044                   struct neighbour *neigh, u8 *lladdr, int on_link)
1045 {
1046         struct rt6_info *rt, *nrt;
1047
1048         /* Locate old route to this destination. */
1049         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1050
1051         if (rt == NULL)
1052                 return;
1053
1054         if (neigh->dev != rt->rt6i_dev)
1055                 goto out;
1056
1057         /*
1058          * Current route is on-link; redirect is always invalid.
1059          * 
1060          * Seems, previous statement is not true. It could
1061          * be node, which looks for us as on-link (f.e. proxy ndisc)
1062          * But then router serving it might decide, that we should
1063          * know truth 8)8) --ANK (980726).
1064          */
1065         if (!(rt->rt6i_flags&RTF_GATEWAY))
1066                 goto out;
1067
1068         /*
1069          *      RFC 2461 specifies that redirects should only be
1070          *      accepted if they come from the nexthop to the target.
1071          *      Due to the way default routers are chosen, this notion
1072          *      is a bit fuzzy and one might need to check all default
1073          *      routers.
1074          */
1075         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1076                 if (rt->rt6i_flags & RTF_DEFAULT) {
1077                         struct rt6_info *rt1;
1078
1079                         read_lock(&rt6_lock);
1080                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1081                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1082                                         dst_hold(&rt1->u.dst);
1083                                         dst_release(&rt->u.dst);
1084                                         read_unlock(&rt6_lock);
1085                                         rt = rt1;
1086                                         goto source_ok;
1087                                 }
1088                         }
1089                         read_unlock(&rt6_lock);
1090                 }
1091                 if (net_ratelimit())
1092                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1093                                "for redirect target\n");
1094                 goto out;
1095         }
1096
1097 source_ok:
1098
1099         /*
1100          *      We have finally decided to accept it.
1101          */
1102
1103         neigh_update(neigh, lladdr, NUD_STALE, 
1104                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1105                      NEIGH_UPDATE_F_OVERRIDE|
1106                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1107                                      NEIGH_UPDATE_F_ISROUTER))
1108                      );
1109
1110         /*
1111          * Redirect received -> path was valid.
1112          * Look, redirects are sent only in response to data packets,
1113          * so that this nexthop apparently is reachable. --ANK
1114          */
1115         dst_confirm(&rt->u.dst);
1116
1117         /* Duplicate redirect: silently ignore. */
1118         if (neigh == rt->u.dst.neighbour)
1119                 goto out;
1120
1121         nrt = ip6_rt_copy(rt);
1122         if (nrt == NULL)
1123                 goto out;
1124
1125         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1126         if (on_link)
1127                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1128
1129         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1130         nrt->rt6i_dst.plen = 128;
1131         nrt->u.dst.flags |= DST_HOST;
1132
1133         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1134         nrt->rt6i_nexthop = neigh_clone(neigh);
1135         /* Reset pmtu, it may be better */
1136         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1137         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1138
1139         if (ip6_ins_rt(nrt, NULL, NULL))
1140                 goto out;
1141
1142         if (rt->rt6i_flags&RTF_CACHE) {
1143                 ip6_del_rt(rt, NULL, NULL);
1144                 return;
1145         }
1146
1147 out:
1148         dst_release(&rt->u.dst);
1149         return;
1150 }
1151
1152 /*
1153  *      Handle ICMP "packet too big" messages
1154  *      i.e. Path MTU discovery
1155  */
1156
1157 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1158                         struct net_device *dev, u32 pmtu)
1159 {
1160         struct rt6_info *rt, *nrt;
1161         int allfrag = 0;
1162
1163         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1164         if (rt == NULL)
1165                 return;
1166
1167         if (pmtu >= dst_mtu(&rt->u.dst))
1168                 goto out;
1169
1170         if (pmtu < IPV6_MIN_MTU) {
1171                 /*
1172                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1173                  * MTU (1280) and a fragment header should always be included
1174                  * after a node receiving Too Big message reporting PMTU is
1175                  * less than the IPv6 Minimum Link MTU.
1176                  */
1177                 pmtu = IPV6_MIN_MTU;
1178                 allfrag = 1;
1179         }
1180
1181         /* New mtu received -> path was valid.
1182            They are sent only in response to data packets,
1183            so that this nexthop apparently is reachable. --ANK
1184          */
1185         dst_confirm(&rt->u.dst);
1186
1187         /* Host route. If it is static, it would be better
1188            not to override it, but add new one, so that
1189            when cache entry will expire old pmtu
1190            would return automatically.
1191          */
1192         if (rt->rt6i_flags & RTF_CACHE) {
1193                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1194                 if (allfrag)
1195                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1196                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1197                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1198                 goto out;
1199         }
1200
1201         /* Network route.
1202            Two cases are possible:
1203            1. It is connected route. Action: COW
1204            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1205          */
1206         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1207                 nrt = rt6_cow(rt, daddr, saddr);
1208                 if (!nrt->u.dst.error) {
1209                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1210                         if (allfrag)
1211                                 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1212                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1213                            happened within 5 mins, the recommended timer is 10 mins.
1214                            Here this route expiration time is set to ip6_rt_mtu_expires
1215                            which is 10 mins. After 10 mins the decreased pmtu is expired
1216                            and detecting PMTU increase will be automatically happened.
1217                          */
1218                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1219                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1220                 }
1221                 dst_release(&nrt->u.dst);
1222         } else {
1223                 nrt = ip6_rt_copy(rt);
1224                 if (nrt == NULL)
1225                         goto out;
1226                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1227                 nrt->rt6i_dst.plen = 128;
1228                 nrt->u.dst.flags |= DST_HOST;
1229                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1230                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1231                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1232                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1233                 if (allfrag)
1234                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1235                 ip6_ins_rt(nrt, NULL, NULL);
1236         }
1237
1238 out:
1239         dst_release(&rt->u.dst);
1240 }
1241
1242 /*
1243  *      Misc support functions
1244  */
1245
1246 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1247 {
1248         struct rt6_info *rt = ip6_dst_alloc();
1249
1250         if (rt) {
1251                 rt->u.dst.input = ort->u.dst.input;
1252                 rt->u.dst.output = ort->u.dst.output;
1253
1254                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1255                 rt->u.dst.dev = ort->u.dst.dev;
1256                 if (rt->u.dst.dev)
1257                         dev_hold(rt->u.dst.dev);
1258                 rt->rt6i_idev = ort->rt6i_idev;
1259                 if (rt->rt6i_idev)
1260                         in6_dev_hold(rt->rt6i_idev);
1261                 rt->u.dst.lastuse = jiffies;
1262                 rt->rt6i_expires = 0;
1263
1264                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1265                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1266                 rt->rt6i_metric = 0;
1267
1268                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1269 #ifdef CONFIG_IPV6_SUBTREES
1270                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1271 #endif
1272         }
1273         return rt;
1274 }
1275
1276 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1277 {       
1278         struct rt6_info *rt;
1279         struct fib6_node *fn;
1280
1281         fn = &ip6_routing_table;
1282
1283         write_lock_bh(&rt6_lock);
1284         for (rt = fn->leaf; rt; rt=rt->u.next) {
1285                 if (dev == rt->rt6i_dev &&
1286                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1287                         break;
1288         }
1289         if (rt)
1290                 dst_hold(&rt->u.dst);
1291         write_unlock_bh(&rt6_lock);
1292         return rt;
1293 }
1294
1295 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1296                                      struct net_device *dev)
1297 {
1298         struct in6_rtmsg rtmsg;
1299
1300         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1301         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1302         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1303         rtmsg.rtmsg_metric = 1024;
1304         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1305
1306         rtmsg.rtmsg_ifindex = dev->ifindex;
1307
1308         ip6_route_add(&rtmsg, NULL, NULL);
1309         return rt6_get_dflt_router(gwaddr, dev);
1310 }
1311
1312 void rt6_purge_dflt_routers(void)
1313 {
1314         struct rt6_info *rt;
1315
1316 restart:
1317         read_lock_bh(&rt6_lock);
1318         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1319                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1320                         dst_hold(&rt->u.dst);
1321
1322                         rt6_reset_dflt_pointer(NULL);
1323
1324                         read_unlock_bh(&rt6_lock);
1325
1326                         ip6_del_rt(rt, NULL, NULL);
1327
1328                         goto restart;
1329                 }
1330         }
1331         read_unlock_bh(&rt6_lock);
1332 }
1333
1334 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1335 {
1336         struct in6_rtmsg rtmsg;
1337         int err;
1338
1339         switch(cmd) {
1340         case SIOCADDRT:         /* Add a route */
1341         case SIOCDELRT:         /* Delete a route */
1342                 if (!capable(CAP_NET_ADMIN))
1343                         return -EPERM;
1344                 err = copy_from_user(&rtmsg, arg,
1345                                      sizeof(struct in6_rtmsg));
1346                 if (err)
1347                         return -EFAULT;
1348                         
1349                 rtnl_lock();
1350                 switch (cmd) {
1351                 case SIOCADDRT:
1352                         err = ip6_route_add(&rtmsg, NULL, NULL);
1353                         break;
1354                 case SIOCDELRT:
1355                         err = ip6_route_del(&rtmsg, NULL, NULL);
1356                         break;
1357                 default:
1358                         err = -EINVAL;
1359                 }
1360                 rtnl_unlock();
1361
1362                 return err;
1363         };
1364
1365         return -EINVAL;
1366 }
1367
1368 /*
1369  *      Drop the packet on the floor
1370  */
1371
1372 int ip6_pkt_discard(struct sk_buff *skb)
1373 {
1374         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1375         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1376         kfree_skb(skb);
1377         return 0;
1378 }
1379
1380 int ip6_pkt_discard_out(struct sk_buff *skb)
1381 {
1382         skb->dev = skb->dst->dev;
1383         return ip6_pkt_discard(skb);
1384 }
1385
1386 /*
1387  *      Allocate a dst for local (unicast / anycast) address.
1388  */
1389
1390 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1391                                     const struct in6_addr *addr,
1392                                     int anycast)
1393 {
1394         struct rt6_info *rt = ip6_dst_alloc();
1395
1396         if (rt == NULL)
1397                 return ERR_PTR(-ENOMEM);
1398
1399         dev_hold(&loopback_dev);
1400         in6_dev_hold(idev);
1401
1402         rt->u.dst.flags = DST_HOST;
1403         rt->u.dst.input = ip6_input;
1404         rt->u.dst.output = ip6_output;
1405         rt->rt6i_dev = &loopback_dev;
1406         rt->rt6i_idev = idev;
1407         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1408         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1409         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1410         rt->u.dst.obsolete = -1;
1411
1412         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1413         if (!anycast)
1414                 rt->rt6i_flags |= RTF_LOCAL;
1415         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1416         if (rt->rt6i_nexthop == NULL) {
1417                 dst_free((struct dst_entry *) rt);
1418                 return ERR_PTR(-ENOMEM);
1419         }
1420
1421         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1422         rt->rt6i_dst.plen = 128;
1423
1424         atomic_set(&rt->u.dst.__refcnt, 1);
1425
1426         return rt;
1427 }
1428
1429 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1430 {
1431         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1432             rt != &ip6_null_entry) {
1433                 RT6_TRACE("deleted by ifdown %p\n", rt);
1434                 return -1;
1435         }
1436         return 0;
1437 }
1438
1439 void rt6_ifdown(struct net_device *dev)
1440 {
1441         write_lock_bh(&rt6_lock);
1442         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1443         write_unlock_bh(&rt6_lock);
1444 }
1445
1446 struct rt6_mtu_change_arg
1447 {
1448         struct net_device *dev;
1449         unsigned mtu;
1450 };
1451
1452 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1453 {
1454         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1455         struct inet6_dev *idev;
1456
1457         /* In IPv6 pmtu discovery is not optional,
1458            so that RTAX_MTU lock cannot disable it.
1459            We still use this lock to block changes
1460            caused by addrconf/ndisc.
1461         */
1462
1463         idev = __in6_dev_get(arg->dev);
1464         if (idev == NULL)
1465                 return 0;
1466
1467         /* For administrative MTU increase, there is no way to discover
1468            IPv6 PMTU increase, so PMTU increase should be updated here.
1469            Since RFC 1981 doesn't include administrative MTU increase
1470            update PMTU increase is a MUST. (i.e. jumbo frame)
1471          */
1472         /*
1473            If new MTU is less than route PMTU, this new MTU will be the
1474            lowest MTU in the path, update the route PMTU to reflect PMTU
1475            decreases; if new MTU is greater than route PMTU, and the
1476            old MTU is the lowest MTU in the path, update the route PMTU
1477            to reflect the increase. In this case if the other nodes' MTU
1478            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1479            PMTU discouvery.
1480          */
1481         if (rt->rt6i_dev == arg->dev &&
1482             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1483             (dst_mtu(&rt->u.dst) > arg->mtu ||
1484              (dst_mtu(&rt->u.dst) < arg->mtu &&
1485               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1486                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1487         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1488         return 0;
1489 }
1490
1491 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1492 {
1493         struct rt6_mtu_change_arg arg;
1494
1495         arg.dev = dev;
1496         arg.mtu = mtu;
1497         read_lock_bh(&rt6_lock);
1498         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1499         read_unlock_bh(&rt6_lock);
1500 }
1501
1502 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1503                               struct in6_rtmsg *rtmsg)
1504 {
1505         memset(rtmsg, 0, sizeof(*rtmsg));
1506
1507         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1508         rtmsg->rtmsg_src_len = r->rtm_src_len;
1509         rtmsg->rtmsg_flags = RTF_UP;
1510         if (r->rtm_type == RTN_UNREACHABLE)
1511                 rtmsg->rtmsg_flags |= RTF_REJECT;
1512
1513         if (rta[RTA_GATEWAY-1]) {
1514                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1515                         return -EINVAL;
1516                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1517                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1518         }
1519         if (rta[RTA_DST-1]) {
1520                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1521                         return -EINVAL;
1522                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1523         }
1524         if (rta[RTA_SRC-1]) {
1525                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1526                         return -EINVAL;
1527                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1528         }
1529         if (rta[RTA_OIF-1]) {
1530                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1531                         return -EINVAL;
1532                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1533         }
1534         if (rta[RTA_PRIORITY-1]) {
1535                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1536                         return -EINVAL;
1537                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1538         }
1539         return 0;
1540 }
1541
1542 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1543 {
1544         struct rtmsg *r = NLMSG_DATA(nlh);
1545         struct in6_rtmsg rtmsg;
1546
1547         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1548                 return -EINVAL;
1549         return ip6_route_del(&rtmsg, nlh, arg);
1550 }
1551
1552 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1553 {
1554         struct rtmsg *r = NLMSG_DATA(nlh);
1555         struct in6_rtmsg rtmsg;
1556
1557         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1558                 return -EINVAL;
1559         return ip6_route_add(&rtmsg, nlh, arg);
1560 }
1561
1562 struct rt6_rtnl_dump_arg
1563 {
1564         struct sk_buff *skb;
1565         struct netlink_callback *cb;
1566 };
1567
1568 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1569                          struct in6_addr *dst,
1570                          struct in6_addr *src,
1571                          int iif,
1572                          int type, u32 pid, u32 seq,
1573                          struct nlmsghdr *in_nlh, int prefix)
1574 {
1575         struct rtmsg *rtm;
1576         struct nlmsghdr  *nlh;
1577         unsigned char    *b = skb->tail;
1578         struct rta_cacheinfo ci;
1579
1580         if (prefix) {   /* user wants prefix routes only */
1581                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1582                         /* success since this is not a prefix route */
1583                         return 1;
1584                 }
1585         }
1586
1587         if (!pid && in_nlh) {
1588                 pid = in_nlh->nlmsg_pid;
1589         }
1590
1591         nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1592         rtm = NLMSG_DATA(nlh);
1593         rtm->rtm_family = AF_INET6;
1594         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1595         rtm->rtm_src_len = rt->rt6i_src.plen;
1596         rtm->rtm_tos = 0;
1597         rtm->rtm_table = RT_TABLE_MAIN;
1598         if (rt->rt6i_flags&RTF_REJECT)
1599                 rtm->rtm_type = RTN_UNREACHABLE;
1600         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1601                 rtm->rtm_type = RTN_LOCAL;
1602         else
1603                 rtm->rtm_type = RTN_UNICAST;
1604         rtm->rtm_flags = 0;
1605         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1606         rtm->rtm_protocol = rt->rt6i_protocol;
1607         if (rt->rt6i_flags&RTF_DYNAMIC)
1608                 rtm->rtm_protocol = RTPROT_REDIRECT;
1609         else if (rt->rt6i_flags & RTF_ADDRCONF)
1610                 rtm->rtm_protocol = RTPROT_KERNEL;
1611         else if (rt->rt6i_flags&RTF_DEFAULT)
1612                 rtm->rtm_protocol = RTPROT_RA;
1613
1614         if (rt->rt6i_flags&RTF_CACHE)
1615                 rtm->rtm_flags |= RTM_F_CLONED;
1616
1617         if (dst) {
1618                 RTA_PUT(skb, RTA_DST, 16, dst);
1619                 rtm->rtm_dst_len = 128;
1620         } else if (rtm->rtm_dst_len)
1621                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1622 #ifdef CONFIG_IPV6_SUBTREES
1623         if (src) {
1624                 RTA_PUT(skb, RTA_SRC, 16, src);
1625                 rtm->rtm_src_len = 128;
1626         } else if (rtm->rtm_src_len)
1627                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1628 #endif
1629         if (iif)
1630                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1631         else if (dst) {
1632                 struct in6_addr saddr_buf;
1633                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1634                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1635         }
1636         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1637                 goto rtattr_failure;
1638         if (rt->u.dst.neighbour)
1639                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1640         if (rt->u.dst.dev)
1641                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1642         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1643         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1644         if (rt->rt6i_expires)
1645                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1646         else
1647                 ci.rta_expires = 0;
1648         ci.rta_used = rt->u.dst.__use;
1649         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1650         ci.rta_error = rt->u.dst.error;
1651         ci.rta_id = 0;
1652         ci.rta_ts = 0;
1653         ci.rta_tsage = 0;
1654         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1655         nlh->nlmsg_len = skb->tail - b;
1656         return skb->len;
1657
1658 nlmsg_failure:
1659 rtattr_failure:
1660         skb_trim(skb, b - skb->data);
1661         return -1;
1662 }
1663
1664 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1665 {
1666         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1667         int prefix;
1668
1669         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1670                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1671                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1672         } else
1673                 prefix = 0;
1674
1675         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1676                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1677                      NULL, prefix);
1678 }
1679
1680 static int fib6_dump_node(struct fib6_walker_t *w)
1681 {
1682         int res;
1683         struct rt6_info *rt;
1684
1685         for (rt = w->leaf; rt; rt = rt->u.next) {
1686                 res = rt6_dump_route(rt, w->args);
1687                 if (res < 0) {
1688                         /* Frame is full, suspend walking */
1689                         w->leaf = rt;
1690                         return 1;
1691                 }
1692                 BUG_TRAP(res!=0);
1693         }
1694         w->leaf = NULL;
1695         return 0;
1696 }
1697
1698 static void fib6_dump_end(struct netlink_callback *cb)
1699 {
1700         struct fib6_walker_t *w = (void*)cb->args[0];
1701
1702         if (w) {
1703                 cb->args[0] = 0;
1704                 fib6_walker_unlink(w);
1705                 kfree(w);
1706         }
1707         if (cb->args[1]) {
1708                 cb->done = (void*)cb->args[1];
1709                 cb->args[1] = 0;
1710         }
1711 }
1712
1713 static int fib6_dump_done(struct netlink_callback *cb)
1714 {
1715         fib6_dump_end(cb);
1716         return cb->done(cb);
1717 }
1718
1719 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1720 {
1721         struct rt6_rtnl_dump_arg arg;
1722         struct fib6_walker_t *w;
1723         int res;
1724
1725         arg.skb = skb;
1726         arg.cb = cb;
1727
1728         w = (void*)cb->args[0];
1729         if (w == NULL) {
1730                 /* New dump:
1731                  * 
1732                  * 1. hook callback destructor.
1733                  */
1734                 cb->args[1] = (long)cb->done;
1735                 cb->done = fib6_dump_done;
1736
1737                 /*
1738                  * 2. allocate and initialize walker.
1739                  */
1740                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1741                 if (w == NULL)
1742                         return -ENOMEM;
1743                 RT6_TRACE("dump<%p", w);
1744                 memset(w, 0, sizeof(*w));
1745                 w->root = &ip6_routing_table;
1746                 w->func = fib6_dump_node;
1747                 w->args = &arg;
1748                 cb->args[0] = (long)w;
1749                 read_lock_bh(&rt6_lock);
1750                 res = fib6_walk(w);
1751                 read_unlock_bh(&rt6_lock);
1752         } else {
1753                 w->args = &arg;
1754                 read_lock_bh(&rt6_lock);
1755                 res = fib6_walk_continue(w);
1756                 read_unlock_bh(&rt6_lock);
1757         }
1758 #if RT6_DEBUG >= 3
1759         if (res <= 0 && skb->len == 0)
1760                 RT6_TRACE("%p>dump end\n", w);
1761 #endif
1762         res = res < 0 ? res : skb->len;
1763         /* res < 0 is an error. (really, impossible)
1764            res == 0 means that dump is complete, but skb still can contain data.
1765            res > 0 dump is not complete, but frame is full.
1766          */
1767         /* Destroy walker, if dump of this table is complete. */
1768         if (res <= 0)
1769                 fib6_dump_end(cb);
1770         return res;
1771 }
1772
1773 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1774 {
1775         struct rtattr **rta = arg;
1776         int iif = 0;
1777         int err = -ENOBUFS;
1778         struct sk_buff *skb;
1779         struct flowi fl;
1780         struct rt6_info *rt;
1781
1782         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1783         if (skb == NULL)
1784                 goto out;
1785
1786         /* Reserve room for dummy headers, this skb can pass
1787            through good chunk of routing engine.
1788          */
1789         skb->mac.raw = skb->data;
1790         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1791
1792         memset(&fl, 0, sizeof(fl));
1793         if (rta[RTA_SRC-1])
1794                 ipv6_addr_copy(&fl.fl6_src,
1795                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1796         if (rta[RTA_DST-1])
1797                 ipv6_addr_copy(&fl.fl6_dst,
1798                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1799
1800         if (rta[RTA_IIF-1])
1801                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1802
1803         if (iif) {
1804                 struct net_device *dev;
1805                 dev = __dev_get_by_index(iif);
1806                 if (!dev) {
1807                         err = -ENODEV;
1808                         goto out_free;
1809                 }
1810         }
1811
1812         fl.oif = 0;
1813         if (rta[RTA_OIF-1])
1814                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1815
1816         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1817
1818         skb->dst = &rt->u.dst;
1819
1820         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1821         err = rt6_fill_node(skb, rt, 
1822                             &fl.fl6_dst, &fl.fl6_src,
1823                             iif,
1824                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825                             nlh->nlmsg_seq, nlh, 0);
1826         if (err < 0) {
1827                 err = -EMSGSIZE;
1828                 goto out_free;
1829         }
1830
1831         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1832         if (err > 0)
1833                 err = 0;
1834 out:
1835         return err;
1836 out_free:
1837         kfree_skb(skb);
1838         goto out;       
1839 }
1840
1841 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1842 {
1843         struct sk_buff *skb;
1844         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1845
1846         skb = alloc_skb(size, gfp_any());
1847         if (!skb) {
1848                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1849                 return;
1850         }
1851         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1852                 kfree_skb(skb);
1853                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1854                 return;
1855         }
1856         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1857         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1858 }
1859
1860 /*
1861  *      /proc
1862  */
1863
1864 #ifdef CONFIG_PROC_FS
1865
1866 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1867
1868 struct rt6_proc_arg
1869 {
1870         char *buffer;
1871         int offset;
1872         int length;
1873         int skip;
1874         int len;
1875 };
1876
1877 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1878 {
1879         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1880         int i;
1881
1882         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1883                 arg->skip++;
1884                 return 0;
1885         }
1886
1887         if (arg->len >= arg->length)
1888                 return 0;
1889
1890         for (i=0; i<16; i++) {
1891                 sprintf(arg->buffer + arg->len, "%02x",
1892                         rt->rt6i_dst.addr.s6_addr[i]);
1893                 arg->len += 2;
1894         }
1895         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1896                             rt->rt6i_dst.plen);
1897
1898 #ifdef CONFIG_IPV6_SUBTREES
1899         for (i=0; i<16; i++) {
1900                 sprintf(arg->buffer + arg->len, "%02x",
1901                         rt->rt6i_src.addr.s6_addr[i]);
1902                 arg->len += 2;
1903         }
1904         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1905                             rt->rt6i_src.plen);
1906 #else
1907         sprintf(arg->buffer + arg->len,
1908                 "00000000000000000000000000000000 00 ");
1909         arg->len += 36;
1910 #endif
1911
1912         if (rt->rt6i_nexthop) {
1913                 for (i=0; i<16; i++) {
1914                         sprintf(arg->buffer + arg->len, "%02x",
1915                                 rt->rt6i_nexthop->primary_key[i]);
1916                         arg->len += 2;
1917                 }
1918         } else {
1919                 sprintf(arg->buffer + arg->len,
1920                         "00000000000000000000000000000000");
1921                 arg->len += 32;
1922         }
1923         arg->len += sprintf(arg->buffer + arg->len,
1924                             " %08x %08x %08x %08x %8s\n",
1925                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1926                             rt->u.dst.__use, rt->rt6i_flags, 
1927                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1928         return 0;
1929 }
1930
1931 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1932 {
1933         struct rt6_proc_arg arg;
1934         arg.buffer = buffer;
1935         arg.offset = offset;
1936         arg.length = length;
1937         arg.skip = 0;
1938         arg.len = 0;
1939
1940         read_lock_bh(&rt6_lock);
1941         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1942         read_unlock_bh(&rt6_lock);
1943
1944         *start = buffer;
1945         if (offset)
1946                 *start += offset % RT6_INFO_LEN;
1947
1948         arg.len -= offset % RT6_INFO_LEN;
1949
1950         if (arg.len > length)
1951                 arg.len = length;
1952         if (arg.len < 0)
1953                 arg.len = 0;
1954
1955         return arg.len;
1956 }
1957
1958 extern struct rt6_statistics rt6_stats;
1959
1960 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1961 {
1962         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1963                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1964                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1965                       rt6_stats.fib_rt_cache,
1966                       atomic_read(&ip6_dst_ops.entries),
1967                       rt6_stats.fib_discarded_routes);
1968
1969         return 0;
1970 }
1971
1972 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1973 {
1974         return single_open(file, rt6_stats_seq_show, NULL);
1975 }
1976
1977 static struct file_operations rt6_stats_seq_fops = {
1978         .owner   = THIS_MODULE,
1979         .open    = rt6_stats_seq_open,
1980         .read    = seq_read,
1981         .llseek  = seq_lseek,
1982         .release = single_release,
1983 };
1984 #endif  /* CONFIG_PROC_FS */
1985
1986 #ifdef CONFIG_SYSCTL
1987
1988 static int flush_delay;
1989
1990 static
1991 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1992                               void __user *buffer, size_t *lenp, loff_t *ppos)
1993 {
1994         if (write) {
1995                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1996                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1997                 return 0;
1998         } else
1999                 return -EINVAL;
2000 }
2001
2002 ctl_table ipv6_route_table[] = {
2003         {
2004                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2005                 .procname       =       "flush",
2006                 .data           =       &flush_delay,
2007                 .maxlen         =       sizeof(int),
2008                 .mode           =       0644,
2009                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2010         },
2011         {
2012                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2013                 .procname       =       "gc_thresh",
2014                 .data           =       &ip6_dst_ops.gc_thresh,
2015                 .maxlen         =       sizeof(int),
2016                 .mode           =       0644,
2017                 .proc_handler   =       &proc_dointvec,
2018         },
2019         {
2020                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2021                 .procname       =       "max_size",
2022                 .data           =       &ip6_rt_max_size,
2023                 .maxlen         =       sizeof(int),
2024                 .mode           =       0644,
2025                 .proc_handler   =       &proc_dointvec,
2026         },
2027         {
2028                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2029                 .procname       =       "gc_min_interval",
2030                 .data           =       &ip6_rt_gc_min_interval,
2031                 .maxlen         =       sizeof(int),
2032                 .mode           =       0644,
2033                 .proc_handler   =       &proc_dointvec_jiffies,
2034                 .strategy       =       &sysctl_jiffies,
2035         },
2036         {
2037                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2038                 .procname       =       "gc_timeout",
2039                 .data           =       &ip6_rt_gc_timeout,
2040                 .maxlen         =       sizeof(int),
2041                 .mode           =       0644,
2042                 .proc_handler   =       &proc_dointvec_jiffies,
2043                 .strategy       =       &sysctl_jiffies,
2044         },
2045         {
2046                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2047                 .procname       =       "gc_interval",
2048                 .data           =       &ip6_rt_gc_interval,
2049                 .maxlen         =       sizeof(int),
2050                 .mode           =       0644,
2051                 .proc_handler   =       &proc_dointvec_jiffies,
2052                 .strategy       =       &sysctl_jiffies,
2053         },
2054         {
2055                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2056                 .procname       =       "gc_elasticity",
2057                 .data           =       &ip6_rt_gc_elasticity,
2058                 .maxlen         =       sizeof(int),
2059                 .mode           =       0644,
2060                 .proc_handler   =       &proc_dointvec_jiffies,
2061                 .strategy       =       &sysctl_jiffies,
2062         },
2063         {
2064                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2065                 .procname       =       "mtu_expires",
2066                 .data           =       &ip6_rt_mtu_expires,
2067                 .maxlen         =       sizeof(int),
2068                 .mode           =       0644,
2069                 .proc_handler   =       &proc_dointvec_jiffies,
2070                 .strategy       =       &sysctl_jiffies,
2071         },
2072         {
2073                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2074                 .procname       =       "min_adv_mss",
2075                 .data           =       &ip6_rt_min_advmss,
2076                 .maxlen         =       sizeof(int),
2077                 .mode           =       0644,
2078                 .proc_handler   =       &proc_dointvec_jiffies,
2079                 .strategy       =       &sysctl_jiffies,
2080         },
2081         {
2082                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2083                 .procname       =       "gc_min_interval_ms",
2084                 .data           =       &ip6_rt_gc_min_interval,
2085                 .maxlen         =       sizeof(int),
2086                 .mode           =       0644,
2087                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2088                 .strategy       =       &sysctl_ms_jiffies,
2089         },
2090         { .ctl_name = 0 }
2091 };
2092
2093 #endif
2094
2095 void __init ip6_route_init(void)
2096 {
2097         struct proc_dir_entry *p;
2098
2099         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2100                                                      sizeof(struct rt6_info),
2101                                                      0, SLAB_HWCACHE_ALIGN,
2102                                                      NULL, NULL);
2103         if (!ip6_dst_ops.kmem_cachep)
2104                 panic("cannot create ip6_dst_cache");
2105
2106         fib6_init();
2107 #ifdef  CONFIG_PROC_FS
2108         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2109         if (p)
2110                 p->owner = THIS_MODULE;
2111
2112         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2113 #endif
2114 #ifdef CONFIG_XFRM
2115         xfrm6_init();
2116 #endif
2117 }
2118
2119 void ip6_route_cleanup(void)
2120 {
2121 #ifdef CONFIG_PROC_FS
2122         proc_net_remove("ipv6_route");
2123         proc_net_remove("rt6_stats");
2124 #endif
2125 #ifdef CONFIG_XFRM
2126         xfrm6_fini();
2127 #endif
2128         rt6_ifdown(NULL);
2129         fib6_gc_cleanup();
2130         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2131 }