- patches.fixes/patch-2.6.11-rc1: 2.6.11-rc1.
[linux-flexiantxendom0-3.2.10.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void             ip6_dst_destroy(struct dst_entry *);
87 static void             ip6_dst_ifdown(struct dst_entry *, int how);
88 static int               ip6_dst_gc(void);
89
90 static int              ip6_pkt_discard(struct sk_buff *skb);
91 static int              ip6_pkt_discard_out(struct sk_buff *skb);
92 static void             ip6_link_failure(struct sk_buff *skb);
93 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
94
95 static struct dst_ops ip6_dst_ops = {
96         .family                 =       AF_INET6,
97         .protocol               =       __constant_htons(ETH_P_IPV6),
98         .gc                     =       ip6_dst_gc,
99         .gc_thresh              =       1024,
100         .check                  =       ip6_dst_check,
101         .destroy                =       ip6_dst_destroy,
102         .ifdown                 =       ip6_dst_ifdown,
103         .negative_advice        =       ip6_negative_advice,
104         .link_failure           =       ip6_link_failure,
105         .update_pmtu            =       ip6_rt_update_pmtu,
106         .entry_size             =       sizeof(struct rt6_info),
107 };
108
109 struct rt6_info ip6_null_entry = {
110         .u = {
111                 .dst = {
112                         .__refcnt       = ATOMIC_INIT(1),
113                         .__use          = 1,
114                         .dev            = &loopback_dev,
115                         .obsolete       = -1,
116                         .error          = -ENETUNREACH,
117                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
118                         .input          = ip6_pkt_discard,
119                         .output         = ip6_pkt_discard_out,
120                         .ops            = &ip6_dst_ops,
121                         .path           = (struct dst_entry*)&ip6_null_entry,
122                 }
123         },
124         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
125         .rt6i_metric    = ~(u32) 0,
126         .rt6i_ref       = ATOMIC_INIT(1),
127 };
128
129 struct fib6_node ip6_routing_table = {
130         .leaf           = &ip6_null_entry,
131         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
132 };
133
134 /* Protects all the ip6 fib */
135
136 DEFINE_RWLOCK(rt6_lock);
137
138
139 /* allocate dst with ip6_dst_ops */
140 static __inline__ struct rt6_info *ip6_dst_alloc(void)
141 {
142         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
143 }
144
145 static void ip6_dst_destroy(struct dst_entry *dst)
146 {
147         struct rt6_info *rt = (struct rt6_info *)dst;
148         struct inet6_dev *idev = rt->rt6i_idev;
149
150         if (idev != NULL) {
151                 rt->rt6i_idev = NULL;
152                 in6_dev_put(idev);
153         }       
154 }
155
156 static void ip6_dst_ifdown(struct dst_entry *dst, int how)
157 {
158         struct rt6_info *rt = (struct rt6_info *)dst;
159         struct inet6_dev *idev = rt->rt6i_idev;
160
161         if (idev != NULL && idev->dev != &loopback_dev) {
162                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
163                 if (loopback_idev != NULL) {
164                         rt->rt6i_idev = loopback_idev;
165                         in6_dev_put(idev);
166                 }
167         }
168 }
169
170 /*
171  *      Route lookup. Any rt6_lock is implied.
172  */
173
174 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
175                                                     int oif,
176                                                     int strict)
177 {
178         struct rt6_info *local = NULL;
179         struct rt6_info *sprt;
180
181         if (oif) {
182                 for (sprt = rt; sprt; sprt = sprt->u.next) {
183                         struct net_device *dev = sprt->rt6i_dev;
184                         if (dev->ifindex == oif)
185                                 return sprt;
186                         if (dev->flags & IFF_LOOPBACK) {
187                                 if (sprt->rt6i_idev == NULL ||
188                                     sprt->rt6i_idev->dev->ifindex != oif) {
189                                         if (strict && oif)
190                                                 continue;
191                                         if (local && (!oif || 
192                                                       local->rt6i_idev->dev->ifindex == oif))
193                                                 continue;
194                                 }
195                                 local = sprt;
196                         }
197                 }
198
199                 if (local)
200                         return local;
201
202                 if (strict)
203                         return &ip6_null_entry;
204         }
205         return rt;
206 }
207
208 /*
209  *      pointer to the last default router chosen. BH is disabled locally.
210  */
211 static struct rt6_info *rt6_dflt_pointer;
212 static DEFINE_SPINLOCK(rt6_dflt_lock);
213
214 void rt6_reset_dflt_pointer(struct rt6_info *rt)
215 {
216         spin_lock_bh(&rt6_dflt_lock);
217         if (rt == NULL || rt == rt6_dflt_pointer) {
218                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
219                 rt6_dflt_pointer = NULL;
220         }
221         spin_unlock_bh(&rt6_dflt_lock);
222 }
223
224 /* Default Router Selection (RFC 2461 6.3.6) */
225 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
226 {
227         struct rt6_info *match = NULL;
228         struct rt6_info *sprt;
229         int mpri = 0;
230
231         for (sprt = rt; sprt; sprt = sprt->u.next) {
232                 struct neighbour *neigh;
233                 int m = 0;
234
235                 if (!oif ||
236                     (sprt->rt6i_dev &&
237                      sprt->rt6i_dev->ifindex == oif))
238                         m += 8;
239
240                 if ((sprt->rt6i_flags & RTF_EXPIRES) &&
241                     time_after(jiffies, sprt->rt6i_expires))
242                         continue;
243
244                 if (sprt == rt6_dflt_pointer)
245                         m += 4;
246
247                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
248                         read_lock_bh(&neigh->lock);
249                         switch (neigh->nud_state) {
250                         case NUD_REACHABLE:
251                                 m += 3;
252                                 break;
253
254                         case NUD_STALE:
255                         case NUD_DELAY:
256                         case NUD_PROBE:
257                                 m += 2;
258                                 break;
259
260                         case NUD_NOARP:
261                         case NUD_PERMANENT:
262                                 m += 1;
263                                 break;
264
265                         case NUD_INCOMPLETE:
266                         default:
267                                 read_unlock_bh(&neigh->lock);
268                                 continue;
269                         }
270                         read_unlock_bh(&neigh->lock);
271                 } else {
272                         continue;
273                 }
274
275                 if (m > mpri || m >= 12) {
276                         match = sprt;
277                         mpri = m;
278                         if (m >= 12) {
279                                 /* we choose the last default router if it
280                                  * is in (probably) reachable state.
281                                  * If route changed, we should do pmtu
282                                  * discovery. --yoshfuji
283                                  */
284                                 break;
285                         }
286                 }
287         }
288
289         spin_lock(&rt6_dflt_lock);
290         if (!match) {
291                 /*
292                  *      No default routers are known to be reachable.
293                  *      SHOULD round robin
294                  */
295                 if (rt6_dflt_pointer) {
296                         for (sprt = rt6_dflt_pointer->u.next;
297                              sprt; sprt = sprt->u.next) {
298                                 if (sprt->u.dst.obsolete <= 0 &&
299                                     sprt->u.dst.error == 0) {
300                                         match = sprt;
301                                         break;
302                                 }
303                         }
304                         for (sprt = rt;
305                              !match && sprt;
306                              sprt = sprt->u.next) {
307                                 if (sprt->u.dst.obsolete <= 0 &&
308                                     sprt->u.dst.error == 0) {
309                                         match = sprt;
310                                         break;
311                                 }
312                                 if (sprt == rt6_dflt_pointer)
313                                         break;
314                         }
315                 }
316         }
317
318         if (match) {
319                 if (rt6_dflt_pointer != match)
320                         RT6_TRACE("changed default router: %p->%p\n",
321                                   rt6_dflt_pointer, match);
322                 rt6_dflt_pointer = match;
323         }
324         spin_unlock(&rt6_dflt_lock);
325
326         if (!match) {
327                 /*
328                  * Last Resort: if no default routers found, 
329                  * use addrconf default route.
330                  * We don't record this route.
331                  */
332                 for (sprt = ip6_routing_table.leaf;
333                      sprt; sprt = sprt->u.next) {
334                         if ((sprt->rt6i_flags & RTF_DEFAULT) &&
335                             (!oif ||
336                              (sprt->rt6i_dev &&
337                               sprt->rt6i_dev->ifindex == oif))) {
338                                 match = sprt;
339                                 break;
340                         }
341                 }
342                 if (!match) {
343                         /* no default route.  give up. */
344                         match = &ip6_null_entry;
345                 }
346         }
347
348         return match;
349 }
350
351 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
352                             int oif, int strict)
353 {
354         struct fib6_node *fn;
355         struct rt6_info *rt;
356
357         read_lock_bh(&rt6_lock);
358         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
359         rt = rt6_device_match(fn->leaf, oif, strict);
360         dst_hold(&rt->u.dst);
361         rt->u.dst.__use++;
362         read_unlock_bh(&rt6_lock);
363
364         rt->u.dst.lastuse = jiffies;
365         if (rt->u.dst.error == 0)
366                 return rt;
367         dst_release(&rt->u.dst);
368         return NULL;
369 }
370
371 /* ip6_ins_rt is called with FREE rt6_lock.
372    It takes new route entry, the addition fails by any reason the
373    route is freed. In any case, if caller does not hold it, it may
374    be destroyed.
375  */
376
377 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
378 {
379         int err;
380
381         write_lock_bh(&rt6_lock);
382         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
383         write_unlock_bh(&rt6_lock);
384
385         return err;
386 }
387
388 /* No rt6_lock! If COW failed, the function returns dead route entry
389    with dst->error set to errno value.
390  */
391
392 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
393                                 struct in6_addr *saddr)
394 {
395         int err;
396         struct rt6_info *rt;
397
398         /*
399          *      Clone the route.
400          */
401
402         rt = ip6_rt_copy(ort);
403
404         if (rt) {
405                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
406
407                 if (!(rt->rt6i_flags&RTF_GATEWAY))
408                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
409
410                 rt->rt6i_dst.plen = 128;
411                 rt->rt6i_flags |= RTF_CACHE;
412                 rt->u.dst.flags |= DST_HOST;
413
414 #ifdef CONFIG_IPV6_SUBTREES
415                 if (rt->rt6i_src.plen && saddr) {
416                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
417                         rt->rt6i_src.plen = 128;
418                 }
419 #endif
420
421                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
422
423                 dst_hold(&rt->u.dst);
424
425                 err = ip6_ins_rt(rt, NULL, NULL);
426                 if (err == 0)
427                         return rt;
428
429                 rt->u.dst.error = err;
430
431                 return rt;
432         }
433         dst_hold(&ip6_null_entry.u.dst);
434         return &ip6_null_entry;
435 }
436
437 #define BACKTRACK() \
438 if (rt == &ip6_null_entry && strict) { \
439        while ((fn = fn->parent) != NULL) { \
440                 if (fn->fn_flags & RTN_ROOT) { \
441                         dst_hold(&rt->u.dst); \
442                         goto out; \
443                 } \
444                 if (fn->fn_flags & RTN_RTINFO) \
445                         goto restart; \
446         } \
447 }
448
449
450 void ip6_route_input(struct sk_buff *skb)
451 {
452         struct fib6_node *fn;
453         struct rt6_info *rt;
454         int strict;
455         int attempts = 3;
456
457         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
458
459 relookup:
460         read_lock_bh(&rt6_lock);
461
462         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
463                          &skb->nh.ipv6h->saddr);
464
465 restart:
466         rt = fn->leaf;
467
468         if ((rt->rt6i_flags & RTF_CACHE)) {
469                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
470                 BACKTRACK();
471                 dst_hold(&rt->u.dst);
472                 goto out;
473         }
474
475         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
476         BACKTRACK();
477
478         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
479                 struct rt6_info *nrt;
480                 dst_hold(&rt->u.dst);
481                 read_unlock_bh(&rt6_lock);
482
483                 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
484                               &skb->nh.ipv6h->saddr);
485
486                 dst_release(&rt->u.dst);
487                 rt = nrt;
488
489                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
490                         goto out2;
491
492                 /* Race condition! In the gap, when rt6_lock was
493                    released someone could insert this route.  Relookup.
494                 */
495                 dst_release(&rt->u.dst);
496                 goto relookup;
497         }
498         dst_hold(&rt->u.dst);
499
500 out:
501         read_unlock_bh(&rt6_lock);
502 out2:
503         rt->u.dst.lastuse = jiffies;
504         rt->u.dst.__use++;
505         skb->dst = (struct dst_entry *) rt;
506 }
507
508 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
509 {
510         struct fib6_node *fn;
511         struct rt6_info *rt;
512         int strict;
513         int attempts = 3;
514
515         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
516
517 relookup:
518         read_lock_bh(&rt6_lock);
519
520         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
521
522 restart:
523         rt = fn->leaf;
524
525         if ((rt->rt6i_flags & RTF_CACHE)) {
526                 rt = rt6_device_match(rt, fl->oif, strict);
527                 BACKTRACK();
528                 dst_hold(&rt->u.dst);
529                 goto out;
530         }
531         if (rt->rt6i_flags & RTF_DEFAULT) {
532                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
533                         rt = rt6_best_dflt(rt, fl->oif);
534         } else {
535                 rt = rt6_device_match(rt, fl->oif, strict);
536                 BACKTRACK();
537         }
538
539         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
540                 struct rt6_info *nrt;
541                 dst_hold(&rt->u.dst);
542                 read_unlock_bh(&rt6_lock);
543
544                 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
545
546                 dst_release(&rt->u.dst);
547                 rt = nrt;
548
549                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
550                         goto out2;
551
552                 /* Race condition! In the gap, when rt6_lock was
553                    released someone could insert this route.  Relookup.
554                 */
555                 dst_release(&rt->u.dst);
556                 goto relookup;
557         }
558         dst_hold(&rt->u.dst);
559
560 out:
561         read_unlock_bh(&rt6_lock);
562 out2:
563         rt->u.dst.lastuse = jiffies;
564         rt->u.dst.__use++;
565         return &rt->u.dst;
566 }
567
568
569 /*
570  *      Destination cache support functions
571  */
572
573 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
574 {
575         struct rt6_info *rt;
576
577         rt = (struct rt6_info *) dst;
578
579         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
580                 return dst;
581
582         dst_release(dst);
583         return NULL;
584 }
585
586 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
587 {
588         struct rt6_info *rt = (struct rt6_info *) dst;
589
590         if (rt) {
591                 if (rt->rt6i_flags & RTF_CACHE)
592                         ip6_del_rt(rt, NULL, NULL);
593                 else
594                         dst_release(dst);
595         }
596         return NULL;
597 }
598
599 static void ip6_link_failure(struct sk_buff *skb)
600 {
601         struct rt6_info *rt;
602
603         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
604
605         rt = (struct rt6_info *) skb->dst;
606         if (rt) {
607                 if (rt->rt6i_flags&RTF_CACHE) {
608                         dst_set_expires(&rt->u.dst, 0);
609                         rt->rt6i_flags |= RTF_EXPIRES;
610                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
611                         rt->rt6i_node->fn_sernum = -1;
612         }
613 }
614
615 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
616 {
617         struct rt6_info *rt6 = (struct rt6_info*)dst;
618
619         if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
620                 rt6->rt6i_flags |= RTF_MODIFIED;
621                 if (mtu < IPV6_MIN_MTU)
622                         mtu = IPV6_MIN_MTU;
623                 dst->metrics[RTAX_MTU-1] = mtu;
624         }
625 }
626
627 /* Protected by rt6_lock.  */
628 static struct dst_entry *ndisc_dst_gc_list;
629 static int ipv6_get_mtu(struct net_device *dev);
630
631 static inline unsigned int ipv6_advmss(unsigned int mtu)
632 {
633         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
634
635         if (mtu < ip6_rt_min_advmss)
636                 mtu = ip6_rt_min_advmss;
637
638         /*
639          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
640          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
641          * IPV6_MAXPLEN is also valid and means: "any MSS, 
642          * rely only on pmtu discovery"
643          */
644         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
645                 mtu = IPV6_MAXPLEN;
646         return mtu;
647 }
648
649 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
650                                   struct neighbour *neigh,
651                                   struct in6_addr *addr,
652                                   int (*output)(struct sk_buff *))
653 {
654         struct rt6_info *rt;
655         struct inet6_dev *idev = in6_dev_get(dev);
656
657         if (unlikely(idev == NULL))
658                 return NULL;
659
660         rt = ip6_dst_alloc();
661         if (unlikely(rt == NULL)) {
662                 in6_dev_put(idev);
663                 goto out;
664         }
665
666         dev_hold(dev);
667         if (neigh)
668                 neigh_hold(neigh);
669         else
670                 neigh = ndisc_get_neigh(dev, addr);
671
672         rt->rt6i_dev      = dev;
673         rt->rt6i_idev     = idev;
674         rt->rt6i_nexthop  = neigh;
675         atomic_set(&rt->u.dst.__refcnt, 1);
676         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
677         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
678         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
679         rt->u.dst.output  = output;
680
681 #if 0   /* there's no chance to use these for ndisc */
682         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
683                                 ? DST_HOST 
684                                 : 0;
685         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
686         rt->rt6i_dst.plen = 128;
687 #endif
688
689         write_lock_bh(&rt6_lock);
690         rt->u.dst.next = ndisc_dst_gc_list;
691         ndisc_dst_gc_list = &rt->u.dst;
692         write_unlock_bh(&rt6_lock);
693
694         fib6_force_start_gc();
695
696 out:
697         return (struct dst_entry *)rt;
698 }
699
700 int ndisc_dst_gc(int *more)
701 {
702         struct dst_entry *dst, *next, **pprev;
703         int freed;
704
705         next = NULL;
706         pprev = &ndisc_dst_gc_list;
707         freed = 0;
708         while ((dst = *pprev) != NULL) {
709                 if (!atomic_read(&dst->__refcnt)) {
710                         *pprev = dst->next;
711                         dst_free(dst);
712                         freed++;
713                 } else {
714                         pprev = &dst->next;
715                         (*more)++;
716                 }
717         }
718
719         return freed;
720 }
721
722 static int ip6_dst_gc(void)
723 {
724         static unsigned expire = 30*HZ;
725         static unsigned long last_gc;
726         unsigned long now = jiffies;
727
728         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
729             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
730                 goto out;
731
732         expire++;
733         fib6_run_gc(expire);
734         last_gc = now;
735         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
736                 expire = ip6_rt_gc_timeout>>1;
737
738 out:
739         expire -= expire>>ip6_rt_gc_elasticity;
740         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
741 }
742
743 /* Clean host part of a prefix. Not necessary in radix tree,
744    but results in cleaner routing tables.
745
746    Remove it only when all the things will work!
747  */
748
749 static int ipv6_get_mtu(struct net_device *dev)
750 {
751         int mtu = IPV6_MIN_MTU;
752         struct inet6_dev *idev;
753
754         idev = in6_dev_get(dev);
755         if (idev) {
756                 mtu = idev->cnf.mtu6;
757                 in6_dev_put(idev);
758         }
759         return mtu;
760 }
761
762 static int ipv6_get_hoplimit(struct net_device *dev)
763 {
764         int hoplimit = ipv6_devconf.hop_limit;
765         struct inet6_dev *idev;
766
767         idev = in6_dev_get(dev);
768         if (idev) {
769                 hoplimit = idev->cnf.hop_limit;
770                 in6_dev_put(idev);
771         }
772         return hoplimit;
773 }
774
775 /*
776  *
777  */
778
779 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
780 {
781         int err;
782         struct rtmsg *r;
783         struct rtattr **rta;
784         struct rt6_info *rt = NULL;
785         struct net_device *dev = NULL;
786         struct inet6_dev *idev = NULL;
787         int addr_type;
788
789         rta = (struct rtattr **) _rtattr;
790
791         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
792                 return -EINVAL;
793 #ifndef CONFIG_IPV6_SUBTREES
794         if (rtmsg->rtmsg_src_len)
795                 return -EINVAL;
796 #endif
797         if (rtmsg->rtmsg_ifindex) {
798                 err = -ENODEV;
799                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
800                 if (!dev)
801                         goto out;
802                 idev = in6_dev_get(dev);
803                 if (!idev)
804                         goto out;
805         }
806
807         if (rtmsg->rtmsg_metric == 0)
808                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
809
810         rt = ip6_dst_alloc();
811
812         if (rt == NULL) {
813                 err = -ENOMEM;
814                 goto out;
815         }
816
817         rt->u.dst.obsolete = -1;
818         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
819         if (nlh && (r = NLMSG_DATA(nlh))) {
820                 rt->rt6i_protocol = r->rtm_protocol;
821         } else {
822                 rt->rt6i_protocol = RTPROT_BOOT;
823         }
824
825         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
826
827         if (addr_type & IPV6_ADDR_MULTICAST)
828                 rt->u.dst.input = ip6_mc_input;
829         else
830                 rt->u.dst.input = ip6_forward;
831
832         rt->u.dst.output = ip6_output;
833
834         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
835                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
836         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
837         if (rt->rt6i_dst.plen == 128)
838                rt->u.dst.flags = DST_HOST;
839
840 #ifdef CONFIG_IPV6_SUBTREES
841         ipv6_addr_prefix(&rt->rt6i_src.addr, 
842                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
843         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
844 #endif
845
846         rt->rt6i_metric = rtmsg->rtmsg_metric;
847
848         /* We cannot add true routes via loopback here,
849            they would result in kernel looping; promote them to reject routes
850          */
851         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
852             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
853                 /* hold loopback dev/idev if we haven't done so. */
854                 if (dev != &loopback_dev) {
855                         if (dev) {
856                                 dev_put(dev);
857                                 in6_dev_put(idev);
858                         }
859                         dev = &loopback_dev;
860                         dev_hold(dev);
861                         idev = in6_dev_get(dev);
862                         if (!idev) {
863                                 err = -ENODEV;
864                                 goto out;
865                         }
866                 }
867                 rt->u.dst.output = ip6_pkt_discard_out;
868                 rt->u.dst.input = ip6_pkt_discard;
869                 rt->u.dst.error = -ENETUNREACH;
870                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
871                 goto install_route;
872         }
873
874         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
875                 struct in6_addr *gw_addr;
876                 int gwa_type;
877
878                 gw_addr = &rtmsg->rtmsg_gateway;
879                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
880                 gwa_type = ipv6_addr_type(gw_addr);
881
882                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
883                         struct rt6_info *grt;
884
885                         /* IPv6 strictly inhibits using not link-local
886                            addresses as nexthop address.
887                            Otherwise, router will not able to send redirects.
888                            It is very good, but in some (rare!) circumstances
889                            (SIT, PtP, NBMA NOARP links) it is handy to allow
890                            some exceptions. --ANK
891                          */
892                         err = -EINVAL;
893                         if (!(gwa_type&IPV6_ADDR_UNICAST))
894                                 goto out;
895
896                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
897
898                         err = -EHOSTUNREACH;
899                         if (grt == NULL)
900                                 goto out;
901                         if (dev) {
902                                 if (dev != grt->rt6i_dev) {
903                                         dst_release(&grt->u.dst);
904                                         goto out;
905                                 }
906                         } else {
907                                 dev = grt->rt6i_dev;
908                                 idev = grt->rt6i_idev;
909                                 dev_hold(dev);
910                                 in6_dev_hold(grt->rt6i_idev);
911                         }
912                         if (!(grt->rt6i_flags&RTF_GATEWAY))
913                                 err = 0;
914                         dst_release(&grt->u.dst);
915
916                         if (err)
917                                 goto out;
918                 }
919                 err = -EINVAL;
920                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
921                         goto out;
922         }
923
924         err = -ENODEV;
925         if (dev == NULL)
926                 goto out;
927
928         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
929                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
930                 if (IS_ERR(rt->rt6i_nexthop)) {
931                         err = PTR_ERR(rt->rt6i_nexthop);
932                         rt->rt6i_nexthop = NULL;
933                         goto out;
934                 }
935         }
936
937         rt->rt6i_flags = rtmsg->rtmsg_flags;
938
939 install_route:
940         if (rta && rta[RTA_METRICS-1]) {
941                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
942                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
943
944                 while (RTA_OK(attr, attrlen)) {
945                         unsigned flavor = attr->rta_type;
946                         if (flavor) {
947                                 if (flavor > RTAX_MAX) {
948                                         err = -EINVAL;
949                                         goto out;
950                                 }
951                                 rt->u.dst.metrics[flavor-1] =
952                                         *(u32 *)RTA_DATA(attr);
953                         }
954                         attr = RTA_NEXT(attr, attrlen);
955                 }
956         }
957
958         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
959                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
960                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
961                                 IPV6_DEFAULT_MCASTHOPS;
962                 else
963                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
964                                 ipv6_get_hoplimit(dev);
965         }
966
967         if (!rt->u.dst.metrics[RTAX_MTU-1])
968                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
969         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
970                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
971         rt->u.dst.dev = dev;
972         rt->rt6i_idev = idev;
973         return ip6_ins_rt(rt, nlh, _rtattr);
974
975 out:
976         if (dev)
977                 dev_put(dev);
978         if (idev)
979                 in6_dev_put(idev);
980         if (rt)
981                 dst_free((struct dst_entry *) rt);
982         return err;
983 }
984
985 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
986 {
987         int err;
988
989         write_lock_bh(&rt6_lock);
990
991         rt6_reset_dflt_pointer(NULL);
992
993         err = fib6_del(rt, nlh, _rtattr);
994         dst_release(&rt->u.dst);
995
996         write_unlock_bh(&rt6_lock);
997
998         return err;
999 }
1000
1001 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
1002 {
1003         struct fib6_node *fn;
1004         struct rt6_info *rt;
1005         int err = -ESRCH;
1006
1007         read_lock_bh(&rt6_lock);
1008
1009         fn = fib6_locate(&ip6_routing_table,
1010                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1011                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1012         
1013         if (fn) {
1014                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1015                         if (rtmsg->rtmsg_ifindex &&
1016                             (rt->rt6i_dev == NULL ||
1017                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1018                                 continue;
1019                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1020                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1021                                 continue;
1022                         if (rtmsg->rtmsg_metric &&
1023                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1024                                 continue;
1025                         dst_hold(&rt->u.dst);
1026                         read_unlock_bh(&rt6_lock);
1027
1028                         return ip6_del_rt(rt, nlh, _rtattr);
1029                 }
1030         }
1031         read_unlock_bh(&rt6_lock);
1032
1033         return err;
1034 }
1035
1036 /*
1037  *      Handle redirects
1038  */
1039 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1040                   struct neighbour *neigh, u8 *lladdr, int on_link)
1041 {
1042         struct rt6_info *rt, *nrt;
1043
1044         /* Locate old route to this destination. */
1045         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1046
1047         if (rt == NULL)
1048                 return;
1049
1050         if (neigh->dev != rt->rt6i_dev)
1051                 goto out;
1052
1053         /*
1054          * Current route is on-link; redirect is always invalid.
1055          * 
1056          * Seems, previous statement is not true. It could
1057          * be node, which looks for us as on-link (f.e. proxy ndisc)
1058          * But then router serving it might decide, that we should
1059          * know truth 8)8) --ANK (980726).
1060          */
1061         if (!(rt->rt6i_flags&RTF_GATEWAY))
1062                 goto out;
1063
1064         /*
1065          *      RFC 2461 specifies that redirects should only be
1066          *      accepted if they come from the nexthop to the target.
1067          *      Due to the way default routers are chosen, this notion
1068          *      is a bit fuzzy and one might need to check all default
1069          *      routers.
1070          */
1071         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1072                 if (rt->rt6i_flags & RTF_DEFAULT) {
1073                         struct rt6_info *rt1;
1074
1075                         read_lock(&rt6_lock);
1076                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1077                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1078                                         dst_hold(&rt1->u.dst);
1079                                         dst_release(&rt->u.dst);
1080                                         read_unlock(&rt6_lock);
1081                                         rt = rt1;
1082                                         goto source_ok;
1083                                 }
1084                         }
1085                         read_unlock(&rt6_lock);
1086                 }
1087                 if (net_ratelimit())
1088                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1089                                "for redirect target\n");
1090                 goto out;
1091         }
1092
1093 source_ok:
1094
1095         /*
1096          *      We have finally decided to accept it.
1097          */
1098
1099         neigh_update(neigh, lladdr, NUD_STALE, 
1100                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1101                      NEIGH_UPDATE_F_OVERRIDE|
1102                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1103                                      NEIGH_UPDATE_F_ISROUTER))
1104                      );
1105
1106         /*
1107          * Redirect received -> path was valid.
1108          * Look, redirects are sent only in response to data packets,
1109          * so that this nexthop apparently is reachable. --ANK
1110          */
1111         dst_confirm(&rt->u.dst);
1112
1113         /* Duplicate redirect: silently ignore. */
1114         if (neigh == rt->u.dst.neighbour)
1115                 goto out;
1116
1117         nrt = ip6_rt_copy(rt);
1118         if (nrt == NULL)
1119                 goto out;
1120
1121         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1122         if (on_link)
1123                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1124
1125         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1126         nrt->rt6i_dst.plen = 128;
1127         nrt->u.dst.flags |= DST_HOST;
1128
1129         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1130         nrt->rt6i_nexthop = neigh_clone(neigh);
1131         /* Reset pmtu, it may be better */
1132         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1133         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
1134
1135         if (ip6_ins_rt(nrt, NULL, NULL))
1136                 goto out;
1137
1138         if (rt->rt6i_flags&RTF_CACHE) {
1139                 ip6_del_rt(rt, NULL, NULL);
1140                 return;
1141         }
1142
1143 out:
1144         dst_release(&rt->u.dst);
1145         return;
1146 }
1147
1148 /*
1149  *      Handle ICMP "packet too big" messages
1150  *      i.e. Path MTU discovery
1151  */
1152
1153 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1154                         struct net_device *dev, u32 pmtu)
1155 {
1156         struct rt6_info *rt, *nrt;
1157
1158         if (pmtu < IPV6_MIN_MTU) {
1159                 if (net_ratelimit())
1160                         printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1161                                pmtu);
1162                 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1163                    link MTU if the node receives a Packet Too Big message
1164                    reporting next-hop MTU that is less than the IPv6 minimum MTU.
1165                    */
1166                 pmtu = IPV6_MIN_MTU;
1167         }
1168
1169         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1170
1171         if (rt == NULL)
1172                 return;
1173
1174         if (pmtu >= dst_pmtu(&rt->u.dst))
1175                 goto out;
1176
1177         /* New mtu received -> path was valid.
1178            They are sent only in response to data packets,
1179            so that this nexthop apparently is reachable. --ANK
1180          */
1181         dst_confirm(&rt->u.dst);
1182
1183         /* Host route. If it is static, it would be better
1184            not to override it, but add new one, so that
1185            when cache entry will expire old pmtu
1186            would return automatically.
1187          */
1188         if (rt->rt6i_flags & RTF_CACHE) {
1189                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1190                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1191                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1192                 goto out;
1193         }
1194
1195         /* Network route.
1196            Two cases are possible:
1197            1. It is connected route. Action: COW
1198            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1199          */
1200         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1201                 nrt = rt6_cow(rt, daddr, saddr);
1202                 if (!nrt->u.dst.error) {
1203                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1204                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1205                            happened within 5 mins, the recommended timer is 10 mins.
1206                            Here this route expiration time is set to ip6_rt_mtu_expires
1207                            which is 10 mins. After 10 mins the decreased pmtu is expired
1208                            and detecting PMTU increase will be automatically happened.
1209                          */
1210                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1211                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1212                 }
1213                 dst_release(&nrt->u.dst);
1214         } else {
1215                 nrt = ip6_rt_copy(rt);
1216                 if (nrt == NULL)
1217                         goto out;
1218                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1219                 nrt->rt6i_dst.plen = 128;
1220                 nrt->u.dst.flags |= DST_HOST;
1221                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1222                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1223                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1224                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1225                 ip6_ins_rt(nrt, NULL, NULL);
1226         }
1227
1228 out:
1229         dst_release(&rt->u.dst);
1230 }
1231
1232 /*
1233  *      Misc support functions
1234  */
1235
1236 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1237 {
1238         struct rt6_info *rt = ip6_dst_alloc();
1239
1240         if (rt) {
1241                 rt->u.dst.input = ort->u.dst.input;
1242                 rt->u.dst.output = ort->u.dst.output;
1243
1244                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1245                 rt->u.dst.dev = ort->u.dst.dev;
1246                 if (rt->u.dst.dev)
1247                         dev_hold(rt->u.dst.dev);
1248                 rt->rt6i_idev = ort->rt6i_idev;
1249                 if (rt->rt6i_idev)
1250                         in6_dev_hold(rt->rt6i_idev);
1251                 rt->u.dst.lastuse = jiffies;
1252                 rt->rt6i_expires = 0;
1253
1254                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1255                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1256                 rt->rt6i_metric = 0;
1257
1258                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1259 #ifdef CONFIG_IPV6_SUBTREES
1260                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1261 #endif
1262         }
1263         return rt;
1264 }
1265
1266 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1267 {       
1268         struct rt6_info *rt;
1269         struct fib6_node *fn;
1270
1271         fn = &ip6_routing_table;
1272
1273         write_lock_bh(&rt6_lock);
1274         for (rt = fn->leaf; rt; rt=rt->u.next) {
1275                 if (dev == rt->rt6i_dev &&
1276                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1277                         break;
1278         }
1279         if (rt)
1280                 dst_hold(&rt->u.dst);
1281         write_unlock_bh(&rt6_lock);
1282         return rt;
1283 }
1284
1285 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1286                                      struct net_device *dev)
1287 {
1288         struct in6_rtmsg rtmsg;
1289
1290         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1291         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1292         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1293         rtmsg.rtmsg_metric = 1024;
1294         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1295
1296         rtmsg.rtmsg_ifindex = dev->ifindex;
1297
1298         ip6_route_add(&rtmsg, NULL, NULL);
1299         return rt6_get_dflt_router(gwaddr, dev);
1300 }
1301
1302 void rt6_purge_dflt_routers(void)
1303 {
1304         struct rt6_info *rt;
1305
1306 restart:
1307         read_lock_bh(&rt6_lock);
1308         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1309                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1310                         dst_hold(&rt->u.dst);
1311
1312                         rt6_reset_dflt_pointer(NULL);
1313
1314                         read_unlock_bh(&rt6_lock);
1315
1316                         ip6_del_rt(rt, NULL, NULL);
1317
1318                         goto restart;
1319                 }
1320         }
1321         read_unlock_bh(&rt6_lock);
1322 }
1323
1324 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1325 {
1326         struct in6_rtmsg rtmsg;
1327         int err;
1328
1329         switch(cmd) {
1330         case SIOCADDRT:         /* Add a route */
1331         case SIOCDELRT:         /* Delete a route */
1332                 if (!capable(CAP_NET_ADMIN))
1333                         return -EPERM;
1334                 err = copy_from_user(&rtmsg, arg,
1335                                      sizeof(struct in6_rtmsg));
1336                 if (err)
1337                         return -EFAULT;
1338                         
1339                 rtnl_lock();
1340                 switch (cmd) {
1341                 case SIOCADDRT:
1342                         err = ip6_route_add(&rtmsg, NULL, NULL);
1343                         break;
1344                 case SIOCDELRT:
1345                         err = ip6_route_del(&rtmsg, NULL, NULL);
1346                         break;
1347                 default:
1348                         err = -EINVAL;
1349                 }
1350                 rtnl_unlock();
1351
1352                 return err;
1353         };
1354
1355         return -EINVAL;
1356 }
1357
1358 /*
1359  *      Drop the packet on the floor
1360  */
1361
1362 int ip6_pkt_discard(struct sk_buff *skb)
1363 {
1364         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1365         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1366         kfree_skb(skb);
1367         return 0;
1368 }
1369
1370 int ip6_pkt_discard_out(struct sk_buff *skb)
1371 {
1372         skb->dev = skb->dst->dev;
1373         return ip6_pkt_discard(skb);
1374 }
1375
1376 /*
1377  *      Allocate a dst for local (unicast / anycast) address.
1378  */
1379
1380 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1381                                     const struct in6_addr *addr,
1382                                     int anycast)
1383 {
1384         struct rt6_info *rt = ip6_dst_alloc();
1385
1386         if (rt == NULL)
1387                 return ERR_PTR(-ENOMEM);
1388
1389         dev_hold(&loopback_dev);
1390         in6_dev_hold(idev);
1391
1392         rt->u.dst.flags = DST_HOST;
1393         rt->u.dst.input = ip6_input;
1394         rt->u.dst.output = ip6_output;
1395         rt->rt6i_dev = &loopback_dev;
1396         rt->rt6i_idev = idev;
1397         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1398         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
1399         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
1400         rt->u.dst.obsolete = -1;
1401
1402         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1403         if (!anycast)
1404                 rt->rt6i_flags |= RTF_LOCAL;
1405         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1406         if (rt->rt6i_nexthop == NULL) {
1407                 dst_free((struct dst_entry *) rt);
1408                 return ERR_PTR(-ENOMEM);
1409         }
1410
1411         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1412         rt->rt6i_dst.plen = 128;
1413
1414         atomic_set(&rt->u.dst.__refcnt, 1);
1415
1416         return rt;
1417 }
1418
1419 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1420 {
1421         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1422             rt != &ip6_null_entry) {
1423                 RT6_TRACE("deleted by ifdown %p\n", rt);
1424                 return -1;
1425         }
1426         return 0;
1427 }
1428
1429 void rt6_ifdown(struct net_device *dev)
1430 {
1431         write_lock_bh(&rt6_lock);
1432         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1433         write_unlock_bh(&rt6_lock);
1434 }
1435
1436 struct rt6_mtu_change_arg
1437 {
1438         struct net_device *dev;
1439         unsigned mtu;
1440 };
1441
1442 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1443 {
1444         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1445         struct inet6_dev *idev;
1446
1447         /* In IPv6 pmtu discovery is not optional,
1448            so that RTAX_MTU lock cannot disable it.
1449            We still use this lock to block changes
1450            caused by addrconf/ndisc.
1451         */
1452
1453         idev = __in6_dev_get(arg->dev);
1454         if (idev == NULL)
1455                 return 0;
1456
1457         /* For administrative MTU increase, there is no way to discover
1458            IPv6 PMTU increase, so PMTU increase should be updated here.
1459            Since RFC 1981 doesn't include administrative MTU increase
1460            update PMTU increase is a MUST. (i.e. jumbo frame)
1461          */
1462         /*
1463            If new MTU is less than route PMTU, this new MTU will be the
1464            lowest MTU in the path, update the route PMTU to reflect PMTU
1465            decreases; if new MTU is greater than route PMTU, and the
1466            old MTU is the lowest MTU in the path, update the route PMTU
1467            to reflect the increase. In this case if the other nodes' MTU
1468            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1469            PMTU discouvery.
1470          */
1471         if (rt->rt6i_dev == arg->dev &&
1472             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1473             (dst_pmtu(&rt->u.dst) > arg->mtu ||
1474              (dst_pmtu(&rt->u.dst) < arg->mtu &&
1475               dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
1476                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1477         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1478         return 0;
1479 }
1480
1481 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1482 {
1483         struct rt6_mtu_change_arg arg;
1484
1485         arg.dev = dev;
1486         arg.mtu = mtu;
1487         read_lock_bh(&rt6_lock);
1488         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1489         read_unlock_bh(&rt6_lock);
1490 }
1491
1492 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1493                               struct in6_rtmsg *rtmsg)
1494 {
1495         memset(rtmsg, 0, sizeof(*rtmsg));
1496
1497         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1498         rtmsg->rtmsg_src_len = r->rtm_src_len;
1499         rtmsg->rtmsg_flags = RTF_UP;
1500         if (r->rtm_type == RTN_UNREACHABLE)
1501                 rtmsg->rtmsg_flags |= RTF_REJECT;
1502
1503         if (rta[RTA_GATEWAY-1]) {
1504                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1505                         return -EINVAL;
1506                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1507                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1508         }
1509         if (rta[RTA_DST-1]) {
1510                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1511                         return -EINVAL;
1512                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1513         }
1514         if (rta[RTA_SRC-1]) {
1515                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1516                         return -EINVAL;
1517                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1518         }
1519         if (rta[RTA_OIF-1]) {
1520                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1521                         return -EINVAL;
1522                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1523         }
1524         if (rta[RTA_PRIORITY-1]) {
1525                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1526                         return -EINVAL;
1527                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1528         }
1529         return 0;
1530 }
1531
1532 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1533 {
1534         struct rtmsg *r = NLMSG_DATA(nlh);
1535         struct in6_rtmsg rtmsg;
1536
1537         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1538                 return -EINVAL;
1539         return ip6_route_del(&rtmsg, nlh, arg);
1540 }
1541
1542 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1543 {
1544         struct rtmsg *r = NLMSG_DATA(nlh);
1545         struct in6_rtmsg rtmsg;
1546
1547         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1548                 return -EINVAL;
1549         return ip6_route_add(&rtmsg, nlh, arg);
1550 }
1551
1552 struct rt6_rtnl_dump_arg
1553 {
1554         struct sk_buff *skb;
1555         struct netlink_callback *cb;
1556 };
1557
1558 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1559                          struct in6_addr *dst,
1560                          struct in6_addr *src,
1561                          int iif,
1562                          int type, u32 pid, u32 seq,
1563                          struct nlmsghdr *in_nlh, int prefix)
1564 {
1565         struct rtmsg *rtm;
1566         struct nlmsghdr  *nlh;
1567         unsigned char    *b = skb->tail;
1568         struct rta_cacheinfo ci;
1569
1570         if (prefix) {   /* user wants prefix routes only */
1571                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1572                         /* success since this is not a prefix route */
1573                         return 1;
1574                 }
1575         }
1576
1577         if (!pid && in_nlh) {
1578                 pid = in_nlh->nlmsg_pid;
1579         }
1580
1581         nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1582         rtm = NLMSG_DATA(nlh);
1583         rtm->rtm_family = AF_INET6;
1584         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1585         rtm->rtm_src_len = rt->rt6i_src.plen;
1586         rtm->rtm_tos = 0;
1587         rtm->rtm_table = RT_TABLE_MAIN;
1588         if (rt->rt6i_flags&RTF_REJECT)
1589                 rtm->rtm_type = RTN_UNREACHABLE;
1590         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1591                 rtm->rtm_type = RTN_LOCAL;
1592         else
1593                 rtm->rtm_type = RTN_UNICAST;
1594         rtm->rtm_flags = 0;
1595         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1596         rtm->rtm_protocol = rt->rt6i_protocol;
1597         if (rt->rt6i_flags&RTF_DYNAMIC)
1598                 rtm->rtm_protocol = RTPROT_REDIRECT;
1599         else if (rt->rt6i_flags & RTF_ADDRCONF)
1600                 rtm->rtm_protocol = RTPROT_KERNEL;
1601         else if (rt->rt6i_flags&RTF_DEFAULT)
1602                 rtm->rtm_protocol = RTPROT_RA;
1603
1604         if (rt->rt6i_flags&RTF_CACHE)
1605                 rtm->rtm_flags |= RTM_F_CLONED;
1606
1607         if (dst) {
1608                 RTA_PUT(skb, RTA_DST, 16, dst);
1609                 rtm->rtm_dst_len = 128;
1610         } else if (rtm->rtm_dst_len)
1611                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1612 #ifdef CONFIG_IPV6_SUBTREES
1613         if (src) {
1614                 RTA_PUT(skb, RTA_SRC, 16, src);
1615                 rtm->rtm_src_len = 128;
1616         } else if (rtm->rtm_src_len)
1617                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1618 #endif
1619         if (iif)
1620                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1621         else if (dst) {
1622                 struct in6_addr saddr_buf;
1623                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1624                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1625         }
1626         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1627                 goto rtattr_failure;
1628         if (rt->u.dst.neighbour)
1629                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1630         if (rt->u.dst.dev)
1631                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1632         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1633         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1634         if (rt->rt6i_expires)
1635                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1636         else
1637                 ci.rta_expires = 0;
1638         ci.rta_used = rt->u.dst.__use;
1639         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1640         ci.rta_error = rt->u.dst.error;
1641         ci.rta_id = 0;
1642         ci.rta_ts = 0;
1643         ci.rta_tsage = 0;
1644         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1645         nlh->nlmsg_len = skb->tail - b;
1646         return skb->len;
1647
1648 nlmsg_failure:
1649 rtattr_failure:
1650         skb_trim(skb, b - skb->data);
1651         return -1;
1652 }
1653
1654 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1655 {
1656         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1657         int prefix;
1658
1659         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1660                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1661                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1662         } else
1663                 prefix = 0;
1664
1665         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1666                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1667                      NULL, prefix);
1668 }
1669
1670 static int fib6_dump_node(struct fib6_walker_t *w)
1671 {
1672         int res;
1673         struct rt6_info *rt;
1674
1675         for (rt = w->leaf; rt; rt = rt->u.next) {
1676                 res = rt6_dump_route(rt, w->args);
1677                 if (res < 0) {
1678                         /* Frame is full, suspend walking */
1679                         w->leaf = rt;
1680                         return 1;
1681                 }
1682                 BUG_TRAP(res!=0);
1683         }
1684         w->leaf = NULL;
1685         return 0;
1686 }
1687
1688 static void fib6_dump_end(struct netlink_callback *cb)
1689 {
1690         struct fib6_walker_t *w = (void*)cb->args[0];
1691
1692         if (w) {
1693                 cb->args[0] = 0;
1694                 fib6_walker_unlink(w);
1695                 kfree(w);
1696         }
1697         if (cb->args[1]) {
1698                 cb->done = (void*)cb->args[1];
1699                 cb->args[1] = 0;
1700         }
1701 }
1702
1703 static int fib6_dump_done(struct netlink_callback *cb)
1704 {
1705         fib6_dump_end(cb);
1706         return cb->done(cb);
1707 }
1708
1709 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1710 {
1711         struct rt6_rtnl_dump_arg arg;
1712         struct fib6_walker_t *w;
1713         int res;
1714
1715         arg.skb = skb;
1716         arg.cb = cb;
1717
1718         w = (void*)cb->args[0];
1719         if (w == NULL) {
1720                 /* New dump:
1721                  * 
1722                  * 1. hook callback destructor.
1723                  */
1724                 cb->args[1] = (long)cb->done;
1725                 cb->done = fib6_dump_done;
1726
1727                 /*
1728                  * 2. allocate and initialize walker.
1729                  */
1730                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1731                 if (w == NULL)
1732                         return -ENOMEM;
1733                 RT6_TRACE("dump<%p", w);
1734                 memset(w, 0, sizeof(*w));
1735                 w->root = &ip6_routing_table;
1736                 w->func = fib6_dump_node;
1737                 w->args = &arg;
1738                 cb->args[0] = (long)w;
1739                 read_lock_bh(&rt6_lock);
1740                 res = fib6_walk(w);
1741                 read_unlock_bh(&rt6_lock);
1742         } else {
1743                 w->args = &arg;
1744                 read_lock_bh(&rt6_lock);
1745                 res = fib6_walk_continue(w);
1746                 read_unlock_bh(&rt6_lock);
1747         }
1748 #if RT6_DEBUG >= 3
1749         if (res <= 0 && skb->len == 0)
1750                 RT6_TRACE("%p>dump end\n", w);
1751 #endif
1752         res = res < 0 ? res : skb->len;
1753         /* res < 0 is an error. (really, impossible)
1754            res == 0 means that dump is complete, but skb still can contain data.
1755            res > 0 dump is not complete, but frame is full.
1756          */
1757         /* Destroy walker, if dump of this table is complete. */
1758         if (res <= 0)
1759                 fib6_dump_end(cb);
1760         return res;
1761 }
1762
1763 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1764 {
1765         struct rtattr **rta = arg;
1766         int iif = 0;
1767         int err = -ENOBUFS;
1768         struct sk_buff *skb;
1769         struct flowi fl;
1770         struct rt6_info *rt;
1771
1772         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1773         if (skb == NULL)
1774                 goto out;
1775
1776         /* Reserve room for dummy headers, this skb can pass
1777            through good chunk of routing engine.
1778          */
1779         skb->mac.raw = skb->data;
1780         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1781
1782         memset(&fl, 0, sizeof(fl));
1783         if (rta[RTA_SRC-1])
1784                 ipv6_addr_copy(&fl.fl6_src,
1785                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1786         if (rta[RTA_DST-1])
1787                 ipv6_addr_copy(&fl.fl6_dst,
1788                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1789
1790         if (rta[RTA_IIF-1])
1791                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1792
1793         if (iif) {
1794                 struct net_device *dev;
1795                 dev = __dev_get_by_index(iif);
1796                 if (!dev) {
1797                         err = -ENODEV;
1798                         goto out_free;
1799                 }
1800         }
1801
1802         fl.oif = 0;
1803         if (rta[RTA_OIF-1])
1804                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1805
1806         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1807
1808         skb->dst = &rt->u.dst;
1809
1810         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1811         err = rt6_fill_node(skb, rt, 
1812                             &fl.fl6_dst, &fl.fl6_src,
1813                             iif,
1814                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1815                             nlh->nlmsg_seq, nlh, 0);
1816         if (err < 0) {
1817                 err = -EMSGSIZE;
1818                 goto out_free;
1819         }
1820
1821         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1822         if (err > 0)
1823                 err = 0;
1824 out:
1825         return err;
1826 out_free:
1827         kfree_skb(skb);
1828         goto out;       
1829 }
1830
1831 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1832 {
1833         struct sk_buff *skb;
1834         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1835
1836         skb = alloc_skb(size, gfp_any());
1837         if (!skb) {
1838                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1839                 return;
1840         }
1841         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1842                 kfree_skb(skb);
1843                 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1844                 return;
1845         }
1846         NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1847         netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1848 }
1849
1850 /*
1851  *      /proc
1852  */
1853
1854 #ifdef CONFIG_PROC_FS
1855
1856 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1857
1858 struct rt6_proc_arg
1859 {
1860         char *buffer;
1861         int offset;
1862         int length;
1863         int skip;
1864         int len;
1865 };
1866
1867 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1868 {
1869         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1870         int i;
1871
1872         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1873                 arg->skip++;
1874                 return 0;
1875         }
1876
1877         if (arg->len >= arg->length)
1878                 return 0;
1879
1880         for (i=0; i<16; i++) {
1881                 sprintf(arg->buffer + arg->len, "%02x",
1882                         rt->rt6i_dst.addr.s6_addr[i]);
1883                 arg->len += 2;
1884         }
1885         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1886                             rt->rt6i_dst.plen);
1887
1888 #ifdef CONFIG_IPV6_SUBTREES
1889         for (i=0; i<16; i++) {
1890                 sprintf(arg->buffer + arg->len, "%02x",
1891                         rt->rt6i_src.addr.s6_addr[i]);
1892                 arg->len += 2;
1893         }
1894         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1895                             rt->rt6i_src.plen);
1896 #else
1897         sprintf(arg->buffer + arg->len,
1898                 "00000000000000000000000000000000 00 ");
1899         arg->len += 36;
1900 #endif
1901
1902         if (rt->rt6i_nexthop) {
1903                 for (i=0; i<16; i++) {
1904                         sprintf(arg->buffer + arg->len, "%02x",
1905                                 rt->rt6i_nexthop->primary_key[i]);
1906                         arg->len += 2;
1907                 }
1908         } else {
1909                 sprintf(arg->buffer + arg->len,
1910                         "00000000000000000000000000000000");
1911                 arg->len += 32;
1912         }
1913         arg->len += sprintf(arg->buffer + arg->len,
1914                             " %08x %08x %08x %08x %8s\n",
1915                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1916                             rt->u.dst.__use, rt->rt6i_flags, 
1917                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1918         return 0;
1919 }
1920
1921 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1922 {
1923         struct rt6_proc_arg arg;
1924         arg.buffer = buffer;
1925         arg.offset = offset;
1926         arg.length = length;
1927         arg.skip = 0;
1928         arg.len = 0;
1929
1930         read_lock_bh(&rt6_lock);
1931         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1932         read_unlock_bh(&rt6_lock);
1933
1934         *start = buffer;
1935         if (offset)
1936                 *start += offset % RT6_INFO_LEN;
1937
1938         arg.len -= offset % RT6_INFO_LEN;
1939
1940         if (arg.len > length)
1941                 arg.len = length;
1942         if (arg.len < 0)
1943                 arg.len = 0;
1944
1945         return arg.len;
1946 }
1947
1948 extern struct rt6_statistics rt6_stats;
1949
1950 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1951 {
1952         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1953                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1954                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1955                       rt6_stats.fib_rt_cache,
1956                       atomic_read(&ip6_dst_ops.entries),
1957                       rt6_stats.fib_discarded_routes);
1958
1959         return 0;
1960 }
1961
1962 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1963 {
1964         return single_open(file, rt6_stats_seq_show, NULL);
1965 }
1966
1967 static struct file_operations rt6_stats_seq_fops = {
1968         .owner   = THIS_MODULE,
1969         .open    = rt6_stats_seq_open,
1970         .read    = seq_read,
1971         .llseek  = seq_lseek,
1972         .release = single_release,
1973 };
1974 #endif  /* CONFIG_PROC_FS */
1975
1976 #ifdef CONFIG_SYSCTL
1977
1978 static int flush_delay;
1979
1980 static
1981 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1982                               void __user *buffer, size_t *lenp, loff_t *ppos)
1983 {
1984         if (write) {
1985                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1986                 if (flush_delay < 0)
1987                         flush_delay = 0;
1988                 fib6_run_gc((unsigned long)flush_delay);
1989                 return 0;
1990         } else
1991                 return -EINVAL;
1992 }
1993
1994 ctl_table ipv6_route_table[] = {
1995         {
1996                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1997                 .procname       =       "flush",
1998                 .data           =       &flush_delay,
1999                 .maxlen         =       sizeof(int),
2000                 .mode           =       0644,
2001                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2002         },
2003         {
2004                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2005                 .procname       =       "gc_thresh",
2006                 .data           =       &ip6_dst_ops.gc_thresh,
2007                 .maxlen         =       sizeof(int),
2008                 .mode           =       0644,
2009                 .proc_handler   =       &proc_dointvec,
2010         },
2011         {
2012                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2013                 .procname       =       "max_size",
2014                 .data           =       &ip6_rt_max_size,
2015                 .maxlen         =       sizeof(int),
2016                 .mode           =       0644,
2017                 .proc_handler   =       &proc_dointvec,
2018         },
2019         {
2020                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2021                 .procname       =       "gc_min_interval",
2022                 .data           =       &ip6_rt_gc_min_interval,
2023                 .maxlen         =       sizeof(int),
2024                 .mode           =       0644,
2025                 .proc_handler   =       &proc_dointvec_jiffies,
2026                 .strategy       =       &sysctl_jiffies,
2027         },
2028         {
2029                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2030                 .procname       =       "gc_timeout",
2031                 .data           =       &ip6_rt_gc_timeout,
2032                 .maxlen         =       sizeof(int),
2033                 .mode           =       0644,
2034                 .proc_handler   =       &proc_dointvec_jiffies,
2035                 .strategy       =       &sysctl_jiffies,
2036         },
2037         {
2038                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2039                 .procname       =       "gc_interval",
2040                 .data           =       &ip6_rt_gc_interval,
2041                 .maxlen         =       sizeof(int),
2042                 .mode           =       0644,
2043                 .proc_handler   =       &proc_dointvec_jiffies,
2044                 .strategy       =       &sysctl_jiffies,
2045         },
2046         {
2047                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2048                 .procname       =       "gc_elasticity",
2049                 .data           =       &ip6_rt_gc_elasticity,
2050                 .maxlen         =       sizeof(int),
2051                 .mode           =       0644,
2052                 .proc_handler   =       &proc_dointvec_jiffies,
2053                 .strategy       =       &sysctl_jiffies,
2054         },
2055         {
2056                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2057                 .procname       =       "mtu_expires",
2058                 .data           =       &ip6_rt_mtu_expires,
2059                 .maxlen         =       sizeof(int),
2060                 .mode           =       0644,
2061                 .proc_handler   =       &proc_dointvec_jiffies,
2062                 .strategy       =       &sysctl_jiffies,
2063         },
2064         {
2065                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2066                 .procname       =       "min_adv_mss",
2067                 .data           =       &ip6_rt_min_advmss,
2068                 .maxlen         =       sizeof(int),
2069                 .mode           =       0644,
2070                 .proc_handler   =       &proc_dointvec_jiffies,
2071                 .strategy       =       &sysctl_jiffies,
2072         },
2073         { .ctl_name = 0 }
2074 };
2075
2076 #endif
2077
2078 void __init ip6_route_init(void)
2079 {
2080         struct proc_dir_entry *p;
2081
2082         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2083                                                      sizeof(struct rt6_info),
2084                                                      0, SLAB_HWCACHE_ALIGN,
2085                                                      NULL, NULL);
2086         if (!ip6_dst_ops.kmem_cachep)
2087                 panic("cannot create ip6_dst_cache");
2088
2089         fib6_init();
2090 #ifdef  CONFIG_PROC_FS
2091         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2092         if (p)
2093                 p->owner = THIS_MODULE;
2094
2095         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2096 #endif
2097 #ifdef CONFIG_XFRM
2098         xfrm6_init();
2099 #endif
2100 }
2101
2102 void __exit ip6_route_cleanup(void)
2103 {
2104 #ifdef CONFIG_PROC_FS
2105         proc_net_remove("ipv6_route");
2106         proc_net_remove("rt6_stats");
2107 #endif
2108 #ifdef CONFIG_XFRM
2109         xfrm6_fini();
2110 #endif
2111         rt6_ifdown(NULL);
2112         fib6_gc_cleanup();
2113         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2114 }