update to 2.6.9-rc1
[linux-flexiantxendom0-3.2.10.git] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45
46 #define FSprintk(a...)
47
48 static struct fib_info  *fib_info_list;
49 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
50 int fib_info_cnt;
51
52 #define for_fib_info() { struct fib_info *fi; \
53         for (fi = fib_info_list; fi; fi = fi->fib_next)
54
55 #define endfor_fib_info() }
56
57 #ifdef CONFIG_IP_ROUTE_MULTIPATH
58
59 static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED;
60
61 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
62 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
63
64 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
65 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67 #else /* CONFIG_IP_ROUTE_MULTIPATH */
68
69 /* Hope, that gcc will optimize it to get rid of dummy loop */
70
71 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
72 for (nhsel=0; nhsel < 1; nhsel++)
73
74 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
75 for (nhsel=0; nhsel < 1; nhsel++)
76
77 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
78
79 #define endfor_nexthops(fi) }
80
81
82 static struct 
83 {
84         int     error;
85         u8      scope;
86 } fib_props[RTA_MAX + 1] = {
87         {
88                 .error  = 0,
89                 .scope  = RT_SCOPE_NOWHERE,
90         },      /* RTN_UNSPEC */
91         {
92                 .error  = 0,
93                 .scope  = RT_SCOPE_UNIVERSE,
94         },      /* RTN_UNICAST */
95         {
96                 .error  = 0,
97                 .scope  = RT_SCOPE_HOST,
98         },      /* RTN_LOCAL */
99         {
100                 .error  = 0,
101                 .scope  = RT_SCOPE_LINK,
102         },      /* RTN_BROADCAST */
103         {
104                 .error  = 0,
105                 .scope  = RT_SCOPE_LINK,
106         },      /* RTN_ANYCAST */
107         {
108                 .error  = 0,
109                 .scope  = RT_SCOPE_UNIVERSE,
110         },      /* RTN_MULTICAST */
111         {
112                 .error  = -EINVAL,
113                 .scope  = RT_SCOPE_UNIVERSE,
114         },      /* RTN_BLACKHOLE */
115         {
116                 .error  = -EHOSTUNREACH,
117                 .scope  = RT_SCOPE_UNIVERSE,
118         },      /* RTN_UNREACHABLE */
119         {
120                 .error  = -EACCES,
121                 .scope  = RT_SCOPE_UNIVERSE,
122         },      /* RTN_PROHIBIT */
123         {
124                 .error  = -EAGAIN,
125                 .scope  = RT_SCOPE_UNIVERSE,
126         },      /* RTN_THROW */
127         {
128                 .error  = -EINVAL,
129                 .scope  = RT_SCOPE_NOWHERE,
130         },      /* RTN_NAT */
131         {
132                 .error  = -EINVAL,
133                 .scope  = RT_SCOPE_NOWHERE,
134         },      /* RTN_XRESOLVE */
135 };
136
137
138 /* Release a nexthop info record */
139
140 void free_fib_info(struct fib_info *fi)
141 {
142         if (fi->fib_dead == 0) {
143                 printk("Freeing alive fib_info %p\n", fi);
144                 return;
145         }
146         change_nexthops(fi) {
147                 if (nh->nh_dev)
148                         dev_put(nh->nh_dev);
149                 nh->nh_dev = NULL;
150         } endfor_nexthops(fi);
151         fib_info_cnt--;
152         kfree(fi);
153 }
154
155 void fib_release_info(struct fib_info *fi)
156 {
157         write_lock(&fib_info_lock);
158         if (fi && --fi->fib_treeref == 0) {
159                 if (fi->fib_next)
160                         fi->fib_next->fib_prev = fi->fib_prev;
161                 if (fi->fib_prev)
162                         fi->fib_prev->fib_next = fi->fib_next;
163                 if (fi == fib_info_list)
164                         fib_info_list = fi->fib_next;
165                 fi->fib_dead = 1;
166                 fib_info_put(fi);
167         }
168         write_unlock(&fib_info_lock);
169 }
170
171 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
172 {
173         const struct fib_nh *onh = ofi->fib_nh;
174
175         for_nexthops(fi) {
176                 if (nh->nh_oif != onh->nh_oif ||
177                     nh->nh_gw  != onh->nh_gw ||
178                     nh->nh_scope != onh->nh_scope ||
179 #ifdef CONFIG_IP_ROUTE_MULTIPATH
180                     nh->nh_weight != onh->nh_weight ||
181 #endif
182 #ifdef CONFIG_NET_CLS_ROUTE
183                     nh->nh_tclassid != onh->nh_tclassid ||
184 #endif
185                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
186                         return -1;
187                 onh++;
188         } endfor_nexthops(fi);
189         return 0;
190 }
191
192 static __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
193 {
194         for_fib_info() {
195                 if (fi->fib_nhs != nfi->fib_nhs)
196                         continue;
197                 if (nfi->fib_protocol == fi->fib_protocol &&
198                     nfi->fib_prefsrc == fi->fib_prefsrc &&
199                     nfi->fib_priority == fi->fib_priority &&
200                     memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
201                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
202                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
203                         return fi;
204         } endfor_fib_info();
205         return NULL;
206 }
207
208 /* Check, that the gateway is already configured.
209    Used only by redirect accept routine.
210  */
211
212 int ip_fib_check_default(u32 gw, struct net_device *dev)
213 {
214         read_lock(&fib_info_lock);
215         for_fib_info() {
216                 if (fi->fib_flags & RTNH_F_DEAD)
217                         continue;
218                 for_nexthops(fi) {
219                         if (nh->nh_dev == dev && nh->nh_gw == gw &&
220                             nh->nh_scope == RT_SCOPE_LINK &&
221                             !(nh->nh_flags&RTNH_F_DEAD)) {
222                                 read_unlock(&fib_info_lock);
223                                 return 0;
224                         }
225                 } endfor_nexthops(fi);
226         } endfor_fib_info();
227         read_unlock(&fib_info_lock);
228         return -1;
229 }
230
231 #ifdef CONFIG_IP_ROUTE_MULTIPATH
232
233 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
234 {
235         while (RTA_OK(attr,attrlen)) {
236                 if (attr->rta_type == type)
237                         return *(u32*)RTA_DATA(attr);
238                 attr = RTA_NEXT(attr, attrlen);
239         }
240         return 0;
241 }
242
243 static int
244 fib_count_nexthops(struct rtattr *rta)
245 {
246         int nhs = 0;
247         struct rtnexthop *nhp = RTA_DATA(rta);
248         int nhlen = RTA_PAYLOAD(rta);
249
250         while (nhlen >= (int)sizeof(struct rtnexthop)) {
251                 if ((nhlen -= nhp->rtnh_len) < 0)
252                         return 0;
253                 nhs++;
254                 nhp = RTNH_NEXT(nhp);
255         };
256         return nhs;
257 }
258
259 static int
260 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
261 {
262         struct rtnexthop *nhp = RTA_DATA(rta);
263         int nhlen = RTA_PAYLOAD(rta);
264
265         change_nexthops(fi) {
266                 int attrlen = nhlen - sizeof(struct rtnexthop);
267                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
268                         return -EINVAL;
269                 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
270                 nh->nh_oif = nhp->rtnh_ifindex;
271                 nh->nh_weight = nhp->rtnh_hops + 1;
272                 if (attrlen) {
273                         nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
274 #ifdef CONFIG_NET_CLS_ROUTE
275                         nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
276 #endif
277                 }
278                 nhp = RTNH_NEXT(nhp);
279         } endfor_nexthops(fi);
280         return 0;
281 }
282
283 #endif
284
285 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
286                  struct fib_info *fi)
287 {
288 #ifdef CONFIG_IP_ROUTE_MULTIPATH
289         struct rtnexthop *nhp;
290         int nhlen;
291 #endif
292
293         if (rta->rta_priority &&
294             *rta->rta_priority != fi->fib_priority)
295                 return 1;
296
297         if (rta->rta_oif || rta->rta_gw) {
298                 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
299                     (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
300                         return 0;
301                 return 1;
302         }
303
304 #ifdef CONFIG_IP_ROUTE_MULTIPATH
305         if (rta->rta_mp == NULL)
306                 return 0;
307         nhp = RTA_DATA(rta->rta_mp);
308         nhlen = RTA_PAYLOAD(rta->rta_mp);
309         
310         for_nexthops(fi) {
311                 int attrlen = nhlen - sizeof(struct rtnexthop);
312                 u32 gw;
313
314                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
315                         return -EINVAL;
316                 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
317                         return 1;
318                 if (attrlen) {
319                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
320                         if (gw && gw != nh->nh_gw)
321                                 return 1;
322 #ifdef CONFIG_NET_CLS_ROUTE
323                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
324                         if (gw && gw != nh->nh_tclassid)
325                                 return 1;
326 #endif
327                 }
328                 nhp = RTNH_NEXT(nhp);
329         } endfor_nexthops(fi);
330 #endif
331         return 0;
332 }
333
334
335 /*
336    Picture
337    -------
338
339    Semantics of nexthop is very messy by historical reasons.
340    We have to take into account, that:
341    a) gateway can be actually local interface address,
342       so that gatewayed route is direct.
343    b) gateway must be on-link address, possibly
344       described not by an ifaddr, but also by a direct route.
345    c) If both gateway and interface are specified, they should not
346       contradict.
347    d) If we use tunnel routes, gateway could be not on-link.
348
349    Attempt to reconcile all of these (alas, self-contradictory) conditions
350    results in pretty ugly and hairy code with obscure logic.
351
352    I chose to generalized it instead, so that the size
353    of code does not increase practically, but it becomes
354    much more general.
355    Every prefix is assigned a "scope" value: "host" is local address,
356    "link" is direct route,
357    [ ... "site" ... "interior" ... ]
358    and "universe" is true gateway route with global meaning.
359
360    Every prefix refers to a set of "nexthop"s (gw, oif),
361    where gw must have narrower scope. This recursion stops
362    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
363    which means that gw is forced to be on link.
364
365    Code is still hairy, but now it is apparently logically
366    consistent and very flexible. F.e. as by-product it allows
367    to co-exists in peace independent exterior and interior
368    routing processes.
369
370    Normally it looks as following.
371
372    {universe prefix}  -> (gw, oif) [scope link]
373                           |
374                           |-> {link prefix} -> (gw, oif) [scope local]
375                                                 |
376                                                 |-> {local prefix} (terminal node)
377  */
378
379 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
380 {
381         int err;
382
383         if (nh->nh_gw) {
384                 struct fib_result res;
385
386 #ifdef CONFIG_IP_ROUTE_PERVASIVE
387                 if (nh->nh_flags&RTNH_F_PERVASIVE)
388                         return 0;
389 #endif
390                 if (nh->nh_flags&RTNH_F_ONLINK) {
391                         struct net_device *dev;
392
393                         if (r->rtm_scope >= RT_SCOPE_LINK)
394                                 return -EINVAL;
395                         if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
396                                 return -EINVAL;
397                         if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
398                                 return -ENODEV;
399                         if (!(dev->flags&IFF_UP))
400                                 return -ENETDOWN;
401                         nh->nh_dev = dev;
402                         dev_hold(dev);
403                         nh->nh_scope = RT_SCOPE_LINK;
404                         return 0;
405                 }
406                 {
407                         struct flowi fl = { .nl_u = { .ip4_u =
408                                                       { .daddr = nh->nh_gw,
409                                                         .scope = r->rtm_scope + 1 } },
410                                             .oif = nh->nh_oif };
411
412                         /* It is not necessary, but requires a bit of thinking */
413                         if (fl.fl4_scope < RT_SCOPE_LINK)
414                                 fl.fl4_scope = RT_SCOPE_LINK;
415                         if ((err = fib_lookup(&fl, &res)) != 0)
416                                 return err;
417                 }
418                 err = -EINVAL;
419                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
420                         goto out;
421                 nh->nh_scope = res.scope;
422                 nh->nh_oif = FIB_RES_OIF(res);
423                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
424                         goto out;
425                 dev_hold(nh->nh_dev);
426                 err = -ENETDOWN;
427                 if (!(nh->nh_dev->flags & IFF_UP))
428                         goto out;
429                 err = 0;
430 out:
431                 fib_res_put(&res);
432                 return err;
433         } else {
434                 struct in_device *in_dev;
435
436                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
437                         return -EINVAL;
438
439                 in_dev = inetdev_by_index(nh->nh_oif);
440                 if (in_dev == NULL)
441                         return -ENODEV;
442                 if (!(in_dev->dev->flags&IFF_UP)) {
443                         in_dev_put(in_dev);
444                         return -ENETDOWN;
445                 }
446                 nh->nh_dev = in_dev->dev;
447                 dev_hold(nh->nh_dev);
448                 nh->nh_scope = RT_SCOPE_HOST;
449                 in_dev_put(in_dev);
450         }
451         return 0;
452 }
453
454 struct fib_info *
455 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
456                 const struct nlmsghdr *nlh, int *errp)
457 {
458         int err;
459         struct fib_info *fi = NULL;
460         struct fib_info *ofi;
461 #ifdef CONFIG_IP_ROUTE_MULTIPATH
462         int nhs = 1;
463 #else
464         const int nhs = 1;
465 #endif
466
467         /* Fast check to catch the most weird cases */
468         if (fib_props[r->rtm_type].scope > r->rtm_scope)
469                 goto err_inval;
470
471 #ifdef CONFIG_IP_ROUTE_MULTIPATH
472         if (rta->rta_mp) {
473                 nhs = fib_count_nexthops(rta->rta_mp);
474                 if (nhs == 0)
475                         goto err_inval;
476         }
477 #endif
478
479         fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
480         err = -ENOBUFS;
481         if (fi == NULL)
482                 goto failure;
483         fib_info_cnt++;
484         memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
485
486         fi->fib_protocol = r->rtm_protocol;
487         fi->fib_nhs = nhs;
488         fi->fib_flags = r->rtm_flags;
489         if (rta->rta_priority)
490                 fi->fib_priority = *rta->rta_priority;
491         if (rta->rta_mx) {
492                 int attrlen = RTA_PAYLOAD(rta->rta_mx);
493                 struct rtattr *attr = RTA_DATA(rta->rta_mx);
494
495                 while (RTA_OK(attr, attrlen)) {
496                         unsigned flavor = attr->rta_type;
497                         if (flavor) {
498                                 if (flavor > RTAX_MAX)
499                                         goto err_inval;
500                                 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
501                         }
502                         attr = RTA_NEXT(attr, attrlen);
503                 }
504         }
505         if (rta->rta_prefsrc)
506                 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
507
508         if (rta->rta_mp) {
509 #ifdef CONFIG_IP_ROUTE_MULTIPATH
510                 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
511                         goto failure;
512                 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
513                         goto err_inval;
514                 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
515                         goto err_inval;
516 #ifdef CONFIG_NET_CLS_ROUTE
517                 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
518                         goto err_inval;
519 #endif
520 #else
521                 goto err_inval;
522 #endif
523         } else {
524                 struct fib_nh *nh = fi->fib_nh;
525                 if (rta->rta_oif)
526                         nh->nh_oif = *rta->rta_oif;
527                 if (rta->rta_gw)
528                         memcpy(&nh->nh_gw, rta->rta_gw, 4);
529 #ifdef CONFIG_NET_CLS_ROUTE
530                 if (rta->rta_flow)
531                         memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
532 #endif
533                 nh->nh_flags = r->rtm_flags;
534 #ifdef CONFIG_IP_ROUTE_MULTIPATH
535                 nh->nh_weight = 1;
536 #endif
537         }
538
539         if (fib_props[r->rtm_type].error) {
540                 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
541                         goto err_inval;
542                 goto link_it;
543         }
544
545         if (r->rtm_scope > RT_SCOPE_HOST)
546                 goto err_inval;
547
548         if (r->rtm_scope == RT_SCOPE_HOST) {
549                 struct fib_nh *nh = fi->fib_nh;
550
551                 /* Local address is added. */
552                 if (nhs != 1 || nh->nh_gw)
553                         goto err_inval;
554                 nh->nh_scope = RT_SCOPE_NOWHERE;
555                 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
556                 err = -ENODEV;
557                 if (nh->nh_dev == NULL)
558                         goto failure;
559         } else {
560                 change_nexthops(fi) {
561                         if ((err = fib_check_nh(r, fi, nh)) != 0)
562                                 goto failure;
563                 } endfor_nexthops(fi)
564         }
565
566         if (fi->fib_prefsrc) {
567                 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
568                     memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
569                         if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
570                                 goto err_inval;
571         }
572
573 link_it:
574         if ((ofi = fib_find_info(fi)) != NULL) {
575                 fi->fib_dead = 1;
576                 free_fib_info(fi);
577                 ofi->fib_treeref++;
578                 return ofi;
579         }
580
581         fi->fib_treeref++;
582         atomic_inc(&fi->fib_clntref);
583         write_lock(&fib_info_lock);
584         fi->fib_next = fib_info_list;
585         fi->fib_prev = NULL;
586         if (fib_info_list)
587                 fib_info_list->fib_prev = fi;
588         fib_info_list = fi;
589         write_unlock(&fib_info_lock);
590         return fi;
591
592 err_inval:
593         err = -EINVAL;
594
595 failure:
596         *errp = err;
597         if (fi) {
598                 fi->fib_dead = 1;
599                 free_fib_info(fi);
600         }
601         return NULL;
602 }
603
604 int 
605 fib_semantic_match(int type, struct fib_info *fi, const struct flowi *flp, struct fib_result *res)
606 {
607         int err = fib_props[type].error;
608
609         if (err == 0) {
610                 if (fi->fib_flags&RTNH_F_DEAD)
611                         return 1;
612
613                 res->fi = fi;
614
615                 switch (type) {
616                 case RTN_UNICAST:
617                 case RTN_LOCAL:
618                 case RTN_BROADCAST:
619                 case RTN_ANYCAST:
620                 case RTN_MULTICAST:
621                         for_nexthops(fi) {
622                                 if (nh->nh_flags&RTNH_F_DEAD)
623                                         continue;
624                                 if (!flp->oif || flp->oif == nh->nh_oif)
625                                         break;
626                         }
627 #ifdef CONFIG_IP_ROUTE_MULTIPATH
628                         if (nhsel < fi->fib_nhs) {
629                                 res->nh_sel = nhsel;
630                                 atomic_inc(&fi->fib_clntref);
631                                 return 0;
632                         }
633 #else
634                         if (nhsel < 1) {
635                                 atomic_inc(&fi->fib_clntref);
636                                 return 0;
637                         }
638 #endif
639                         endfor_nexthops(fi);
640                         res->fi = NULL;
641                         return 1;
642                 default:
643                         res->fi = NULL;
644                         printk(KERN_DEBUG "impossible 102\n");
645                         return -EINVAL;
646                 }
647         }
648         return err;
649 }
650
651 /* Find appropriate source address to this destination */
652
653 u32 __fib_res_prefsrc(struct fib_result *res)
654 {
655         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
656 }
657
658 int
659 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
660               u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
661               struct fib_info *fi)
662 {
663         struct rtmsg *rtm;
664         struct nlmsghdr  *nlh;
665         unsigned char    *b = skb->tail;
666
667         nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
668         rtm = NLMSG_DATA(nlh);
669         rtm->rtm_family = AF_INET;
670         rtm->rtm_dst_len = dst_len;
671         rtm->rtm_src_len = 0;
672         rtm->rtm_tos = tos;
673         rtm->rtm_table = tb_id;
674         rtm->rtm_type = type;
675         rtm->rtm_flags = fi->fib_flags;
676         rtm->rtm_scope = scope;
677         if (rtm->rtm_dst_len)
678                 RTA_PUT(skb, RTA_DST, 4, dst);
679         rtm->rtm_protocol = fi->fib_protocol;
680         if (fi->fib_priority)
681                 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
682 #ifdef CONFIG_NET_CLS_ROUTE
683         if (fi->fib_nh[0].nh_tclassid)
684                 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
685 #endif
686         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
687                 goto rtattr_failure;
688         if (fi->fib_prefsrc)
689                 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
690         if (fi->fib_nhs == 1) {
691                 if (fi->fib_nh->nh_gw)
692                         RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
693                 if (fi->fib_nh->nh_oif)
694                         RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
695         }
696 #ifdef CONFIG_IP_ROUTE_MULTIPATH
697         if (fi->fib_nhs > 1) {
698                 struct rtnexthop *nhp;
699                 struct rtattr *mp_head;
700                 if (skb_tailroom(skb) <= RTA_SPACE(0))
701                         goto rtattr_failure;
702                 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
703
704                 for_nexthops(fi) {
705                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
706                                 goto rtattr_failure;
707                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
708                         nhp->rtnh_flags = nh->nh_flags & 0xFF;
709                         nhp->rtnh_hops = nh->nh_weight-1;
710                         nhp->rtnh_ifindex = nh->nh_oif;
711                         if (nh->nh_gw)
712                                 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
713                         nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
714                 } endfor_nexthops(fi);
715                 mp_head->rta_type = RTA_MULTIPATH;
716                 mp_head->rta_len = skb->tail - (u8*)mp_head;
717         }
718 #endif
719         nlh->nlmsg_len = skb->tail - b;
720         return skb->len;
721
722 nlmsg_failure:
723 rtattr_failure:
724         skb_trim(skb, b - skb->data);
725         return -1;
726 }
727
728 #ifndef CONFIG_IP_NOSIOCRT
729
730 int
731 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
732                     struct kern_rta *rta, struct rtentry *r)
733 {
734         int    plen;
735         u32    *ptr;
736
737         memset(rtm, 0, sizeof(*rtm));
738         memset(rta, 0, sizeof(*rta));
739
740         if (r->rt_dst.sa_family != AF_INET)
741                 return -EAFNOSUPPORT;
742
743         /* Check mask for validity:
744            a) it must be contiguous.
745            b) destination must have all host bits clear.
746            c) if application forgot to set correct family (AF_INET),
747               reject request unless it is absolutely clear i.e.
748               both family and mask are zero.
749          */
750         plen = 32;
751         ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
752         if (!(r->rt_flags&RTF_HOST)) {
753                 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
754                 if (r->rt_genmask.sa_family != AF_INET) {
755                         if (mask || r->rt_genmask.sa_family)
756                                 return -EAFNOSUPPORT;
757                 }
758                 if (bad_mask(mask, *ptr))
759                         return -EINVAL;
760                 plen = inet_mask_len(mask);
761         }
762
763         nl->nlmsg_flags = NLM_F_REQUEST;
764         nl->nlmsg_pid = 0;
765         nl->nlmsg_seq = 0;
766         nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
767         if (cmd == SIOCDELRT) {
768                 nl->nlmsg_type = RTM_DELROUTE;
769                 nl->nlmsg_flags = 0;
770         } else {
771                 nl->nlmsg_type = RTM_NEWROUTE;
772                 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
773                 rtm->rtm_protocol = RTPROT_BOOT;
774         }
775
776         rtm->rtm_dst_len = plen;
777         rta->rta_dst = ptr;
778
779         if (r->rt_metric) {
780                 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
781                 rta->rta_priority = (u32*)&r->rt_pad3;
782         }
783         if (r->rt_flags&RTF_REJECT) {
784                 rtm->rtm_scope = RT_SCOPE_HOST;
785                 rtm->rtm_type = RTN_UNREACHABLE;
786                 return 0;
787         }
788         rtm->rtm_scope = RT_SCOPE_NOWHERE;
789         rtm->rtm_type = RTN_UNICAST;
790
791         if (r->rt_dev) {
792                 char *colon;
793                 struct net_device *dev;
794                 char   devname[IFNAMSIZ];
795
796                 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
797                         return -EFAULT;
798                 devname[IFNAMSIZ-1] = 0;
799                 colon = strchr(devname, ':');
800                 if (colon)
801                         *colon = 0;
802                 dev = __dev_get_by_name(devname);
803                 if (!dev)
804                         return -ENODEV;
805                 rta->rta_oif = &dev->ifindex;
806                 if (colon) {
807                         struct in_ifaddr *ifa;
808                         struct in_device *in_dev = __in_dev_get(dev);
809                         if (!in_dev)
810                                 return -ENODEV;
811                         *colon = ':';
812                         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
813                                 if (strcmp(ifa->ifa_label, devname) == 0)
814                                         break;
815                         if (ifa == NULL)
816                                 return -ENODEV;
817                         rta->rta_prefsrc = &ifa->ifa_local;
818                 }
819         }
820
821         ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
822         if (r->rt_gateway.sa_family == AF_INET && *ptr) {
823                 rta->rta_gw = ptr;
824                 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
825                         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
826         }
827
828         if (cmd == SIOCDELRT)
829                 return 0;
830
831         if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
832                 return -EINVAL;
833
834         if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
835                 rtm->rtm_scope = RT_SCOPE_LINK;
836
837         if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
838                 struct rtattr *rec;
839                 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
840                 if (mx == NULL)
841                         return -ENOMEM;
842                 rta->rta_mx = mx;
843                 mx->rta_type = RTA_METRICS;
844                 mx->rta_len  = RTA_LENGTH(0);
845                 if (r->rt_flags&RTF_MTU) {
846                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
847                         rec->rta_type = RTAX_ADVMSS;
848                         rec->rta_len = RTA_LENGTH(4);
849                         mx->rta_len += RTA_LENGTH(4);
850                         *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
851                 }
852                 if (r->rt_flags&RTF_WINDOW) {
853                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
854                         rec->rta_type = RTAX_WINDOW;
855                         rec->rta_len = RTA_LENGTH(4);
856                         mx->rta_len += RTA_LENGTH(4);
857                         *(u32*)RTA_DATA(rec) = r->rt_window;
858                 }
859                 if (r->rt_flags&RTF_IRTT) {
860                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
861                         rec->rta_type = RTAX_RTT;
862                         rec->rta_len = RTA_LENGTH(4);
863                         mx->rta_len += RTA_LENGTH(4);
864                         *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
865                 }
866         }
867         return 0;
868 }
869
870 #endif
871
872 /*
873    Update FIB if:
874    - local address disappeared -> we must delete all the entries
875      referring to it.
876    - device went down -> we must shutdown all nexthops going via it.
877  */
878
879 int fib_sync_down(u32 local, struct net_device *dev, int force)
880 {
881         int ret = 0;
882         int scope = RT_SCOPE_NOWHERE;
883         
884         if (force)
885                 scope = -1;
886
887         for_fib_info() {
888                 if (local && fi->fib_prefsrc == local) {
889                         fi->fib_flags |= RTNH_F_DEAD;
890                         ret++;
891                 } else if (dev && fi->fib_nhs) {
892                         int dead = 0;
893
894                         change_nexthops(fi) {
895                                 if (nh->nh_flags&RTNH_F_DEAD)
896                                         dead++;
897                                 else if (nh->nh_dev == dev &&
898                                          nh->nh_scope != scope) {
899                                         nh->nh_flags |= RTNH_F_DEAD;
900 #ifdef CONFIG_IP_ROUTE_MULTIPATH
901                                         spin_lock_bh(&fib_multipath_lock);
902                                         fi->fib_power -= nh->nh_power;
903                                         nh->nh_power = 0;
904                                         spin_unlock_bh(&fib_multipath_lock);
905 #endif
906                                         dead++;
907                                 }
908 #ifdef CONFIG_IP_ROUTE_MULTIPATH
909                                 if (force > 1 && nh->nh_dev == dev) {
910                                         dead = fi->fib_nhs;
911                                         break;
912                                 }
913 #endif
914                         } endfor_nexthops(fi)
915                         if (dead == fi->fib_nhs) {
916                                 fi->fib_flags |= RTNH_F_DEAD;
917                                 ret++;
918                         }
919                 }
920         } endfor_fib_info();
921         return ret;
922 }
923
924 #ifdef CONFIG_IP_ROUTE_MULTIPATH
925
926 /*
927    Dead device goes up. We wake up dead nexthops.
928    It takes sense only on multipath routes.
929  */
930
931 int fib_sync_up(struct net_device *dev)
932 {
933         int ret = 0;
934
935         if (!(dev->flags&IFF_UP))
936                 return 0;
937
938         for_fib_info() {
939                 int alive = 0;
940
941                 change_nexthops(fi) {
942                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
943                                 alive++;
944                                 continue;
945                         }
946                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
947                                 continue;
948                         if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
949                                 continue;
950                         alive++;
951                         spin_lock_bh(&fib_multipath_lock);
952                         nh->nh_power = 0;
953                         nh->nh_flags &= ~RTNH_F_DEAD;
954                         spin_unlock_bh(&fib_multipath_lock);
955                 } endfor_nexthops(fi)
956
957                 if (alive > 0) {
958                         fi->fib_flags &= ~RTNH_F_DEAD;
959                         ret++;
960                 }
961         } endfor_fib_info();
962         return ret;
963 }
964
965 /*
966    The algorithm is suboptimal, but it provides really
967    fair weighted route distribution.
968  */
969
970 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
971 {
972         struct fib_info *fi = res->fi;
973         int w;
974
975         spin_lock_bh(&fib_multipath_lock);
976         if (fi->fib_power <= 0) {
977                 int power = 0;
978                 change_nexthops(fi) {
979                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
980                                 power += nh->nh_weight;
981                                 nh->nh_power = nh->nh_weight;
982                         }
983                 } endfor_nexthops(fi);
984                 fi->fib_power = power;
985                 if (power <= 0) {
986                         spin_unlock_bh(&fib_multipath_lock);
987                         /* Race condition: route has just become dead. */
988                         res->nh_sel = 0;
989                         return;
990                 }
991         }
992
993
994         /* w should be random number [0..fi->fib_power-1],
995            it is pretty bad approximation.
996          */
997
998         w = jiffies % fi->fib_power;
999
1000         change_nexthops(fi) {
1001                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1002                         if ((w -= nh->nh_power) <= 0) {
1003                                 nh->nh_power--;
1004                                 fi->fib_power--;
1005                                 res->nh_sel = nhsel;
1006                                 spin_unlock_bh(&fib_multipath_lock);
1007                                 return;
1008                         }
1009                 }
1010         } endfor_nexthops(fi);
1011
1012         /* Race condition: route has just become dead. */
1013         res->nh_sel = 0;
1014         spin_unlock_bh(&fib_multipath_lock);
1015 }
1016 #endif