2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
44 #include <net/ip_fib.h>
46 #define FSprintk(a...)
48 static struct fib_info *fib_info_list;
49 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
52 #define for_fib_info() { struct fib_info *fi; \
53 for (fi = fib_info_list; fi; fi = fi->fib_next)
55 #define endfor_fib_info() }
57 #ifdef CONFIG_IP_ROUTE_MULTIPATH
59 static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED;
61 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
62 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
64 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
65 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
67 #else /* CONFIG_IP_ROUTE_MULTIPATH */
69 /* Hope, that gcc will optimize it to get rid of dummy loop */
71 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
72 for (nhsel=0; nhsel < 1; nhsel++)
74 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
75 for (nhsel=0; nhsel < 1; nhsel++)
77 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
79 #define endfor_nexthops(fi) }
86 } fib_props[RTA_MAX + 1] = {
89 .scope = RT_SCOPE_NOWHERE,
93 .scope = RT_SCOPE_UNIVERSE,
97 .scope = RT_SCOPE_HOST,
101 .scope = RT_SCOPE_LINK,
102 }, /* RTN_BROADCAST */
105 .scope = RT_SCOPE_LINK,
109 .scope = RT_SCOPE_UNIVERSE,
110 }, /* RTN_MULTICAST */
113 .scope = RT_SCOPE_UNIVERSE,
114 }, /* RTN_BLACKHOLE */
116 .error = -EHOSTUNREACH,
117 .scope = RT_SCOPE_UNIVERSE,
118 }, /* RTN_UNREACHABLE */
121 .scope = RT_SCOPE_UNIVERSE,
122 }, /* RTN_PROHIBIT */
125 .scope = RT_SCOPE_UNIVERSE,
129 .scope = RT_SCOPE_NOWHERE,
133 .scope = RT_SCOPE_NOWHERE,
134 }, /* RTN_XRESOLVE */
138 /* Release a nexthop info record */
140 void free_fib_info(struct fib_info *fi)
142 if (fi->fib_dead == 0) {
143 printk("Freeing alive fib_info %p\n", fi);
146 change_nexthops(fi) {
150 } endfor_nexthops(fi);
155 void fib_release_info(struct fib_info *fi)
157 write_lock(&fib_info_lock);
158 if (fi && --fi->fib_treeref == 0) {
160 fi->fib_next->fib_prev = fi->fib_prev;
162 fi->fib_prev->fib_next = fi->fib_next;
163 if (fi == fib_info_list)
164 fib_info_list = fi->fib_next;
168 write_unlock(&fib_info_lock);
171 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
173 const struct fib_nh *onh = ofi->fib_nh;
176 if (nh->nh_oif != onh->nh_oif ||
177 nh->nh_gw != onh->nh_gw ||
178 nh->nh_scope != onh->nh_scope ||
179 #ifdef CONFIG_IP_ROUTE_MULTIPATH
180 nh->nh_weight != onh->nh_weight ||
182 #ifdef CONFIG_NET_CLS_ROUTE
183 nh->nh_tclassid != onh->nh_tclassid ||
185 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
188 } endfor_nexthops(fi);
192 static __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
195 if (fi->fib_nhs != nfi->fib_nhs)
197 if (nfi->fib_protocol == fi->fib_protocol &&
198 nfi->fib_prefsrc == fi->fib_prefsrc &&
199 nfi->fib_priority == fi->fib_priority &&
200 memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
201 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
202 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
208 /* Check, that the gateway is already configured.
209 Used only by redirect accept routine.
212 int ip_fib_check_default(u32 gw, struct net_device *dev)
214 read_lock(&fib_info_lock);
216 if (fi->fib_flags & RTNH_F_DEAD)
219 if (nh->nh_dev == dev && nh->nh_gw == gw &&
220 nh->nh_scope == RT_SCOPE_LINK &&
221 !(nh->nh_flags&RTNH_F_DEAD)) {
222 read_unlock(&fib_info_lock);
225 } endfor_nexthops(fi);
227 read_unlock(&fib_info_lock);
231 #ifdef CONFIG_IP_ROUTE_MULTIPATH
233 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
235 while (RTA_OK(attr,attrlen)) {
236 if (attr->rta_type == type)
237 return *(u32*)RTA_DATA(attr);
238 attr = RTA_NEXT(attr, attrlen);
244 fib_count_nexthops(struct rtattr *rta)
247 struct rtnexthop *nhp = RTA_DATA(rta);
248 int nhlen = RTA_PAYLOAD(rta);
250 while (nhlen >= (int)sizeof(struct rtnexthop)) {
251 if ((nhlen -= nhp->rtnh_len) < 0)
254 nhp = RTNH_NEXT(nhp);
260 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
262 struct rtnexthop *nhp = RTA_DATA(rta);
263 int nhlen = RTA_PAYLOAD(rta);
265 change_nexthops(fi) {
266 int attrlen = nhlen - sizeof(struct rtnexthop);
267 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
269 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
270 nh->nh_oif = nhp->rtnh_ifindex;
271 nh->nh_weight = nhp->rtnh_hops + 1;
273 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
274 #ifdef CONFIG_NET_CLS_ROUTE
275 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
278 nhp = RTNH_NEXT(nhp);
279 } endfor_nexthops(fi);
285 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
288 #ifdef CONFIG_IP_ROUTE_MULTIPATH
289 struct rtnexthop *nhp;
293 if (rta->rta_priority &&
294 *rta->rta_priority != fi->fib_priority)
297 if (rta->rta_oif || rta->rta_gw) {
298 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
299 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
304 #ifdef CONFIG_IP_ROUTE_MULTIPATH
305 if (rta->rta_mp == NULL)
307 nhp = RTA_DATA(rta->rta_mp);
308 nhlen = RTA_PAYLOAD(rta->rta_mp);
311 int attrlen = nhlen - sizeof(struct rtnexthop);
314 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
316 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
319 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
320 if (gw && gw != nh->nh_gw)
322 #ifdef CONFIG_NET_CLS_ROUTE
323 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
324 if (gw && gw != nh->nh_tclassid)
328 nhp = RTNH_NEXT(nhp);
329 } endfor_nexthops(fi);
339 Semantics of nexthop is very messy by historical reasons.
340 We have to take into account, that:
341 a) gateway can be actually local interface address,
342 so that gatewayed route is direct.
343 b) gateway must be on-link address, possibly
344 described not by an ifaddr, but also by a direct route.
345 c) If both gateway and interface are specified, they should not
347 d) If we use tunnel routes, gateway could be not on-link.
349 Attempt to reconcile all of these (alas, self-contradictory) conditions
350 results in pretty ugly and hairy code with obscure logic.
352 I chose to generalized it instead, so that the size
353 of code does not increase practically, but it becomes
355 Every prefix is assigned a "scope" value: "host" is local address,
356 "link" is direct route,
357 [ ... "site" ... "interior" ... ]
358 and "universe" is true gateway route with global meaning.
360 Every prefix refers to a set of "nexthop"s (gw, oif),
361 where gw must have narrower scope. This recursion stops
362 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
363 which means that gw is forced to be on link.
365 Code is still hairy, but now it is apparently logically
366 consistent and very flexible. F.e. as by-product it allows
367 to co-exists in peace independent exterior and interior
370 Normally it looks as following.
372 {universe prefix} -> (gw, oif) [scope link]
374 |-> {link prefix} -> (gw, oif) [scope local]
376 |-> {local prefix} (terminal node)
379 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
384 struct fib_result res;
386 #ifdef CONFIG_IP_ROUTE_PERVASIVE
387 if (nh->nh_flags&RTNH_F_PERVASIVE)
390 if (nh->nh_flags&RTNH_F_ONLINK) {
391 struct net_device *dev;
393 if (r->rtm_scope >= RT_SCOPE_LINK)
395 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
397 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
399 if (!(dev->flags&IFF_UP))
403 nh->nh_scope = RT_SCOPE_LINK;
407 struct flowi fl = { .nl_u = { .ip4_u =
408 { .daddr = nh->nh_gw,
409 .scope = r->rtm_scope + 1 } },
412 /* It is not necessary, but requires a bit of thinking */
413 if (fl.fl4_scope < RT_SCOPE_LINK)
414 fl.fl4_scope = RT_SCOPE_LINK;
415 if ((err = fib_lookup(&fl, &res)) != 0)
419 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
421 nh->nh_scope = res.scope;
422 nh->nh_oif = FIB_RES_OIF(res);
423 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
425 dev_hold(nh->nh_dev);
427 if (!(nh->nh_dev->flags & IFF_UP))
434 struct in_device *in_dev;
436 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
439 in_dev = inetdev_by_index(nh->nh_oif);
442 if (!(in_dev->dev->flags&IFF_UP)) {
446 nh->nh_dev = in_dev->dev;
447 dev_hold(nh->nh_dev);
448 nh->nh_scope = RT_SCOPE_HOST;
455 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
456 const struct nlmsghdr *nlh, int *errp)
459 struct fib_info *fi = NULL;
460 struct fib_info *ofi;
461 #ifdef CONFIG_IP_ROUTE_MULTIPATH
467 /* Fast check to catch the most weird cases */
468 if (fib_props[r->rtm_type].scope > r->rtm_scope)
471 #ifdef CONFIG_IP_ROUTE_MULTIPATH
473 nhs = fib_count_nexthops(rta->rta_mp);
479 fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
484 memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
486 fi->fib_protocol = r->rtm_protocol;
488 fi->fib_flags = r->rtm_flags;
489 if (rta->rta_priority)
490 fi->fib_priority = *rta->rta_priority;
492 int attrlen = RTA_PAYLOAD(rta->rta_mx);
493 struct rtattr *attr = RTA_DATA(rta->rta_mx);
495 while (RTA_OK(attr, attrlen)) {
496 unsigned flavor = attr->rta_type;
498 if (flavor > RTAX_MAX)
500 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
502 attr = RTA_NEXT(attr, attrlen);
505 if (rta->rta_prefsrc)
506 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
509 #ifdef CONFIG_IP_ROUTE_MULTIPATH
510 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
512 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
514 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
516 #ifdef CONFIG_NET_CLS_ROUTE
517 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
524 struct fib_nh *nh = fi->fib_nh;
526 nh->nh_oif = *rta->rta_oif;
528 memcpy(&nh->nh_gw, rta->rta_gw, 4);
529 #ifdef CONFIG_NET_CLS_ROUTE
531 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
533 nh->nh_flags = r->rtm_flags;
534 #ifdef CONFIG_IP_ROUTE_MULTIPATH
539 if (fib_props[r->rtm_type].error) {
540 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
545 if (r->rtm_scope > RT_SCOPE_HOST)
548 if (r->rtm_scope == RT_SCOPE_HOST) {
549 struct fib_nh *nh = fi->fib_nh;
551 /* Local address is added. */
552 if (nhs != 1 || nh->nh_gw)
554 nh->nh_scope = RT_SCOPE_NOWHERE;
555 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
557 if (nh->nh_dev == NULL)
560 change_nexthops(fi) {
561 if ((err = fib_check_nh(r, fi, nh)) != 0)
563 } endfor_nexthops(fi)
566 if (fi->fib_prefsrc) {
567 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
568 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
569 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
574 if ((ofi = fib_find_info(fi)) != NULL) {
582 atomic_inc(&fi->fib_clntref);
583 write_lock(&fib_info_lock);
584 fi->fib_next = fib_info_list;
587 fib_info_list->fib_prev = fi;
589 write_unlock(&fib_info_lock);
605 fib_semantic_match(int type, struct fib_info *fi, const struct flowi *flp, struct fib_result *res)
607 int err = fib_props[type].error;
610 if (fi->fib_flags&RTNH_F_DEAD)
622 if (nh->nh_flags&RTNH_F_DEAD)
624 if (!flp->oif || flp->oif == nh->nh_oif)
627 #ifdef CONFIG_IP_ROUTE_MULTIPATH
628 if (nhsel < fi->fib_nhs) {
630 atomic_inc(&fi->fib_clntref);
635 atomic_inc(&fi->fib_clntref);
644 printk(KERN_DEBUG "impossible 102\n");
651 /* Find appropriate source address to this destination */
653 u32 __fib_res_prefsrc(struct fib_result *res)
655 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
659 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
660 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
664 struct nlmsghdr *nlh;
665 unsigned char *b = skb->tail;
667 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
668 rtm = NLMSG_DATA(nlh);
669 rtm->rtm_family = AF_INET;
670 rtm->rtm_dst_len = dst_len;
671 rtm->rtm_src_len = 0;
673 rtm->rtm_table = tb_id;
674 rtm->rtm_type = type;
675 rtm->rtm_flags = fi->fib_flags;
676 rtm->rtm_scope = scope;
677 if (rtm->rtm_dst_len)
678 RTA_PUT(skb, RTA_DST, 4, dst);
679 rtm->rtm_protocol = fi->fib_protocol;
680 if (fi->fib_priority)
681 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
682 #ifdef CONFIG_NET_CLS_ROUTE
683 if (fi->fib_nh[0].nh_tclassid)
684 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
686 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
689 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
690 if (fi->fib_nhs == 1) {
691 if (fi->fib_nh->nh_gw)
692 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
693 if (fi->fib_nh->nh_oif)
694 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
696 #ifdef CONFIG_IP_ROUTE_MULTIPATH
697 if (fi->fib_nhs > 1) {
698 struct rtnexthop *nhp;
699 struct rtattr *mp_head;
700 if (skb_tailroom(skb) <= RTA_SPACE(0))
702 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
705 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
707 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
708 nhp->rtnh_flags = nh->nh_flags & 0xFF;
709 nhp->rtnh_hops = nh->nh_weight-1;
710 nhp->rtnh_ifindex = nh->nh_oif;
712 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
713 nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
714 } endfor_nexthops(fi);
715 mp_head->rta_type = RTA_MULTIPATH;
716 mp_head->rta_len = skb->tail - (u8*)mp_head;
719 nlh->nlmsg_len = skb->tail - b;
724 skb_trim(skb, b - skb->data);
728 #ifndef CONFIG_IP_NOSIOCRT
731 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
732 struct kern_rta *rta, struct rtentry *r)
737 memset(rtm, 0, sizeof(*rtm));
738 memset(rta, 0, sizeof(*rta));
740 if (r->rt_dst.sa_family != AF_INET)
741 return -EAFNOSUPPORT;
743 /* Check mask for validity:
744 a) it must be contiguous.
745 b) destination must have all host bits clear.
746 c) if application forgot to set correct family (AF_INET),
747 reject request unless it is absolutely clear i.e.
748 both family and mask are zero.
751 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
752 if (!(r->rt_flags&RTF_HOST)) {
753 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
754 if (r->rt_genmask.sa_family != AF_INET) {
755 if (mask || r->rt_genmask.sa_family)
756 return -EAFNOSUPPORT;
758 if (bad_mask(mask, *ptr))
760 plen = inet_mask_len(mask);
763 nl->nlmsg_flags = NLM_F_REQUEST;
766 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
767 if (cmd == SIOCDELRT) {
768 nl->nlmsg_type = RTM_DELROUTE;
771 nl->nlmsg_type = RTM_NEWROUTE;
772 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
773 rtm->rtm_protocol = RTPROT_BOOT;
776 rtm->rtm_dst_len = plen;
780 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
781 rta->rta_priority = (u32*)&r->rt_pad3;
783 if (r->rt_flags&RTF_REJECT) {
784 rtm->rtm_scope = RT_SCOPE_HOST;
785 rtm->rtm_type = RTN_UNREACHABLE;
788 rtm->rtm_scope = RT_SCOPE_NOWHERE;
789 rtm->rtm_type = RTN_UNICAST;
793 struct net_device *dev;
794 char devname[IFNAMSIZ];
796 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
798 devname[IFNAMSIZ-1] = 0;
799 colon = strchr(devname, ':');
802 dev = __dev_get_by_name(devname);
805 rta->rta_oif = &dev->ifindex;
807 struct in_ifaddr *ifa;
808 struct in_device *in_dev = __in_dev_get(dev);
812 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
813 if (strcmp(ifa->ifa_label, devname) == 0)
817 rta->rta_prefsrc = &ifa->ifa_local;
821 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
822 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
824 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
825 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
828 if (cmd == SIOCDELRT)
831 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
834 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
835 rtm->rtm_scope = RT_SCOPE_LINK;
837 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
839 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
843 mx->rta_type = RTA_METRICS;
844 mx->rta_len = RTA_LENGTH(0);
845 if (r->rt_flags&RTF_MTU) {
846 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
847 rec->rta_type = RTAX_ADVMSS;
848 rec->rta_len = RTA_LENGTH(4);
849 mx->rta_len += RTA_LENGTH(4);
850 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
852 if (r->rt_flags&RTF_WINDOW) {
853 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
854 rec->rta_type = RTAX_WINDOW;
855 rec->rta_len = RTA_LENGTH(4);
856 mx->rta_len += RTA_LENGTH(4);
857 *(u32*)RTA_DATA(rec) = r->rt_window;
859 if (r->rt_flags&RTF_IRTT) {
860 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
861 rec->rta_type = RTAX_RTT;
862 rec->rta_len = RTA_LENGTH(4);
863 mx->rta_len += RTA_LENGTH(4);
864 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
874 - local address disappeared -> we must delete all the entries
876 - device went down -> we must shutdown all nexthops going via it.
879 int fib_sync_down(u32 local, struct net_device *dev, int force)
882 int scope = RT_SCOPE_NOWHERE;
888 if (local && fi->fib_prefsrc == local) {
889 fi->fib_flags |= RTNH_F_DEAD;
891 } else if (dev && fi->fib_nhs) {
894 change_nexthops(fi) {
895 if (nh->nh_flags&RTNH_F_DEAD)
897 else if (nh->nh_dev == dev &&
898 nh->nh_scope != scope) {
899 nh->nh_flags |= RTNH_F_DEAD;
900 #ifdef CONFIG_IP_ROUTE_MULTIPATH
901 spin_lock_bh(&fib_multipath_lock);
902 fi->fib_power -= nh->nh_power;
904 spin_unlock_bh(&fib_multipath_lock);
908 #ifdef CONFIG_IP_ROUTE_MULTIPATH
909 if (force > 1 && nh->nh_dev == dev) {
914 } endfor_nexthops(fi)
915 if (dead == fi->fib_nhs) {
916 fi->fib_flags |= RTNH_F_DEAD;
924 #ifdef CONFIG_IP_ROUTE_MULTIPATH
927 Dead device goes up. We wake up dead nexthops.
928 It takes sense only on multipath routes.
931 int fib_sync_up(struct net_device *dev)
935 if (!(dev->flags&IFF_UP))
941 change_nexthops(fi) {
942 if (!(nh->nh_flags&RTNH_F_DEAD)) {
946 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
948 if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
951 spin_lock_bh(&fib_multipath_lock);
953 nh->nh_flags &= ~RTNH_F_DEAD;
954 spin_unlock_bh(&fib_multipath_lock);
955 } endfor_nexthops(fi)
958 fi->fib_flags &= ~RTNH_F_DEAD;
966 The algorithm is suboptimal, but it provides really
967 fair weighted route distribution.
970 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
972 struct fib_info *fi = res->fi;
975 spin_lock_bh(&fib_multipath_lock);
976 if (fi->fib_power <= 0) {
978 change_nexthops(fi) {
979 if (!(nh->nh_flags&RTNH_F_DEAD)) {
980 power += nh->nh_weight;
981 nh->nh_power = nh->nh_weight;
983 } endfor_nexthops(fi);
984 fi->fib_power = power;
986 spin_unlock_bh(&fib_multipath_lock);
987 /* Race condition: route has just become dead. */
994 /* w should be random number [0..fi->fib_power-1],
995 it is pretty bad approximation.
998 w = jiffies % fi->fib_power;
1000 change_nexthops(fi) {
1001 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1002 if ((w -= nh->nh_power) <= 0) {
1005 res->nh_sel = nhsel;
1006 spin_unlock_bh(&fib_multipath_lock);
1010 } endfor_nexthops(fi);
1012 /* Race condition: route has just become dead. */
1014 spin_unlock_bh(&fib_multipath_lock);