ipv4: Fix erroneous uses of ifa_address.
[linux-flexiantxendom0-natty.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65
66 static struct ipv4_devconf ipv4_devconf = {
67         .data = {
68                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
72         },
73 };
74
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76         .data = {
77                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82         },
83 };
84
85 #define IPV4_DEVCONF_DFLT(net, attr) \
86         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89         [IFA_LOCAL]             = { .type = NLA_U32 },
90         [IFA_ADDRESS]           = { .type = NLA_U32 },
91         [IFA_BROADCAST]         = { .type = NLA_U32 },
92         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99                          int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
103 #else
104 static inline void devinet_sysctl_register(struct in_device *idev)
105 {
106 }
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
108 {
109 }
110 #endif
111
112 /* Locks all the inet devices. */
113
114 static struct in_ifaddr *inet_alloc_ifa(void)
115 {
116         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
117 }
118
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122         if (ifa->ifa_dev)
123                 in_dev_put(ifa->ifa_dev);
124         kfree(ifa);
125 }
126
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134         struct net_device *dev = idev->dev;
135
136         WARN_ON(idev->ifa_list);
137         WARN_ON(idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140                idev, dev ? dev->name : "NIL");
141 #endif
142         dev_put(dev);
143         if (!idev->dead)
144                 pr_err("Freeing alive in_device %p\n", idev);
145         else
146                 kfree(idev);
147 }
148 EXPORT_SYMBOL(in_dev_finish_destroy);
149
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152         struct in_device *in_dev;
153
154         ASSERT_RTNL();
155
156         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157         if (!in_dev)
158                 goto out;
159         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160                         sizeof(in_dev->cnf));
161         in_dev->cnf.sysctl = NULL;
162         in_dev->dev = dev;
163         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164         if (!in_dev->arp_parms)
165                 goto out_kfree;
166         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167                 dev_disable_lro(dev);
168         /* Reference in_dev->dev */
169         dev_hold(dev);
170         /* Account for reference dev->ip_ptr (below) */
171         in_dev_hold(in_dev);
172
173         devinet_sysctl_register(in_dev);
174         ip_mc_init_dev(in_dev);
175         if (dev->flags & IFF_UP)
176                 ip_mc_up(in_dev);
177
178         /* we can receive as soon as ip_ptr is set -- do this last */
179         rcu_assign_pointer(dev->ip_ptr, in_dev);
180 out:
181         return in_dev;
182 out_kfree:
183         kfree(in_dev);
184         in_dev = NULL;
185         goto out;
186 }
187
188 static void in_dev_rcu_put(struct rcu_head *head)
189 {
190         struct in_device *idev = container_of(head, struct in_device, rcu_head);
191         in_dev_put(idev);
192 }
193
194 static void inetdev_destroy(struct in_device *in_dev)
195 {
196         struct in_ifaddr *ifa;
197         struct net_device *dev;
198
199         ASSERT_RTNL();
200
201         dev = in_dev->dev;
202
203         in_dev->dead = 1;
204
205         ip_mc_destroy_dev(in_dev);
206
207         while ((ifa = in_dev->ifa_list) != NULL) {
208                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209                 inet_free_ifa(ifa);
210         }
211
212         rcu_assign_pointer(dev->ip_ptr, NULL);
213
214         devinet_sysctl_unregister(in_dev);
215         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216         arp_ifdown(dev);
217
218         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219 }
220
221 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222 {
223         rcu_read_lock();
224         for_primary_ifa(in_dev) {
225                 if (inet_ifa_match(a, ifa)) {
226                         if (!b || inet_ifa_match(b, ifa)) {
227                                 rcu_read_unlock();
228                                 return 1;
229                         }
230                 }
231         } endfor_ifa(in_dev);
232         rcu_read_unlock();
233         return 0;
234 }
235
236 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237                          int destroy, struct nlmsghdr *nlh, u32 pid)
238 {
239         struct in_ifaddr *promote = NULL;
240         struct in_ifaddr *ifa, *ifa1 = *ifap;
241         struct in_ifaddr *last_prim = in_dev->ifa_list;
242         struct in_ifaddr *prev_prom = NULL;
243         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
244
245         ASSERT_RTNL();
246
247         /* 1. Deleting primary ifaddr forces deletion all secondaries
248          * unless alias promotion is set
249          **/
250
251         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253
254                 while ((ifa = *ifap1) != NULL) {
255                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256                             ifa1->ifa_scope <= ifa->ifa_scope)
257                                 last_prim = ifa;
258
259                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260                             ifa1->ifa_mask != ifa->ifa_mask ||
261                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
262                                 ifap1 = &ifa->ifa_next;
263                                 prev_prom = ifa;
264                                 continue;
265                         }
266
267                         if (!do_promote) {
268                                 *ifap1 = ifa->ifa_next;
269
270                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271                                 blocking_notifier_call_chain(&inetaddr_chain,
272                                                 NETDEV_DOWN, ifa);
273                                 inet_free_ifa(ifa);
274                         } else {
275                                 promote = ifa;
276                                 break;
277                         }
278                 }
279         }
280
281         /* 2. Unlink it */
282
283         *ifap = ifa1->ifa_next;
284
285         /* 3. Announce address deletion */
286
287         /* Send message first, then call notifier.
288            At first sight, FIB update triggered by notifier
289            will refer to already deleted ifaddr, that could confuse
290            netlink listeners. It is not true: look, gated sees
291            that route deleted and if it still thinks that ifaddr
292            is valid, it will try to restore deleted routes... Grr.
293            So that, this order is correct.
294          */
295         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297
298         if (promote) {
299
300                 if (prev_prom) {
301                         prev_prom->ifa_next = promote->ifa_next;
302                         promote->ifa_next = last_prim->ifa_next;
303                         last_prim->ifa_next = promote;
304                 }
305
306                 promote->ifa_flags &= ~IFA_F_SECONDARY;
307                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308                 blocking_notifier_call_chain(&inetaddr_chain,
309                                 NETDEV_UP, promote);
310                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311                         if (ifa1->ifa_mask != ifa->ifa_mask ||
312                             !inet_ifa_match(ifa1->ifa_address, ifa))
313                                         continue;
314                         fib_add_ifaddr(ifa);
315                 }
316
317         }
318         if (destroy)
319                 inet_free_ifa(ifa1);
320 }
321
322 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323                          int destroy)
324 {
325         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326 }
327
328 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329                              u32 pid)
330 {
331         struct in_device *in_dev = ifa->ifa_dev;
332         struct in_ifaddr *ifa1, **ifap, **last_primary;
333
334         ASSERT_RTNL();
335
336         if (!ifa->ifa_local) {
337                 inet_free_ifa(ifa);
338                 return 0;
339         }
340
341         ifa->ifa_flags &= ~IFA_F_SECONDARY;
342         last_primary = &in_dev->ifa_list;
343
344         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345              ifap = &ifa1->ifa_next) {
346                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347                     ifa->ifa_scope <= ifa1->ifa_scope)
348                         last_primary = &ifa1->ifa_next;
349                 if (ifa1->ifa_mask == ifa->ifa_mask &&
350                     inet_ifa_match(ifa1->ifa_address, ifa)) {
351                         if (ifa1->ifa_local == ifa->ifa_local) {
352                                 inet_free_ifa(ifa);
353                                 return -EEXIST;
354                         }
355                         if (ifa1->ifa_scope != ifa->ifa_scope) {
356                                 inet_free_ifa(ifa);
357                                 return -EINVAL;
358                         }
359                         ifa->ifa_flags |= IFA_F_SECONDARY;
360                 }
361         }
362
363         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364                 net_srandom(ifa->ifa_local);
365                 ifap = last_primary;
366         }
367
368         ifa->ifa_next = *ifap;
369         *ifap = ifa;
370
371         /* Send message first, then call notifier.
372            Notifier will trigger FIB update, so that
373            listeners of netlink will know about new ifaddr */
374         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
376
377         return 0;
378 }
379
380 static int inet_insert_ifa(struct in_ifaddr *ifa)
381 {
382         return __inet_insert_ifa(ifa, NULL, 0);
383 }
384
385 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386 {
387         struct in_device *in_dev = __in_dev_get_rtnl(dev);
388
389         ASSERT_RTNL();
390
391         if (!in_dev) {
392                 inet_free_ifa(ifa);
393                 return -ENOBUFS;
394         }
395         ipv4_devconf_setall(in_dev);
396         if (ifa->ifa_dev != in_dev) {
397                 WARN_ON(ifa->ifa_dev);
398                 in_dev_hold(in_dev);
399                 ifa->ifa_dev = in_dev;
400         }
401         if (ipv4_is_loopback(ifa->ifa_local))
402                 ifa->ifa_scope = RT_SCOPE_HOST;
403         return inet_insert_ifa(ifa);
404 }
405
406 /* Caller must hold RCU or RTNL :
407  * We dont take a reference on found in_device
408  */
409 struct in_device *inetdev_by_index(struct net *net, int ifindex)
410 {
411         struct net_device *dev;
412         struct in_device *in_dev = NULL;
413
414         rcu_read_lock();
415         dev = dev_get_by_index_rcu(net, ifindex);
416         if (dev)
417                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
418         rcu_read_unlock();
419         return in_dev;
420 }
421 EXPORT_SYMBOL(inetdev_by_index);
422
423 /* Called only from RTNL semaphored context. No locks. */
424
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426                                     __be32 mask)
427 {
428         ASSERT_RTNL();
429
430         for_primary_ifa(in_dev) {
431                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432                         return ifa;
433         } endfor_ifa(in_dev);
434         return NULL;
435 }
436
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439         struct net *net = sock_net(skb->sk);
440         struct nlattr *tb[IFA_MAX+1];
441         struct in_device *in_dev;
442         struct ifaddrmsg *ifm;
443         struct in_ifaddr *ifa, **ifap;
444         int err = -EINVAL;
445
446         ASSERT_RTNL();
447
448         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449         if (err < 0)
450                 goto errout;
451
452         ifm = nlmsg_data(nlh);
453         in_dev = inetdev_by_index(net, ifm->ifa_index);
454         if (in_dev == NULL) {
455                 err = -ENODEV;
456                 goto errout;
457         }
458
459         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
460              ifap = &ifa->ifa_next) {
461                 if (tb[IFA_LOCAL] &&
462                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
463                         continue;
464
465                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
466                         continue;
467
468                 if (tb[IFA_ADDRESS] &&
469                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
470                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
471                         continue;
472
473                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
474                 return 0;
475         }
476
477         err = -EADDRNOTAVAIL;
478 errout:
479         return err;
480 }
481
482 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
483 {
484         struct nlattr *tb[IFA_MAX+1];
485         struct in_ifaddr *ifa;
486         struct ifaddrmsg *ifm;
487         struct net_device *dev;
488         struct in_device *in_dev;
489         int err;
490
491         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
492         if (err < 0)
493                 goto errout;
494
495         ifm = nlmsg_data(nlh);
496         err = -EINVAL;
497         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
498                 goto errout;
499
500         dev = __dev_get_by_index(net, ifm->ifa_index);
501         err = -ENODEV;
502         if (dev == NULL)
503                 goto errout;
504
505         in_dev = __in_dev_get_rtnl(dev);
506         err = -ENOBUFS;
507         if (in_dev == NULL)
508                 goto errout;
509
510         ifa = inet_alloc_ifa();
511         if (ifa == NULL)
512                 /*
513                  * A potential indev allocation can be left alive, it stays
514                  * assigned to its device and is destroy with it.
515                  */
516                 goto errout;
517
518         ipv4_devconf_setall(in_dev);
519         in_dev_hold(in_dev);
520
521         if (tb[IFA_ADDRESS] == NULL)
522                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
523
524         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526         ifa->ifa_flags = ifm->ifa_flags;
527         ifa->ifa_scope = ifm->ifa_scope;
528         ifa->ifa_dev = in_dev;
529
530         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
531         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
532
533         if (tb[IFA_BROADCAST])
534                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
535
536         if (tb[IFA_LABEL])
537                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
538         else
539                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
540
541         return ifa;
542
543 errout:
544         return ERR_PTR(err);
545 }
546
547 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
548 {
549         struct net *net = sock_net(skb->sk);
550         struct in_ifaddr *ifa;
551
552         ASSERT_RTNL();
553
554         ifa = rtm_to_ifaddr(net, nlh);
555         if (IS_ERR(ifa))
556                 return PTR_ERR(ifa);
557
558         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
559 }
560
561 /*
562  *      Determine a default network mask, based on the IP address.
563  */
564
565 static inline int inet_abc_len(__be32 addr)
566 {
567         int rc = -1;    /* Something else, probably a multicast. */
568
569         if (ipv4_is_zeronet(addr))
570                 rc = 0;
571         else {
572                 __u32 haddr = ntohl(addr);
573
574                 if (IN_CLASSA(haddr))
575                         rc = 8;
576                 else if (IN_CLASSB(haddr))
577                         rc = 16;
578                 else if (IN_CLASSC(haddr))
579                         rc = 24;
580         }
581
582         return rc;
583 }
584
585
586 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
587 {
588         struct ifreq ifr;
589         struct sockaddr_in sin_orig;
590         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
591         struct in_device *in_dev;
592         struct in_ifaddr **ifap = NULL;
593         struct in_ifaddr *ifa = NULL;
594         struct net_device *dev;
595         char *colon;
596         int ret = -EFAULT;
597         int tryaddrmatch = 0;
598
599         /*
600          *      Fetch the caller's info block into kernel space
601          */
602
603         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
604                 goto out;
605         ifr.ifr_name[IFNAMSIZ - 1] = 0;
606
607         /* save original address for comparison */
608         memcpy(&sin_orig, sin, sizeof(*sin));
609
610         colon = strchr(ifr.ifr_name, ':');
611         if (colon)
612                 *colon = 0;
613
614         dev_load(net, ifr.ifr_name);
615
616         switch (cmd) {
617         case SIOCGIFADDR:       /* Get interface address */
618         case SIOCGIFBRDADDR:    /* Get the broadcast address */
619         case SIOCGIFDSTADDR:    /* Get the destination address */
620         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
621                 /* Note that these ioctls will not sleep,
622                    so that we do not impose a lock.
623                    One day we will be forced to put shlock here (I mean SMP)
624                  */
625                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
626                 memset(sin, 0, sizeof(*sin));
627                 sin->sin_family = AF_INET;
628                 break;
629
630         case SIOCSIFFLAGS:
631                 ret = -EACCES;
632                 if (!capable(CAP_NET_ADMIN))
633                         goto out;
634                 break;
635         case SIOCSIFADDR:       /* Set interface address (and family) */
636         case SIOCSIFBRDADDR:    /* Set the broadcast address */
637         case SIOCSIFDSTADDR:    /* Set the destination address */
638         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
639                 ret = -EACCES;
640                 if (!capable(CAP_NET_ADMIN))
641                         goto out;
642                 ret = -EINVAL;
643                 if (sin->sin_family != AF_INET)
644                         goto out;
645                 break;
646         default:
647                 ret = -EINVAL;
648                 goto out;
649         }
650
651         rtnl_lock();
652
653         ret = -ENODEV;
654         dev = __dev_get_by_name(net, ifr.ifr_name);
655         if (!dev)
656                 goto done;
657
658         if (colon)
659                 *colon = ':';
660
661         in_dev = __in_dev_get_rtnl(dev);
662         if (in_dev) {
663                 if (tryaddrmatch) {
664                         /* Matthias Andree */
665                         /* compare label and address (4.4BSD style) */
666                         /* note: we only do this for a limited set of ioctls
667                            and only if the original address family was AF_INET.
668                            This is checked above. */
669                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
670                              ifap = &ifa->ifa_next) {
671                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672                                     sin_orig.sin_addr.s_addr ==
673                                                         ifa->ifa_local) {
674                                         break; /* found */
675                                 }
676                         }
677                 }
678                 /* we didn't get a match, maybe the application is
679                    4.3BSD-style and passed in junk so we fall back to
680                    comparing just the label */
681                 if (!ifa) {
682                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
683                              ifap = &ifa->ifa_next)
684                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
685                                         break;
686                 }
687         }
688
689         ret = -EADDRNOTAVAIL;
690         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
691                 goto done;
692
693         switch (cmd) {
694         case SIOCGIFADDR:       /* Get interface address */
695                 sin->sin_addr.s_addr = ifa->ifa_local;
696                 goto rarok;
697
698         case SIOCGIFBRDADDR:    /* Get the broadcast address */
699                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
700                 goto rarok;
701
702         case SIOCGIFDSTADDR:    /* Get the destination address */
703                 sin->sin_addr.s_addr = ifa->ifa_address;
704                 goto rarok;
705
706         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
707                 sin->sin_addr.s_addr = ifa->ifa_mask;
708                 goto rarok;
709
710         case SIOCSIFFLAGS:
711                 if (colon) {
712                         ret = -EADDRNOTAVAIL;
713                         if (!ifa)
714                                 break;
715                         ret = 0;
716                         if (!(ifr.ifr_flags & IFF_UP))
717                                 inet_del_ifa(in_dev, ifap, 1);
718                         break;
719                 }
720                 ret = dev_change_flags(dev, ifr.ifr_flags);
721                 break;
722
723         case SIOCSIFADDR:       /* Set interface address (and family) */
724                 ret = -EINVAL;
725                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
726                         break;
727
728                 if (!ifa) {
729                         ret = -ENOBUFS;
730                         ifa = inet_alloc_ifa();
731                         if (!ifa)
732                                 break;
733                         if (colon)
734                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
735                         else
736                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
737                 } else {
738                         ret = 0;
739                         if (ifa->ifa_local == sin->sin_addr.s_addr)
740                                 break;
741                         inet_del_ifa(in_dev, ifap, 0);
742                         ifa->ifa_broadcast = 0;
743                         ifa->ifa_scope = 0;
744                 }
745
746                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
747
748                 if (!(dev->flags & IFF_POINTOPOINT)) {
749                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
750                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
751                         if ((dev->flags & IFF_BROADCAST) &&
752                             ifa->ifa_prefixlen < 31)
753                                 ifa->ifa_broadcast = ifa->ifa_address |
754                                                      ~ifa->ifa_mask;
755                 } else {
756                         ifa->ifa_prefixlen = 32;
757                         ifa->ifa_mask = inet_make_mask(32);
758                 }
759                 ret = inet_set_ifa(dev, ifa);
760                 break;
761
762         case SIOCSIFBRDADDR:    /* Set the broadcast address */
763                 ret = 0;
764                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
765                         inet_del_ifa(in_dev, ifap, 0);
766                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
767                         inet_insert_ifa(ifa);
768                 }
769                 break;
770
771         case SIOCSIFDSTADDR:    /* Set the destination address */
772                 ret = 0;
773                 if (ifa->ifa_address == sin->sin_addr.s_addr)
774                         break;
775                 ret = -EINVAL;
776                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
777                         break;
778                 ret = 0;
779                 inet_del_ifa(in_dev, ifap, 0);
780                 ifa->ifa_address = sin->sin_addr.s_addr;
781                 inet_insert_ifa(ifa);
782                 break;
783
784         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
785
786                 /*
787                  *      The mask we set must be legal.
788                  */
789                 ret = -EINVAL;
790                 if (bad_mask(sin->sin_addr.s_addr, 0))
791                         break;
792                 ret = 0;
793                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
794                         __be32 old_mask = ifa->ifa_mask;
795                         inet_del_ifa(in_dev, ifap, 0);
796                         ifa->ifa_mask = sin->sin_addr.s_addr;
797                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
798
799                         /* See if current broadcast address matches
800                          * with current netmask, then recalculate
801                          * the broadcast address. Otherwise it's a
802                          * funny address, so don't touch it since
803                          * the user seems to know what (s)he's doing...
804                          */
805                         if ((dev->flags & IFF_BROADCAST) &&
806                             (ifa->ifa_prefixlen < 31) &&
807                             (ifa->ifa_broadcast ==
808                              (ifa->ifa_local|~old_mask))) {
809                                 ifa->ifa_broadcast = (ifa->ifa_local |
810                                                       ~sin->sin_addr.s_addr);
811                         }
812                         inet_insert_ifa(ifa);
813                 }
814                 break;
815         }
816 done:
817         rtnl_unlock();
818 out:
819         return ret;
820 rarok:
821         rtnl_unlock();
822         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
823         goto out;
824 }
825
826 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
827 {
828         struct in_device *in_dev = __in_dev_get_rtnl(dev);
829         struct in_ifaddr *ifa;
830         struct ifreq ifr;
831         int done = 0;
832
833         if (!in_dev)
834                 goto out;
835
836         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
837                 if (!buf) {
838                         done += sizeof(ifr);
839                         continue;
840                 }
841                 if (len < (int) sizeof(ifr))
842                         break;
843                 memset(&ifr, 0, sizeof(struct ifreq));
844                 if (ifa->ifa_label)
845                         strcpy(ifr.ifr_name, ifa->ifa_label);
846                 else
847                         strcpy(ifr.ifr_name, dev->name);
848
849                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
850                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
851                                                                 ifa->ifa_local;
852
853                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
854                         done = -EFAULT;
855                         break;
856                 }
857                 buf  += sizeof(struct ifreq);
858                 len  -= sizeof(struct ifreq);
859                 done += sizeof(struct ifreq);
860         }
861 out:
862         return done;
863 }
864
865 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
866 {
867         __be32 addr = 0;
868         struct in_device *in_dev;
869         struct net *net = dev_net(dev);
870
871         rcu_read_lock();
872         in_dev = __in_dev_get_rcu(dev);
873         if (!in_dev)
874                 goto no_in_dev;
875
876         for_primary_ifa(in_dev) {
877                 if (ifa->ifa_scope > scope)
878                         continue;
879                 if (!dst || inet_ifa_match(dst, ifa)) {
880                         addr = ifa->ifa_local;
881                         break;
882                 }
883                 if (!addr)
884                         addr = ifa->ifa_local;
885         } endfor_ifa(in_dev);
886
887         if (addr)
888                 goto out_unlock;
889 no_in_dev:
890
891         /* Not loopback addresses on loopback should be preferred
892            in this case. It is importnat that lo is the first interface
893            in dev_base list.
894          */
895         for_each_netdev_rcu(net, dev) {
896                 in_dev = __in_dev_get_rcu(dev);
897                 if (!in_dev)
898                         continue;
899
900                 for_primary_ifa(in_dev) {
901                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
902                             ifa->ifa_scope <= scope) {
903                                 addr = ifa->ifa_local;
904                                 goto out_unlock;
905                         }
906                 } endfor_ifa(in_dev);
907         }
908 out_unlock:
909         rcu_read_unlock();
910         return addr;
911 }
912 EXPORT_SYMBOL(inet_select_addr);
913
914 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
915                               __be32 local, int scope)
916 {
917         int same = 0;
918         __be32 addr = 0;
919
920         for_ifa(in_dev) {
921                 if (!addr &&
922                     (local == ifa->ifa_local || !local) &&
923                     ifa->ifa_scope <= scope) {
924                         addr = ifa->ifa_local;
925                         if (same)
926                                 break;
927                 }
928                 if (!same) {
929                         same = (!local || inet_ifa_match(local, ifa)) &&
930                                 (!dst || inet_ifa_match(dst, ifa));
931                         if (same && addr) {
932                                 if (local || !dst)
933                                         break;
934                                 /* Is the selected addr into dst subnet? */
935                                 if (inet_ifa_match(addr, ifa))
936                                         break;
937                                 /* No, then can we use new local src? */
938                                 if (ifa->ifa_scope <= scope) {
939                                         addr = ifa->ifa_local;
940                                         break;
941                                 }
942                                 /* search for large dst subnet for addr */
943                                 same = 0;
944                         }
945                 }
946         } endfor_ifa(in_dev);
947
948         return same ? addr : 0;
949 }
950
951 /*
952  * Confirm that local IP address exists using wildcards:
953  * - in_dev: only on this interface, 0=any interface
954  * - dst: only in the same subnet as dst, 0=any dst
955  * - local: address, 0=autoselect the local address
956  * - scope: maximum allowed scope value for the local address
957  */
958 __be32 inet_confirm_addr(struct in_device *in_dev,
959                          __be32 dst, __be32 local, int scope)
960 {
961         __be32 addr = 0;
962         struct net_device *dev;
963         struct net *net;
964
965         if (scope != RT_SCOPE_LINK)
966                 return confirm_addr_indev(in_dev, dst, local, scope);
967
968         net = dev_net(in_dev->dev);
969         rcu_read_lock();
970         for_each_netdev_rcu(net, dev) {
971                 in_dev = __in_dev_get_rcu(dev);
972                 if (in_dev) {
973                         addr = confirm_addr_indev(in_dev, dst, local, scope);
974                         if (addr)
975                                 break;
976                 }
977         }
978         rcu_read_unlock();
979
980         return addr;
981 }
982
983 /*
984  *      Device notifier
985  */
986
987 int register_inetaddr_notifier(struct notifier_block *nb)
988 {
989         return blocking_notifier_chain_register(&inetaddr_chain, nb);
990 }
991 EXPORT_SYMBOL(register_inetaddr_notifier);
992
993 int unregister_inetaddr_notifier(struct notifier_block *nb)
994 {
995         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
996 }
997 EXPORT_SYMBOL(unregister_inetaddr_notifier);
998
999 /* Rename ifa_labels for a device name change. Make some effort to preserve
1000  * existing alias numbering and to create unique labels if possible.
1001 */
1002 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1003 {
1004         struct in_ifaddr *ifa;
1005         int named = 0;
1006
1007         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1008                 char old[IFNAMSIZ], *dot;
1009
1010                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1011                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1012                 if (named++ == 0)
1013                         goto skip;
1014                 dot = strchr(old, ':');
1015                 if (dot == NULL) {
1016                         sprintf(old, ":%d", named);
1017                         dot = old;
1018                 }
1019                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1020                         strcat(ifa->ifa_label, dot);
1021                 else
1022                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1023 skip:
1024                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1025         }
1026 }
1027
1028 static inline bool inetdev_valid_mtu(unsigned mtu)
1029 {
1030         return mtu >= 68;
1031 }
1032
1033 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1034                                         struct in_device *in_dev)
1035
1036 {
1037         struct in_ifaddr *ifa = in_dev->ifa_list;
1038
1039         if (!ifa)
1040                 return;
1041
1042         arp_send(ARPOP_REQUEST, ETH_P_ARP,
1043                  ifa->ifa_local, dev,
1044                  ifa->ifa_local, NULL,
1045                  dev->dev_addr, NULL);
1046 }
1047
1048 /* Called only under RTNL semaphore */
1049
1050 static int inetdev_event(struct notifier_block *this, unsigned long event,
1051                          void *ptr)
1052 {
1053         struct net_device *dev = ptr;
1054         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1055
1056         ASSERT_RTNL();
1057
1058         if (!in_dev) {
1059                 if (event == NETDEV_REGISTER) {
1060                         in_dev = inetdev_init(dev);
1061                         if (!in_dev)
1062                                 return notifier_from_errno(-ENOMEM);
1063                         if (dev->flags & IFF_LOOPBACK) {
1064                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1065                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1066                         }
1067                 } else if (event == NETDEV_CHANGEMTU) {
1068                         /* Re-enabling IP */
1069                         if (inetdev_valid_mtu(dev->mtu))
1070                                 in_dev = inetdev_init(dev);
1071                 }
1072                 goto out;
1073         }
1074
1075         switch (event) {
1076         case NETDEV_REGISTER:
1077                 printk(KERN_DEBUG "inetdev_event: bug\n");
1078                 rcu_assign_pointer(dev->ip_ptr, NULL);
1079                 break;
1080         case NETDEV_UP:
1081                 if (!inetdev_valid_mtu(dev->mtu))
1082                         break;
1083                 if (dev->flags & IFF_LOOPBACK) {
1084                         struct in_ifaddr *ifa = inet_alloc_ifa();
1085
1086                         if (ifa) {
1087                                 ifa->ifa_local =
1088                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1089                                 ifa->ifa_prefixlen = 8;
1090                                 ifa->ifa_mask = inet_make_mask(8);
1091                                 in_dev_hold(in_dev);
1092                                 ifa->ifa_dev = in_dev;
1093                                 ifa->ifa_scope = RT_SCOPE_HOST;
1094                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1095                                 inet_insert_ifa(ifa);
1096                         }
1097                 }
1098                 ip_mc_up(in_dev);
1099                 /* fall through */
1100         case NETDEV_CHANGEADDR:
1101                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1102                         break;
1103                 /* fall through */
1104         case NETDEV_NOTIFY_PEERS:
1105                 /* Send gratuitous ARP to notify of link change */
1106                 inetdev_send_gratuitous_arp(dev, in_dev);
1107                 break;
1108         case NETDEV_DOWN:
1109                 ip_mc_down(in_dev);
1110                 break;
1111         case NETDEV_PRE_TYPE_CHANGE:
1112                 ip_mc_unmap(in_dev);
1113                 break;
1114         case NETDEV_POST_TYPE_CHANGE:
1115                 ip_mc_remap(in_dev);
1116                 break;
1117         case NETDEV_CHANGEMTU:
1118                 if (inetdev_valid_mtu(dev->mtu))
1119                         break;
1120                 /* disable IP when MTU is not enough */
1121         case NETDEV_UNREGISTER:
1122                 inetdev_destroy(in_dev);
1123                 break;
1124         case NETDEV_CHANGENAME:
1125                 /* Do not notify about label change, this event is
1126                  * not interesting to applications using netlink.
1127                  */
1128                 inetdev_changename(dev, in_dev);
1129
1130                 devinet_sysctl_unregister(in_dev);
1131                 devinet_sysctl_register(in_dev);
1132                 break;
1133         }
1134 out:
1135         return NOTIFY_DONE;
1136 }
1137
1138 static struct notifier_block ip_netdev_notifier = {
1139         .notifier_call = inetdev_event,
1140 };
1141
1142 static inline size_t inet_nlmsg_size(void)
1143 {
1144         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1145                + nla_total_size(4) /* IFA_ADDRESS */
1146                + nla_total_size(4) /* IFA_LOCAL */
1147                + nla_total_size(4) /* IFA_BROADCAST */
1148                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1149 }
1150
1151 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1152                             u32 pid, u32 seq, int event, unsigned int flags)
1153 {
1154         struct ifaddrmsg *ifm;
1155         struct nlmsghdr  *nlh;
1156
1157         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1158         if (nlh == NULL)
1159                 return -EMSGSIZE;
1160
1161         ifm = nlmsg_data(nlh);
1162         ifm->ifa_family = AF_INET;
1163         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1164         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1165         ifm->ifa_scope = ifa->ifa_scope;
1166         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1167
1168         if (ifa->ifa_address)
1169                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1170
1171         if (ifa->ifa_local)
1172                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1173
1174         if (ifa->ifa_broadcast)
1175                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1176
1177         if (ifa->ifa_label[0])
1178                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1179
1180         return nlmsg_end(skb, nlh);
1181
1182 nla_put_failure:
1183         nlmsg_cancel(skb, nlh);
1184         return -EMSGSIZE;
1185 }
1186
1187 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1188 {
1189         struct net *net = sock_net(skb->sk);
1190         int h, s_h;
1191         int idx, s_idx;
1192         int ip_idx, s_ip_idx;
1193         struct net_device *dev;
1194         struct in_device *in_dev;
1195         struct in_ifaddr *ifa;
1196         struct hlist_head *head;
1197         struct hlist_node *node;
1198
1199         s_h = cb->args[0];
1200         s_idx = idx = cb->args[1];
1201         s_ip_idx = ip_idx = cb->args[2];
1202
1203         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1204                 idx = 0;
1205                 head = &net->dev_index_head[h];
1206                 rcu_read_lock();
1207                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1208                         if (idx < s_idx)
1209                                 goto cont;
1210                         if (h > s_h || idx > s_idx)
1211                                 s_ip_idx = 0;
1212                         in_dev = __in_dev_get_rcu(dev);
1213                         if (!in_dev)
1214                                 goto cont;
1215
1216                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1217                              ifa = ifa->ifa_next, ip_idx++) {
1218                                 if (ip_idx < s_ip_idx)
1219                                         continue;
1220                                 if (inet_fill_ifaddr(skb, ifa,
1221                                              NETLINK_CB(cb->skb).pid,
1222                                              cb->nlh->nlmsg_seq,
1223                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1224                                         rcu_read_unlock();
1225                                         goto done;
1226                                 }
1227                         }
1228 cont:
1229                         idx++;
1230                 }
1231                 rcu_read_unlock();
1232         }
1233
1234 done:
1235         cb->args[0] = h;
1236         cb->args[1] = idx;
1237         cb->args[2] = ip_idx;
1238
1239         return skb->len;
1240 }
1241
1242 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1243                       u32 pid)
1244 {
1245         struct sk_buff *skb;
1246         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1247         int err = -ENOBUFS;
1248         struct net *net;
1249
1250         net = dev_net(ifa->ifa_dev->dev);
1251         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1252         if (skb == NULL)
1253                 goto errout;
1254
1255         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1256         if (err < 0) {
1257                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1258                 WARN_ON(err == -EMSGSIZE);
1259                 kfree_skb(skb);
1260                 goto errout;
1261         }
1262         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1263         return;
1264 errout:
1265         if (err < 0)
1266                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1267 }
1268
1269 static size_t inet_get_link_af_size(const struct net_device *dev)
1270 {
1271         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1272
1273         if (!in_dev)
1274                 return 0;
1275
1276         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1277 }
1278
1279 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1280 {
1281         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1282         struct nlattr *nla;
1283         int i;
1284
1285         if (!in_dev)
1286                 return -ENODATA;
1287
1288         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1289         if (nla == NULL)
1290                 return -EMSGSIZE;
1291
1292         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1293                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1294
1295         return 0;
1296 }
1297
1298 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1299         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1300 };
1301
1302 static int inet_validate_link_af(const struct net_device *dev,
1303                                  const struct nlattr *nla)
1304 {
1305         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1306         int err, rem;
1307
1308         if (dev && !__in_dev_get_rtnl(dev))
1309                 return -EAFNOSUPPORT;
1310
1311         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1312         if (err < 0)
1313                 return err;
1314
1315         if (tb[IFLA_INET_CONF]) {
1316                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1317                         int cfgid = nla_type(a);
1318
1319                         if (nla_len(a) < 4)
1320                                 return -EINVAL;
1321
1322                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1323                                 return -EINVAL;
1324                 }
1325         }
1326
1327         return 0;
1328 }
1329
1330 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1331 {
1332         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1333         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1334         int rem;
1335
1336         if (!in_dev)
1337                 return -EAFNOSUPPORT;
1338
1339         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1340                 BUG();
1341
1342         if (tb[IFLA_INET_CONF]) {
1343                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1344                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1345         }
1346
1347         return 0;
1348 }
1349
1350 #ifdef CONFIG_SYSCTL
1351
1352 static void devinet_copy_dflt_conf(struct net *net, int i)
1353 {
1354         struct net_device *dev;
1355
1356         rcu_read_lock();
1357         for_each_netdev_rcu(net, dev) {
1358                 struct in_device *in_dev;
1359
1360                 in_dev = __in_dev_get_rcu(dev);
1361                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1362                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1363         }
1364         rcu_read_unlock();
1365 }
1366
1367 /* called with RTNL locked */
1368 static void inet_forward_change(struct net *net)
1369 {
1370         struct net_device *dev;
1371         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1372
1373         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1374         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1375
1376         for_each_netdev(net, dev) {
1377                 struct in_device *in_dev;
1378                 if (on)
1379                         dev_disable_lro(dev);
1380                 rcu_read_lock();
1381                 in_dev = __in_dev_get_rcu(dev);
1382                 if (in_dev)
1383                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1384                 rcu_read_unlock();
1385         }
1386 }
1387
1388 static int devinet_conf_proc(ctl_table *ctl, int write,
1389                              void __user *buffer,
1390                              size_t *lenp, loff_t *ppos)
1391 {
1392         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1393
1394         if (write) {
1395                 struct ipv4_devconf *cnf = ctl->extra1;
1396                 struct net *net = ctl->extra2;
1397                 int i = (int *)ctl->data - cnf->data;
1398
1399                 set_bit(i, cnf->state);
1400
1401                 if (cnf == net->ipv4.devconf_dflt)
1402                         devinet_copy_dflt_conf(net, i);
1403         }
1404
1405         return ret;
1406 }
1407
1408 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1409                                   void __user *buffer,
1410                                   size_t *lenp, loff_t *ppos)
1411 {
1412         int *valp = ctl->data;
1413         int val = *valp;
1414         loff_t pos = *ppos;
1415         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1416
1417         if (write && *valp != val) {
1418                 struct net *net = ctl->extra2;
1419
1420                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1421                         if (!rtnl_trylock()) {
1422                                 /* Restore the original values before restarting */
1423                                 *valp = val;
1424                                 *ppos = pos;
1425                                 return restart_syscall();
1426                         }
1427                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1428                                 inet_forward_change(net);
1429                         } else if (*valp) {
1430                                 struct ipv4_devconf *cnf = ctl->extra1;
1431                                 struct in_device *idev =
1432                                         container_of(cnf, struct in_device, cnf);
1433                                 dev_disable_lro(idev->dev);
1434                         }
1435                         rtnl_unlock();
1436                         rt_cache_flush(net, 0);
1437                 }
1438         }
1439
1440         return ret;
1441 }
1442
1443 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1444                                 void __user *buffer,
1445                                 size_t *lenp, loff_t *ppos)
1446 {
1447         int *valp = ctl->data;
1448         int val = *valp;
1449         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1450         struct net *net = ctl->extra2;
1451
1452         if (write && *valp != val)
1453                 rt_cache_flush(net, 0);
1454
1455         return ret;
1456 }
1457
1458 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1459         { \
1460                 .procname       = name, \
1461                 .data           = ipv4_devconf.data + \
1462                                   IPV4_DEVCONF_ ## attr - 1, \
1463                 .maxlen         = sizeof(int), \
1464                 .mode           = mval, \
1465                 .proc_handler   = proc, \
1466                 .extra1         = &ipv4_devconf, \
1467         }
1468
1469 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1470         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1471
1472 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1473         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1474
1475 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1476         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1477
1478 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1479         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1480
1481 static struct devinet_sysctl_table {
1482         struct ctl_table_header *sysctl_header;
1483         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1484         char *dev_name;
1485 } devinet_sysctl = {
1486         .devinet_vars = {
1487                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1488                                              devinet_sysctl_forward),
1489                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1490
1491                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1492                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1493                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1494                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1495                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1496                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1497                                         "accept_source_route"),
1498                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1499                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1500                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1501                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1502                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1503                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1504                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1505                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1506                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1507                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1508                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1509                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1510                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1511
1512                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1513                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1514                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1515                                               "force_igmp_version"),
1516                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1517                                               "promote_secondaries"),
1518         },
1519 };
1520
1521 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1522                                         struct ipv4_devconf *p)
1523 {
1524         int i;
1525         struct devinet_sysctl_table *t;
1526
1527 #define DEVINET_CTL_PATH_DEV    3
1528
1529         struct ctl_path devinet_ctl_path[] = {
1530                 { .procname = "net",  },
1531                 { .procname = "ipv4", },
1532                 { .procname = "conf", },
1533                 { /* to be set */ },
1534                 { },
1535         };
1536
1537         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1538         if (!t)
1539                 goto out;
1540
1541         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1542                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1543                 t->devinet_vars[i].extra1 = p;
1544                 t->devinet_vars[i].extra2 = net;
1545         }
1546
1547         /*
1548          * Make a copy of dev_name, because '.procname' is regarded as const
1549          * by sysctl and we wouldn't want anyone to change it under our feet
1550          * (see SIOCSIFNAME).
1551          */
1552         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1553         if (!t->dev_name)
1554                 goto free;
1555
1556         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1557
1558         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1559                         t->devinet_vars);
1560         if (!t->sysctl_header)
1561                 goto free_procname;
1562
1563         p->sysctl = t;
1564         return 0;
1565
1566 free_procname:
1567         kfree(t->dev_name);
1568 free:
1569         kfree(t);
1570 out:
1571         return -ENOBUFS;
1572 }
1573
1574 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1575 {
1576         struct devinet_sysctl_table *t = cnf->sysctl;
1577
1578         if (t == NULL)
1579                 return;
1580
1581         cnf->sysctl = NULL;
1582         unregister_sysctl_table(t->sysctl_header);
1583         kfree(t->dev_name);
1584         kfree(t);
1585 }
1586
1587 static void devinet_sysctl_register(struct in_device *idev)
1588 {
1589         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1590         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1591                                         &idev->cnf);
1592 }
1593
1594 static void devinet_sysctl_unregister(struct in_device *idev)
1595 {
1596         __devinet_sysctl_unregister(&idev->cnf);
1597         neigh_sysctl_unregister(idev->arp_parms);
1598 }
1599
1600 static struct ctl_table ctl_forward_entry[] = {
1601         {
1602                 .procname       = "ip_forward",
1603                 .data           = &ipv4_devconf.data[
1604                                         IPV4_DEVCONF_FORWARDING - 1],
1605                 .maxlen         = sizeof(int),
1606                 .mode           = 0644,
1607                 .proc_handler   = devinet_sysctl_forward,
1608                 .extra1         = &ipv4_devconf,
1609                 .extra2         = &init_net,
1610         },
1611         { },
1612 };
1613
1614 static __net_initdata struct ctl_path net_ipv4_path[] = {
1615         { .procname = "net", },
1616         { .procname = "ipv4", },
1617         { },
1618 };
1619 #endif
1620
1621 static __net_init int devinet_init_net(struct net *net)
1622 {
1623         int err;
1624         struct ipv4_devconf *all, *dflt;
1625 #ifdef CONFIG_SYSCTL
1626         struct ctl_table *tbl = ctl_forward_entry;
1627         struct ctl_table_header *forw_hdr;
1628 #endif
1629
1630         err = -ENOMEM;
1631         all = &ipv4_devconf;
1632         dflt = &ipv4_devconf_dflt;
1633
1634         if (!net_eq(net, &init_net)) {
1635                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1636                 if (all == NULL)
1637                         goto err_alloc_all;
1638
1639                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1640                 if (dflt == NULL)
1641                         goto err_alloc_dflt;
1642
1643 #ifdef CONFIG_SYSCTL
1644                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1645                 if (tbl == NULL)
1646                         goto err_alloc_ctl;
1647
1648                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1649                 tbl[0].extra1 = all;
1650                 tbl[0].extra2 = net;
1651 #endif
1652         }
1653
1654 #ifdef CONFIG_SYSCTL
1655         err = __devinet_sysctl_register(net, "all", all);
1656         if (err < 0)
1657                 goto err_reg_all;
1658
1659         err = __devinet_sysctl_register(net, "default", dflt);
1660         if (err < 0)
1661                 goto err_reg_dflt;
1662
1663         err = -ENOMEM;
1664         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1665         if (forw_hdr == NULL)
1666                 goto err_reg_ctl;
1667         net->ipv4.forw_hdr = forw_hdr;
1668 #endif
1669
1670         net->ipv4.devconf_all = all;
1671         net->ipv4.devconf_dflt = dflt;
1672         return 0;
1673
1674 #ifdef CONFIG_SYSCTL
1675 err_reg_ctl:
1676         __devinet_sysctl_unregister(dflt);
1677 err_reg_dflt:
1678         __devinet_sysctl_unregister(all);
1679 err_reg_all:
1680         if (tbl != ctl_forward_entry)
1681                 kfree(tbl);
1682 err_alloc_ctl:
1683 #endif
1684         if (dflt != &ipv4_devconf_dflt)
1685                 kfree(dflt);
1686 err_alloc_dflt:
1687         if (all != &ipv4_devconf)
1688                 kfree(all);
1689 err_alloc_all:
1690         return err;
1691 }
1692
1693 static __net_exit void devinet_exit_net(struct net *net)
1694 {
1695 #ifdef CONFIG_SYSCTL
1696         struct ctl_table *tbl;
1697
1698         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1699         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1700         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1701         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1702         kfree(tbl);
1703 #endif
1704         kfree(net->ipv4.devconf_dflt);
1705         kfree(net->ipv4.devconf_all);
1706 }
1707
1708 static __net_initdata struct pernet_operations devinet_ops = {
1709         .init = devinet_init_net,
1710         .exit = devinet_exit_net,
1711 };
1712
1713 static struct rtnl_af_ops inet_af_ops = {
1714         .family           = AF_INET,
1715         .fill_link_af     = inet_fill_link_af,
1716         .get_link_af_size = inet_get_link_af_size,
1717         .validate_link_af = inet_validate_link_af,
1718         .set_link_af      = inet_set_link_af,
1719 };
1720
1721 void __init devinet_init(void)
1722 {
1723         register_pernet_subsys(&devinet_ops);
1724
1725         register_gifconf(PF_INET, inet_gifconf);
1726         register_netdevice_notifier(&ip_netdev_notifier);
1727
1728         rtnl_af_register(&inet_af_ops);
1729
1730         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1731         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1732         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1733 }
1734