7ee9c3426f44a24b141492b266223b319e8a3f95
[linux-flexiantxendom0-natty.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34
35 #include <linux/netfilter.h>
36 #include <linux/netfilter_ipv4.h>
37 #include <linux/mutex.h>
38
39 #include <net/net_namespace.h>
40 #include <net/ip.h>
41 #ifdef CONFIG_IP_VS_IPV6
42 #include <net/ipv6.h>
43 #include <net/ip6_route.h>
44 #endif
45 #include <net/route.h>
46 #include <net/sock.h>
47 #include <net/genetlink.h>
48
49 #include <asm/uaccess.h>
50
51 #include <net/ip_vs.h>
52
53 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
54 static DEFINE_MUTEX(__ip_vs_mutex);
55
56 /* lock for service table */
57 static DEFINE_RWLOCK(__ip_vs_svc_lock);
58
59 /* lock for table with the real services */
60 static DEFINE_RWLOCK(__ip_vs_rs_lock);
61
62 /* lock for state and timeout tables */
63 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
64
65 /* lock for drop entry handling */
66 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
67
68 /* lock for drop packet handling */
69 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
70
71 /* 1/rate drop and drop-entry variables */
72 int ip_vs_drop_rate = 0;
73 int ip_vs_drop_counter = 0;
74 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
75
76 /* number of virtual services */
77 static int ip_vs_num_services = 0;
78
79 /* sysctl variables */
80 static int sysctl_ip_vs_drop_entry = 0;
81 static int sysctl_ip_vs_drop_packet = 0;
82 static int sysctl_ip_vs_secure_tcp = 0;
83 static int sysctl_ip_vs_amemthresh = 1024;
84 static int sysctl_ip_vs_am_droprate = 10;
85 int sysctl_ip_vs_cache_bypass = 0;
86 int sysctl_ip_vs_expire_nodest_conn = 0;
87 int sysctl_ip_vs_expire_quiescent_template = 0;
88 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
89 int sysctl_ip_vs_nat_icmp_send = 0;
90
91
92 #ifdef CONFIG_IP_VS_DEBUG
93 static int sysctl_ip_vs_debug_level = 0;
94
95 int ip_vs_get_debug_level(void)
96 {
97         return sysctl_ip_vs_debug_level;
98 }
99 #endif
100
101 #ifdef CONFIG_IP_VS_IPV6
102 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
103 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
104 {
105         struct rt6_info *rt;
106         struct flowi fl = {
107                 .oif = 0,
108                 .nl_u = {
109                         .ip6_u = {
110                                 .daddr = *addr,
111                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
112         };
113
114         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
115         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
116                         return 1;
117
118         return 0;
119 }
120 #endif
121 /*
122  *      update_defense_level is called from keventd and from sysctl,
123  *      so it needs to protect itself from softirqs
124  */
125 static void update_defense_level(void)
126 {
127         struct sysinfo i;
128         static int old_secure_tcp = 0;
129         int availmem;
130         int nomem;
131         int to_change = -1;
132
133         /* we only count free and buffered memory (in pages) */
134         si_meminfo(&i);
135         availmem = i.freeram + i.bufferram;
136         /* however in linux 2.5 the i.bufferram is total page cache size,
137            we need adjust it */
138         /* si_swapinfo(&i); */
139         /* availmem = availmem - (i.totalswap - i.freeswap); */
140
141         nomem = (availmem < sysctl_ip_vs_amemthresh);
142
143         local_bh_disable();
144
145         /* drop_entry */
146         spin_lock(&__ip_vs_dropentry_lock);
147         switch (sysctl_ip_vs_drop_entry) {
148         case 0:
149                 atomic_set(&ip_vs_dropentry, 0);
150                 break;
151         case 1:
152                 if (nomem) {
153                         atomic_set(&ip_vs_dropentry, 1);
154                         sysctl_ip_vs_drop_entry = 2;
155                 } else {
156                         atomic_set(&ip_vs_dropentry, 0);
157                 }
158                 break;
159         case 2:
160                 if (nomem) {
161                         atomic_set(&ip_vs_dropentry, 1);
162                 } else {
163                         atomic_set(&ip_vs_dropentry, 0);
164                         sysctl_ip_vs_drop_entry = 1;
165                 };
166                 break;
167         case 3:
168                 atomic_set(&ip_vs_dropentry, 1);
169                 break;
170         }
171         spin_unlock(&__ip_vs_dropentry_lock);
172
173         /* drop_packet */
174         spin_lock(&__ip_vs_droppacket_lock);
175         switch (sysctl_ip_vs_drop_packet) {
176         case 0:
177                 ip_vs_drop_rate = 0;
178                 break;
179         case 1:
180                 if (nomem) {
181                         ip_vs_drop_rate = ip_vs_drop_counter
182                                 = sysctl_ip_vs_amemthresh /
183                                 (sysctl_ip_vs_amemthresh-availmem);
184                         sysctl_ip_vs_drop_packet = 2;
185                 } else {
186                         ip_vs_drop_rate = 0;
187                 }
188                 break;
189         case 2:
190                 if (nomem) {
191                         ip_vs_drop_rate = ip_vs_drop_counter
192                                 = sysctl_ip_vs_amemthresh /
193                                 (sysctl_ip_vs_amemthresh-availmem);
194                 } else {
195                         ip_vs_drop_rate = 0;
196                         sysctl_ip_vs_drop_packet = 1;
197                 }
198                 break;
199         case 3:
200                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
201                 break;
202         }
203         spin_unlock(&__ip_vs_droppacket_lock);
204
205         /* secure_tcp */
206         write_lock(&__ip_vs_securetcp_lock);
207         switch (sysctl_ip_vs_secure_tcp) {
208         case 0:
209                 if (old_secure_tcp >= 2)
210                         to_change = 0;
211                 break;
212         case 1:
213                 if (nomem) {
214                         if (old_secure_tcp < 2)
215                                 to_change = 1;
216                         sysctl_ip_vs_secure_tcp = 2;
217                 } else {
218                         if (old_secure_tcp >= 2)
219                                 to_change = 0;
220                 }
221                 break;
222         case 2:
223                 if (nomem) {
224                         if (old_secure_tcp < 2)
225                                 to_change = 1;
226                 } else {
227                         if (old_secure_tcp >= 2)
228                                 to_change = 0;
229                         sysctl_ip_vs_secure_tcp = 1;
230                 }
231                 break;
232         case 3:
233                 if (old_secure_tcp < 2)
234                         to_change = 1;
235                 break;
236         }
237         old_secure_tcp = sysctl_ip_vs_secure_tcp;
238         if (to_change >= 0)
239                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
240         write_unlock(&__ip_vs_securetcp_lock);
241
242         local_bh_enable();
243 }
244
245
246 /*
247  *      Timer for checking the defense
248  */
249 #define DEFENSE_TIMER_PERIOD    1*HZ
250 static void defense_work_handler(struct work_struct *work);
251 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
252
253 static void defense_work_handler(struct work_struct *work)
254 {
255         update_defense_level();
256         if (atomic_read(&ip_vs_dropentry))
257                 ip_vs_random_dropentry();
258
259         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
260 }
261
262 int
263 ip_vs_use_count_inc(void)
264 {
265         return try_module_get(THIS_MODULE);
266 }
267
268 void
269 ip_vs_use_count_dec(void)
270 {
271         module_put(THIS_MODULE);
272 }
273
274
275 /*
276  *      Hash table: for virtual service lookups
277  */
278 #define IP_VS_SVC_TAB_BITS 8
279 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
280 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
281
282 /* the service table hashed by <protocol, addr, port> */
283 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
284 /* the service table hashed by fwmark */
285 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
286
287 /*
288  *      Hash table: for real service lookups
289  */
290 #define IP_VS_RTAB_BITS 4
291 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
292 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
293
294 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
295
296 /*
297  *      Trash for destinations
298  */
299 static LIST_HEAD(ip_vs_dest_trash);
300
301 /*
302  *      FTP & NULL virtual service counters
303  */
304 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
305 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
306
307
308 /*
309  *      Returns hash value for virtual service
310  */
311 static __inline__ unsigned
312 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
313                   __be16 port)
314 {
315         register unsigned porth = ntohs(port);
316         __be32 addr_fold = addr->ip;
317
318 #ifdef CONFIG_IP_VS_IPV6
319         if (af == AF_INET6)
320                 addr_fold = addr->ip6[0]^addr->ip6[1]^
321                             addr->ip6[2]^addr->ip6[3];
322 #endif
323
324         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
325                 & IP_VS_SVC_TAB_MASK;
326 }
327
328 /*
329  *      Returns hash value of fwmark for virtual service lookup
330  */
331 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
332 {
333         return fwmark & IP_VS_SVC_TAB_MASK;
334 }
335
336 /*
337  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
338  *      or in the ip_vs_svc_fwm_table by fwmark.
339  *      Should be called with locked tables.
340  */
341 static int ip_vs_svc_hash(struct ip_vs_service *svc)
342 {
343         unsigned hash;
344
345         if (svc->flags & IP_VS_SVC_F_HASHED) {
346                 pr_err("%s(): request for already hashed, called from %pF\n",
347                        __func__, __builtin_return_address(0));
348                 return 0;
349         }
350
351         if (svc->fwmark == 0) {
352                 /*
353                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
354                  */
355                 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
356                                          svc->port);
357                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
358         } else {
359                 /*
360                  *  Hash it by fwmark in ip_vs_svc_fwm_table
361                  */
362                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
363                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
364         }
365
366         svc->flags |= IP_VS_SVC_F_HASHED;
367         /* increase its refcnt because it is referenced by the svc table */
368         atomic_inc(&svc->refcnt);
369         return 1;
370 }
371
372
373 /*
374  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
375  *      Should be called with locked tables.
376  */
377 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
378 {
379         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
380                 pr_err("%s(): request for unhash flagged, called from %pF\n",
381                        __func__, __builtin_return_address(0));
382                 return 0;
383         }
384
385         if (svc->fwmark == 0) {
386                 /* Remove it from the ip_vs_svc_table table */
387                 list_del(&svc->s_list);
388         } else {
389                 /* Remove it from the ip_vs_svc_fwm_table table */
390                 list_del(&svc->f_list);
391         }
392
393         svc->flags &= ~IP_VS_SVC_F_HASHED;
394         atomic_dec(&svc->refcnt);
395         return 1;
396 }
397
398
399 /*
400  *      Get service by {proto,addr,port} in the service table.
401  */
402 static inline struct ip_vs_service *
403 __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
404                     __be16 vport)
405 {
406         unsigned hash;
407         struct ip_vs_service *svc;
408
409         /* Check for "full" addressed entries */
410         hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
411
412         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
413                 if ((svc->af == af)
414                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
415                     && (svc->port == vport)
416                     && (svc->protocol == protocol)) {
417                         /* HIT */
418                         atomic_inc(&svc->usecnt);
419                         return svc;
420                 }
421         }
422
423         return NULL;
424 }
425
426
427 /*
428  *      Get service by {fwmark} in the service table.
429  */
430 static inline struct ip_vs_service *
431 __ip_vs_svc_fwm_get(int af, __u32 fwmark)
432 {
433         unsigned hash;
434         struct ip_vs_service *svc;
435
436         /* Check for fwmark addressed entries */
437         hash = ip_vs_svc_fwm_hashkey(fwmark);
438
439         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
440                 if (svc->fwmark == fwmark && svc->af == af) {
441                         /* HIT */
442                         atomic_inc(&svc->usecnt);
443                         return svc;
444                 }
445         }
446
447         return NULL;
448 }
449
450 struct ip_vs_service *
451 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
452                   const union nf_inet_addr *vaddr, __be16 vport)
453 {
454         struct ip_vs_service *svc;
455
456         read_lock(&__ip_vs_svc_lock);
457
458         /*
459          *      Check the table hashed by fwmark first
460          */
461         if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
462                 goto out;
463
464         /*
465          *      Check the table hashed by <protocol,addr,port>
466          *      for "full" addressed entries
467          */
468         svc = __ip_vs_service_get(af, protocol, vaddr, vport);
469
470         if (svc == NULL
471             && protocol == IPPROTO_TCP
472             && atomic_read(&ip_vs_ftpsvc_counter)
473             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
474                 /*
475                  * Check if ftp service entry exists, the packet
476                  * might belong to FTP data connections.
477                  */
478                 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
479         }
480
481         if (svc == NULL
482             && atomic_read(&ip_vs_nullsvc_counter)) {
483                 /*
484                  * Check if the catch-all port (port zero) exists
485                  */
486                 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
487         }
488
489   out:
490         read_unlock(&__ip_vs_svc_lock);
491
492         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
493                       fwmark, ip_vs_proto_name(protocol),
494                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
495                       svc ? "hit" : "not hit");
496
497         return svc;
498 }
499
500
501 static inline void
502 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
503 {
504         atomic_inc(&svc->refcnt);
505         dest->svc = svc;
506 }
507
508 static inline void
509 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
510 {
511         struct ip_vs_service *svc = dest->svc;
512
513         dest->svc = NULL;
514         if (atomic_dec_and_test(&svc->refcnt))
515                 kfree(svc);
516 }
517
518
519 /*
520  *      Returns hash value for real service
521  */
522 static inline unsigned ip_vs_rs_hashkey(int af,
523                                             const union nf_inet_addr *addr,
524                                             __be16 port)
525 {
526         register unsigned porth = ntohs(port);
527         __be32 addr_fold = addr->ip;
528
529 #ifdef CONFIG_IP_VS_IPV6
530         if (af == AF_INET6)
531                 addr_fold = addr->ip6[0]^addr->ip6[1]^
532                             addr->ip6[2]^addr->ip6[3];
533 #endif
534
535         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
536                 & IP_VS_RTAB_MASK;
537 }
538
539 /*
540  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
541  *      should be called with locked tables.
542  */
543 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
544 {
545         unsigned hash;
546
547         if (!list_empty(&dest->d_list)) {
548                 return 0;
549         }
550
551         /*
552          *      Hash by proto,addr,port,
553          *      which are the parameters of the real service.
554          */
555         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
556
557         list_add(&dest->d_list, &ip_vs_rtable[hash]);
558
559         return 1;
560 }
561
562 /*
563  *      UNhashes ip_vs_dest from ip_vs_rtable.
564  *      should be called with locked tables.
565  */
566 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
567 {
568         /*
569          * Remove it from the ip_vs_rtable table.
570          */
571         if (!list_empty(&dest->d_list)) {
572                 list_del(&dest->d_list);
573                 INIT_LIST_HEAD(&dest->d_list);
574         }
575
576         return 1;
577 }
578
579 /*
580  *      Lookup real service by <proto,addr,port> in the real service table.
581  */
582 struct ip_vs_dest *
583 ip_vs_lookup_real_service(int af, __u16 protocol,
584                           const union nf_inet_addr *daddr,
585                           __be16 dport)
586 {
587         unsigned hash;
588         struct ip_vs_dest *dest;
589
590         /*
591          *      Check for "full" addressed entries
592          *      Return the first found entry
593          */
594         hash = ip_vs_rs_hashkey(af, daddr, dport);
595
596         read_lock(&__ip_vs_rs_lock);
597         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
598                 if ((dest->af == af)
599                     && ip_vs_addr_equal(af, &dest->addr, daddr)
600                     && (dest->port == dport)
601                     && ((dest->protocol == protocol) ||
602                         dest->vfwmark)) {
603                         /* HIT */
604                         read_unlock(&__ip_vs_rs_lock);
605                         return dest;
606                 }
607         }
608         read_unlock(&__ip_vs_rs_lock);
609
610         return NULL;
611 }
612
613 /*
614  *      Lookup destination by {addr,port} in the given service
615  */
616 static struct ip_vs_dest *
617 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
618                   __be16 dport)
619 {
620         struct ip_vs_dest *dest;
621
622         /*
623          * Find the destination for the given service
624          */
625         list_for_each_entry(dest, &svc->destinations, n_list) {
626                 if ((dest->af == svc->af)
627                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
628                     && (dest->port == dport)) {
629                         /* HIT */
630                         return dest;
631                 }
632         }
633
634         return NULL;
635 }
636
637 /*
638  * Find destination by {daddr,dport,vaddr,protocol}
639  * Cretaed to be used in ip_vs_process_message() in
640  * the backup synchronization daemon. It finds the
641  * destination to be bound to the received connection
642  * on the backup.
643  *
644  * ip_vs_lookup_real_service() looked promissing, but
645  * seems not working as expected.
646  */
647 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
648                                    __be16 dport,
649                                    const union nf_inet_addr *vaddr,
650                                    __be16 vport, __u16 protocol)
651 {
652         struct ip_vs_dest *dest;
653         struct ip_vs_service *svc;
654
655         svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
656         if (!svc)
657                 return NULL;
658         dest = ip_vs_lookup_dest(svc, daddr, dport);
659         if (dest)
660                 atomic_inc(&dest->refcnt);
661         ip_vs_service_put(svc);
662         return dest;
663 }
664
665 /*
666  *  Lookup dest by {svc,addr,port} in the destination trash.
667  *  The destination trash is used to hold the destinations that are removed
668  *  from the service table but are still referenced by some conn entries.
669  *  The reason to add the destination trash is when the dest is temporary
670  *  down (either by administrator or by monitor program), the dest can be
671  *  picked back from the trash, the remaining connections to the dest can
672  *  continue, and the counting information of the dest is also useful for
673  *  scheduling.
674  */
675 static struct ip_vs_dest *
676 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
677                      __be16 dport)
678 {
679         struct ip_vs_dest *dest, *nxt;
680
681         /*
682          * Find the destination in trash
683          */
684         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
685                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
686                               "dest->refcnt=%d\n",
687                               dest->vfwmark,
688                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
689                               ntohs(dest->port),
690                               atomic_read(&dest->refcnt));
691                 if (dest->af == svc->af &&
692                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
693                     dest->port == dport &&
694                     dest->vfwmark == svc->fwmark &&
695                     dest->protocol == svc->protocol &&
696                     (svc->fwmark ||
697                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
698                       dest->vport == svc->port))) {
699                         /* HIT */
700                         return dest;
701                 }
702
703                 /*
704                  * Try to purge the destination from trash if not referenced
705                  */
706                 if (atomic_read(&dest->refcnt) == 1) {
707                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
708                                       "from trash\n",
709                                       dest->vfwmark,
710                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
711                                       ntohs(dest->port));
712                         list_del(&dest->n_list);
713                         ip_vs_dst_reset(dest);
714                         __ip_vs_unbind_svc(dest);
715                         kfree(dest);
716                 }
717         }
718
719         return NULL;
720 }
721
722
723 /*
724  *  Clean up all the destinations in the trash
725  *  Called by the ip_vs_control_cleanup()
726  *
727  *  When the ip_vs_control_clearup is activated by ipvs module exit,
728  *  the service tables must have been flushed and all the connections
729  *  are expired, and the refcnt of each destination in the trash must
730  *  be 1, so we simply release them here.
731  */
732 static void ip_vs_trash_cleanup(void)
733 {
734         struct ip_vs_dest *dest, *nxt;
735
736         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
737                 list_del(&dest->n_list);
738                 ip_vs_dst_reset(dest);
739                 __ip_vs_unbind_svc(dest);
740                 kfree(dest);
741         }
742 }
743
744
745 static void
746 ip_vs_zero_stats(struct ip_vs_stats *stats)
747 {
748         spin_lock_bh(&stats->lock);
749
750         memset(&stats->ustats, 0, sizeof(stats->ustats));
751         ip_vs_zero_estimator(stats);
752
753         spin_unlock_bh(&stats->lock);
754 }
755
756 /*
757  *      Update a destination in the given service
758  */
759 static void
760 __ip_vs_update_dest(struct ip_vs_service *svc,
761                     struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
762 {
763         int conn_flags;
764
765         /* set the weight and the flags */
766         atomic_set(&dest->weight, udest->weight);
767         conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
768
769         /* check if local node and update the flags */
770 #ifdef CONFIG_IP_VS_IPV6
771         if (svc->af == AF_INET6) {
772                 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
773                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
774                                 | IP_VS_CONN_F_LOCALNODE;
775                 }
776         } else
777 #endif
778                 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
779                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
780                                 | IP_VS_CONN_F_LOCALNODE;
781                 }
782
783         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
784         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
785                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
786         } else {
787                 /*
788                  *    Put the real service in ip_vs_rtable if not present.
789                  *    For now only for NAT!
790                  */
791                 write_lock_bh(&__ip_vs_rs_lock);
792                 ip_vs_rs_hash(dest);
793                 write_unlock_bh(&__ip_vs_rs_lock);
794         }
795         atomic_set(&dest->conn_flags, conn_flags);
796
797         /* bind the service */
798         if (!dest->svc) {
799                 __ip_vs_bind_svc(dest, svc);
800         } else {
801                 if (dest->svc != svc) {
802                         __ip_vs_unbind_svc(dest);
803                         ip_vs_zero_stats(&dest->stats);
804                         __ip_vs_bind_svc(dest, svc);
805                 }
806         }
807
808         /* set the dest status flags */
809         dest->flags |= IP_VS_DEST_F_AVAILABLE;
810
811         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
812                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
813         dest->u_threshold = udest->u_threshold;
814         dest->l_threshold = udest->l_threshold;
815 }
816
817
818 /*
819  *      Create a destination for the given service
820  */
821 static int
822 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
823                struct ip_vs_dest **dest_p)
824 {
825         struct ip_vs_dest *dest;
826         unsigned atype;
827
828         EnterFunction(2);
829
830 #ifdef CONFIG_IP_VS_IPV6
831         if (svc->af == AF_INET6) {
832                 atype = ipv6_addr_type(&udest->addr.in6);
833                 if ((!(atype & IPV6_ADDR_UNICAST) ||
834                         atype & IPV6_ADDR_LINKLOCAL) &&
835                         !__ip_vs_addr_is_local_v6(&udest->addr.in6))
836                         return -EINVAL;
837         } else
838 #endif
839         {
840                 atype = inet_addr_type(&init_net, udest->addr.ip);
841                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
842                         return -EINVAL;
843         }
844
845         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
846         if (dest == NULL) {
847                 pr_err("%s(): no memory.\n", __func__);
848                 return -ENOMEM;
849         }
850
851         dest->af = svc->af;
852         dest->protocol = svc->protocol;
853         dest->vaddr = svc->addr;
854         dest->vport = svc->port;
855         dest->vfwmark = svc->fwmark;
856         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
857         dest->port = udest->port;
858
859         atomic_set(&dest->activeconns, 0);
860         atomic_set(&dest->inactconns, 0);
861         atomic_set(&dest->persistconns, 0);
862         atomic_set(&dest->refcnt, 0);
863
864         INIT_LIST_HEAD(&dest->d_list);
865         spin_lock_init(&dest->dst_lock);
866         spin_lock_init(&dest->stats.lock);
867         __ip_vs_update_dest(svc, dest, udest);
868         ip_vs_new_estimator(&dest->stats);
869
870         *dest_p = dest;
871
872         LeaveFunction(2);
873         return 0;
874 }
875
876
877 /*
878  *      Add a destination into an existing service
879  */
880 static int
881 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
882 {
883         struct ip_vs_dest *dest;
884         union nf_inet_addr daddr;
885         __be16 dport = udest->port;
886         int ret;
887
888         EnterFunction(2);
889
890         if (udest->weight < 0) {
891                 pr_err("%s(): server weight less than zero\n", __func__);
892                 return -ERANGE;
893         }
894
895         if (udest->l_threshold > udest->u_threshold) {
896                 pr_err("%s(): lower threshold is higher than upper threshold\n",
897                         __func__);
898                 return -ERANGE;
899         }
900
901         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
902
903         /*
904          * Check if the dest already exists in the list
905          */
906         dest = ip_vs_lookup_dest(svc, &daddr, dport);
907
908         if (dest != NULL) {
909                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
910                 return -EEXIST;
911         }
912
913         /*
914          * Check if the dest already exists in the trash and
915          * is from the same service
916          */
917         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
918
919         if (dest != NULL) {
920                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
921                               "dest->refcnt=%d, service %u/%s:%u\n",
922                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
923                               atomic_read(&dest->refcnt),
924                               dest->vfwmark,
925                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
926                               ntohs(dest->vport));
927
928                 __ip_vs_update_dest(svc, dest, udest);
929
930                 /*
931                  * Get the destination from the trash
932                  */
933                 list_del(&dest->n_list);
934
935                 ip_vs_new_estimator(&dest->stats);
936
937                 write_lock_bh(&__ip_vs_svc_lock);
938
939                 /*
940                  * Wait until all other svc users go away.
941                  */
942                 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
943
944                 list_add(&dest->n_list, &svc->destinations);
945                 svc->num_dests++;
946
947                 /* call the update_service function of its scheduler */
948                 if (svc->scheduler->update_service)
949                         svc->scheduler->update_service(svc);
950
951                 write_unlock_bh(&__ip_vs_svc_lock);
952                 return 0;
953         }
954
955         /*
956          * Allocate and initialize the dest structure
957          */
958         ret = ip_vs_new_dest(svc, udest, &dest);
959         if (ret) {
960                 return ret;
961         }
962
963         /*
964          * Add the dest entry into the list
965          */
966         atomic_inc(&dest->refcnt);
967
968         write_lock_bh(&__ip_vs_svc_lock);
969
970         /*
971          * Wait until all other svc users go away.
972          */
973         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
974
975         list_add(&dest->n_list, &svc->destinations);
976         svc->num_dests++;
977
978         /* call the update_service function of its scheduler */
979         if (svc->scheduler->update_service)
980                 svc->scheduler->update_service(svc);
981
982         write_unlock_bh(&__ip_vs_svc_lock);
983
984         LeaveFunction(2);
985
986         return 0;
987 }
988
989
990 /*
991  *      Edit a destination in the given service
992  */
993 static int
994 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
995 {
996         struct ip_vs_dest *dest;
997         union nf_inet_addr daddr;
998         __be16 dport = udest->port;
999
1000         EnterFunction(2);
1001
1002         if (udest->weight < 0) {
1003                 pr_err("%s(): server weight less than zero\n", __func__);
1004                 return -ERANGE;
1005         }
1006
1007         if (udest->l_threshold > udest->u_threshold) {
1008                 pr_err("%s(): lower threshold is higher than upper threshold\n",
1009                         __func__);
1010                 return -ERANGE;
1011         }
1012
1013         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1014
1015         /*
1016          *  Lookup the destination list
1017          */
1018         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1019
1020         if (dest == NULL) {
1021                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1022                 return -ENOENT;
1023         }
1024
1025         __ip_vs_update_dest(svc, dest, udest);
1026
1027         write_lock_bh(&__ip_vs_svc_lock);
1028
1029         /* Wait until all other svc users go away */
1030         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1031
1032         /* call the update_service, because server weight may be changed */
1033         if (svc->scheduler->update_service)
1034                 svc->scheduler->update_service(svc);
1035
1036         write_unlock_bh(&__ip_vs_svc_lock);
1037
1038         LeaveFunction(2);
1039
1040         return 0;
1041 }
1042
1043
1044 /*
1045  *      Delete a destination (must be already unlinked from the service)
1046  */
1047 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1048 {
1049         ip_vs_kill_estimator(&dest->stats);
1050
1051         /*
1052          *  Remove it from the d-linked list with the real services.
1053          */
1054         write_lock_bh(&__ip_vs_rs_lock);
1055         ip_vs_rs_unhash(dest);
1056         write_unlock_bh(&__ip_vs_rs_lock);
1057
1058         /*
1059          *  Decrease the refcnt of the dest, and free the dest
1060          *  if nobody refers to it (refcnt=0). Otherwise, throw
1061          *  the destination into the trash.
1062          */
1063         if (atomic_dec_and_test(&dest->refcnt)) {
1064                 ip_vs_dst_reset(dest);
1065                 /* simply decrease svc->refcnt here, let the caller check
1066                    and release the service if nobody refers to it.
1067                    Only user context can release destination and service,
1068                    and only one user context can update virtual service at a
1069                    time, so the operation here is OK */
1070                 atomic_dec(&dest->svc->refcnt);
1071                 kfree(dest);
1072         } else {
1073                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1074                               "dest->refcnt=%d\n",
1075                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1076                               ntohs(dest->port),
1077                               atomic_read(&dest->refcnt));
1078                 list_add(&dest->n_list, &ip_vs_dest_trash);
1079                 atomic_inc(&dest->refcnt);
1080         }
1081 }
1082
1083
1084 /*
1085  *      Unlink a destination from the given service
1086  */
1087 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1088                                 struct ip_vs_dest *dest,
1089                                 int svcupd)
1090 {
1091         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1092
1093         /*
1094          *  Remove it from the d-linked destination list.
1095          */
1096         list_del(&dest->n_list);
1097         svc->num_dests--;
1098
1099         /*
1100          *  Call the update_service function of its scheduler
1101          */
1102         if (svcupd && svc->scheduler->update_service)
1103                         svc->scheduler->update_service(svc);
1104 }
1105
1106
1107 /*
1108  *      Delete a destination server in the given service
1109  */
1110 static int
1111 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1112 {
1113         struct ip_vs_dest *dest;
1114         __be16 dport = udest->port;
1115
1116         EnterFunction(2);
1117
1118         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1119
1120         if (dest == NULL) {
1121                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1122                 return -ENOENT;
1123         }
1124
1125         write_lock_bh(&__ip_vs_svc_lock);
1126
1127         /*
1128          *      Wait until all other svc users go away.
1129          */
1130         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1131
1132         /*
1133          *      Unlink dest from the service
1134          */
1135         __ip_vs_unlink_dest(svc, dest, 1);
1136
1137         write_unlock_bh(&__ip_vs_svc_lock);
1138
1139         /*
1140          *      Delete the destination
1141          */
1142         __ip_vs_del_dest(dest);
1143
1144         LeaveFunction(2);
1145
1146         return 0;
1147 }
1148
1149
1150 /*
1151  *      Add a service into the service hash table
1152  */
1153 static int
1154 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1155                   struct ip_vs_service **svc_p)
1156 {
1157         int ret = 0;
1158         struct ip_vs_scheduler *sched = NULL;
1159         struct ip_vs_service *svc = NULL;
1160
1161         /* increase the module use count */
1162         ip_vs_use_count_inc();
1163
1164         /* Lookup the scheduler by 'u->sched_name' */
1165         sched = ip_vs_scheduler_get(u->sched_name);
1166         if (sched == NULL) {
1167                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1168                 ret = -ENOENT;
1169                 goto out_mod_dec;
1170         }
1171
1172 #ifdef CONFIG_IP_VS_IPV6
1173         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1174                 ret = -EINVAL;
1175                 goto out_err;
1176         }
1177 #endif
1178
1179         svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1180         if (svc == NULL) {
1181                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1182                 ret = -ENOMEM;
1183                 goto out_err;
1184         }
1185
1186         /* I'm the first user of the service */
1187         atomic_set(&svc->usecnt, 1);
1188         atomic_set(&svc->refcnt, 0);
1189
1190         svc->af = u->af;
1191         svc->protocol = u->protocol;
1192         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1193         svc->port = u->port;
1194         svc->fwmark = u->fwmark;
1195         svc->flags = u->flags;
1196         svc->timeout = u->timeout * HZ;
1197         svc->netmask = u->netmask;
1198
1199         INIT_LIST_HEAD(&svc->destinations);
1200         rwlock_init(&svc->sched_lock);
1201         spin_lock_init(&svc->stats.lock);
1202
1203         /* Bind the scheduler */
1204         ret = ip_vs_bind_scheduler(svc, sched);
1205         if (ret)
1206                 goto out_err;
1207         sched = NULL;
1208
1209         /* Update the virtual service counters */
1210         if (svc->port == FTPPORT)
1211                 atomic_inc(&ip_vs_ftpsvc_counter);
1212         else if (svc->port == 0)
1213                 atomic_inc(&ip_vs_nullsvc_counter);
1214
1215         ip_vs_new_estimator(&svc->stats);
1216
1217         /* Count only IPv4 services for old get/setsockopt interface */
1218         if (svc->af == AF_INET)
1219                 ip_vs_num_services++;
1220
1221         /* Hash the service into the service table */
1222         write_lock_bh(&__ip_vs_svc_lock);
1223         ip_vs_svc_hash(svc);
1224         write_unlock_bh(&__ip_vs_svc_lock);
1225
1226         *svc_p = svc;
1227         return 0;
1228
1229   out_err:
1230         if (svc != NULL) {
1231                 if (svc->scheduler)
1232                         ip_vs_unbind_scheduler(svc);
1233                 if (svc->inc) {
1234                         local_bh_disable();
1235                         ip_vs_app_inc_put(svc->inc);
1236                         local_bh_enable();
1237                 }
1238                 kfree(svc);
1239         }
1240         ip_vs_scheduler_put(sched);
1241
1242   out_mod_dec:
1243         /* decrease the module use count */
1244         ip_vs_use_count_dec();
1245
1246         return ret;
1247 }
1248
1249
1250 /*
1251  *      Edit a service and bind it with a new scheduler
1252  */
1253 static int
1254 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1255 {
1256         struct ip_vs_scheduler *sched, *old_sched;
1257         int ret = 0;
1258
1259         /*
1260          * Lookup the scheduler, by 'u->sched_name'
1261          */
1262         sched = ip_vs_scheduler_get(u->sched_name);
1263         if (sched == NULL) {
1264                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1265                 return -ENOENT;
1266         }
1267         old_sched = sched;
1268
1269 #ifdef CONFIG_IP_VS_IPV6
1270         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1271                 ret = -EINVAL;
1272                 goto out;
1273         }
1274 #endif
1275
1276         write_lock_bh(&__ip_vs_svc_lock);
1277
1278         /*
1279          * Wait until all other svc users go away.
1280          */
1281         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1282
1283         /*
1284          * Set the flags and timeout value
1285          */
1286         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1287         svc->timeout = u->timeout * HZ;
1288         svc->netmask = u->netmask;
1289
1290         old_sched = svc->scheduler;
1291         if (sched != old_sched) {
1292                 /*
1293                  * Unbind the old scheduler
1294                  */
1295                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1296                         old_sched = sched;
1297                         goto out_unlock;
1298                 }
1299
1300                 /*
1301                  * Bind the new scheduler
1302                  */
1303                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1304                         /*
1305                          * If ip_vs_bind_scheduler fails, restore the old
1306                          * scheduler.
1307                          * The main reason of failure is out of memory.
1308                          *
1309                          * The question is if the old scheduler can be
1310                          * restored all the time. TODO: if it cannot be
1311                          * restored some time, we must delete the service,
1312                          * otherwise the system may crash.
1313                          */
1314                         ip_vs_bind_scheduler(svc, old_sched);
1315                         old_sched = sched;
1316                         goto out_unlock;
1317                 }
1318         }
1319
1320   out_unlock:
1321         write_unlock_bh(&__ip_vs_svc_lock);
1322 #ifdef CONFIG_IP_VS_IPV6
1323   out:
1324 #endif
1325
1326         if (old_sched)
1327                 ip_vs_scheduler_put(old_sched);
1328
1329         return ret;
1330 }
1331
1332
1333 /*
1334  *      Delete a service from the service list
1335  *      - The service must be unlinked, unlocked and not referenced!
1336  *      - We are called under _bh lock
1337  */
1338 static void __ip_vs_del_service(struct ip_vs_service *svc)
1339 {
1340         struct ip_vs_dest *dest, *nxt;
1341         struct ip_vs_scheduler *old_sched;
1342
1343         /* Count only IPv4 services for old get/setsockopt interface */
1344         if (svc->af == AF_INET)
1345                 ip_vs_num_services--;
1346
1347         ip_vs_kill_estimator(&svc->stats);
1348
1349         /* Unbind scheduler */
1350         old_sched = svc->scheduler;
1351         ip_vs_unbind_scheduler(svc);
1352         if (old_sched)
1353                 ip_vs_scheduler_put(old_sched);
1354
1355         /* Unbind app inc */
1356         if (svc->inc) {
1357                 ip_vs_app_inc_put(svc->inc);
1358                 svc->inc = NULL;
1359         }
1360
1361         /*
1362          *    Unlink the whole destination list
1363          */
1364         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1365                 __ip_vs_unlink_dest(svc, dest, 0);
1366                 __ip_vs_del_dest(dest);
1367         }
1368
1369         /*
1370          *    Update the virtual service counters
1371          */
1372         if (svc->port == FTPPORT)
1373                 atomic_dec(&ip_vs_ftpsvc_counter);
1374         else if (svc->port == 0)
1375                 atomic_dec(&ip_vs_nullsvc_counter);
1376
1377         /*
1378          *    Free the service if nobody refers to it
1379          */
1380         if (atomic_read(&svc->refcnt) == 0)
1381                 kfree(svc);
1382
1383         /* decrease the module use count */
1384         ip_vs_use_count_dec();
1385 }
1386
1387 /*
1388  *      Delete a service from the service list
1389  */
1390 static int ip_vs_del_service(struct ip_vs_service *svc)
1391 {
1392         if (svc == NULL)
1393                 return -EEXIST;
1394
1395         /*
1396          * Unhash it from the service table
1397          */
1398         write_lock_bh(&__ip_vs_svc_lock);
1399
1400         ip_vs_svc_unhash(svc);
1401
1402         /*
1403          * Wait until all the svc users go away.
1404          */
1405         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1406
1407         __ip_vs_del_service(svc);
1408
1409         write_unlock_bh(&__ip_vs_svc_lock);
1410
1411         return 0;
1412 }
1413
1414
1415 /*
1416  *      Flush all the virtual services
1417  */
1418 static int ip_vs_flush(void)
1419 {
1420         int idx;
1421         struct ip_vs_service *svc, *nxt;
1422
1423         /*
1424          * Flush the service table hashed by <protocol,addr,port>
1425          */
1426         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1427                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1428                         write_lock_bh(&__ip_vs_svc_lock);
1429                         ip_vs_svc_unhash(svc);
1430                         /*
1431                          * Wait until all the svc users go away.
1432                          */
1433                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1434                         __ip_vs_del_service(svc);
1435                         write_unlock_bh(&__ip_vs_svc_lock);
1436                 }
1437         }
1438
1439         /*
1440          * Flush the service table hashed by fwmark
1441          */
1442         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1443                 list_for_each_entry_safe(svc, nxt,
1444                                          &ip_vs_svc_fwm_table[idx], f_list) {
1445                         write_lock_bh(&__ip_vs_svc_lock);
1446                         ip_vs_svc_unhash(svc);
1447                         /*
1448                          * Wait until all the svc users go away.
1449                          */
1450                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1451                         __ip_vs_del_service(svc);
1452                         write_unlock_bh(&__ip_vs_svc_lock);
1453                 }
1454         }
1455
1456         return 0;
1457 }
1458
1459
1460 /*
1461  *      Zero counters in a service or all services
1462  */
1463 static int ip_vs_zero_service(struct ip_vs_service *svc)
1464 {
1465         struct ip_vs_dest *dest;
1466
1467         write_lock_bh(&__ip_vs_svc_lock);
1468         list_for_each_entry(dest, &svc->destinations, n_list) {
1469                 ip_vs_zero_stats(&dest->stats);
1470         }
1471         ip_vs_zero_stats(&svc->stats);
1472         write_unlock_bh(&__ip_vs_svc_lock);
1473         return 0;
1474 }
1475
1476 static int ip_vs_zero_all(void)
1477 {
1478         int idx;
1479         struct ip_vs_service *svc;
1480
1481         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1482                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1483                         ip_vs_zero_service(svc);
1484                 }
1485         }
1486
1487         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1488                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1489                         ip_vs_zero_service(svc);
1490                 }
1491         }
1492
1493         ip_vs_zero_stats(&ip_vs_stats);
1494         return 0;
1495 }
1496
1497
1498 static int
1499 proc_do_defense_mode(ctl_table *table, int write,
1500                      void __user *buffer, size_t *lenp, loff_t *ppos)
1501 {
1502         int *valp = table->data;
1503         int val = *valp;
1504         int rc;
1505
1506         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1507         if (write && (*valp != val)) {
1508                 if ((*valp < 0) || (*valp > 3)) {
1509                         /* Restore the correct value */
1510                         *valp = val;
1511                 } else {
1512                         update_defense_level();
1513                 }
1514         }
1515         return rc;
1516 }
1517
1518
1519 static int
1520 proc_do_sync_threshold(ctl_table *table, int write,
1521                        void __user *buffer, size_t *lenp, loff_t *ppos)
1522 {
1523         int *valp = table->data;
1524         int val[2];
1525         int rc;
1526
1527         /* backup the value first */
1528         memcpy(val, valp, sizeof(val));
1529
1530         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1531         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1532                 /* Restore the correct value */
1533                 memcpy(valp, val, sizeof(val));
1534         }
1535         return rc;
1536 }
1537
1538
1539 /*
1540  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1541  */
1542
1543 static struct ctl_table vs_vars[] = {
1544         {
1545                 .procname       = "amemthresh",
1546                 .data           = &sysctl_ip_vs_amemthresh,
1547                 .maxlen         = sizeof(int),
1548                 .mode           = 0644,
1549                 .proc_handler   = proc_dointvec,
1550         },
1551 #ifdef CONFIG_IP_VS_DEBUG
1552         {
1553                 .procname       = "debug_level",
1554                 .data           = &sysctl_ip_vs_debug_level,
1555                 .maxlen         = sizeof(int),
1556                 .mode           = 0644,
1557                 .proc_handler   = proc_dointvec,
1558         },
1559 #endif
1560         {
1561                 .procname       = "am_droprate",
1562                 .data           = &sysctl_ip_vs_am_droprate,
1563                 .maxlen         = sizeof(int),
1564                 .mode           = 0644,
1565                 .proc_handler   = proc_dointvec,
1566         },
1567         {
1568                 .procname       = "drop_entry",
1569                 .data           = &sysctl_ip_vs_drop_entry,
1570                 .maxlen         = sizeof(int),
1571                 .mode           = 0644,
1572                 .proc_handler   = proc_do_defense_mode,
1573         },
1574         {
1575                 .procname       = "drop_packet",
1576                 .data           = &sysctl_ip_vs_drop_packet,
1577                 .maxlen         = sizeof(int),
1578                 .mode           = 0644,
1579                 .proc_handler   = proc_do_defense_mode,
1580         },
1581         {
1582                 .procname       = "secure_tcp",
1583                 .data           = &sysctl_ip_vs_secure_tcp,
1584                 .maxlen         = sizeof(int),
1585                 .mode           = 0644,
1586                 .proc_handler   = proc_do_defense_mode,
1587         },
1588 #if 0
1589         {
1590                 .procname       = "timeout_established",
1591                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1592                 .maxlen         = sizeof(int),
1593                 .mode           = 0644,
1594                 .proc_handler   = proc_dointvec_jiffies,
1595         },
1596         {
1597                 .procname       = "timeout_synsent",
1598                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1599                 .maxlen         = sizeof(int),
1600                 .mode           = 0644,
1601                 .proc_handler   = proc_dointvec_jiffies,
1602         },
1603         {
1604                 .procname       = "timeout_synrecv",
1605                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1606                 .maxlen         = sizeof(int),
1607                 .mode           = 0644,
1608                 .proc_handler   = proc_dointvec_jiffies,
1609         },
1610         {
1611                 .procname       = "timeout_finwait",
1612                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1613                 .maxlen         = sizeof(int),
1614                 .mode           = 0644,
1615                 .proc_handler   = proc_dointvec_jiffies,
1616         },
1617         {
1618                 .procname       = "timeout_timewait",
1619                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1620                 .maxlen         = sizeof(int),
1621                 .mode           = 0644,
1622                 .proc_handler   = proc_dointvec_jiffies,
1623         },
1624         {
1625                 .procname       = "timeout_close",
1626                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1627                 .maxlen         = sizeof(int),
1628                 .mode           = 0644,
1629                 .proc_handler   = proc_dointvec_jiffies,
1630         },
1631         {
1632                 .procname       = "timeout_closewait",
1633                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1634                 .maxlen         = sizeof(int),
1635                 .mode           = 0644,
1636                 .proc_handler   = proc_dointvec_jiffies,
1637         },
1638         {
1639                 .procname       = "timeout_lastack",
1640                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1641                 .maxlen         = sizeof(int),
1642                 .mode           = 0644,
1643                 .proc_handler   = proc_dointvec_jiffies,
1644         },
1645         {
1646                 .procname       = "timeout_listen",
1647                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1648                 .maxlen         = sizeof(int),
1649                 .mode           = 0644,
1650                 .proc_handler   = proc_dointvec_jiffies,
1651         },
1652         {
1653                 .procname       = "timeout_synack",
1654                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1655                 .maxlen         = sizeof(int),
1656                 .mode           = 0644,
1657                 .proc_handler   = proc_dointvec_jiffies,
1658         },
1659         {
1660                 .procname       = "timeout_udp",
1661                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1662                 .maxlen         = sizeof(int),
1663                 .mode           = 0644,
1664                 .proc_handler   = proc_dointvec_jiffies,
1665         },
1666         {
1667                 .procname       = "timeout_icmp",
1668                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1669                 .maxlen         = sizeof(int),
1670                 .mode           = 0644,
1671                 .proc_handler   = proc_dointvec_jiffies,
1672         },
1673 #endif
1674         {
1675                 .procname       = "cache_bypass",
1676                 .data           = &sysctl_ip_vs_cache_bypass,
1677                 .maxlen         = sizeof(int),
1678                 .mode           = 0644,
1679                 .proc_handler   = proc_dointvec,
1680         },
1681         {
1682                 .procname       = "expire_nodest_conn",
1683                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1684                 .maxlen         = sizeof(int),
1685                 .mode           = 0644,
1686                 .proc_handler   = proc_dointvec,
1687         },
1688         {
1689                 .procname       = "expire_quiescent_template",
1690                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1691                 .maxlen         = sizeof(int),
1692                 .mode           = 0644,
1693                 .proc_handler   = proc_dointvec,
1694         },
1695         {
1696                 .procname       = "sync_threshold",
1697                 .data           = &sysctl_ip_vs_sync_threshold,
1698                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1699                 .mode           = 0644,
1700                 .proc_handler   = proc_do_sync_threshold,
1701         },
1702         {
1703                 .procname       = "nat_icmp_send",
1704                 .data           = &sysctl_ip_vs_nat_icmp_send,
1705                 .maxlen         = sizeof(int),
1706                 .mode           = 0644,
1707                 .proc_handler   = proc_dointvec,
1708         },
1709         { }
1710 };
1711
1712 const struct ctl_path net_vs_ctl_path[] = {
1713         { .procname = "net", },
1714         { .procname = "ipv4", },
1715         { .procname = "vs", },
1716         { }
1717 };
1718 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1719
1720 static struct ctl_table_header * sysctl_header;
1721
1722 #ifdef CONFIG_PROC_FS
1723
1724 struct ip_vs_iter {
1725         struct list_head *table;
1726         int bucket;
1727 };
1728
1729 /*
1730  *      Write the contents of the VS rule table to a PROCfs file.
1731  *      (It is kept just for backward compatibility)
1732  */
1733 static inline const char *ip_vs_fwd_name(unsigned flags)
1734 {
1735         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1736         case IP_VS_CONN_F_LOCALNODE:
1737                 return "Local";
1738         case IP_VS_CONN_F_TUNNEL:
1739                 return "Tunnel";
1740         case IP_VS_CONN_F_DROUTE:
1741                 return "Route";
1742         default:
1743                 return "Masq";
1744         }
1745 }
1746
1747
1748 /* Get the Nth entry in the two lists */
1749 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1750 {
1751         struct ip_vs_iter *iter = seq->private;
1752         int idx;
1753         struct ip_vs_service *svc;
1754
1755         /* look in hash by protocol */
1756         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1757                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1758                         if (pos-- == 0){
1759                                 iter->table = ip_vs_svc_table;
1760                                 iter->bucket = idx;
1761                                 return svc;
1762                         }
1763                 }
1764         }
1765
1766         /* keep looking in fwmark */
1767         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1768                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1769                         if (pos-- == 0) {
1770                                 iter->table = ip_vs_svc_fwm_table;
1771                                 iter->bucket = idx;
1772                                 return svc;
1773                         }
1774                 }
1775         }
1776
1777         return NULL;
1778 }
1779
1780 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1781 __acquires(__ip_vs_svc_lock)
1782 {
1783
1784         read_lock_bh(&__ip_vs_svc_lock);
1785         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1786 }
1787
1788
1789 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1790 {
1791         struct list_head *e;
1792         struct ip_vs_iter *iter;
1793         struct ip_vs_service *svc;
1794
1795         ++*pos;
1796         if (v == SEQ_START_TOKEN)
1797                 return ip_vs_info_array(seq,0);
1798
1799         svc = v;
1800         iter = seq->private;
1801
1802         if (iter->table == ip_vs_svc_table) {
1803                 /* next service in table hashed by protocol */
1804                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1805                         return list_entry(e, struct ip_vs_service, s_list);
1806
1807
1808                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1809                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1810                                             s_list) {
1811                                 return svc;
1812                         }
1813                 }
1814
1815                 iter->table = ip_vs_svc_fwm_table;
1816                 iter->bucket = -1;
1817                 goto scan_fwmark;
1818         }
1819
1820         /* next service in hashed by fwmark */
1821         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1822                 return list_entry(e, struct ip_vs_service, f_list);
1823
1824  scan_fwmark:
1825         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1826                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1827                                     f_list)
1828                         return svc;
1829         }
1830
1831         return NULL;
1832 }
1833
1834 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1835 __releases(__ip_vs_svc_lock)
1836 {
1837         read_unlock_bh(&__ip_vs_svc_lock);
1838 }
1839
1840
1841 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1842 {
1843         if (v == SEQ_START_TOKEN) {
1844                 seq_printf(seq,
1845                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1846                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1847                 seq_puts(seq,
1848                          "Prot LocalAddress:Port Scheduler Flags\n");
1849                 seq_puts(seq,
1850                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1851         } else {
1852                 const struct ip_vs_service *svc = v;
1853                 const struct ip_vs_iter *iter = seq->private;
1854                 const struct ip_vs_dest *dest;
1855
1856                 if (iter->table == ip_vs_svc_table) {
1857 #ifdef CONFIG_IP_VS_IPV6
1858                         if (svc->af == AF_INET6)
1859                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1860                                            ip_vs_proto_name(svc->protocol),
1861                                            &svc->addr.in6,
1862                                            ntohs(svc->port),
1863                                            svc->scheduler->name);
1864                         else
1865 #endif
1866                                 seq_printf(seq, "%s  %08X:%04X %s ",
1867                                            ip_vs_proto_name(svc->protocol),
1868                                            ntohl(svc->addr.ip),
1869                                            ntohs(svc->port),
1870                                            svc->scheduler->name);
1871                 } else {
1872                         seq_printf(seq, "FWM  %08X %s ",
1873                                    svc->fwmark, svc->scheduler->name);
1874                 }
1875
1876                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1877                         seq_printf(seq, "persistent %d %08X\n",
1878                                 svc->timeout,
1879                                 ntohl(svc->netmask));
1880                 else
1881                         seq_putc(seq, '\n');
1882
1883                 list_for_each_entry(dest, &svc->destinations, n_list) {
1884 #ifdef CONFIG_IP_VS_IPV6
1885                         if (dest->af == AF_INET6)
1886                                 seq_printf(seq,
1887                                            "  -> [%pI6]:%04X"
1888                                            "      %-7s %-6d %-10d %-10d\n",
1889                                            &dest->addr.in6,
1890                                            ntohs(dest->port),
1891                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1892                                            atomic_read(&dest->weight),
1893                                            atomic_read(&dest->activeconns),
1894                                            atomic_read(&dest->inactconns));
1895                         else
1896 #endif
1897                                 seq_printf(seq,
1898                                            "  -> %08X:%04X      "
1899                                            "%-7s %-6d %-10d %-10d\n",
1900                                            ntohl(dest->addr.ip),
1901                                            ntohs(dest->port),
1902                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1903                                            atomic_read(&dest->weight),
1904                                            atomic_read(&dest->activeconns),
1905                                            atomic_read(&dest->inactconns));
1906
1907                 }
1908         }
1909         return 0;
1910 }
1911
1912 static const struct seq_operations ip_vs_info_seq_ops = {
1913         .start = ip_vs_info_seq_start,
1914         .next  = ip_vs_info_seq_next,
1915         .stop  = ip_vs_info_seq_stop,
1916         .show  = ip_vs_info_seq_show,
1917 };
1918
1919 static int ip_vs_info_open(struct inode *inode, struct file *file)
1920 {
1921         return seq_open_private(file, &ip_vs_info_seq_ops,
1922                         sizeof(struct ip_vs_iter));
1923 }
1924
1925 static const struct file_operations ip_vs_info_fops = {
1926         .owner   = THIS_MODULE,
1927         .open    = ip_vs_info_open,
1928         .read    = seq_read,
1929         .llseek  = seq_lseek,
1930         .release = seq_release_private,
1931 };
1932
1933 #endif
1934
1935 struct ip_vs_stats ip_vs_stats = {
1936         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1937 };
1938
1939 #ifdef CONFIG_PROC_FS
1940 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1941 {
1942
1943 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1944         seq_puts(seq,
1945                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1946         seq_printf(seq,
1947                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1948
1949         spin_lock_bh(&ip_vs_stats.lock);
1950         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1951                    ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1952                    (unsigned long long) ip_vs_stats.ustats.inbytes,
1953                    (unsigned long long) ip_vs_stats.ustats.outbytes);
1954
1955 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1956         seq_puts(seq,
1957                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1958         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1959                         ip_vs_stats.ustats.cps,
1960                         ip_vs_stats.ustats.inpps,
1961                         ip_vs_stats.ustats.outpps,
1962                         ip_vs_stats.ustats.inbps,
1963                         ip_vs_stats.ustats.outbps);
1964         spin_unlock_bh(&ip_vs_stats.lock);
1965
1966         return 0;
1967 }
1968
1969 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1970 {
1971         return single_open(file, ip_vs_stats_show, NULL);
1972 }
1973
1974 static const struct file_operations ip_vs_stats_fops = {
1975         .owner = THIS_MODULE,
1976         .open = ip_vs_stats_seq_open,
1977         .read = seq_read,
1978         .llseek = seq_lseek,
1979         .release = single_release,
1980 };
1981
1982 #endif
1983
1984 /*
1985  *      Set timeout values for tcp tcpfin udp in the timeout_table.
1986  */
1987 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1988 {
1989         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1990                   u->tcp_timeout,
1991                   u->tcp_fin_timeout,
1992                   u->udp_timeout);
1993
1994 #ifdef CONFIG_IP_VS_PROTO_TCP
1995         if (u->tcp_timeout) {
1996                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1997                         = u->tcp_timeout * HZ;
1998         }
1999
2000         if (u->tcp_fin_timeout) {
2001                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2002                         = u->tcp_fin_timeout * HZ;
2003         }
2004 #endif
2005
2006 #ifdef CONFIG_IP_VS_PROTO_UDP
2007         if (u->udp_timeout) {
2008                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2009                         = u->udp_timeout * HZ;
2010         }
2011 #endif
2012         return 0;
2013 }
2014
2015
2016 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2017 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2018 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2019                                  sizeof(struct ip_vs_dest_user))
2020 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2021 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2022 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2023
2024 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2025         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2026         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2027         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2028         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2029         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2030         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2031         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2032         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2033         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2034         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2035         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2036 };
2037
2038 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2039                                   struct ip_vs_service_user *usvc_compat)
2040 {
2041         usvc->af                = AF_INET;
2042         usvc->protocol          = usvc_compat->protocol;
2043         usvc->addr.ip           = usvc_compat->addr;
2044         usvc->port              = usvc_compat->port;
2045         usvc->fwmark            = usvc_compat->fwmark;
2046
2047         /* Deep copy of sched_name is not needed here */
2048         usvc->sched_name        = usvc_compat->sched_name;
2049
2050         usvc->flags             = usvc_compat->flags;
2051         usvc->timeout           = usvc_compat->timeout;
2052         usvc->netmask           = usvc_compat->netmask;
2053 }
2054
2055 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2056                                    struct ip_vs_dest_user *udest_compat)
2057 {
2058         udest->addr.ip          = udest_compat->addr;
2059         udest->port             = udest_compat->port;
2060         udest->conn_flags       = udest_compat->conn_flags;
2061         udest->weight           = udest_compat->weight;
2062         udest->u_threshold      = udest_compat->u_threshold;
2063         udest->l_threshold      = udest_compat->l_threshold;
2064 }
2065
2066 static int
2067 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2068 {
2069         int ret;
2070         unsigned char arg[MAX_ARG_LEN];
2071         struct ip_vs_service_user *usvc_compat;
2072         struct ip_vs_service_user_kern usvc;
2073         struct ip_vs_service *svc;
2074         struct ip_vs_dest_user *udest_compat;
2075         struct ip_vs_dest_user_kern udest;
2076
2077         if (!capable(CAP_NET_ADMIN))
2078                 return -EPERM;
2079
2080         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2081                 return -EINVAL;
2082         if (len < 0 || len >  MAX_ARG_LEN)
2083                 return -EINVAL;
2084         if (len != set_arglen[SET_CMDID(cmd)]) {
2085                 pr_err("set_ctl: len %u != %u\n",
2086                        len, set_arglen[SET_CMDID(cmd)]);
2087                 return -EINVAL;
2088         }
2089
2090         if (copy_from_user(arg, user, len) != 0)
2091                 return -EFAULT;
2092
2093         /* increase the module use count */
2094         ip_vs_use_count_inc();
2095
2096         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2097                 ret = -ERESTARTSYS;
2098                 goto out_dec;
2099         }
2100
2101         if (cmd == IP_VS_SO_SET_FLUSH) {
2102                 /* Flush the virtual service */
2103                 ret = ip_vs_flush();
2104                 goto out_unlock;
2105         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2106                 /* Set timeout values for (tcp tcpfin udp) */
2107                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2108                 goto out_unlock;
2109         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2110                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2111                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2112                 goto out_unlock;
2113         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2114                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2115                 ret = stop_sync_thread(dm->state);
2116                 goto out_unlock;
2117         }
2118
2119         usvc_compat = (struct ip_vs_service_user *)arg;
2120         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2121
2122         /* We only use the new structs internally, so copy userspace compat
2123          * structs to extended internal versions */
2124         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2125         ip_vs_copy_udest_compat(&udest, udest_compat);
2126
2127         if (cmd == IP_VS_SO_SET_ZERO) {
2128                 /* if no service address is set, zero counters in all */
2129                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2130                         ret = ip_vs_zero_all();
2131                         goto out_unlock;
2132                 }
2133         }
2134
2135         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2136         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2137             usvc.protocol != IPPROTO_SCTP) {
2138                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2139                        usvc.protocol, &usvc.addr.ip,
2140                        ntohs(usvc.port), usvc.sched_name);
2141                 ret = -EFAULT;
2142                 goto out_unlock;
2143         }
2144
2145         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2146         if (usvc.fwmark == 0)
2147                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2148                                           &usvc.addr, usvc.port);
2149         else
2150                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2151
2152         if (cmd != IP_VS_SO_SET_ADD
2153             && (svc == NULL || svc->protocol != usvc.protocol)) {
2154                 ret = -ESRCH;
2155                 goto out_unlock;
2156         }
2157
2158         switch (cmd) {
2159         case IP_VS_SO_SET_ADD:
2160                 if (svc != NULL)
2161                         ret = -EEXIST;
2162                 else
2163                         ret = ip_vs_add_service(&usvc, &svc);
2164                 break;
2165         case IP_VS_SO_SET_EDIT:
2166                 ret = ip_vs_edit_service(svc, &usvc);
2167                 break;
2168         case IP_VS_SO_SET_DEL:
2169                 ret = ip_vs_del_service(svc);
2170                 if (!ret)
2171                         goto out_unlock;
2172                 break;
2173         case IP_VS_SO_SET_ZERO:
2174                 ret = ip_vs_zero_service(svc);
2175                 break;
2176         case IP_VS_SO_SET_ADDDEST:
2177                 ret = ip_vs_add_dest(svc, &udest);
2178                 break;
2179         case IP_VS_SO_SET_EDITDEST:
2180                 ret = ip_vs_edit_dest(svc, &udest);
2181                 break;
2182         case IP_VS_SO_SET_DELDEST:
2183                 ret = ip_vs_del_dest(svc, &udest);
2184                 break;
2185         default:
2186                 ret = -EINVAL;
2187         }
2188
2189         if (svc)
2190                 ip_vs_service_put(svc);
2191
2192   out_unlock:
2193         mutex_unlock(&__ip_vs_mutex);
2194   out_dec:
2195         /* decrease the module use count */
2196         ip_vs_use_count_dec();
2197
2198         return ret;
2199 }
2200
2201
2202 static void
2203 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2204 {
2205         spin_lock_bh(&src->lock);
2206         memcpy(dst, &src->ustats, sizeof(*dst));
2207         spin_unlock_bh(&src->lock);
2208 }
2209
2210 static void
2211 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2212 {
2213         dst->protocol = src->protocol;
2214         dst->addr = src->addr.ip;
2215         dst->port = src->port;
2216         dst->fwmark = src->fwmark;
2217         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2218         dst->flags = src->flags;
2219         dst->timeout = src->timeout / HZ;
2220         dst->netmask = src->netmask;
2221         dst->num_dests = src->num_dests;
2222         ip_vs_copy_stats(&dst->stats, &src->stats);
2223 }
2224
2225 static inline int
2226 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2227                             struct ip_vs_get_services __user *uptr)
2228 {
2229         int idx, count=0;
2230         struct ip_vs_service *svc;
2231         struct ip_vs_service_entry entry;
2232         int ret = 0;
2233
2234         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2235                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2236                         /* Only expose IPv4 entries to old interface */
2237                         if (svc->af != AF_INET)
2238                                 continue;
2239
2240                         if (count >= get->num_services)
2241                                 goto out;
2242                         memset(&entry, 0, sizeof(entry));
2243                         ip_vs_copy_service(&entry, svc);
2244                         if (copy_to_user(&uptr->entrytable[count],
2245                                          &entry, sizeof(entry))) {
2246                                 ret = -EFAULT;
2247                                 goto out;
2248                         }
2249                         count++;
2250                 }
2251         }
2252
2253         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2254                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2255                         /* Only expose IPv4 entries to old interface */
2256                         if (svc->af != AF_INET)
2257                                 continue;
2258
2259                         if (count >= get->num_services)
2260                                 goto out;
2261                         memset(&entry, 0, sizeof(entry));
2262                         ip_vs_copy_service(&entry, svc);
2263                         if (copy_to_user(&uptr->entrytable[count],
2264                                          &entry, sizeof(entry))) {
2265                                 ret = -EFAULT;
2266                                 goto out;
2267                         }
2268                         count++;
2269                 }
2270         }
2271   out:
2272         return ret;
2273 }
2274
2275 static inline int
2276 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2277                          struct ip_vs_get_dests __user *uptr)
2278 {
2279         struct ip_vs_service *svc;
2280         union nf_inet_addr addr = { .ip = get->addr };
2281         int ret = 0;
2282
2283         if (get->fwmark)
2284                 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2285         else
2286                 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2287                                           get->port);
2288
2289         if (svc) {
2290                 int count = 0;
2291                 struct ip_vs_dest *dest;
2292                 struct ip_vs_dest_entry entry;
2293
2294                 list_for_each_entry(dest, &svc->destinations, n_list) {
2295                         if (count >= get->num_dests)
2296                                 break;
2297
2298                         entry.addr = dest->addr.ip;
2299                         entry.port = dest->port;
2300                         entry.conn_flags = atomic_read(&dest->conn_flags);
2301                         entry.weight = atomic_read(&dest->weight);
2302                         entry.u_threshold = dest->u_threshold;
2303                         entry.l_threshold = dest->l_threshold;
2304                         entry.activeconns = atomic_read(&dest->activeconns);
2305                         entry.inactconns = atomic_read(&dest->inactconns);
2306                         entry.persistconns = atomic_read(&dest->persistconns);
2307                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2308                         if (copy_to_user(&uptr->entrytable[count],
2309                                          &entry, sizeof(entry))) {
2310                                 ret = -EFAULT;
2311                                 break;
2312                         }
2313                         count++;
2314                 }
2315                 ip_vs_service_put(svc);
2316         } else
2317                 ret = -ESRCH;
2318         return ret;
2319 }
2320
2321 static inline void
2322 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2323 {
2324 #ifdef CONFIG_IP_VS_PROTO_TCP
2325         u->tcp_timeout =
2326                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2327         u->tcp_fin_timeout =
2328                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2329 #endif
2330 #ifdef CONFIG_IP_VS_PROTO_UDP
2331         u->udp_timeout =
2332                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2333 #endif
2334 }
2335
2336
2337 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2338 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2339 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2340 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2341 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2342 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2343 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2344
2345 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2346         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2347         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2348         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2349         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2350         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2351         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2352         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2353 };
2354
2355 static int
2356 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2357 {
2358         unsigned char arg[128];
2359         int ret = 0;
2360         unsigned int copylen;
2361
2362         if (!capable(CAP_NET_ADMIN))
2363                 return -EPERM;
2364
2365         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2366                 return -EINVAL;
2367
2368         if (*len < get_arglen[GET_CMDID(cmd)]) {
2369                 pr_err("get_ctl: len %u < %u\n",
2370                        *len, get_arglen[GET_CMDID(cmd)]);
2371                 return -EINVAL;
2372         }
2373
2374         copylen = get_arglen[GET_CMDID(cmd)];
2375         if (copylen > 128)
2376                 return -EINVAL;
2377
2378         if (copy_from_user(arg, user, copylen) != 0)
2379                 return -EFAULT;
2380
2381         if (mutex_lock_interruptible(&__ip_vs_mutex))
2382                 return -ERESTARTSYS;
2383
2384         switch (cmd) {
2385         case IP_VS_SO_GET_VERSION:
2386         {
2387                 char buf[64];
2388
2389                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2390                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2391                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2392                         ret = -EFAULT;
2393                         goto out;
2394                 }
2395                 *len = strlen(buf)+1;
2396         }
2397         break;
2398
2399         case IP_VS_SO_GET_INFO:
2400         {
2401                 struct ip_vs_getinfo info;
2402                 info.version = IP_VS_VERSION_CODE;
2403                 info.size = ip_vs_conn_tab_size;
2404                 info.num_services = ip_vs_num_services;
2405                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2406                         ret = -EFAULT;
2407         }
2408         break;
2409
2410         case IP_VS_SO_GET_SERVICES:
2411         {
2412                 struct ip_vs_get_services *get;
2413                 int size;
2414
2415                 get = (struct ip_vs_get_services *)arg;
2416                 size = sizeof(*get) +
2417                         sizeof(struct ip_vs_service_entry) * get->num_services;
2418                 if (*len != size) {
2419                         pr_err("length: %u != %u\n", *len, size);
2420                         ret = -EINVAL;
2421                         goto out;
2422                 }
2423                 ret = __ip_vs_get_service_entries(get, user);
2424         }
2425         break;
2426
2427         case IP_VS_SO_GET_SERVICE:
2428         {
2429                 struct ip_vs_service_entry *entry;
2430                 struct ip_vs_service *svc;
2431                 union nf_inet_addr addr;
2432
2433                 entry = (struct ip_vs_service_entry *)arg;
2434                 addr.ip = entry->addr;
2435                 if (entry->fwmark)
2436                         svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2437                 else
2438                         svc = __ip_vs_service_get(AF_INET, entry->protocol,
2439                                                   &addr, entry->port);
2440                 if (svc) {
2441                         ip_vs_copy_service(entry, svc);
2442                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2443                                 ret = -EFAULT;
2444                         ip_vs_service_put(svc);
2445                 } else
2446                         ret = -ESRCH;
2447         }
2448         break;
2449
2450         case IP_VS_SO_GET_DESTS:
2451         {
2452                 struct ip_vs_get_dests *get;
2453                 int size;
2454
2455                 get = (struct ip_vs_get_dests *)arg;
2456                 size = sizeof(*get) +
2457                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2458                 if (*len != size) {
2459                         pr_err("length: %u != %u\n", *len, size);
2460                         ret = -EINVAL;
2461                         goto out;
2462                 }
2463                 ret = __ip_vs_get_dest_entries(get, user);
2464         }
2465         break;
2466
2467         case IP_VS_SO_GET_TIMEOUT:
2468         {
2469                 struct ip_vs_timeout_user t;
2470
2471                 __ip_vs_get_timeouts(&t);
2472                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2473                         ret = -EFAULT;
2474         }
2475         break;
2476
2477         case IP_VS_SO_GET_DAEMON:
2478         {
2479                 struct ip_vs_daemon_user d[2];
2480
2481                 memset(&d, 0, sizeof(d));
2482                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2483                         d[0].state = IP_VS_STATE_MASTER;
2484                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2485                         d[0].syncid = ip_vs_master_syncid;
2486                 }
2487                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2488                         d[1].state = IP_VS_STATE_BACKUP;
2489                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2490                         d[1].syncid = ip_vs_backup_syncid;
2491                 }
2492                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2493                         ret = -EFAULT;
2494         }
2495         break;
2496
2497         default:
2498                 ret = -EINVAL;
2499         }
2500
2501   out:
2502         mutex_unlock(&__ip_vs_mutex);
2503         return ret;
2504 }
2505
2506
2507 static struct nf_sockopt_ops ip_vs_sockopts = {
2508         .pf             = PF_INET,
2509         .set_optmin     = IP_VS_BASE_CTL,
2510         .set_optmax     = IP_VS_SO_SET_MAX+1,
2511         .set            = do_ip_vs_set_ctl,
2512         .get_optmin     = IP_VS_BASE_CTL,
2513         .get_optmax     = IP_VS_SO_GET_MAX+1,
2514         .get            = do_ip_vs_get_ctl,
2515         .owner          = THIS_MODULE,
2516 };
2517
2518 /*
2519  * Generic Netlink interface
2520  */
2521
2522 /* IPVS genetlink family */
2523 static struct genl_family ip_vs_genl_family = {
2524         .id             = GENL_ID_GENERATE,
2525         .hdrsize        = 0,
2526         .name           = IPVS_GENL_NAME,
2527         .version        = IPVS_GENL_VERSION,
2528         .maxattr        = IPVS_CMD_MAX,
2529 };
2530
2531 /* Policy used for first-level command attributes */
2532 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2533         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2534         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2535         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2536         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2537         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2538         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2539 };
2540
2541 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2542 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2543         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2544         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2545                                             .len = IP_VS_IFNAME_MAXLEN },
2546         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2547 };
2548
2549 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2550 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2551         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2552         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2553         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2554                                             .len = sizeof(union nf_inet_addr) },
2555         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2556         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2557         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2558                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2559         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2560                                             .len = sizeof(struct ip_vs_flags) },
2561         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2562         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2563         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2564 };
2565
2566 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2567 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2568         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2569                                             .len = sizeof(union nf_inet_addr) },
2570         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2571         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2572         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2573         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2574         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2575         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2576         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2577         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2578         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2579 };
2580
2581 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2582                                  struct ip_vs_stats *stats)
2583 {
2584         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2585         if (!nl_stats)
2586                 return -EMSGSIZE;
2587
2588         spin_lock_bh(&stats->lock);
2589
2590         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2591         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2592         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2593         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2594         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2595         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2596         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2597         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2598         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2599         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2600
2601         spin_unlock_bh(&stats->lock);
2602
2603         nla_nest_end(skb, nl_stats);
2604
2605         return 0;
2606
2607 nla_put_failure:
2608         spin_unlock_bh(&stats->lock);
2609         nla_nest_cancel(skb, nl_stats);
2610         return -EMSGSIZE;
2611 }
2612
2613 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2614                                    struct ip_vs_service *svc)
2615 {
2616         struct nlattr *nl_service;
2617         struct ip_vs_flags flags = { .flags = svc->flags,
2618                                      .mask = ~0 };
2619
2620         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2621         if (!nl_service)
2622                 return -EMSGSIZE;
2623
2624         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2625
2626         if (svc->fwmark) {
2627                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2628         } else {
2629                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2630                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2631                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2632         }
2633
2634         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2635         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2636         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2637         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2638
2639         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2640                 goto nla_put_failure;
2641
2642         nla_nest_end(skb, nl_service);
2643
2644         return 0;
2645
2646 nla_put_failure:
2647         nla_nest_cancel(skb, nl_service);
2648         return -EMSGSIZE;
2649 }
2650
2651 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2652                                    struct ip_vs_service *svc,
2653                                    struct netlink_callback *cb)
2654 {
2655         void *hdr;
2656
2657         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2658                           &ip_vs_genl_family, NLM_F_MULTI,
2659                           IPVS_CMD_NEW_SERVICE);
2660         if (!hdr)
2661                 return -EMSGSIZE;
2662
2663         if (ip_vs_genl_fill_service(skb, svc) < 0)
2664                 goto nla_put_failure;
2665
2666         return genlmsg_end(skb, hdr);
2667
2668 nla_put_failure:
2669         genlmsg_cancel(skb, hdr);
2670         return -EMSGSIZE;
2671 }
2672
2673 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2674                                     struct netlink_callback *cb)
2675 {
2676         int idx = 0, i;
2677         int start = cb->args[0];
2678         struct ip_vs_service *svc;
2679
2680         mutex_lock(&__ip_vs_mutex);
2681         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2682                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2683                         if (++idx <= start)
2684                                 continue;
2685                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2686                                 idx--;
2687                                 goto nla_put_failure;
2688                         }
2689                 }
2690         }
2691
2692         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2693                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2694                         if (++idx <= start)
2695                                 continue;
2696                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2697                                 idx--;
2698                                 goto nla_put_failure;
2699                         }
2700                 }
2701         }
2702
2703 nla_put_failure:
2704         mutex_unlock(&__ip_vs_mutex);
2705         cb->args[0] = idx;
2706
2707         return skb->len;
2708 }
2709
2710 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2711                                     struct nlattr *nla, int full_entry)
2712 {
2713         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2714         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2715
2716         /* Parse mandatory identifying service fields first */
2717         if (nla == NULL ||
2718             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2719                 return -EINVAL;
2720
2721         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2722         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2723         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2724         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2725         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2726
2727         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2728                 return -EINVAL;
2729
2730         memset(usvc, 0, sizeof(*usvc));
2731
2732         usvc->af = nla_get_u16(nla_af);
2733 #ifdef CONFIG_IP_VS_IPV6
2734         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2735 #else
2736         if (usvc->af != AF_INET)
2737 #endif
2738                 return -EAFNOSUPPORT;
2739
2740         if (nla_fwmark) {
2741                 usvc->protocol = IPPROTO_TCP;
2742                 usvc->fwmark = nla_get_u32(nla_fwmark);
2743         } else {
2744                 usvc->protocol = nla_get_u16(nla_protocol);
2745                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2746                 usvc->port = nla_get_u16(nla_port);
2747                 usvc->fwmark = 0;
2748         }
2749
2750         /* If a full entry was requested, check for the additional fields */
2751         if (full_entry) {
2752                 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2753                               *nla_netmask;
2754                 struct ip_vs_flags flags;
2755                 struct ip_vs_service *svc;
2756
2757                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2758                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2759                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2760                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2761
2762                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2763                         return -EINVAL;
2764
2765                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2766
2767                 /* prefill flags from service if it already exists */
2768                 if (usvc->fwmark)
2769                         svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2770                 else
2771                         svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2772                                                   &usvc->addr, usvc->port);
2773                 if (svc) {
2774                         usvc->flags = svc->flags;
2775                         ip_vs_service_put(svc);
2776                 } else
2777                         usvc->flags = 0;
2778
2779                 /* set new flags from userland */
2780                 usvc->flags = (usvc->flags & ~flags.mask) |
2781                               (flags.flags & flags.mask);
2782                 usvc->sched_name = nla_data(nla_sched);
2783                 usvc->timeout = nla_get_u32(nla_timeout);
2784                 usvc->netmask = nla_get_u32(nla_netmask);
2785         }
2786
2787         return 0;
2788 }
2789
2790 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2791 {
2792         struct ip_vs_service_user_kern usvc;
2793         int ret;
2794
2795         ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2796         if (ret)
2797                 return ERR_PTR(ret);
2798
2799         if (usvc.fwmark)
2800                 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2801         else
2802                 return __ip_vs_service_get(usvc.af, usvc.protocol,
2803                                            &usvc.addr, usvc.port);
2804 }
2805
2806 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2807 {
2808         struct nlattr *nl_dest;
2809
2810         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2811         if (!nl_dest)
2812                 return -EMSGSIZE;
2813
2814         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2815         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2816
2817         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2818                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2819         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2820         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2821         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2822         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2823                     atomic_read(&dest->activeconns));
2824         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2825                     atomic_read(&dest->inactconns));
2826         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2827                     atomic_read(&dest->persistconns));
2828
2829         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2830                 goto nla_put_failure;
2831
2832         nla_nest_end(skb, nl_dest);
2833
2834         return 0;
2835
2836 nla_put_failure:
2837         nla_nest_cancel(skb, nl_dest);
2838         return -EMSGSIZE;
2839 }
2840
2841 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2842                                 struct netlink_callback *cb)
2843 {
2844         void *hdr;
2845
2846         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2847                           &ip_vs_genl_family, NLM_F_MULTI,
2848                           IPVS_CMD_NEW_DEST);
2849         if (!hdr)
2850                 return -EMSGSIZE;
2851
2852         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2853                 goto nla_put_failure;
2854
2855         return genlmsg_end(skb, hdr);
2856
2857 nla_put_failure:
2858         genlmsg_cancel(skb, hdr);
2859         return -EMSGSIZE;
2860 }
2861
2862 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2863                                  struct netlink_callback *cb)
2864 {
2865         int idx = 0;
2866         int start = cb->args[0];
2867         struct ip_vs_service *svc;
2868         struct ip_vs_dest *dest;
2869         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2870
2871         mutex_lock(&__ip_vs_mutex);
2872
2873         /* Try to find the service for which to dump destinations */
2874         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2875                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2876                 goto out_err;
2877
2878         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2879         if (IS_ERR(svc) || svc == NULL)
2880                 goto out_err;
2881
2882         /* Dump the destinations */
2883         list_for_each_entry(dest, &svc->destinations, n_list) {
2884                 if (++idx <= start)
2885                         continue;
2886                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2887                         idx--;
2888                         goto nla_put_failure;
2889                 }
2890         }
2891
2892 nla_put_failure:
2893         cb->args[0] = idx;
2894         ip_vs_service_put(svc);
2895
2896 out_err:
2897         mutex_unlock(&__ip_vs_mutex);
2898
2899         return skb->len;
2900 }
2901
2902 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2903                                  struct nlattr *nla, int full_entry)
2904 {
2905         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2906         struct nlattr *nla_addr, *nla_port;
2907
2908         /* Parse mandatory identifying destination fields first */
2909         if (nla == NULL ||
2910             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2911                 return -EINVAL;
2912
2913         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2914         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2915
2916         if (!(nla_addr && nla_port))
2917                 return -EINVAL;
2918
2919         memset(udest, 0, sizeof(*udest));
2920
2921         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2922         udest->port = nla_get_u16(nla_port);
2923
2924         /* If a full entry was requested, check for the additional fields */
2925         if (full_entry) {
2926                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2927                               *nla_l_thresh;
2928
2929                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2930                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2931                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2932                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2933
2934                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2935                         return -EINVAL;
2936
2937                 udest->conn_flags = nla_get_u32(nla_fwd)
2938                                     & IP_VS_CONN_F_FWD_MASK;
2939                 udest->weight = nla_get_u32(nla_weight);
2940                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2941                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2942         }
2943
2944         return 0;
2945 }
2946
2947 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2948                                   const char *mcast_ifn, __be32 syncid)
2949 {
2950         struct nlattr *nl_daemon;
2951
2952         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2953         if (!nl_daemon)
2954                 return -EMSGSIZE;
2955
2956         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2957         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2958         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2959
2960         nla_nest_end(skb, nl_daemon);
2961
2962         return 0;
2963
2964 nla_put_failure:
2965         nla_nest_cancel(skb, nl_daemon);
2966         return -EMSGSIZE;
2967 }
2968
2969 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2970                                   const char *mcast_ifn, __be32 syncid,
2971                                   struct netlink_callback *cb)
2972 {
2973         void *hdr;
2974         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2975                           &ip_vs_genl_family, NLM_F_MULTI,
2976                           IPVS_CMD_NEW_DAEMON);
2977         if (!hdr)
2978                 return -EMSGSIZE;
2979
2980         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2981                 goto nla_put_failure;
2982
2983         return genlmsg_end(skb, hdr);
2984
2985 nla_put_failure:
2986         genlmsg_cancel(skb, hdr);
2987         return -EMSGSIZE;
2988 }
2989
2990 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2991                                    struct netlink_callback *cb)
2992 {
2993         mutex_lock(&__ip_vs_mutex);
2994         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2995                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2996                                            ip_vs_master_mcast_ifn,
2997                                            ip_vs_master_syncid, cb) < 0)
2998                         goto nla_put_failure;
2999
3000                 cb->args[0] = 1;
3001         }
3002
3003         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3004                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3005                                            ip_vs_backup_mcast_ifn,
3006                                            ip_vs_backup_syncid, cb) < 0)
3007                         goto nla_put_failure;
3008
3009                 cb->args[1] = 1;
3010         }
3011
3012 nla_put_failure:
3013         mutex_unlock(&__ip_vs_mutex);
3014
3015         return skb->len;
3016 }
3017
3018 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3019 {
3020         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3021               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3022               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3023                 return -EINVAL;
3024
3025         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3026                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3027                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3028 }
3029
3030 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3031 {
3032         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3033                 return -EINVAL;
3034
3035         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3036 }
3037
3038 static int ip_vs_genl_set_config(struct nlattr **attrs)
3039 {
3040         struct ip_vs_timeout_user t;
3041
3042         __ip_vs_get_timeouts(&t);
3043
3044         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3045                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3046
3047         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3048                 t.tcp_fin_timeout =
3049                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3050
3051         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3052                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3053
3054         return ip_vs_set_timeout(&t);
3055 }
3056
3057 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3058 {
3059         struct ip_vs_service *svc = NULL;
3060         struct ip_vs_service_user_kern usvc;
3061         struct ip_vs_dest_user_kern udest;
3062         int ret = 0, cmd;
3063         int need_full_svc = 0, need_full_dest = 0;
3064
3065         cmd = info->genlhdr->cmd;
3066
3067         mutex_lock(&__ip_vs_mutex);
3068
3069         if (cmd == IPVS_CMD_FLUSH) {
3070                 ret = ip_vs_flush();
3071                 goto out;
3072         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3073                 ret = ip_vs_genl_set_config(info->attrs);
3074                 goto out;
3075         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3076                    cmd == IPVS_CMD_DEL_DAEMON) {
3077
3078                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3079
3080                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3081                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3082                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3083                                      ip_vs_daemon_policy)) {
3084                         ret = -EINVAL;
3085                         goto out;
3086                 }
3087
3088                 if (cmd == IPVS_CMD_NEW_DAEMON)
3089                         ret = ip_vs_genl_new_daemon(daemon_attrs);
3090                 else
3091                         ret = ip_vs_genl_del_daemon(daemon_attrs);
3092                 goto out;
3093         } else if (cmd == IPVS_CMD_ZERO &&
3094                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3095                 ret = ip_vs_zero_all();
3096                 goto out;
3097         }
3098
3099         /* All following commands require a service argument, so check if we
3100          * received a valid one. We need a full service specification when
3101          * adding / editing a service. Only identifying members otherwise. */
3102         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3103                 need_full_svc = 1;
3104
3105         ret = ip_vs_genl_parse_service(&usvc,
3106                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3107                                        need_full_svc);
3108         if (ret)
3109                 goto out;
3110
3111         /* Lookup the exact service by <protocol, addr, port> or fwmark */
3112         if (usvc.fwmark == 0)
3113                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3114                                           &usvc.addr, usvc.port);
3115         else
3116                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3117
3118         /* Unless we're adding a new service, the service must already exist */
3119         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3120                 ret = -ESRCH;
3121                 goto out;
3122         }
3123
3124         /* Destination commands require a valid destination argument. For
3125          * adding / editing a destination, we need a full destination
3126          * specification. */
3127         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3128             cmd == IPVS_CMD_DEL_DEST) {
3129                 if (cmd != IPVS_CMD_DEL_DEST)
3130                         need_full_dest = 1;
3131
3132                 ret = ip_vs_genl_parse_dest(&udest,
3133                                             info->attrs[IPVS_CMD_ATTR_DEST],
3134                                             need_full_dest);
3135                 if (ret)
3136                         goto out;
3137         }
3138
3139         switch (cmd) {
3140         case IPVS_CMD_NEW_SERVICE:
3141                 if (svc == NULL)
3142                         ret = ip_vs_add_service(&usvc, &svc);
3143                 else
3144                         ret = -EEXIST;
3145                 break;
3146         case IPVS_CMD_SET_SERVICE:
3147                 ret = ip_vs_edit_service(svc, &usvc);
3148                 break;
3149         case IPVS_CMD_DEL_SERVICE:
3150                 ret = ip_vs_del_service(svc);
3151                 break;
3152         case IPVS_CMD_NEW_DEST:
3153                 ret = ip_vs_add_dest(svc, &udest);
3154                 break;
3155         case IPVS_CMD_SET_DEST:
3156                 ret = ip_vs_edit_dest(svc, &udest);
3157                 break;
3158         case IPVS_CMD_DEL_DEST:
3159                 ret = ip_vs_del_dest(svc, &udest);
3160                 break;
3161         case IPVS_CMD_ZERO:
3162                 ret = ip_vs_zero_service(svc);
3163                 break;
3164         default:
3165                 ret = -EINVAL;
3166         }
3167
3168 out:
3169         if (svc)
3170                 ip_vs_service_put(svc);
3171         mutex_unlock(&__ip_vs_mutex);
3172
3173         return ret;
3174 }
3175
3176 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3177 {
3178         struct sk_buff *msg;
3179         void *reply;
3180         int ret, cmd, reply_cmd;
3181
3182         cmd = info->genlhdr->cmd;
3183
3184         if (cmd == IPVS_CMD_GET_SERVICE)
3185                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3186         else if (cmd == IPVS_CMD_GET_INFO)
3187                 reply_cmd = IPVS_CMD_SET_INFO;
3188         else if (cmd == IPVS_CMD_GET_CONFIG)
3189                 reply_cmd = IPVS_CMD_SET_CONFIG;
3190         else {
3191                 pr_err("unknown Generic Netlink command\n");
3192                 return -EINVAL;
3193         }
3194
3195         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3196         if (!msg)
3197                 return -ENOMEM;
3198
3199         mutex_lock(&__ip_vs_mutex);
3200
3201         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3202         if (reply == NULL)
3203                 goto nla_put_failure;
3204
3205         switch (cmd) {
3206         case IPVS_CMD_GET_SERVICE:
3207         {
3208                 struct ip_vs_service *svc;
3209
3210                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3211                 if (IS_ERR(svc)) {
3212                         ret = PTR_ERR(svc);
3213                         goto out_err;
3214                 } else if (svc) {
3215                         ret = ip_vs_genl_fill_service(msg, svc);
3216                         ip_vs_service_put(svc);
3217                         if (ret)
3218                                 goto nla_put_failure;
3219                 } else {
3220                         ret = -ESRCH;
3221                         goto out_err;
3222                 }
3223
3224                 break;
3225         }
3226
3227         case IPVS_CMD_GET_CONFIG:
3228         {
3229                 struct ip_vs_timeout_user t;
3230
3231                 __ip_vs_get_timeouts(&t);
3232 #ifdef CONFIG_IP_VS_PROTO_TCP
3233                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3234                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3235                             t.tcp_fin_timeout);
3236 #endif
3237 #ifdef CONFIG_IP_VS_PROTO_UDP
3238                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3239 #endif
3240
3241                 break;
3242         }
3243
3244         case IPVS_CMD_GET_INFO:
3245                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3246                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3247                             ip_vs_conn_tab_size);
3248                 break;
3249         }
3250
3251         genlmsg_end(msg, reply);
3252         ret = genlmsg_reply(msg, info);
3253         goto out;
3254
3255 nla_put_failure:
3256         pr_err("not enough space in Netlink message\n");
3257         ret = -EMSGSIZE;
3258
3259 out_err:
3260         nlmsg_free(msg);
3261 out:
3262         mutex_unlock(&__ip_vs_mutex);
3263
3264         return ret;
3265 }
3266
3267
3268 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3269         {
3270                 .cmd    = IPVS_CMD_NEW_SERVICE,
3271                 .flags  = GENL_ADMIN_PERM,
3272                 .policy = ip_vs_cmd_policy,
3273                 .doit   = ip_vs_genl_set_cmd,
3274         },
3275         {
3276                 .cmd    = IPVS_CMD_SET_SERVICE,
3277                 .flags  = GENL_ADMIN_PERM,
3278                 .policy = ip_vs_cmd_policy,
3279                 .doit   = ip_vs_genl_set_cmd,
3280         },
3281         {
3282                 .cmd    = IPVS_CMD_DEL_SERVICE,
3283                 .flags  = GENL_ADMIN_PERM,
3284                 .policy = ip_vs_cmd_policy,
3285                 .doit   = ip_vs_genl_set_cmd,
3286         },
3287         {
3288                 .cmd    = IPVS_CMD_GET_SERVICE,
3289                 .flags  = GENL_ADMIN_PERM,
3290                 .doit   = ip_vs_genl_get_cmd,
3291                 .dumpit = ip_vs_genl_dump_services,
3292                 .policy = ip_vs_cmd_policy,
3293         },
3294         {
3295                 .cmd    = IPVS_CMD_NEW_DEST,
3296                 .flags  = GENL_ADMIN_PERM,
3297                 .policy = ip_vs_cmd_policy,
3298                 .doit   = ip_vs_genl_set_cmd,
3299         },
3300         {
3301                 .cmd    = IPVS_CMD_SET_DEST,
3302                 .flags  = GENL_ADMIN_PERM,
3303                 .policy = ip_vs_cmd_policy,
3304                 .doit   = ip_vs_genl_set_cmd,
3305         },
3306         {
3307                 .cmd    = IPVS_CMD_DEL_DEST,
3308                 .flags  = GENL_ADMIN_PERM,
3309                 .policy = ip_vs_cmd_policy,
3310                 .doit   = ip_vs_genl_set_cmd,
3311         },
3312         {
3313                 .cmd    = IPVS_CMD_GET_DEST,
3314                 .flags  = GENL_ADMIN_PERM,
3315                 .policy = ip_vs_cmd_policy,
3316                 .dumpit = ip_vs_genl_dump_dests,
3317         },
3318         {
3319                 .cmd    = IPVS_CMD_NEW_DAEMON,
3320                 .flags  = GENL_ADMIN_PERM,
3321                 .policy = ip_vs_cmd_policy,
3322                 .doit   = ip_vs_genl_set_cmd,
3323         },
3324         {
3325                 .cmd    = IPVS_CMD_DEL_DAEMON,
3326                 .flags  = GENL_ADMIN_PERM,
3327                 .policy = ip_vs_cmd_policy,
3328                 .doit   = ip_vs_genl_set_cmd,
3329         },
3330         {
3331                 .cmd    = IPVS_CMD_GET_DAEMON,
3332                 .flags  = GENL_ADMIN_PERM,
3333                 .dumpit = ip_vs_genl_dump_daemons,
3334         },
3335         {
3336                 .cmd    = IPVS_CMD_SET_CONFIG,
3337                 .flags  = GENL_ADMIN_PERM,
3338                 .policy = ip_vs_cmd_policy,
3339                 .doit   = ip_vs_genl_set_cmd,
3340         },
3341         {
3342                 .cmd    = IPVS_CMD_GET_CONFIG,
3343                 .flags  = GENL_ADMIN_PERM,
3344                 .doit   = ip_vs_genl_get_cmd,
3345         },
3346         {
3347                 .cmd    = IPVS_CMD_GET_INFO,
3348                 .flags  = GENL_ADMIN_PERM,
3349                 .doit   = ip_vs_genl_get_cmd,
3350         },
3351         {
3352                 .cmd    = IPVS_CMD_ZERO,
3353                 .flags  = GENL_ADMIN_PERM,
3354                 .policy = ip_vs_cmd_policy,
3355                 .doit   = ip_vs_genl_set_cmd,
3356         },
3357         {
3358                 .cmd    = IPVS_CMD_FLUSH,
3359                 .flags  = GENL_ADMIN_PERM,
3360                 .doit   = ip_vs_genl_set_cmd,
3361         },
3362 };
3363
3364 static int __init ip_vs_genl_register(void)
3365 {
3366         return genl_register_family_with_ops(&ip_vs_genl_family,
3367                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3368 }
3369
3370 static void ip_vs_genl_unregister(void)
3371 {
3372         genl_unregister_family(&ip_vs_genl_family);
3373 }
3374
3375 /* End of Generic Netlink interface definitions */
3376
3377
3378 int __init ip_vs_control_init(void)
3379 {
3380         int ret;
3381         int idx;
3382
3383         EnterFunction(2);
3384
3385         ret = nf_register_sockopt(&ip_vs_sockopts);
3386         if (ret) {
3387                 pr_err("cannot register sockopt.\n");
3388                 return ret;
3389         }
3390
3391         ret = ip_vs_genl_register();
3392         if (ret) {
3393                 pr_err("cannot register Generic Netlink interface.\n");
3394                 nf_unregister_sockopt(&ip_vs_sockopts);
3395                 return ret;
3396         }
3397
3398         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3399         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3400
3401         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3402
3403         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3404         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3405                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3406                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3407         }
3408         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3409                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3410         }
3411
3412         ip_vs_new_estimator(&ip_vs_stats);
3413
3414         /* Hook the defense timer */
3415         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3416
3417         LeaveFunction(2);
3418         return 0;
3419 }
3420
3421
3422 void ip_vs_control_cleanup(void)
3423 {
3424         EnterFunction(2);
3425         ip_vs_trash_cleanup();
3426         cancel_rearming_delayed_work(&defense_work);
3427         cancel_work_sync(&defense_work.work);
3428         ip_vs_kill_estimator(&ip_vs_stats);
3429         unregister_sysctl_table(sysctl_header);
3430         proc_net_remove(&init_net, "ip_vs_stats");
3431         proc_net_remove(&init_net, "ip_vs");
3432         ip_vs_genl_unregister();
3433         nf_unregister_sockopt(&ip_vs_sockopts);
3434         LeaveFunction(2);
3435 }