8f828f700ce8cfeaca4d0ae39fa67f0e877bc438
[linux-flexiantxendom0-3.2.10.git] / net / ipv4 / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Version:     $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
9  *
10  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
11  *              Peter Kese <peter.kese@ijs.si>
12  *              Julian Anastasov <ja@ssi.bg>
13  *
14  *              This program is free software; you can redistribute it and/or
15  *              modify it under the terms of the GNU General Public License
16  *              as published by the Free Software Foundation; either version
17  *              2 of the License, or (at your option) any later version.
18  *
19  * Changes:
20  *
21  */
22
23 #include <linux/config.h>
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/init.h>
27 #include <linux/types.h>
28 #include <linux/errno.h>
29 #include <linux/fs.h>
30 #include <linux/sysctl.h>
31 #include <linux/proc_fs.h>
32 #include <linux/timer.h>
33 #include <linux/swap.h>
34 #include <linux/proc_fs.h>
35 #include <linux/seq_file.h>
36
37 #include <linux/netfilter.h>
38 #include <linux/netfilter_ipv4.h>
39
40 #include <net/ip.h>
41 #include <net/sock.h>
42
43 #include <asm/uaccess.h>
44
45 #include <net/ip_vs.h>
46
47 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
48 static DECLARE_MUTEX(__ip_vs_mutex);
49
50 /* lock for service table */
51 static rwlock_t __ip_vs_svc_lock = RW_LOCK_UNLOCKED;
52
53 /* lock for table with the real services */
54 static rwlock_t __ip_vs_rs_lock = RW_LOCK_UNLOCKED;
55
56 /* lock for state and timeout tables */
57 static rwlock_t __ip_vs_securetcp_lock = RW_LOCK_UNLOCKED;
58
59 /* lock for drop entry handling */
60 static spinlock_t __ip_vs_dropentry_lock = SPIN_LOCK_UNLOCKED;
61
62 /* lock for drop packet handling */
63 static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED;
64
65 /* 1/rate drop and drop-entry variables */
66 int ip_vs_drop_rate = 0;
67 int ip_vs_drop_counter = 0;
68 atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
69
70 /* number of virtual services */
71 static int ip_vs_num_services = 0;
72
73 /* sysctl variables */
74 static int sysctl_ip_vs_drop_entry = 0;
75 static int sysctl_ip_vs_drop_packet = 0;
76 static int sysctl_ip_vs_secure_tcp = 0;
77 static int sysctl_ip_vs_amemthresh = 1024;
78 static int sysctl_ip_vs_am_droprate = 10;
79 int sysctl_ip_vs_cache_bypass = 0;
80 int sysctl_ip_vs_expire_nodest_conn = 0;
81 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82 int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85 #ifdef CONFIG_IP_VS_DEBUG
86 static int sysctl_ip_vs_debug_level = 0;
87
88 int ip_vs_get_debug_level(void)
89 {
90         return sysctl_ip_vs_debug_level;
91 }
92 #endif
93
94 /*
95  *      update_defense_level is called from timer bh and from sysctl.
96  */
97 static void update_defense_level(void)
98 {
99         struct sysinfo i;
100         static int old_secure_tcp = 0;
101         int availmem;
102         int nomem;
103         int to_change = -1;
104
105         /* we only count free and buffered memory (in pages) */
106         si_meminfo(&i);
107         availmem = i.freeram + i.bufferram;
108         /* however in linux 2.5 the i.bufferram is total page cache size,
109            we need adjust it */
110         /* si_swapinfo(&i); */
111         /* availmem = availmem - (i.totalswap - i.freeswap); */
112
113         nomem = (availmem < sysctl_ip_vs_amemthresh);
114
115         /* drop_entry */
116         spin_lock(&__ip_vs_dropentry_lock);
117         switch (sysctl_ip_vs_drop_entry) {
118         case 0:
119                 atomic_set(&ip_vs_dropentry, 0);
120                 break;
121         case 1:
122                 if (nomem) {
123                         atomic_set(&ip_vs_dropentry, 1);
124                         sysctl_ip_vs_drop_entry = 2;
125                 } else {
126                         atomic_set(&ip_vs_dropentry, 0);
127                 }
128                 break;
129         case 2:
130                 if (nomem) {
131                         atomic_set(&ip_vs_dropentry, 1);
132                 } else {
133                         atomic_set(&ip_vs_dropentry, 0);
134                         sysctl_ip_vs_drop_entry = 1;
135                 };
136                 break;
137         case 3:
138                 atomic_set(&ip_vs_dropentry, 1);
139                 break;
140         }
141         spin_unlock(&__ip_vs_dropentry_lock);
142
143         /* drop_packet */
144         spin_lock(&__ip_vs_droppacket_lock);
145         switch (sysctl_ip_vs_drop_packet) {
146         case 0:
147                 ip_vs_drop_rate = 0;
148                 break;
149         case 1:
150                 if (nomem) {
151                         ip_vs_drop_rate = ip_vs_drop_counter
152                                 = sysctl_ip_vs_amemthresh /
153                                 (sysctl_ip_vs_amemthresh-availmem);
154                         sysctl_ip_vs_drop_packet = 2;
155                 } else {
156                         ip_vs_drop_rate = 0;
157                 }
158                 break;
159         case 2:
160                 if (nomem) {
161                         ip_vs_drop_rate = ip_vs_drop_counter
162                                 = sysctl_ip_vs_amemthresh /
163                                 (sysctl_ip_vs_amemthresh-availmem);
164                 } else {
165                         ip_vs_drop_rate = 0;
166                         sysctl_ip_vs_drop_packet = 1;
167                 }
168                 break;
169         case 3:
170                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
171                 break;
172         }
173         spin_unlock(&__ip_vs_droppacket_lock);
174
175         /* secure_tcp */
176         write_lock(&__ip_vs_securetcp_lock);
177         switch (sysctl_ip_vs_secure_tcp) {
178         case 0:
179                 if (old_secure_tcp >= 2)
180                         to_change = 0;
181                 break;
182         case 1:
183                 if (nomem) {
184                         if (old_secure_tcp < 2)
185                                 to_change = 1;
186                         sysctl_ip_vs_secure_tcp = 2;
187                 } else {
188                         if (old_secure_tcp >= 2)
189                                 to_change = 0;
190                 }
191                 break;
192         case 2:
193                 if (nomem) {
194                         if (old_secure_tcp < 2)
195                                 to_change = 1;
196                 } else {
197                         if (old_secure_tcp >= 2)
198                                 to_change = 0;
199                         sysctl_ip_vs_secure_tcp = 1;
200                 }
201                 break;
202         case 3:
203                 if (old_secure_tcp < 2)
204                         to_change = 1;
205                 break;
206         }
207         old_secure_tcp = sysctl_ip_vs_secure_tcp;
208         if (to_change >= 0)
209                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
210         write_unlock(&__ip_vs_securetcp_lock);
211 }
212
213
214 /*
215  *      Timer for checking the defense
216  */
217 static struct timer_list defense_timer;
218 #define DEFENSE_TIMER_PERIOD    1*HZ
219
220 static void defense_timer_handler(unsigned long data)
221 {
222         update_defense_level();
223         if (atomic_read(&ip_vs_dropentry))
224                 ip_vs_random_dropentry();
225
226         mod_timer(&defense_timer, jiffies + DEFENSE_TIMER_PERIOD);
227 }
228
229
230 int
231 ip_vs_use_count_inc(void)
232 {
233         return try_module_get(THIS_MODULE);
234 }
235
236 void
237 ip_vs_use_count_dec(void)
238 {
239         module_put(THIS_MODULE);
240 }
241
242
243 /*
244  *      Hash table: for virtual service lookups
245  */
246 #define IP_VS_SVC_TAB_BITS 8
247 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
248 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
249
250 /* the service table hashed by <protocol, addr, port> */
251 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
252 /* the service table hashed by fwmark */
253 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
254
255 /*
256  *      Hash table: for real service lookups
257  */
258 #define IP_VS_RTAB_BITS 4
259 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
260 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
261
262 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
263
264 /*
265  *      Trash for destinations
266  */
267 static LIST_HEAD(ip_vs_dest_trash);
268
269 /*
270  *      FTP & NULL virtual service counters
271  */
272 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
273 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
274
275
276 /*
277  *      Returns hash value for virtual service
278  */
279 static __inline__ unsigned
280 ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
281 {
282         register unsigned porth = ntohs(port);
283
284         return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
285                 & IP_VS_SVC_TAB_MASK;
286 }
287
288 /*
289  *      Returns hash value of fwmark for virtual service lookup
290  */
291 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
292 {
293         return fwmark & IP_VS_SVC_TAB_MASK;
294 }
295
296 /*
297  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
298  *      or in the ip_vs_svc_fwm_table by fwmark.
299  *      Should be called with locked tables.
300  */
301 static int ip_vs_svc_hash(struct ip_vs_service *svc)
302 {
303         unsigned hash;
304
305         if (svc->flags & IP_VS_SVC_F_HASHED) {
306                 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
307                           "called from %p\n", __builtin_return_address(0));
308                 return 0;
309         }
310
311         if (svc->fwmark == 0) {
312                 /*
313                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
314                  */
315                 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
316                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
317         } else {
318                 /*
319                  *  Hash it by fwmark in ip_vs_svc_fwm_table
320                  */
321                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
322                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
323         }
324
325         svc->flags |= IP_VS_SVC_F_HASHED;
326         /* increase its refcnt because it is referenced by the svc table */
327         atomic_inc(&svc->refcnt);
328         return 1;
329 }
330
331
332 /*
333  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
334  *      Should be called with locked tables.
335  */
336 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
337 {
338         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
339                 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
340                           "called from %p\n", __builtin_return_address(0));
341                 return 0;
342         }
343
344         if (svc->fwmark == 0) {
345                 /* Remove it from the ip_vs_svc_table table */
346                 list_del(&svc->s_list);
347         } else {
348                 /* Remove it from the ip_vs_svc_fwm_table table */
349                 list_del(&svc->f_list);
350         }
351
352         svc->flags &= ~IP_VS_SVC_F_HASHED;
353         atomic_dec(&svc->refcnt);
354         return 1;
355 }
356
357
358 /*
359  *      Get service by {proto,addr,port} in the service table.
360  */
361 static __inline__ struct ip_vs_service *
362 __ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
363 {
364         unsigned hash;
365         struct ip_vs_service *svc;
366
367         /* Check for "full" addressed entries */
368         hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
369
370         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
371                 if ((svc->addr == vaddr)
372                     && (svc->port == vport)
373                     && (svc->protocol == protocol)) {
374                         /* HIT */
375                         atomic_inc(&svc->usecnt);
376                         return svc;
377                 }
378         }
379
380         return NULL;
381 }
382
383
384 /*
385  *      Get service by {fwmark} in the service table.
386  */
387 static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
388 {
389         unsigned hash;
390         struct ip_vs_service *svc;
391
392         /* Check for fwmark addressed entries */
393         hash = ip_vs_svc_fwm_hashkey(fwmark);
394
395         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
396                 if (svc->fwmark == fwmark) {
397                         /* HIT */
398                         atomic_inc(&svc->usecnt);
399                         return svc;
400                 }
401         }
402
403         return NULL;
404 }
405
406 struct ip_vs_service *
407 ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
408 {
409         struct ip_vs_service *svc;
410
411         read_lock(&__ip_vs_svc_lock);
412
413         /*
414          *      Check the table hashed by fwmark first
415          */
416         if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
417                 goto out;
418
419         /*
420          *      Check the table hashed by <protocol,addr,port>
421          *      for "full" addressed entries
422          */
423         svc = __ip_vs_service_get(protocol, vaddr, vport);
424
425         if (svc == NULL
426             && protocol == IPPROTO_TCP
427             && atomic_read(&ip_vs_ftpsvc_counter)
428             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
429                 /*
430                  * Check if ftp service entry exists, the packet
431                  * might belong to FTP data connections.
432                  */
433                 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
434         }
435
436         if (svc == NULL
437             && atomic_read(&ip_vs_nullsvc_counter)) {
438                 /*
439                  * Check if the catch-all port (port zero) exists
440                  */
441                 svc = __ip_vs_service_get(protocol, vaddr, 0);
442         }
443
444   out:
445         read_unlock(&__ip_vs_svc_lock);
446
447         IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
448                   fwmark, ip_vs_proto_name(protocol),
449                   NIPQUAD(vaddr), ntohs(vport),
450                   svc?"hit":"not hit");
451
452         return svc;
453 }
454
455
456 static inline void
457 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
458 {
459         atomic_inc(&svc->refcnt);
460         dest->svc = svc;
461 }
462
463 static inline void
464 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
465 {
466         struct ip_vs_service *svc = dest->svc;
467
468         dest->svc = NULL;
469         if (atomic_dec_and_test(&svc->refcnt))
470                 kfree(svc);
471 }
472
473
474 /*
475  *      Returns hash value for real service
476  */
477 static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
478 {
479         register unsigned porth = ntohs(port);
480
481         return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
482                 & IP_VS_RTAB_MASK;
483 }
484
485 /*
486  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
487  *      should be called with locked tables.
488  */
489 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
490 {
491         unsigned hash;
492
493         if (!list_empty(&dest->d_list)) {
494                 return 0;
495         }
496
497         /*
498          *      Hash by proto,addr,port,
499          *      which are the parameters of the real service.
500          */
501         hash = ip_vs_rs_hashkey(dest->addr, dest->port);
502         list_add(&dest->d_list, &ip_vs_rtable[hash]);
503
504         return 1;
505 }
506
507 /*
508  *      UNhashes ip_vs_dest from ip_vs_rtable.
509  *      should be called with locked tables.
510  */
511 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
512 {
513         /*
514          * Remove it from the ip_vs_rtable table.
515          */
516         if (!list_empty(&dest->d_list)) {
517                 list_del(&dest->d_list);
518                 INIT_LIST_HEAD(&dest->d_list);
519         }
520
521         return 1;
522 }
523
524 /*
525  *      Lookup real service by <proto,addr,port> in the real service table.
526  */
527 struct ip_vs_dest *
528 ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
529 {
530         unsigned hash;
531         struct ip_vs_dest *dest;
532
533         /*
534          *      Check for "full" addressed entries
535          *      Return the first found entry
536          */
537         hash = ip_vs_rs_hashkey(daddr, dport);
538
539         read_lock(&__ip_vs_rs_lock);
540         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
541                 if ((dest->addr == daddr)
542                     && (dest->port == dport)
543                     && ((dest->protocol == protocol) ||
544                         dest->vfwmark)) {
545                         /* HIT */
546                         read_unlock(&__ip_vs_rs_lock);
547                         return dest;
548                 }
549         }
550         read_unlock(&__ip_vs_rs_lock);
551
552         return NULL;
553 }
554
555 /*
556  *      Lookup destination by {addr,port} in the given service
557  */
558 static struct ip_vs_dest *
559 ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
560 {
561         struct ip_vs_dest *dest;
562
563         /*
564          * Find the destination for the given service
565          */
566         list_for_each_entry(dest, &svc->destinations, n_list) {
567                 if ((dest->addr == daddr) && (dest->port == dport)) {
568                         /* HIT */
569                         return dest;
570                 }
571         }
572
573         return NULL;
574 }
575
576
577 /*
578  *  Lookup dest by {svc,addr,port} in the destination trash.
579  *  The destination trash is used to hold the destinations that are removed
580  *  from the service table but are still referenced by some conn entries.
581  *  The reason to add the destination trash is when the dest is temporary
582  *  down (either by administrator or by monitor program), the dest can be
583  *  picked back from the trash, the remaining connections to the dest can
584  *  continue, and the counting information of the dest is also useful for
585  *  scheduling.
586  */
587 static struct ip_vs_dest *
588 ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
589 {
590         struct ip_vs_dest *dest, *nxt;
591
592         /*
593          * Find the destination in trash
594          */
595         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
596                 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
597                           "refcnt=%d\n",
598                           dest->vfwmark,
599                           NIPQUAD(dest->addr), ntohs(dest->port),
600                           atomic_read(&dest->refcnt));
601                 if (dest->addr == daddr &&
602                     dest->port == dport &&
603                     dest->vfwmark == svc->fwmark &&
604                     dest->protocol == svc->protocol &&
605                     (svc->fwmark ||
606                      (dest->vaddr == svc->addr &&
607                       dest->vport == svc->port))) {
608                         /* HIT */
609                         return dest;
610                 }
611
612                 /*
613                  * Try to purge the destination from trash if not referenced
614                  */
615                 if (atomic_read(&dest->refcnt) == 1) {
616                         IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
617                                   "from trash\n",
618                                   dest->vfwmark,
619                                   NIPQUAD(dest->addr), ntohs(dest->port));
620                         list_del(&dest->n_list);
621                         ip_vs_dst_reset(dest);
622                         __ip_vs_unbind_svc(dest);
623                         kfree(dest);
624                 }
625         }
626
627         return NULL;
628 }
629
630
631 /*
632  *  Clean up all the destinations in the trash
633  *  Called by the ip_vs_control_cleanup()
634  *
635  *  When the ip_vs_control_clearup is activated by ipvs module exit,
636  *  the service tables must have been flushed and all the connections
637  *  are expired, and the refcnt of each destination in the trash must
638  *  be 1, so we simply release them here.
639  */
640 static void ip_vs_trash_cleanup(void)
641 {
642         struct ip_vs_dest *dest, *nxt;
643
644         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
645                 list_del(&dest->n_list);
646                 ip_vs_dst_reset(dest);
647                 __ip_vs_unbind_svc(dest);
648                 kfree(dest);
649         }
650 }
651
652
653 static void
654 ip_vs_zero_stats(struct ip_vs_stats *stats)
655 {
656         spin_lock_bh(&stats->lock);
657         memset(stats, 0, (char *)&stats->lock - (char *)stats);
658         spin_unlock_bh(&stats->lock);
659         ip_vs_zero_estimator(stats);
660 }
661
662 /*
663  *      Update a destination in the given service
664  */
665 static void
666 __ip_vs_update_dest(struct ip_vs_service *svc,
667                     struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
668 {
669         int conn_flags;
670
671         /* set the weight and the flags */
672         atomic_set(&dest->weight, udest->weight);
673         conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
674
675         /* check if local node and update the flags */
676         if (inet_addr_type(udest->addr) == RTN_LOCAL) {
677                 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
678                         | IP_VS_CONN_F_LOCALNODE;
679         }
680
681         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
682         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
683                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
684         } else {
685                 /*
686                  *    Put the real service in ip_vs_rtable if not present.
687                  *    For now only for NAT!
688                  */
689                 write_lock_bh(&__ip_vs_rs_lock);
690                 ip_vs_rs_hash(dest);
691                 write_unlock_bh(&__ip_vs_rs_lock);
692         }
693         atomic_set(&dest->conn_flags, conn_flags);
694
695         /* bind the service */
696         if (!dest->svc) {
697                 __ip_vs_bind_svc(dest, svc);
698         } else {
699                 if (dest->svc != svc) {
700                         __ip_vs_unbind_svc(dest);
701                         ip_vs_zero_stats(&dest->stats);
702                         __ip_vs_bind_svc(dest, svc);
703                 }
704         }
705
706         /* set the dest status flags */
707         dest->flags |= IP_VS_DEST_F_AVAILABLE;
708
709         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
710                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
711         dest->u_threshold = udest->u_threshold;
712         dest->l_threshold = udest->l_threshold;
713 }
714
715
716 /*
717  *      Create a destination for the given service
718  */
719 static int
720 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
721                struct ip_vs_dest **dest_p)
722 {
723         struct ip_vs_dest *dest;
724         unsigned atype;
725
726         EnterFunction(2);
727
728         atype = inet_addr_type(udest->addr);
729         if (atype != RTN_LOCAL && atype != RTN_UNICAST)
730                 return -EINVAL;
731
732         dest = kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
733         if (dest == NULL) {
734                 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
735                 return -ENOMEM;
736         }
737         memset(dest, 0, sizeof(struct ip_vs_dest));
738
739         dest->protocol = svc->protocol;
740         dest->vaddr = svc->addr;
741         dest->vport = svc->port;
742         dest->vfwmark = svc->fwmark;
743         dest->addr = udest->addr;
744         dest->port = udest->port;
745
746         atomic_set(&dest->activeconns, 0);
747         atomic_set(&dest->inactconns, 0);
748         atomic_set(&dest->persistconns, 0);
749         atomic_set(&dest->refcnt, 0);
750
751         INIT_LIST_HEAD(&dest->d_list);
752         dest->dst_lock = SPIN_LOCK_UNLOCKED;
753         dest->stats.lock = SPIN_LOCK_UNLOCKED;
754         __ip_vs_update_dest(svc, dest, udest);
755         ip_vs_new_estimator(&dest->stats);
756
757         *dest_p = dest;
758
759         LeaveFunction(2);
760         return 0;
761 }
762
763
764 /*
765  *      Add a destination into an existing service
766  */
767 static int
768 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
769 {
770         struct ip_vs_dest *dest;
771         __u32 daddr = udest->addr;
772         __u16 dport = udest->port;
773         int ret;
774
775         EnterFunction(2);
776
777         if (udest->weight < 0) {
778                 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
779                 return -ERANGE;
780         }
781
782         if (udest->l_threshold > udest->u_threshold) {
783                 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
784                           "upper threshold\n");
785                 return -ERANGE;
786         }
787
788         /*
789          * Check if the dest already exists in the list
790          */
791         dest = ip_vs_lookup_dest(svc, daddr, dport);
792         if (dest != NULL) {
793                 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
794                 return -EEXIST;
795         }
796
797         /*
798          * Check if the dest already exists in the trash and
799          * is from the same service
800          */
801         dest = ip_vs_trash_get_dest(svc, daddr, dport);
802         if (dest != NULL) {
803                 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
804                           "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
805                           NIPQUAD(daddr), ntohs(dport),
806                           atomic_read(&dest->refcnt),
807                           dest->vfwmark,
808                           NIPQUAD(dest->vaddr),
809                           ntohs(dest->vport));
810                 __ip_vs_update_dest(svc, dest, udest);
811
812                 /*
813                  * Get the destination from the trash
814                  */
815                 list_del(&dest->n_list);
816
817                 ip_vs_new_estimator(&dest->stats);
818
819                 write_lock_bh(&__ip_vs_svc_lock);
820
821                 /*
822                  * Wait until all other svc users go away.
823                  */
824                 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
825
826                 list_add(&dest->n_list, &svc->destinations);
827                 svc->num_dests++;
828
829                 /* call the update_service function of its scheduler */
830                 svc->scheduler->update_service(svc);
831
832                 write_unlock_bh(&__ip_vs_svc_lock);
833                 return 0;
834         }
835
836         /*
837          * Allocate and initialize the dest structure
838          */
839         ret = ip_vs_new_dest(svc, udest, &dest);
840         if (ret) {
841                 return ret;
842         }
843
844         /*
845          * Add the dest entry into the list
846          */
847         atomic_inc(&dest->refcnt);
848
849         write_lock_bh(&__ip_vs_svc_lock);
850
851         /*
852          * Wait until all other svc users go away.
853          */
854         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
855
856         list_add(&dest->n_list, &svc->destinations);
857         svc->num_dests++;
858
859         /* call the update_service function of its scheduler */
860         svc->scheduler->update_service(svc);
861
862         write_unlock_bh(&__ip_vs_svc_lock);
863
864         LeaveFunction(2);
865
866         return 0;
867 }
868
869
870 /*
871  *      Edit a destination in the given service
872  */
873 static int
874 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
875 {
876         struct ip_vs_dest *dest;
877         __u32 daddr = udest->addr;
878         __u16 dport = udest->port;
879
880         EnterFunction(2);
881
882         if (udest->weight < 0) {
883                 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
884                 return -ERANGE;
885         }
886
887         if (udest->l_threshold > udest->u_threshold) {
888                 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
889                           "upper threshold\n");
890                 return -ERANGE;
891         }
892
893         /*
894          *  Lookup the destination list
895          */
896         dest = ip_vs_lookup_dest(svc, daddr, dport);
897         if (dest == NULL) {
898                 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
899                 return -ENOENT;
900         }
901
902         __ip_vs_update_dest(svc, dest, udest);
903
904         /* call the update_service, because server weight may be changed */
905         svc->scheduler->update_service(svc);
906
907         LeaveFunction(2);
908
909         return 0;
910 }
911
912
913 /*
914  *      Delete a destination (must be already unlinked from the service)
915  */
916 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
917 {
918         ip_vs_kill_estimator(&dest->stats);
919
920         /*
921          *  Remove it from the d-linked list with the real services.
922          */
923         write_lock_bh(&__ip_vs_rs_lock);
924         ip_vs_rs_unhash(dest);
925         write_unlock_bh(&__ip_vs_rs_lock);
926
927         /*
928          *  Decrease the refcnt of the dest, and free the dest
929          *  if nobody refers to it (refcnt=0). Otherwise, throw
930          *  the destination into the trash.
931          */
932         if (atomic_dec_and_test(&dest->refcnt)) {
933                 ip_vs_dst_reset(dest);
934                 /* simply decrease svc->refcnt here, let the caller check
935                    and release the service if nobody refers to it.
936                    Only user context can release destination and service,
937                    and only one user context can update virtual service at a
938                    time, so the operation here is OK */
939                 atomic_dec(&dest->svc->refcnt);
940                 kfree(dest);
941         } else {
942                 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
943                           NIPQUAD(dest->addr), ntohs(dest->port),
944                           atomic_read(&dest->refcnt));
945                 list_add(&dest->n_list, &ip_vs_dest_trash);
946                 atomic_inc(&dest->refcnt);
947         }
948 }
949
950
951 /*
952  *      Unlink a destination from the given service
953  */
954 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
955                                 struct ip_vs_dest *dest,
956                                 int svcupd)
957 {
958         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
959
960         /*
961          *  Remove it from the d-linked destination list.
962          */
963         list_del(&dest->n_list);
964         svc->num_dests--;
965         if (svcupd) {
966                 /*
967                  *  Call the update_service function of its scheduler
968                  */
969                 svc->scheduler->update_service(svc);
970         }
971 }
972
973
974 /*
975  *      Delete a destination server in the given service
976  */
977 static int
978 ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
979 {
980         struct ip_vs_dest *dest;
981         __u32 daddr = udest->addr;
982         __u16 dport = udest->port;
983
984         EnterFunction(2);
985
986         dest = ip_vs_lookup_dest(svc, daddr, dport);
987         if (dest == NULL) {
988                 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
989                 return -ENOENT;
990         }
991
992         write_lock_bh(&__ip_vs_svc_lock);
993
994         /*
995          *      Wait until all other svc users go away.
996          */
997         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
998
999         /*
1000          *      Unlink dest from the service
1001          */
1002         __ip_vs_unlink_dest(svc, dest, 1);
1003
1004         write_unlock_bh(&__ip_vs_svc_lock);
1005
1006         /*
1007          *      Delete the destination
1008          */
1009         __ip_vs_del_dest(dest);
1010
1011         LeaveFunction(2);
1012
1013         return 0;
1014 }
1015
1016
1017 /*
1018  *      Add a service into the service hash table
1019  */
1020 static int
1021 ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1022 {
1023         int ret = 0;
1024         struct ip_vs_scheduler *sched = NULL;
1025         struct ip_vs_service *svc = NULL;
1026
1027         /* increase the module use count */
1028         ip_vs_use_count_inc();
1029
1030         /* Lookup the scheduler by 'u->sched_name' */
1031         sched = ip_vs_scheduler_get(u->sched_name);
1032         if (sched == NULL) {
1033                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1034                            u->sched_name);
1035                 ret = -ENOENT;
1036                 goto out_mod_dec;
1037         }
1038
1039         svc = (struct ip_vs_service *)
1040                 kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1041         if (svc == NULL) {
1042                 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1043                 ret = -ENOMEM;
1044                 goto out_err;
1045         }
1046         memset(svc, 0, sizeof(struct ip_vs_service));
1047
1048         /* I'm the first user of the service */
1049         atomic_set(&svc->usecnt, 1);
1050         atomic_set(&svc->refcnt, 0);
1051
1052         svc->protocol = u->protocol;
1053         svc->addr = u->addr;
1054         svc->port = u->port;
1055         svc->fwmark = u->fwmark;
1056         svc->flags = u->flags;
1057         svc->timeout = u->timeout * HZ;
1058         svc->netmask = u->netmask;
1059
1060         INIT_LIST_HEAD(&svc->destinations);
1061         svc->sched_lock = RW_LOCK_UNLOCKED;
1062         svc->stats.lock = SPIN_LOCK_UNLOCKED;
1063
1064         /* Bind the scheduler */
1065         ret = ip_vs_bind_scheduler(svc, sched);
1066         if (ret)
1067                 goto out_err;
1068         sched = NULL;
1069
1070         /* Update the virtual service counters */
1071         if (svc->port == FTPPORT)
1072                 atomic_inc(&ip_vs_ftpsvc_counter);
1073         else if (svc->port == 0)
1074                 atomic_inc(&ip_vs_nullsvc_counter);
1075
1076         ip_vs_new_estimator(&svc->stats);
1077         ip_vs_num_services++;
1078
1079         /* Hash the service into the service table */
1080         write_lock_bh(&__ip_vs_svc_lock);
1081         ip_vs_svc_hash(svc);
1082         write_unlock_bh(&__ip_vs_svc_lock);
1083
1084         *svc_p = svc;
1085         return 0;
1086
1087   out_err:
1088         if (svc != NULL) {
1089                 if (svc->scheduler)
1090                         ip_vs_unbind_scheduler(svc);
1091                 if (svc->inc) {
1092                         local_bh_disable();
1093                         ip_vs_app_inc_put(svc->inc);
1094                         local_bh_enable();
1095                 }
1096                 kfree(svc);
1097         }
1098         ip_vs_scheduler_put(sched);
1099
1100   out_mod_dec:
1101         /* decrease the module use count */
1102         ip_vs_use_count_dec();
1103
1104         return ret;
1105 }
1106
1107
1108 /*
1109  *      Edit a service and bind it with a new scheduler
1110  */
1111 static int
1112 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1113 {
1114         struct ip_vs_scheduler *sched, *old_sched;
1115         int ret = 0;
1116
1117         /*
1118          * Lookup the scheduler, by 'u->sched_name'
1119          */
1120         sched = ip_vs_scheduler_get(u->sched_name);
1121         if (sched == NULL) {
1122                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1123                            u->sched_name);
1124                 return -ENOENT;
1125         }
1126         old_sched = sched;
1127
1128         write_lock_bh(&__ip_vs_svc_lock);
1129
1130         /*
1131          * Wait until all other svc users go away.
1132          */
1133         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1134
1135         /*
1136          * Set the flags and timeout value
1137          */
1138         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1139         svc->timeout = u->timeout * HZ;
1140         svc->netmask = u->netmask;
1141
1142         old_sched = svc->scheduler;
1143         if (sched != old_sched) {
1144                 /*
1145                  * Unbind the old scheduler
1146                  */
1147                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1148                         old_sched = sched;
1149                         goto out;
1150                 }
1151
1152                 /*
1153                  * Bind the new scheduler
1154                  */
1155                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1156                         /*
1157                          * If ip_vs_bind_scheduler fails, restore the old
1158                          * scheduler.
1159                          * The main reason of failure is out of memory.
1160                          *
1161                          * The question is if the old scheduler can be
1162                          * restored all the time. TODO: if it cannot be
1163                          * restored some time, we must delete the service,
1164                          * otherwise the system may crash.
1165                          */
1166                         ip_vs_bind_scheduler(svc, old_sched);
1167                         old_sched = sched;
1168                         goto out;
1169                 }
1170         }
1171
1172   out:
1173         write_unlock_bh(&__ip_vs_svc_lock);
1174
1175         if (old_sched)
1176                 ip_vs_scheduler_put(old_sched);
1177
1178         return ret;
1179 }
1180
1181
1182 /*
1183  *      Delete a service from the service list
1184  *      - The service must be unlinked, unlocked and not referenced!
1185  *      - We are called under _bh lock
1186  */
1187 static void __ip_vs_del_service(struct ip_vs_service *svc)
1188 {
1189         struct ip_vs_dest *dest, *nxt;
1190         struct ip_vs_scheduler *old_sched;
1191
1192         ip_vs_num_services--;
1193         ip_vs_kill_estimator(&svc->stats);
1194
1195         /* Unbind scheduler */
1196         old_sched = svc->scheduler;
1197         ip_vs_unbind_scheduler(svc);
1198         if (old_sched)
1199                 ip_vs_scheduler_put(old_sched);
1200
1201         /* Unbind app inc */
1202         if (svc->inc) {
1203                 ip_vs_app_inc_put(svc->inc);
1204                 svc->inc = NULL;
1205         }
1206
1207         /*
1208          *    Unlink the whole destination list
1209          */
1210         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1211                 __ip_vs_unlink_dest(svc, dest, 0);
1212                 __ip_vs_del_dest(dest);
1213         }
1214
1215         /*
1216          *    Update the virtual service counters
1217          */
1218         if (svc->port == FTPPORT)
1219                 atomic_dec(&ip_vs_ftpsvc_counter);
1220         else if (svc->port == 0)
1221                 atomic_dec(&ip_vs_nullsvc_counter);
1222
1223         /*
1224          *    Free the service if nobody refers to it
1225          */
1226         if (atomic_read(&svc->refcnt) == 0)
1227                 kfree(svc);
1228
1229         /* decrease the module use count */
1230         ip_vs_use_count_dec();
1231 }
1232
1233 /*
1234  *      Delete a service from the service list
1235  */
1236 static int ip_vs_del_service(struct ip_vs_service *svc)
1237 {
1238         if (svc == NULL)
1239                 return -EEXIST;
1240
1241         /*
1242          * Unhash it from the service table
1243          */
1244         write_lock_bh(&__ip_vs_svc_lock);
1245
1246         ip_vs_svc_unhash(svc);
1247
1248         /*
1249          * Wait until all the svc users go away.
1250          */
1251         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1252
1253         __ip_vs_del_service(svc);
1254
1255         write_unlock_bh(&__ip_vs_svc_lock);
1256
1257         return 0;
1258 }
1259
1260
1261 /*
1262  *      Flush all the virtual services
1263  */
1264 static int ip_vs_flush(void)
1265 {
1266         int idx;
1267         struct ip_vs_service *svc, *nxt;
1268
1269         /*
1270          * Flush the service table hashed by <protocol,addr,port>
1271          */
1272         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1273                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1274                         write_lock_bh(&__ip_vs_svc_lock);
1275                         ip_vs_svc_unhash(svc);
1276                         /*
1277                          * Wait until all the svc users go away.
1278                          */
1279                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1280                         __ip_vs_del_service(svc);
1281                         write_unlock_bh(&__ip_vs_svc_lock);
1282                 }
1283         }
1284
1285         /*
1286          * Flush the service table hashed by fwmark
1287          */
1288         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1289                 list_for_each_entry_safe(svc, nxt,
1290                                          &ip_vs_svc_fwm_table[idx], f_list) {
1291                         write_lock_bh(&__ip_vs_svc_lock);
1292                         ip_vs_svc_unhash(svc);
1293                         /*
1294                          * Wait until all the svc users go away.
1295                          */
1296                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1297                         __ip_vs_del_service(svc);
1298                         write_unlock_bh(&__ip_vs_svc_lock);
1299                 }
1300         }
1301
1302         return 0;
1303 }
1304
1305
1306 /*
1307  *      Zero counters in a service or all services
1308  */
1309 static int ip_vs_zero_service(struct ip_vs_service *svc)
1310 {
1311         struct ip_vs_dest *dest;
1312
1313         write_lock_bh(&__ip_vs_svc_lock);
1314         list_for_each_entry(dest, &svc->destinations, n_list) {
1315                 ip_vs_zero_stats(&dest->stats);
1316         }
1317         ip_vs_zero_stats(&svc->stats);
1318         write_unlock_bh(&__ip_vs_svc_lock);
1319         return 0;
1320 }
1321
1322 static int ip_vs_zero_all(void)
1323 {
1324         int idx;
1325         struct ip_vs_service *svc;
1326
1327         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1328                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1329                         ip_vs_zero_service(svc);
1330                 }
1331         }
1332
1333         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1334                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1335                         ip_vs_zero_service(svc);
1336                 }
1337         }
1338
1339         ip_vs_zero_stats(&ip_vs_stats);
1340         return 0;
1341 }
1342
1343
1344 static int
1345 proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1346                      void *buffer, size_t *lenp)
1347 {
1348         int *valp = table->data;
1349         int val = *valp;
1350         int rc;
1351
1352         rc = proc_dointvec(table, write, filp, buffer, lenp);
1353         if (write && (*valp != val)) {
1354                 if ((*valp < 0) || (*valp > 3)) {
1355                         /* Restore the correct value */
1356                         *valp = val;
1357                 } else {
1358                         local_bh_disable();
1359                         update_defense_level();
1360                         local_bh_enable();
1361                 }
1362         }
1363         return rc;
1364 }
1365
1366
1367 static int
1368 proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1369                        void *buffer, size_t *lenp)
1370 {
1371         int *valp = table->data;
1372         int val[2];
1373         int rc;
1374
1375         /* backup the value first */
1376         memcpy(val, valp, sizeof(val));
1377
1378         rc = proc_dointvec(table, write, filp, buffer, lenp);
1379         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1380                 /* Restore the correct value */
1381                 memcpy(valp, val, sizeof(val));
1382         }
1383         return rc;
1384 }
1385
1386
1387 /*
1388  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1389  */
1390 struct ip_vs_sysctl_table {
1391         struct ctl_table_header *sysctl_header;
1392         ctl_table vs_vars[NET_IPV4_VS_LAST];
1393         ctl_table vs_dir[2];
1394         ctl_table ipv4_dir[2];
1395         ctl_table root_dir[2];
1396 };
1397
1398 static struct ip_vs_sysctl_table ipv4_vs_table = {
1399         NULL,
1400         {{NET_IPV4_VS_AMEMTHRESH, "amemthresh",
1401           &sysctl_ip_vs_amemthresh, sizeof(int), 0644, NULL,
1402           &proc_dointvec},
1403 #ifdef CONFIG_IP_VS_DEBUG
1404          {NET_IPV4_VS_DEBUG_LEVEL, "debug_level",
1405           &sysctl_ip_vs_debug_level, sizeof(int), 0644, NULL,
1406           &proc_dointvec},
1407 #endif
1408          {NET_IPV4_VS_AMDROPRATE, "am_droprate",
1409           &sysctl_ip_vs_am_droprate, sizeof(int), 0644, NULL,
1410           &proc_dointvec},
1411          {NET_IPV4_VS_DROP_ENTRY, "drop_entry",
1412           &sysctl_ip_vs_drop_entry, sizeof(int), 0644, NULL,
1413           &proc_do_defense_mode},
1414          {NET_IPV4_VS_DROP_PACKET, "drop_packet",
1415           &sysctl_ip_vs_drop_packet, sizeof(int), 0644, NULL,
1416           &proc_do_defense_mode},
1417          {NET_IPV4_VS_SECURE_TCP, "secure_tcp",
1418           &sysctl_ip_vs_secure_tcp, sizeof(int), 0644, NULL,
1419           &proc_do_defense_mode},
1420 #if 0
1421          {NET_IPV4_VS_TO_ES, "timeout_established",
1422           &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1423           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1424          {NET_IPV4_VS_TO_SS, "timeout_synsent",
1425           &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1426           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1427          {NET_IPV4_VS_TO_SR, "timeout_synrecv",
1428           &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1429           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1430          {NET_IPV4_VS_TO_FW, "timeout_finwait",
1431           &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1432           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1433          {NET_IPV4_VS_TO_TW, "timeout_timewait",
1434           &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1435           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1436          {NET_IPV4_VS_TO_CL, "timeout_close",
1437           &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1438           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1439          {NET_IPV4_VS_TO_CW, "timeout_closewait",
1440           &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1441           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1442          {NET_IPV4_VS_TO_LA, "timeout_lastack",
1443           &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1444           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1445          {NET_IPV4_VS_TO_LI, "timeout_listen",
1446           &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1447           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1448          {NET_IPV4_VS_TO_SA, "timeout_synack",
1449           &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1450           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1451          {NET_IPV4_VS_TO_UDP, "timeout_udp",
1452           &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1453           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1454          {NET_IPV4_VS_TO_ICMP, "timeout_icmp",
1455           &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1456           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1457 #endif
1458          {NET_IPV4_VS_CACHE_BYPASS, "cache_bypass",
1459           &sysctl_ip_vs_cache_bypass, sizeof(int), 0644, NULL,
1460           &proc_dointvec},
1461          {NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn",
1462           &sysctl_ip_vs_expire_nodest_conn, sizeof(int), 0644, NULL,
1463           &proc_dointvec},
1464          {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold",
1465           &sysctl_ip_vs_sync_threshold, sizeof(sysctl_ip_vs_sync_threshold),
1466           0644, NULL, &proc_do_sync_threshold},
1467          {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send",
1468           &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL,
1469           &proc_dointvec},
1470          {0}},
1471         {{NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table.vs_vars},
1472          {0}},
1473         {{NET_IPV4, "ipv4", NULL, 0, 0555, ipv4_vs_table.vs_dir},
1474          {0}},
1475         {{CTL_NET, "net", NULL, 0, 0555, ipv4_vs_table.ipv4_dir},
1476          {0}}
1477 };
1478
1479 #ifdef CONFIG_PROC_FS
1480
1481 struct ip_vs_iter {
1482         struct list_head *table;
1483         int bucket;
1484 };
1485
1486 /*
1487  *      Write the contents of the VS rule table to a PROCfs file.
1488  *      (It is kept just for backward compatibility)
1489  */
1490 static inline const char *ip_vs_fwd_name(unsigned flags)
1491 {
1492         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1493         case IP_VS_CONN_F_LOCALNODE:
1494                 return "Local";
1495         case IP_VS_CONN_F_TUNNEL:
1496                 return "Tunnel";
1497         case IP_VS_CONN_F_DROUTE:
1498                 return "Route";
1499         default:
1500                 return "Masq";
1501         }
1502 }
1503
1504
1505 /* Get the Nth entry in the two lists */
1506 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1507 {
1508         struct ip_vs_iter *iter = seq->private;
1509         int idx;
1510         struct ip_vs_service *svc;
1511
1512         /* look in hash by protocol */
1513         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1514                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1515                         if (pos-- == 0){
1516                                 iter->table = ip_vs_svc_table;
1517                                 iter->bucket = idx;
1518                                 return svc;
1519                         }
1520                 }
1521         }
1522
1523         /* keep looking in fwmark */
1524         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1525                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1526                         if (pos-- == 0) {
1527                                 iter->table = ip_vs_svc_fwm_table;
1528                                 iter->bucket = idx;
1529                                 return svc;
1530                         }
1531                 }
1532         }
1533
1534         return NULL;
1535 }
1536
1537 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1538 {
1539
1540         read_lock_bh(&__ip_vs_svc_lock);
1541         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1542 }
1543
1544
1545 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1546 {
1547         struct list_head *e;
1548         struct ip_vs_iter *iter;
1549         struct ip_vs_service *svc;
1550
1551         ++*pos;
1552         if (v == SEQ_START_TOKEN)
1553                 return ip_vs_info_array(seq,0);
1554
1555         svc = v;
1556         iter = seq->private;
1557
1558         if (iter->table == ip_vs_svc_table) {
1559                 /* next service in table hashed by protocol */
1560                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1561                         return list_entry(e, struct ip_vs_service, s_list);
1562
1563
1564                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1565                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1566                                             s_list) {
1567                                 return svc;
1568                         }
1569                 }
1570
1571                 iter->table = ip_vs_svc_fwm_table;
1572                 iter->bucket = -1;
1573                 goto scan_fwmark;
1574         }
1575
1576         /* next service in hashed by fwmark */
1577         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1578                 return list_entry(e, struct ip_vs_service, f_list);
1579
1580  scan_fwmark:
1581         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1582                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1583                                     f_list)
1584                         return svc;
1585         }
1586
1587         return NULL;
1588 }
1589
1590 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1591 {
1592         read_unlock_bh(&__ip_vs_svc_lock);
1593 }
1594
1595
1596 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1597 {
1598         if (v == SEQ_START_TOKEN) {
1599                 seq_printf(seq,
1600                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1601                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1602                 seq_puts(seq,
1603                          "Prot LocalAddress:Port Scheduler Flags\n");
1604                 seq_puts(seq,
1605                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1606         } else {
1607                 const struct ip_vs_service *svc = v;
1608                 const struct ip_vs_iter *iter = seq->private;
1609                 const struct ip_vs_dest *dest;
1610
1611                 if (iter->table == ip_vs_svc_table)
1612                         seq_printf(seq, "%s  %08X:%04X %s ",
1613                                    ip_vs_proto_name(svc->protocol),
1614                                    ntohl(svc->addr),
1615                                    ntohs(svc->port),
1616                                    svc->scheduler->name);
1617                 else
1618                         seq_printf(seq, "FWM  %08X %s ",
1619                                    svc->fwmark, svc->scheduler->name);
1620
1621                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1622                         seq_printf(seq, "persistent %d %08X\n",
1623                                 svc->timeout,
1624                                 ntohl(svc->netmask));
1625                 else
1626                         seq_putc(seq, '\n');
1627
1628                 list_for_each_entry(dest, &svc->destinations, n_list) {
1629                         seq_printf(seq,
1630                                    "  -> %08X:%04X      %-7s %-6d %-10d %-10d\n",
1631                                    ntohl(dest->addr), ntohs(dest->port),
1632                                    ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1633                                    atomic_read(&dest->weight),
1634                                    atomic_read(&dest->activeconns),
1635                                    atomic_read(&dest->inactconns));
1636                 }
1637         }
1638         return 0;
1639 }
1640
1641 static struct seq_operations ip_vs_info_seq_ops = {
1642         .start = ip_vs_info_seq_start,
1643         .next  = ip_vs_info_seq_next,
1644         .stop  = ip_vs_info_seq_stop,
1645         .show  = ip_vs_info_seq_show,
1646 };
1647
1648 static int ip_vs_info_open(struct inode *inode, struct file *file)
1649 {
1650         struct seq_file *seq;
1651         int rc = -ENOMEM;
1652         struct ip_vs_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1653
1654         if (!s)
1655                 goto out;
1656
1657         rc = seq_open(file, &ip_vs_info_seq_ops);
1658         if (rc)
1659                 goto out_kfree;
1660
1661         seq          = file->private_data;
1662         seq->private = s;
1663         memset(s, 0, sizeof(*s));
1664 out:
1665         return rc;
1666 out_kfree:
1667         kfree(s);
1668         goto out;
1669 }
1670
1671 static struct file_operations ip_vs_info_fops = {
1672         .owner   = THIS_MODULE,
1673         .open    = ip_vs_info_open,
1674         .read    = seq_read,
1675         .llseek  = seq_lseek,
1676         .release = seq_release_private,
1677 };
1678
1679 #endif
1680
1681 struct ip_vs_stats ip_vs_stats;
1682
1683 #ifdef CONFIG_PROC_FS
1684 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1685 {
1686
1687 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1688         seq_puts(seq,
1689                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1690         seq_printf(seq,
1691                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1692
1693         spin_lock_bh(&ip_vs_stats.lock);
1694         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1695                    ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1696                    (unsigned long long) ip_vs_stats.inbytes,
1697                    (unsigned long long) ip_vs_stats.outbytes);
1698
1699 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1700         seq_puts(seq,
1701                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1702         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1703                         ip_vs_stats.cps,
1704                         ip_vs_stats.inpps,
1705                         ip_vs_stats.outpps,
1706                         ip_vs_stats.inbps,
1707                         ip_vs_stats.outbps);
1708         spin_unlock_bh(&ip_vs_stats.lock);
1709
1710         return 0;
1711 }
1712
1713 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1714 {
1715         return single_open(file, ip_vs_stats_show, NULL);
1716 }
1717
1718 static struct file_operations ip_vs_stats_fops = {
1719         .owner = THIS_MODULE,
1720         .open = ip_vs_stats_seq_open,
1721         .read = seq_read,
1722         .llseek = seq_lseek,
1723         .release = single_release,
1724 };
1725
1726 #endif
1727
1728 /*
1729  *      Set timeout values for tcp tcpfin udp in the timeout_table.
1730  */
1731 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1732 {
1733         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1734                   u->tcp_timeout,
1735                   u->tcp_fin_timeout,
1736                   u->udp_timeout);
1737
1738 #ifdef CONFIG_IP_VS_PROTO_TCP
1739         if (u->tcp_timeout) {
1740                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1741                         = u->tcp_timeout * HZ;
1742         }
1743
1744         if (u->tcp_fin_timeout) {
1745                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1746                         = u->tcp_fin_timeout * HZ;
1747         }
1748 #endif
1749
1750 #ifdef CONFIG_IP_VS_PROTO_UDP
1751         if (u->udp_timeout) {
1752                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1753                         = u->udp_timeout * HZ;
1754         }
1755 #endif
1756         return 0;
1757 }
1758
1759
1760 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
1761 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
1762 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
1763                                  sizeof(struct ip_vs_dest_user))
1764 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
1765 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
1766 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
1767
1768 static unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1769         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
1770         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
1771         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
1772         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
1773         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
1774         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
1775         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
1776         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
1777         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
1778         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
1779         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
1780 };
1781
1782 static int
1783 do_ip_vs_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1784 {
1785         int ret;
1786         unsigned char arg[MAX_ARG_LEN];
1787         struct ip_vs_service_user *usvc;
1788         struct ip_vs_service *svc;
1789         struct ip_vs_dest_user *udest;
1790
1791         if (!capable(CAP_NET_ADMIN))
1792                 return -EPERM;
1793
1794         if (len != set_arglen[SET_CMDID(cmd)]) {
1795                 IP_VS_ERR("set_ctl: len %u != %u\n",
1796                           len, set_arglen[SET_CMDID(cmd)]);
1797                 return -EINVAL;
1798         }
1799
1800         if (copy_from_user(arg, user, len) != 0)
1801                 return -EFAULT;
1802
1803         /* increase the module use count */
1804         ip_vs_use_count_inc();
1805
1806         if (down_interruptible(&__ip_vs_mutex)) {
1807                 ret = -ERESTARTSYS;
1808                 goto out_dec;
1809         }
1810
1811         if (cmd == IP_VS_SO_SET_FLUSH) {
1812                 /* Flush the virtual service */
1813                 ret = ip_vs_flush();
1814                 goto out_unlock;
1815         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1816                 /* Set timeout values for (tcp tcpfin udp) */
1817                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1818                 goto out_unlock;
1819         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1820                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1821                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1822                 goto out_unlock;
1823         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1824                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1825                 ret = stop_sync_thread(dm->state);
1826                 goto out_unlock;
1827         }
1828
1829         usvc = (struct ip_vs_service_user *)arg;
1830         udest = (struct ip_vs_dest_user *)(usvc + 1);
1831
1832         if (cmd == IP_VS_SO_SET_ZERO) {
1833                 /* if no service address is set, zero counters in all */
1834                 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1835                         ret = ip_vs_zero_all();
1836                         goto out_unlock;
1837                 }
1838         }
1839
1840         /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1841         if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1842                 IP_VS_INFO("vs_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s",
1843                            ntohs(usvc->protocol), NIPQUAD(usvc->addr),
1844                            ntohs(usvc->port), usvc->sched_name);
1845                 ret = -EFAULT;
1846                 goto out_unlock;
1847         }
1848
1849         /* Lookup the exact service by <protocol, addr, port> or fwmark */
1850         if (usvc->fwmark == 0)
1851                 svc = __ip_vs_service_get(usvc->protocol,
1852                                           usvc->addr, usvc->port);
1853         else
1854                 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
1855
1856         if (cmd != IP_VS_SO_SET_ADD
1857             && (svc == NULL || svc->protocol != usvc->protocol)) {
1858                 ret = -ESRCH;
1859                 goto out_unlock;
1860         }
1861
1862         switch (cmd) {
1863         case IP_VS_SO_SET_ADD:
1864                 if (svc != NULL)
1865                         ret = -EEXIST;
1866                 else
1867                         ret = ip_vs_add_service(usvc, &svc);
1868                 break;
1869         case IP_VS_SO_SET_EDIT:
1870                 ret = ip_vs_edit_service(svc, usvc);
1871                 break;
1872         case IP_VS_SO_SET_DEL:
1873                 ret = ip_vs_del_service(svc);
1874                 if (!ret)
1875                         goto out_unlock;
1876                 break;
1877         case IP_VS_SO_SET_ZERO:
1878                 ret = ip_vs_zero_service(svc);
1879                 break;
1880         case IP_VS_SO_SET_ADDDEST:
1881                 ret = ip_vs_add_dest(svc, udest);
1882                 break;
1883         case IP_VS_SO_SET_EDITDEST:
1884                 ret = ip_vs_edit_dest(svc, udest);
1885                 break;
1886         case IP_VS_SO_SET_DELDEST:
1887                 ret = ip_vs_del_dest(svc, udest);
1888                 break;
1889         default:
1890                 ret = -EINVAL;
1891         }
1892
1893         if (svc)
1894                 ip_vs_service_put(svc);
1895
1896   out_unlock:
1897         up(&__ip_vs_mutex);
1898   out_dec:
1899         /* decrease the module use count */
1900         ip_vs_use_count_dec();
1901
1902         return ret;
1903 }
1904
1905
1906 static void
1907 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
1908 {
1909         spin_lock_bh(&src->lock);
1910         memcpy(dst, src, (char*)&src->lock - (char*)src);
1911         spin_unlock_bh(&src->lock);
1912 }
1913
1914 static void
1915 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
1916 {
1917         dst->protocol = src->protocol;
1918         dst->addr = src->addr;
1919         dst->port = src->port;
1920         dst->fwmark = src->fwmark;
1921         strcpy(dst->sched_name, src->scheduler->name);
1922         dst->flags = src->flags;
1923         dst->timeout = src->timeout / HZ;
1924         dst->netmask = src->netmask;
1925         dst->num_dests = src->num_dests;
1926         ip_vs_copy_stats(&dst->stats, &src->stats);
1927 }
1928
1929 static inline int
1930 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
1931                             struct ip_vs_get_services __user *uptr)
1932 {
1933         int idx, count=0;
1934         struct ip_vs_service *svc;
1935         struct ip_vs_service_entry entry;
1936         int ret = 0;
1937
1938         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1939                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1940                         if (count >= get->num_services)
1941                                 goto out;
1942                         ip_vs_copy_service(&entry, svc);
1943                         if (copy_to_user(&uptr->entrytable[count],
1944                                          &entry, sizeof(entry))) {
1945                                 ret = -EFAULT;
1946                                 goto out;
1947                         }
1948                         count++;
1949                 }
1950         }
1951
1952         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1953                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1954                         if (count >= get->num_services)
1955                                 goto out;
1956                         ip_vs_copy_service(&entry, svc);
1957                         if (copy_to_user(&uptr->entrytable[count],
1958                                          &entry, sizeof(entry))) {
1959                                 ret = -EFAULT;
1960                                 goto out;
1961                         }
1962                         count++;
1963                 }
1964         }
1965   out:
1966         return ret;
1967 }
1968
1969 static inline int
1970 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
1971                          struct ip_vs_get_dests __user *uptr)
1972 {
1973         struct ip_vs_service *svc;
1974         int ret = 0;
1975
1976         if (get->fwmark)
1977                 svc = __ip_vs_svc_fwm_get(get->fwmark);
1978         else
1979                 svc = __ip_vs_service_get(get->protocol,
1980                                           get->addr, get->port);
1981         if (svc) {
1982                 int count = 0;
1983                 struct ip_vs_dest *dest;
1984                 struct ip_vs_dest_entry entry;
1985
1986                 list_for_each_entry(dest, &svc->destinations, n_list) {
1987                         if (count >= get->num_dests)
1988                                 break;
1989
1990                         entry.addr = dest->addr;
1991                         entry.port = dest->port;
1992                         entry.conn_flags = atomic_read(&dest->conn_flags);
1993                         entry.weight = atomic_read(&dest->weight);
1994                         entry.u_threshold = dest->u_threshold;
1995                         entry.l_threshold = dest->l_threshold;
1996                         entry.activeconns = atomic_read(&dest->activeconns);
1997                         entry.inactconns = atomic_read(&dest->inactconns);
1998                         entry.persistconns = atomic_read(&dest->persistconns);
1999                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2000                         if (copy_to_user(&uptr->entrytable[count],
2001                                          &entry, sizeof(entry))) {
2002                                 ret = -EFAULT;
2003                                 break;
2004                         }
2005                         count++;
2006                 }
2007                 ip_vs_service_put(svc);
2008         } else
2009                 ret = -ESRCH;
2010         return ret;
2011 }
2012
2013 static inline void
2014 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2015 {
2016 #ifdef CONFIG_IP_VS_PROTO_TCP
2017         u->tcp_timeout =
2018                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2019         u->tcp_fin_timeout =
2020                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2021 #endif
2022 #ifdef CONFIG_IP_VS_PROTO_UDP
2023         u->udp_timeout =
2024                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2025 #endif
2026 }
2027
2028
2029 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2030 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2031 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2032 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2033 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2034 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2035 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2036
2037 static unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2038         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2039         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2040         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2041         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2042         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2043         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2044         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2045 };
2046
2047 static int
2048 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2049 {
2050         unsigned char arg[128];
2051         int ret = 0;
2052
2053         if (!capable(CAP_NET_ADMIN))
2054                 return -EPERM;
2055
2056         if (*len < get_arglen[GET_CMDID(cmd)]) {
2057                 IP_VS_ERR("get_ctl: len %u < %u\n",
2058                           *len, get_arglen[GET_CMDID(cmd)]);
2059                 return -EINVAL;
2060         }
2061
2062         if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2063                 return -EFAULT;
2064
2065         if (down_interruptible(&__ip_vs_mutex))
2066                 return -ERESTARTSYS;
2067
2068         switch (cmd) {
2069         case IP_VS_SO_GET_VERSION:
2070         {
2071                 char buf[64];
2072
2073                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2074                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2075                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2076                         ret = -EFAULT;
2077                         goto out;
2078                 }
2079                 *len = strlen(buf)+1;
2080         }
2081         break;
2082
2083         case IP_VS_SO_GET_INFO:
2084         {
2085                 struct ip_vs_getinfo info;
2086                 info.version = IP_VS_VERSION_CODE;
2087                 info.size = IP_VS_CONN_TAB_SIZE;
2088                 info.num_services = ip_vs_num_services;
2089                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2090                         ret = -EFAULT;
2091         }
2092         break;
2093
2094         case IP_VS_SO_GET_SERVICES:
2095         {
2096                 struct ip_vs_get_services *get;
2097                 int size;
2098
2099                 get = (struct ip_vs_get_services *)arg;
2100                 size = sizeof(*get) +
2101                         sizeof(struct ip_vs_service_entry) * get->num_services;
2102                 if (*len != size) {
2103                         IP_VS_ERR("length: %u != %u\n", *len, size);
2104                         ret = -EINVAL;
2105                         goto out;
2106                 }
2107                 ret = __ip_vs_get_service_entries(get, user);
2108         }
2109         break;
2110
2111         case IP_VS_SO_GET_SERVICE:
2112         {
2113                 struct ip_vs_service_entry *entry;
2114                 struct ip_vs_service *svc;
2115
2116                 entry = (struct ip_vs_service_entry *)arg;
2117                 if (entry->fwmark)
2118                         svc = __ip_vs_svc_fwm_get(entry->fwmark);
2119                 else
2120                         svc = __ip_vs_service_get(entry->protocol,
2121                                                   entry->addr, entry->port);
2122                 if (svc) {
2123                         ip_vs_copy_service(entry, svc);
2124                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2125                                 ret = -EFAULT;
2126                         ip_vs_service_put(svc);
2127                 } else
2128                         ret = -ESRCH;
2129         }
2130         break;
2131
2132         case IP_VS_SO_GET_DESTS:
2133         {
2134                 struct ip_vs_get_dests *get;
2135                 int size;
2136
2137                 get = (struct ip_vs_get_dests *)arg;
2138                 size = sizeof(*get) +
2139                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2140                 if (*len != size) {
2141                         IP_VS_ERR("length: %u != %u\n", *len, size);
2142                         ret = -EINVAL;
2143                         goto out;
2144                 }
2145                 ret = __ip_vs_get_dest_entries(get, user);
2146         }
2147         break;
2148
2149         case IP_VS_SO_GET_TIMEOUT:
2150         {
2151                 struct ip_vs_timeout_user t;
2152
2153                 __ip_vs_get_timeouts(&t);
2154                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2155                         ret = -EFAULT;
2156         }
2157         break;
2158
2159         case IP_VS_SO_GET_DAEMON:
2160         {
2161                 struct ip_vs_daemon_user d[2];
2162
2163                 memset(&d, 0, sizeof(d));
2164                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2165                         d[0].state = IP_VS_STATE_MASTER;
2166                         strcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn);
2167                         d[0].syncid = ip_vs_master_syncid;
2168                 }
2169                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2170                         d[1].state = IP_VS_STATE_BACKUP;
2171                         strcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn);
2172                         d[1].syncid = ip_vs_backup_syncid;
2173                 }
2174                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2175                         ret = -EFAULT;
2176         }
2177         break;
2178
2179         default:
2180                 ret = -EINVAL;
2181         }
2182
2183   out:
2184         up(&__ip_vs_mutex);
2185         return ret;
2186 }
2187
2188
2189 static struct nf_sockopt_ops ip_vs_sockopts = {
2190         { NULL, NULL }, PF_INET,
2191         IP_VS_BASE_CTL, IP_VS_SO_SET_MAX+1, do_ip_vs_set_ctl,
2192         IP_VS_BASE_CTL, IP_VS_SO_GET_MAX+1, do_ip_vs_get_ctl
2193 };
2194
2195
2196 int ip_vs_control_init(void)
2197 {
2198         int ret;
2199         int idx;
2200
2201         EnterFunction(2);
2202
2203         ret = nf_register_sockopt(&ip_vs_sockopts);
2204         if (ret) {
2205                 IP_VS_ERR("cannot register sockopt.\n");
2206                 return ret;
2207         }
2208
2209         proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
2210         proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
2211
2212         ipv4_vs_table.sysctl_header =
2213                 register_sysctl_table(ipv4_vs_table.root_dir, 0);
2214
2215         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
2216         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
2217                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2218                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2219         }
2220         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
2221                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2222         }
2223
2224         memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2225         ip_vs_stats.lock = SPIN_LOCK_UNLOCKED;
2226         ip_vs_new_estimator(&ip_vs_stats);
2227
2228         /* Hook the defense timer */
2229         init_timer(&defense_timer);
2230         defense_timer.function = defense_timer_handler;
2231         defense_timer.expires = jiffies + DEFENSE_TIMER_PERIOD;
2232         add_timer(&defense_timer);
2233
2234         LeaveFunction(2);
2235         return 0;
2236 }
2237
2238
2239 void ip_vs_control_cleanup(void)
2240 {
2241         EnterFunction(2);
2242         ip_vs_trash_cleanup();
2243         del_timer_sync(&defense_timer);
2244         ip_vs_kill_estimator(&ip_vs_stats);
2245         unregister_sysctl_table(ipv4_vs_table.sysctl_header);
2246         proc_net_remove("ip_vs_stats");
2247         proc_net_remove("ip_vs");
2248         nf_unregister_sockopt(&ip_vs_sockopts);
2249         LeaveFunction(2);
2250 }