e5a78d2d7dc2e4c22feab8bfcf0bf194bc0618ea
[linux-flexiantxendom0-3.2.10.git] / net / ipv4 / ipvs / ip_vs_proto_tcp.c
1 /*
2  * ip_vs_proto_tcp.c:   TCP load balancing support for IPVS
3  *
4  * Version:     $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
5  *
6  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
7  *              Julian Anastasov <ja@ssi.bg>
8  *
9  *              This program is free software; you can redistribute it and/or
10  *              modify it under the terms of the GNU General Public License
11  *              as published by the Free Software Foundation; either version
12  *              2 of the License, or (at your option) any later version.
13  *
14  * Changes:
15  *
16  */
17
18 #include <linux/config.h>
19 #include <linux/compiler.h>
20 #include <linux/ip.h>
21 #include <linux/tcp.h>                  /* for tcphdr */
22 #include <net/ip.h>
23 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
24 #include <linux/netfilter_ipv4.h>
25
26 #include <net/ip_vs.h>
27
28
29 static struct ip_vs_conn *
30 tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
31                 const struct iphdr *iph, unsigned int proto_off, int inverse)
32 {
33         __u16 ports[2];
34
35         if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
36                 return NULL;
37
38         if (likely(!inverse)) {
39                 return ip_vs_conn_in_get(iph->protocol,
40                                          iph->saddr, ports[0],
41                                          iph->daddr, ports[1]);
42         } else {
43                 return ip_vs_conn_in_get(iph->protocol,
44                                          iph->daddr, ports[1],
45                                          iph->saddr, ports[0]);
46         }
47 }
48
49 static struct ip_vs_conn *
50 tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
51                  const struct iphdr *iph, unsigned int proto_off, int inverse)
52 {
53         __u16 ports[2];
54
55         if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
56                 return NULL;
57
58         if (likely(!inverse)) {
59                 return ip_vs_conn_out_get(iph->protocol,
60                                           iph->saddr, ports[0],
61                                           iph->daddr, ports[1]);
62         } else {
63                 return ip_vs_conn_out_get(iph->protocol,
64                                           iph->daddr, ports[1],
65                                           iph->saddr, ports[0]);
66         }
67 }
68
69
70 static int
71 tcp_conn_schedule(struct sk_buff *skb,
72                   struct ip_vs_protocol *pp,
73                   int *verdict, struct ip_vs_conn **cpp)
74 {
75         struct ip_vs_service *svc;
76         struct tcphdr tcph;
77
78         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) {
79                 *verdict = NF_DROP;
80                 return 0;
81         }
82
83         if (tcph.syn &&
84             (svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
85                                      skb->nh.iph->daddr, tcph.dest))) {
86                 if (ip_vs_todrop()) {
87                         /*
88                          * It seems that we are very loaded.
89                          * We have to drop this packet :(
90                          */
91                         ip_vs_service_put(svc);
92                         *verdict = NF_DROP;
93                         return 0;
94                 }
95
96                 /*
97                  * Let the virtual server select a real server for the
98                  * incoming connection, and create a connection entry.
99                  */
100                 *cpp = ip_vs_schedule(svc, skb);
101                 if (!*cpp) {
102                         *verdict = ip_vs_leave(svc, skb, pp);
103                         return 0;
104                 }
105                 ip_vs_service_put(svc);
106         }
107         return 1;
108 }
109
110
111 static inline void
112 tcp_fast_csum_update(struct tcphdr *tcph, u32 oldip, u32 newip,
113                      u16 oldport, u16 newport)
114 {
115         tcph->check =
116                 ip_vs_check_diff(~oldip, newip,
117                                  ip_vs_check_diff(oldport ^ 0xFFFF,
118                                                   newport, tcph->check));
119 }
120
121
122 static int
123 tcp_snat_handler(struct sk_buff **pskb,
124                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
125 {
126         struct tcphdr *tcph;
127         unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
128
129         /* csum_check requires unshared skb */
130         if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
131                 return 0;
132
133         if (unlikely(cp->app != NULL)) {
134                 /* Some checks before mangling */
135                 if (pp->csum_check && !pp->csum_check(*pskb, pp))
136                         return 0;
137
138                 /* Call application helper if needed */
139                 if (!ip_vs_app_pkt_out(cp, pskb))
140                         return 0;
141         }
142
143         tcph = (void *)(*pskb)->nh.iph + tcphoff;
144         tcph->source = cp->vport;
145
146         /* Adjust TCP checksums */
147         if (!cp->app) {
148                 /* Only port and addr are changed, do fast csum update */
149                 tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
150                                      cp->dport, cp->vport);
151                 if ((*pskb)->ip_summed == CHECKSUM_HW)
152                         (*pskb)->ip_summed = CHECKSUM_NONE;
153         } else {
154                 /* full checksum calculation */
155                 tcph->check = 0;
156                 (*pskb)->csum = skb_checksum(*pskb, tcphoff,
157                                              (*pskb)->len - tcphoff, 0);
158                 tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
159                                                 (*pskb)->len - tcphoff,
160                                                 cp->protocol,
161                                                 (*pskb)->csum);
162                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n",
163                           pp->name, tcph->check,
164                           (char*)&(tcph->check) - (char*)tcph);
165         }
166         return 1;
167 }
168
169
170 static int
171 tcp_dnat_handler(struct sk_buff **pskb,
172                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
173 {
174         struct tcphdr *tcph;
175         unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
176
177         /* csum_check requires unshared skb */
178         if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
179                 return 0;
180
181         if (unlikely(cp->app != NULL)) {
182                 /* Some checks before mangling */
183                 if (pp->csum_check && !pp->csum_check(*pskb, pp))
184                         return 0;
185
186                 /*
187                  *      Attempt ip_vs_app call.
188                  *      It will fix ip_vs_conn and iph ack_seq stuff
189                  */
190                 if (!ip_vs_app_pkt_in(cp, pskb))
191                         return 0;
192         }
193
194         tcph = (void *)(*pskb)->nh.iph + tcphoff;
195         tcph->dest = cp->dport;
196
197         /*
198          *      Adjust TCP checksums
199          */
200         if (!cp->app) {
201                 /* Only port and addr are changed, do fast csum update */
202                 tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
203                                      cp->vport, cp->dport);
204                 if ((*pskb)->ip_summed == CHECKSUM_HW)
205                         (*pskb)->ip_summed = CHECKSUM_NONE;
206         } else {
207                 /* full checksum calculation */
208                 tcph->check = 0;
209                 (*pskb)->csum = skb_checksum(*pskb, tcphoff,
210                                              (*pskb)->len - tcphoff, 0);
211                 tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
212                                                 (*pskb)->len - tcphoff,
213                                                 cp->protocol,
214                                                 (*pskb)->csum);
215                 (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
216         }
217         return 1;
218 }
219
220
221 static int
222 tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
223 {
224         unsigned int tcphoff = skb->nh.iph->ihl*4;
225
226         switch (skb->ip_summed) {
227         case CHECKSUM_NONE:
228                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
229         case CHECKSUM_HW:
230                 if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
231                                       skb->len - tcphoff,
232                                       skb->nh.iph->protocol, skb->csum)) {
233                         IP_VS_DBG_RL_PKT(0, pp, skb, 0,
234                                          "Failed checksum for");
235                         return 0;
236                 }
237                 break;
238         default:
239                 /* CHECKSUM_UNNECESSARY */
240                 break;
241         }
242
243         return 1;
244 }
245
246
247 #define TCP_DIR_INPUT           0
248 #define TCP_DIR_OUTPUT          4
249 #define TCP_DIR_INPUT_ONLY      8
250
251 static int tcp_state_off[IP_VS_DIR_LAST] = {
252         [IP_VS_DIR_INPUT]               =       TCP_DIR_INPUT,
253         [IP_VS_DIR_OUTPUT]              =       TCP_DIR_OUTPUT,
254         [IP_VS_DIR_INPUT_ONLY]          =       TCP_DIR_INPUT_ONLY,
255 };
256
257 /*
258  *      Timeout table[state]
259  */
260 static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
261         [IP_VS_TCP_S_NONE]              =       2*HZ,
262         [IP_VS_TCP_S_ESTABLISHED]       =       15*60*HZ,
263         [IP_VS_TCP_S_SYN_SENT]          =       2*60*HZ,
264         [IP_VS_TCP_S_SYN_RECV]          =       1*60*HZ,
265         [IP_VS_TCP_S_FIN_WAIT]          =       2*60*HZ,
266         [IP_VS_TCP_S_TIME_WAIT]         =       2*60*HZ,
267         [IP_VS_TCP_S_CLOSE]             =       10*HZ,
268         [IP_VS_TCP_S_CLOSE_WAIT]        =       60*HZ,
269         [IP_VS_TCP_S_LAST_ACK]          =       30*HZ,
270         [IP_VS_TCP_S_LISTEN]            =       2*60*HZ,
271         [IP_VS_TCP_S_SYNACK]            =       120*HZ,
272         [IP_VS_TCP_S_LAST]              =       2*HZ,
273 };
274
275
276 #if 0
277
278 /* FIXME: This is going to die */
279
280 static int tcp_timeouts_dos[IP_VS_TCP_S_LAST+1] = {
281         [IP_VS_TCP_S_NONE]              =       2*HZ,
282         [IP_VS_TCP_S_ESTABLISHED]       =       8*60*HZ,
283         [IP_VS_TCP_S_SYN_SENT]          =       60*HZ,
284         [IP_VS_TCP_S_SYN_RECV]          =       10*HZ,
285         [IP_VS_TCP_S_FIN_WAIT]          =       60*HZ,
286         [IP_VS_TCP_S_TIME_WAIT]         =       60*HZ,
287         [IP_VS_TCP_S_CLOSE]             =       10*HZ,
288         [IP_VS_TCP_S_CLOSE_WAIT]        =       60*HZ,
289         [IP_VS_TCP_S_LAST_ACK]          =       30*HZ,
290         [IP_VS_TCP_S_LISTEN]            =       2*60*HZ,
291         [IP_VS_TCP_S_SYNACK]            =       100*HZ,
292         [IP_VS_TCP_S_LAST]              =       2*HZ,
293 };
294
295 #endif
296
297 static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
298         [IP_VS_TCP_S_NONE]              =       "NONE",
299         [IP_VS_TCP_S_ESTABLISHED]       =       "ESTABLISHED",
300         [IP_VS_TCP_S_SYN_SENT]          =       "SYN_SENT",
301         [IP_VS_TCP_S_SYN_RECV]          =       "SYN_RECV",
302         [IP_VS_TCP_S_FIN_WAIT]          =       "FIN_WAIT",
303         [IP_VS_TCP_S_TIME_WAIT]         =       "TIME_WAIT",
304         [IP_VS_TCP_S_CLOSE]             =       "CLOSE",
305         [IP_VS_TCP_S_CLOSE_WAIT]        =       "CLOSE_WAIT",
306         [IP_VS_TCP_S_LAST_ACK]          =       "LAST_ACK",
307         [IP_VS_TCP_S_LISTEN]            =       "LISTEN",
308         [IP_VS_TCP_S_SYNACK]            =       "SYNACK",
309         [IP_VS_TCP_S_LAST]              =       "BUG!",
310 };
311
312 #define sNO IP_VS_TCP_S_NONE
313 #define sES IP_VS_TCP_S_ESTABLISHED
314 #define sSS IP_VS_TCP_S_SYN_SENT
315 #define sSR IP_VS_TCP_S_SYN_RECV
316 #define sFW IP_VS_TCP_S_FIN_WAIT
317 #define sTW IP_VS_TCP_S_TIME_WAIT
318 #define sCL IP_VS_TCP_S_CLOSE
319 #define sCW IP_VS_TCP_S_CLOSE_WAIT
320 #define sLA IP_VS_TCP_S_LAST_ACK
321 #define sLI IP_VS_TCP_S_LISTEN
322 #define sSA IP_VS_TCP_S_SYNACK
323
324 struct tcp_states_t {
325         int next_state[IP_VS_TCP_S_LAST];
326 };
327
328 static const char * tcp_state_name(int state)
329 {
330         if (state >= IP_VS_TCP_S_LAST)
331                 return "ERR!";
332         return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
333 }
334
335 static struct tcp_states_t tcp_states [] = {
336 /*      INPUT */
337 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
338 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
339 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
340 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
341 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
342
343 /*      OUTPUT */
344 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
345 /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
346 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
347 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
348 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
349
350 /*      INPUT-ONLY */
351 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
352 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
353 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
354 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
355 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
356 };
357
358 static struct tcp_states_t tcp_states_dos [] = {
359 /*      INPUT */
360 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
361 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
362 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
363 /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
364 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
365
366 /*      OUTPUT */
367 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
368 /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
369 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
370 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
371 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
372
373 /*      INPUT-ONLY */
374 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
375 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
376 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
377 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
378 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
379 };
380
381 static struct tcp_states_t *tcp_state_table = tcp_states;
382
383
384 static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
385 {
386         int on = (flags & 1);           /* secure_tcp */
387
388         /*
389         ** FIXME: change secure_tcp to independent sysctl var
390         ** or make it per-service or per-app because it is valid
391         ** for most if not for all of the applications. Something
392         ** like "capabilities" (flags) for each object.
393         */
394         tcp_state_table = (on? tcp_states_dos : tcp_states);
395 }
396
397 static int
398 tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
399 {
400         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
401                                        tcp_state_name_table, sname, to);
402 }
403
404 static inline int tcp_state_idx(struct tcphdr *th)
405 {
406         if (th->rst)
407                 return 3;
408         if (th->syn)
409                 return 0;
410         if (th->fin)
411                 return 1;
412         if (th->ack)
413                 return 2;
414         return -1;
415 }
416
417 static inline void
418 set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
419               int direction, struct tcphdr *th)
420 {
421         int state_idx;
422         int new_state = IP_VS_TCP_S_CLOSE;
423         int state_off = tcp_state_off[direction];
424
425         /*
426          *    Update state offset to INPUT_ONLY if necessary
427          *    or delete NO_OUTPUT flag if output packet detected
428          */
429         if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
430                 if (state_off == TCP_DIR_OUTPUT)
431                         cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
432                 else
433                         state_off = TCP_DIR_INPUT_ONLY;
434         }
435
436         if ((state_idx = tcp_state_idx(th)) < 0) {
437                 IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
438                 goto tcp_state_out;
439         }
440
441         new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
442
443   tcp_state_out:
444         if (new_state != cp->state) {
445                 struct ip_vs_dest *dest = cp->dest;
446
447                 IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
448                           "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n",
449                           pp->name,
450                           (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
451                           th->syn? 'S' : '.',
452                           th->fin? 'F' : '.',
453                           th->ack? 'A' : '.',
454                           th->rst? 'R' : '.',
455                           NIPQUAD(cp->daddr), ntohs(cp->dport),
456                           NIPQUAD(cp->caddr), ntohs(cp->cport),
457                           tcp_state_name(cp->state),
458                           tcp_state_name(new_state),
459                           atomic_read(&cp->refcnt));
460                 if (dest) {
461                         if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
462                             (new_state != IP_VS_TCP_S_ESTABLISHED)) {
463                                 atomic_dec(&dest->activeconns);
464                                 atomic_inc(&dest->inactconns);
465                                 cp->flags |= IP_VS_CONN_F_INACTIVE;
466                         } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
467                                    (new_state == IP_VS_TCP_S_ESTABLISHED)) {
468                                 atomic_inc(&dest->activeconns);
469                                 atomic_dec(&dest->inactconns);
470                                 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
471                         }
472                 }
473         }
474
475         cp->timeout = pp->timeout_table[cp->state = new_state];
476 }
477
478
479 /*
480  *      Handle state transitions
481  */
482 static int
483 tcp_state_transition(struct ip_vs_conn *cp, int direction,
484                      const struct sk_buff *skb,
485                      struct ip_vs_protocol *pp)
486 {
487         struct tcphdr tcph;
488
489         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0)
490                 return 0;
491
492         spin_lock(&cp->lock);
493         set_tcp_state(pp, cp, direction, &tcph);
494         spin_unlock(&cp->lock);
495
496         return 1;
497 }
498
499
500 /*
501  *      Hash table for TCP application incarnations
502  */
503 #define TCP_APP_TAB_BITS        4
504 #define TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
505 #define TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
506
507 static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
508 static spinlock_t tcp_app_lock = SPIN_LOCK_UNLOCKED;
509
510 static inline __u16 tcp_app_hashkey(__u16 port)
511 {
512         return ((port >> TCP_APP_TAB_BITS) ^ port) & TCP_APP_TAB_MASK;
513 }
514
515
516 static int tcp_register_app(struct ip_vs_app *inc)
517 {
518         struct ip_vs_app *i;
519         __u16 hash, port = inc->port;
520         int ret = 0;
521
522         hash = tcp_app_hashkey(port);
523
524         spin_lock_bh(&tcp_app_lock);
525         list_for_each_entry(i, &tcp_apps[hash], p_list) {
526                 if (i->port == port) {
527                         ret = -EEXIST;
528                         goto out;
529                 }
530         }
531         list_add(&inc->p_list, &tcp_apps[hash]);
532         atomic_inc(&ip_vs_protocol_tcp.appcnt);
533
534   out:
535         spin_unlock_bh(&tcp_app_lock);
536         return ret;
537 }
538
539
540 static void
541 tcp_unregister_app(struct ip_vs_app *inc)
542 {
543         spin_lock_bh(&tcp_app_lock);
544         atomic_dec(&ip_vs_protocol_tcp.appcnt);
545         list_del(&inc->p_list);
546         spin_unlock_bh(&tcp_app_lock);
547 }
548
549
550 static int
551 tcp_app_conn_bind(struct ip_vs_conn *cp)
552 {
553         int hash;
554         struct ip_vs_app *inc;
555         int result = 0;
556
557         /* Default binding: bind app only for NAT */
558         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
559                 return 0;
560
561         /* Lookup application incarnations and bind the right one */
562         hash = tcp_app_hashkey(cp->vport);
563
564         spin_lock(&tcp_app_lock);
565         list_for_each_entry(inc, &tcp_apps[hash], p_list) {
566                 if (inc->port == cp->vport) {
567                         if (unlikely(!ip_vs_app_inc_get(inc)))
568                                 break;
569                         spin_unlock(&tcp_app_lock);
570
571                         IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
572                                   "%u.%u.%u.%u:%u to app %s on port %u\n",
573                                   __FUNCTION__,
574                                   NIPQUAD(cp->caddr), ntohs(cp->cport),
575                                   NIPQUAD(cp->vaddr), ntohs(cp->vport),
576                                   inc->name, ntohs(inc->port));
577                         cp->app = inc;
578                         if (inc->init_conn)
579                                 result = inc->init_conn(inc, cp);
580                         goto out;
581                 }
582         }
583         spin_unlock(&tcp_app_lock);
584
585   out:
586         return result;
587 }
588
589
590 /*
591  *      Set LISTEN timeout. (ip_vs_conn_put will setup timer)
592  */
593 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
594 {
595         spin_lock(&cp->lock);
596         cp->state = IP_VS_TCP_S_LISTEN;
597         cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
598         spin_unlock(&cp->lock);
599 }
600
601
602 static void tcp_init(struct ip_vs_protocol *pp)
603 {
604         IP_VS_INIT_HASH_TABLE(tcp_apps);
605         pp->timeout_table = tcp_timeouts;
606 }
607
608
609 static void tcp_exit(struct ip_vs_protocol *pp)
610 {
611 }
612
613
614 struct ip_vs_protocol ip_vs_protocol_tcp = {
615         .name =                 "TCP",
616         .protocol =             IPPROTO_TCP,
617         .dont_defrag =          0,
618         .appcnt =               ATOMIC_INIT(0),
619         .init =                 tcp_init,
620         .exit =                 tcp_exit,
621         .register_app =         tcp_register_app,
622         .unregister_app =       tcp_unregister_app,
623         .conn_schedule =        tcp_conn_schedule,
624         .conn_in_get =          tcp_conn_in_get,
625         .conn_out_get =         tcp_conn_out_get,
626         .snat_handler =         tcp_snat_handler,
627         .dnat_handler =         tcp_dnat_handler,
628         .csum_check =           tcp_csum_check,
629         .state_name =           tcp_state_name,
630         .state_transition =     tcp_state_transition,
631         .app_conn_bind =        tcp_app_conn_bind,
632         .debug_packet =         ip_vs_tcpudp_debug_packet,
633         .timeout_change =       tcp_timeout_change,
634         .set_state_timeout =    tcp_set_state_timeout,
635 };