Linux-2.6.12-rc2
[linux-flexiantxendom0-natty.git] / net / ipv4 / ipvs / ip_vs_proto_tcp.c
1 /*
2  * ip_vs_proto_tcp.c:   TCP load balancing support for IPVS
3  *
4  * Version:     $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
5  *
6  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
7  *              Julian Anastasov <ja@ssi.bg>
8  *
9  *              This program is free software; you can redistribute it and/or
10  *              modify it under the terms of the GNU General Public License
11  *              as published by the Free Software Foundation; either version
12  *              2 of the License, or (at your option) any later version.
13  *
14  * Changes:
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/ip.h>
20 #include <linux/tcp.h>                  /* for tcphdr */
21 #include <net/ip.h>
22 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
23 #include <linux/netfilter_ipv4.h>
24
25 #include <net/ip_vs.h>
26
27
28 static struct ip_vs_conn *
29 tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
30                 const struct iphdr *iph, unsigned int proto_off, int inverse)
31 {
32         __u16 _ports[2], *pptr;
33
34         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
35         if (pptr == NULL)
36                 return NULL;
37
38         if (likely(!inverse)) {
39                 return ip_vs_conn_in_get(iph->protocol,
40                                          iph->saddr, pptr[0],
41                                          iph->daddr, pptr[1]);
42         } else {
43                 return ip_vs_conn_in_get(iph->protocol,
44                                          iph->daddr, pptr[1],
45                                          iph->saddr, pptr[0]);
46         }
47 }
48
49 static struct ip_vs_conn *
50 tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
51                  const struct iphdr *iph, unsigned int proto_off, int inverse)
52 {
53         __u16 _ports[2], *pptr;
54
55         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
56         if (pptr == NULL)
57                 return NULL;
58
59         if (likely(!inverse)) {
60                 return ip_vs_conn_out_get(iph->protocol,
61                                           iph->saddr, pptr[0],
62                                           iph->daddr, pptr[1]);
63         } else {
64                 return ip_vs_conn_out_get(iph->protocol,
65                                           iph->daddr, pptr[1],
66                                           iph->saddr, pptr[0]);
67         }
68 }
69
70
71 static int
72 tcp_conn_schedule(struct sk_buff *skb,
73                   struct ip_vs_protocol *pp,
74                   int *verdict, struct ip_vs_conn **cpp)
75 {
76         struct ip_vs_service *svc;
77         struct tcphdr _tcph, *th;
78
79         th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
80                                 sizeof(_tcph), &_tcph);
81         if (th == NULL) {
82                 *verdict = NF_DROP;
83                 return 0;
84         }
85
86         if (th->syn &&
87             (svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
88                                      skb->nh.iph->daddr, th->dest))) {
89                 if (ip_vs_todrop()) {
90                         /*
91                          * It seems that we are very loaded.
92                          * We have to drop this packet :(
93                          */
94                         ip_vs_service_put(svc);
95                         *verdict = NF_DROP;
96                         return 0;
97                 }
98
99                 /*
100                  * Let the virtual server select a real server for the
101                  * incoming connection, and create a connection entry.
102                  */
103                 *cpp = ip_vs_schedule(svc, skb);
104                 if (!*cpp) {
105                         *verdict = ip_vs_leave(svc, skb, pp);
106                         return 0;
107                 }
108                 ip_vs_service_put(svc);
109         }
110         return 1;
111 }
112
113
114 static inline void
115 tcp_fast_csum_update(struct tcphdr *tcph, u32 oldip, u32 newip,
116                      u16 oldport, u16 newport)
117 {
118         tcph->check =
119                 ip_vs_check_diff(~oldip, newip,
120                                  ip_vs_check_diff(oldport ^ 0xFFFF,
121                                                   newport, tcph->check));
122 }
123
124
125 static int
126 tcp_snat_handler(struct sk_buff **pskb,
127                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
128 {
129         struct tcphdr *tcph;
130         unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
131
132         /* csum_check requires unshared skb */
133         if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
134                 return 0;
135
136         if (unlikely(cp->app != NULL)) {
137                 /* Some checks before mangling */
138                 if (pp->csum_check && !pp->csum_check(*pskb, pp))
139                         return 0;
140
141                 /* Call application helper if needed */
142                 if (!ip_vs_app_pkt_out(cp, pskb))
143                         return 0;
144         }
145
146         tcph = (void *)(*pskb)->nh.iph + tcphoff;
147         tcph->source = cp->vport;
148
149         /* Adjust TCP checksums */
150         if (!cp->app) {
151                 /* Only port and addr are changed, do fast csum update */
152                 tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
153                                      cp->dport, cp->vport);
154                 if ((*pskb)->ip_summed == CHECKSUM_HW)
155                         (*pskb)->ip_summed = CHECKSUM_NONE;
156         } else {
157                 /* full checksum calculation */
158                 tcph->check = 0;
159                 (*pskb)->csum = skb_checksum(*pskb, tcphoff,
160                                              (*pskb)->len - tcphoff, 0);
161                 tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
162                                                 (*pskb)->len - tcphoff,
163                                                 cp->protocol,
164                                                 (*pskb)->csum);
165                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
166                           pp->name, tcph->check,
167                           (char*)&(tcph->check) - (char*)tcph);
168         }
169         return 1;
170 }
171
172
173 static int
174 tcp_dnat_handler(struct sk_buff **pskb,
175                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
176 {
177         struct tcphdr *tcph;
178         unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
179
180         /* csum_check requires unshared skb */
181         if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
182                 return 0;
183
184         if (unlikely(cp->app != NULL)) {
185                 /* Some checks before mangling */
186                 if (pp->csum_check && !pp->csum_check(*pskb, pp))
187                         return 0;
188
189                 /*
190                  *      Attempt ip_vs_app call.
191                  *      It will fix ip_vs_conn and iph ack_seq stuff
192                  */
193                 if (!ip_vs_app_pkt_in(cp, pskb))
194                         return 0;
195         }
196
197         tcph = (void *)(*pskb)->nh.iph + tcphoff;
198         tcph->dest = cp->dport;
199
200         /*
201          *      Adjust TCP checksums
202          */
203         if (!cp->app) {
204                 /* Only port and addr are changed, do fast csum update */
205                 tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
206                                      cp->vport, cp->dport);
207                 if ((*pskb)->ip_summed == CHECKSUM_HW)
208                         (*pskb)->ip_summed = CHECKSUM_NONE;
209         } else {
210                 /* full checksum calculation */
211                 tcph->check = 0;
212                 (*pskb)->csum = skb_checksum(*pskb, tcphoff,
213                                              (*pskb)->len - tcphoff, 0);
214                 tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
215                                                 (*pskb)->len - tcphoff,
216                                                 cp->protocol,
217                                                 (*pskb)->csum);
218                 (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
219         }
220         return 1;
221 }
222
223
224 static int
225 tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
226 {
227         unsigned int tcphoff = skb->nh.iph->ihl*4;
228
229         switch (skb->ip_summed) {
230         case CHECKSUM_NONE:
231                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
232         case CHECKSUM_HW:
233                 if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
234                                       skb->len - tcphoff,
235                                       skb->nh.iph->protocol, skb->csum)) {
236                         IP_VS_DBG_RL_PKT(0, pp, skb, 0,
237                                          "Failed checksum for");
238                         return 0;
239                 }
240                 break;
241         default:
242                 /* CHECKSUM_UNNECESSARY */
243                 break;
244         }
245
246         return 1;
247 }
248
249
250 #define TCP_DIR_INPUT           0
251 #define TCP_DIR_OUTPUT          4
252 #define TCP_DIR_INPUT_ONLY      8
253
254 static int tcp_state_off[IP_VS_DIR_LAST] = {
255         [IP_VS_DIR_INPUT]               =       TCP_DIR_INPUT,
256         [IP_VS_DIR_OUTPUT]              =       TCP_DIR_OUTPUT,
257         [IP_VS_DIR_INPUT_ONLY]          =       TCP_DIR_INPUT_ONLY,
258 };
259
260 /*
261  *      Timeout table[state]
262  */
263 static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
264         [IP_VS_TCP_S_NONE]              =       2*HZ,
265         [IP_VS_TCP_S_ESTABLISHED]       =       15*60*HZ,
266         [IP_VS_TCP_S_SYN_SENT]          =       2*60*HZ,
267         [IP_VS_TCP_S_SYN_RECV]          =       1*60*HZ,
268         [IP_VS_TCP_S_FIN_WAIT]          =       2*60*HZ,
269         [IP_VS_TCP_S_TIME_WAIT]         =       2*60*HZ,
270         [IP_VS_TCP_S_CLOSE]             =       10*HZ,
271         [IP_VS_TCP_S_CLOSE_WAIT]        =       60*HZ,
272         [IP_VS_TCP_S_LAST_ACK]          =       30*HZ,
273         [IP_VS_TCP_S_LISTEN]            =       2*60*HZ,
274         [IP_VS_TCP_S_SYNACK]            =       120*HZ,
275         [IP_VS_TCP_S_LAST]              =       2*HZ,
276 };
277
278
279 #if 0
280
281 /* FIXME: This is going to die */
282
283 static int tcp_timeouts_dos[IP_VS_TCP_S_LAST+1] = {
284         [IP_VS_TCP_S_NONE]              =       2*HZ,
285         [IP_VS_TCP_S_ESTABLISHED]       =       8*60*HZ,
286         [IP_VS_TCP_S_SYN_SENT]          =       60*HZ,
287         [IP_VS_TCP_S_SYN_RECV]          =       10*HZ,
288         [IP_VS_TCP_S_FIN_WAIT]          =       60*HZ,
289         [IP_VS_TCP_S_TIME_WAIT]         =       60*HZ,
290         [IP_VS_TCP_S_CLOSE]             =       10*HZ,
291         [IP_VS_TCP_S_CLOSE_WAIT]        =       60*HZ,
292         [IP_VS_TCP_S_LAST_ACK]          =       30*HZ,
293         [IP_VS_TCP_S_LISTEN]            =       2*60*HZ,
294         [IP_VS_TCP_S_SYNACK]            =       100*HZ,
295         [IP_VS_TCP_S_LAST]              =       2*HZ,
296 };
297
298 #endif
299
300 static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
301         [IP_VS_TCP_S_NONE]              =       "NONE",
302         [IP_VS_TCP_S_ESTABLISHED]       =       "ESTABLISHED",
303         [IP_VS_TCP_S_SYN_SENT]          =       "SYN_SENT",
304         [IP_VS_TCP_S_SYN_RECV]          =       "SYN_RECV",
305         [IP_VS_TCP_S_FIN_WAIT]          =       "FIN_WAIT",
306         [IP_VS_TCP_S_TIME_WAIT]         =       "TIME_WAIT",
307         [IP_VS_TCP_S_CLOSE]             =       "CLOSE",
308         [IP_VS_TCP_S_CLOSE_WAIT]        =       "CLOSE_WAIT",
309         [IP_VS_TCP_S_LAST_ACK]          =       "LAST_ACK",
310         [IP_VS_TCP_S_LISTEN]            =       "LISTEN",
311         [IP_VS_TCP_S_SYNACK]            =       "SYNACK",
312         [IP_VS_TCP_S_LAST]              =       "BUG!",
313 };
314
315 #define sNO IP_VS_TCP_S_NONE
316 #define sES IP_VS_TCP_S_ESTABLISHED
317 #define sSS IP_VS_TCP_S_SYN_SENT
318 #define sSR IP_VS_TCP_S_SYN_RECV
319 #define sFW IP_VS_TCP_S_FIN_WAIT
320 #define sTW IP_VS_TCP_S_TIME_WAIT
321 #define sCL IP_VS_TCP_S_CLOSE
322 #define sCW IP_VS_TCP_S_CLOSE_WAIT
323 #define sLA IP_VS_TCP_S_LAST_ACK
324 #define sLI IP_VS_TCP_S_LISTEN
325 #define sSA IP_VS_TCP_S_SYNACK
326
327 struct tcp_states_t {
328         int next_state[IP_VS_TCP_S_LAST];
329 };
330
331 static const char * tcp_state_name(int state)
332 {
333         if (state >= IP_VS_TCP_S_LAST)
334                 return "ERR!";
335         return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
336 }
337
338 static struct tcp_states_t tcp_states [] = {
339 /*      INPUT */
340 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
341 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
342 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
343 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
344 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
345
346 /*      OUTPUT */
347 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
348 /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
349 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
350 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
351 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
352
353 /*      INPUT-ONLY */
354 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
355 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
356 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
357 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
358 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
359 };
360
361 static struct tcp_states_t tcp_states_dos [] = {
362 /*      INPUT */
363 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
364 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
365 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
366 /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
367 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
368
369 /*      OUTPUT */
370 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
371 /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
372 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
373 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
374 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
375
376 /*      INPUT-ONLY */
377 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
378 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
379 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
380 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
381 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
382 };
383
384 static struct tcp_states_t *tcp_state_table = tcp_states;
385
386
387 static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
388 {
389         int on = (flags & 1);           /* secure_tcp */
390
391         /*
392         ** FIXME: change secure_tcp to independent sysctl var
393         ** or make it per-service or per-app because it is valid
394         ** for most if not for all of the applications. Something
395         ** like "capabilities" (flags) for each object.
396         */
397         tcp_state_table = (on? tcp_states_dos : tcp_states);
398 }
399
400 static int
401 tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
402 {
403         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
404                                        tcp_state_name_table, sname, to);
405 }
406
407 static inline int tcp_state_idx(struct tcphdr *th)
408 {
409         if (th->rst)
410                 return 3;
411         if (th->syn)
412                 return 0;
413         if (th->fin)
414                 return 1;
415         if (th->ack)
416                 return 2;
417         return -1;
418 }
419
420 static inline void
421 set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
422               int direction, struct tcphdr *th)
423 {
424         int state_idx;
425         int new_state = IP_VS_TCP_S_CLOSE;
426         int state_off = tcp_state_off[direction];
427
428         /*
429          *    Update state offset to INPUT_ONLY if necessary
430          *    or delete NO_OUTPUT flag if output packet detected
431          */
432         if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
433                 if (state_off == TCP_DIR_OUTPUT)
434                         cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
435                 else
436                         state_off = TCP_DIR_INPUT_ONLY;
437         }
438
439         if ((state_idx = tcp_state_idx(th)) < 0) {
440                 IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
441                 goto tcp_state_out;
442         }
443
444         new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
445
446   tcp_state_out:
447         if (new_state != cp->state) {
448                 struct ip_vs_dest *dest = cp->dest;
449
450                 IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
451                           "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n",
452                           pp->name,
453                           (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
454                           th->syn? 'S' : '.',
455                           th->fin? 'F' : '.',
456                           th->ack? 'A' : '.',
457                           th->rst? 'R' : '.',
458                           NIPQUAD(cp->daddr), ntohs(cp->dport),
459                           NIPQUAD(cp->caddr), ntohs(cp->cport),
460                           tcp_state_name(cp->state),
461                           tcp_state_name(new_state),
462                           atomic_read(&cp->refcnt));
463                 if (dest) {
464                         if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
465                             (new_state != IP_VS_TCP_S_ESTABLISHED)) {
466                                 atomic_dec(&dest->activeconns);
467                                 atomic_inc(&dest->inactconns);
468                                 cp->flags |= IP_VS_CONN_F_INACTIVE;
469                         } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
470                                    (new_state == IP_VS_TCP_S_ESTABLISHED)) {
471                                 atomic_inc(&dest->activeconns);
472                                 atomic_dec(&dest->inactconns);
473                                 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
474                         }
475                 }
476         }
477
478         cp->timeout = pp->timeout_table[cp->state = new_state];
479 }
480
481
482 /*
483  *      Handle state transitions
484  */
485 static int
486 tcp_state_transition(struct ip_vs_conn *cp, int direction,
487                      const struct sk_buff *skb,
488                      struct ip_vs_protocol *pp)
489 {
490         struct tcphdr _tcph, *th;
491
492         th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
493                                 sizeof(_tcph), &_tcph);
494         if (th == NULL)
495                 return 0;
496
497         spin_lock(&cp->lock);
498         set_tcp_state(pp, cp, direction, th);
499         spin_unlock(&cp->lock);
500
501         return 1;
502 }
503
504
505 /*
506  *      Hash table for TCP application incarnations
507  */
508 #define TCP_APP_TAB_BITS        4
509 #define TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
510 #define TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
511
512 static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
513 static DEFINE_SPINLOCK(tcp_app_lock);
514
515 static inline __u16 tcp_app_hashkey(__u16 port)
516 {
517         return ((port >> TCP_APP_TAB_BITS) ^ port) & TCP_APP_TAB_MASK;
518 }
519
520
521 static int tcp_register_app(struct ip_vs_app *inc)
522 {
523         struct ip_vs_app *i;
524         __u16 hash, port = inc->port;
525         int ret = 0;
526
527         hash = tcp_app_hashkey(port);
528
529         spin_lock_bh(&tcp_app_lock);
530         list_for_each_entry(i, &tcp_apps[hash], p_list) {
531                 if (i->port == port) {
532                         ret = -EEXIST;
533                         goto out;
534                 }
535         }
536         list_add(&inc->p_list, &tcp_apps[hash]);
537         atomic_inc(&ip_vs_protocol_tcp.appcnt);
538
539   out:
540         spin_unlock_bh(&tcp_app_lock);
541         return ret;
542 }
543
544
545 static void
546 tcp_unregister_app(struct ip_vs_app *inc)
547 {
548         spin_lock_bh(&tcp_app_lock);
549         atomic_dec(&ip_vs_protocol_tcp.appcnt);
550         list_del(&inc->p_list);
551         spin_unlock_bh(&tcp_app_lock);
552 }
553
554
555 static int
556 tcp_app_conn_bind(struct ip_vs_conn *cp)
557 {
558         int hash;
559         struct ip_vs_app *inc;
560         int result = 0;
561
562         /* Default binding: bind app only for NAT */
563         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
564                 return 0;
565
566         /* Lookup application incarnations and bind the right one */
567         hash = tcp_app_hashkey(cp->vport);
568
569         spin_lock(&tcp_app_lock);
570         list_for_each_entry(inc, &tcp_apps[hash], p_list) {
571                 if (inc->port == cp->vport) {
572                         if (unlikely(!ip_vs_app_inc_get(inc)))
573                                 break;
574                         spin_unlock(&tcp_app_lock);
575
576                         IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
577                                   "%u.%u.%u.%u:%u to app %s on port %u\n",
578                                   __FUNCTION__,
579                                   NIPQUAD(cp->caddr), ntohs(cp->cport),
580                                   NIPQUAD(cp->vaddr), ntohs(cp->vport),
581                                   inc->name, ntohs(inc->port));
582                         cp->app = inc;
583                         if (inc->init_conn)
584                                 result = inc->init_conn(inc, cp);
585                         goto out;
586                 }
587         }
588         spin_unlock(&tcp_app_lock);
589
590   out:
591         return result;
592 }
593
594
595 /*
596  *      Set LISTEN timeout. (ip_vs_conn_put will setup timer)
597  */
598 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
599 {
600         spin_lock(&cp->lock);
601         cp->state = IP_VS_TCP_S_LISTEN;
602         cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
603         spin_unlock(&cp->lock);
604 }
605
606
607 static void tcp_init(struct ip_vs_protocol *pp)
608 {
609         IP_VS_INIT_HASH_TABLE(tcp_apps);
610         pp->timeout_table = tcp_timeouts;
611 }
612
613
614 static void tcp_exit(struct ip_vs_protocol *pp)
615 {
616 }
617
618
619 struct ip_vs_protocol ip_vs_protocol_tcp = {
620         .name =                 "TCP",
621         .protocol =             IPPROTO_TCP,
622         .dont_defrag =          0,
623         .appcnt =               ATOMIC_INIT(0),
624         .init =                 tcp_init,
625         .exit =                 tcp_exit,
626         .register_app =         tcp_register_app,
627         .unregister_app =       tcp_unregister_app,
628         .conn_schedule =        tcp_conn_schedule,
629         .conn_in_get =          tcp_conn_in_get,
630         .conn_out_get =         tcp_conn_out_get,
631         .snat_handler =         tcp_snat_handler,
632         .dnat_handler =         tcp_dnat_handler,
633         .csum_check =           tcp_csum_check,
634         .state_name =           tcp_state_name,
635         .state_transition =     tcp_state_transition,
636         .app_conn_bind =        tcp_app_conn_bind,
637         .debug_packet =         ip_vs_tcpudp_debug_packet,
638         .timeout_change =       tcp_timeout_change,
639         .set_state_timeout =    tcp_set_state_timeout,
640 };