223b5018c7dc658d876f4b3c749d1377b57931b4
[linux-flexiantxendom0-natty.git] / net / netfilter / ipvs / ip_vs_xmit.c
1 /*
2  * ip_vs_xmit.c: various packet transmitters for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #define KMSG_COMPONENT "IPVS"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19 #include <linux/kernel.h>
20 #include <linux/tcp.h>                  /* for tcphdr */
21 #include <net/ip.h>
22 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
23 #include <net/udp.h>
24 #include <net/icmp.h>                   /* for icmp_send */
25 #include <net/route.h>                  /* for ip_route_output */
26 #include <net/ipv6.h>
27 #include <net/ip6_route.h>
28 #include <linux/icmpv6.h>
29 #include <linux/netfilter.h>
30 #include <linux/netfilter_ipv4.h>
31
32 #include <net/ip_vs.h>
33
34
35 /*
36  *      Destination cache to speed up outgoing route lookup
37  */
38 static inline void
39 __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
40 {
41         struct dst_entry *old_dst;
42
43         old_dst = dest->dst_cache;
44         dest->dst_cache = dst;
45         dest->dst_rtos = rtos;
46         dst_release(old_dst);
47 }
48
49 static inline struct dst_entry *
50 __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
51 {
52         struct dst_entry *dst = dest->dst_cache;
53
54         if (!dst)
55                 return NULL;
56         if ((dst->obsolete
57              || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
58             dst->ops->check(dst, cookie) == NULL) {
59                 dest->dst_cache = NULL;
60                 dst_release(dst);
61                 return NULL;
62         }
63         dst_hold(dst);
64         return dst;
65 }
66
67 static struct rtable *
68 __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
69 {
70         struct rtable *rt;                      /* Route to the other host */
71         struct ip_vs_dest *dest = cp->dest;
72
73         if (dest) {
74                 spin_lock(&dest->dst_lock);
75                 if (!(rt = (struct rtable *)
76                       __ip_vs_dst_check(dest, rtos, 0))) {
77                         struct flowi fl = {
78                                 .oif = 0,
79                                 .nl_u = {
80                                         .ip4_u = {
81                                                 .daddr = dest->addr.ip,
82                                                 .saddr = 0,
83                                                 .tos = rtos, } },
84                         };
85
86                         if (ip_route_output_key(&init_net, &rt, &fl)) {
87                                 spin_unlock(&dest->dst_lock);
88                                 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
89                                              &dest->addr.ip);
90                                 return NULL;
91                         }
92                         __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
93                         IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
94                                   &dest->addr.ip,
95                                   atomic_read(&rt->u.dst.__refcnt), rtos);
96                 }
97                 spin_unlock(&dest->dst_lock);
98         } else {
99                 struct flowi fl = {
100                         .oif = 0,
101                         .nl_u = {
102                                 .ip4_u = {
103                                         .daddr = cp->daddr.ip,
104                                         .saddr = 0,
105                                         .tos = rtos, } },
106                 };
107
108                 if (ip_route_output_key(&init_net, &rt, &fl)) {
109                         IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
110                                      &cp->daddr.ip);
111                         return NULL;
112                 }
113         }
114
115         return rt;
116 }
117
118 #ifdef CONFIG_IP_VS_IPV6
119 static struct rt6_info *
120 __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
121 {
122         struct rt6_info *rt;                    /* Route to the other host */
123         struct ip_vs_dest *dest = cp->dest;
124
125         if (dest) {
126                 spin_lock(&dest->dst_lock);
127                 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
128                 if (!rt) {
129                         struct flowi fl = {
130                                 .oif = 0,
131                                 .nl_u = {
132                                         .ip6_u = {
133                                                 .daddr = dest->addr.in6,
134                                                 .saddr = {
135                                                         .s6_addr32 =
136                                                                 { 0, 0, 0, 0 },
137                                                 },
138                                         },
139                                 },
140                         };
141
142                         rt = (struct rt6_info *)ip6_route_output(&init_net,
143                                                                  NULL, &fl);
144                         if (!rt) {
145                                 spin_unlock(&dest->dst_lock);
146                                 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
147                                              &dest->addr.in6);
148                                 return NULL;
149                         }
150                         __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
151                         IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
152                                   &dest->addr.in6,
153                                   atomic_read(&rt->u.dst.__refcnt));
154                 }
155                 spin_unlock(&dest->dst_lock);
156         } else {
157                 struct flowi fl = {
158                         .oif = 0,
159                         .nl_u = {
160                                 .ip6_u = {
161                                         .daddr = cp->daddr.in6,
162                                         .saddr = {
163                                                 .s6_addr32 = { 0, 0, 0, 0 },
164                                         },
165                                 },
166                         },
167                 };
168
169                 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
170                 if (!rt) {
171                         IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
172                                      &cp->daddr.in6);
173                         return NULL;
174                 }
175         }
176
177         return rt;
178 }
179 #endif
180
181
182 /*
183  *      Release dest->dst_cache before a dest is removed
184  */
185 void
186 ip_vs_dst_reset(struct ip_vs_dest *dest)
187 {
188         struct dst_entry *old_dst;
189
190         old_dst = dest->dst_cache;
191         dest->dst_cache = NULL;
192         dst_release(old_dst);
193 }
194
195 #define IP_VS_XMIT(pf, skb, rt)                         \
196 do {                                                    \
197         (skb)->ipvs_property = 1;                       \
198         skb_forward_csum(skb);                          \
199         NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,     \
200                 (rt)->u.dst.dev, dst_output);           \
201 } while (0)
202
203
204 /*
205  *      NULL transmitter (do nothing except return NF_ACCEPT)
206  */
207 int
208 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
209                 struct ip_vs_protocol *pp)
210 {
211         /* we do not touch skb and do not need pskb ptr */
212         return NF_ACCEPT;
213 }
214
215
216 /*
217  *      Bypass transmitter
218  *      Let packets bypass the destination when the destination is not
219  *      available, it may be only used in transparent cache cluster.
220  */
221 int
222 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
223                   struct ip_vs_protocol *pp)
224 {
225         struct rtable *rt;                      /* Route to the other host */
226         struct iphdr  *iph = ip_hdr(skb);
227         u8     tos = iph->tos;
228         int    mtu;
229         struct flowi fl = {
230                 .oif = 0,
231                 .nl_u = {
232                         .ip4_u = {
233                                 .daddr = iph->daddr,
234                                 .saddr = 0,
235                                 .tos = RT_TOS(tos), } },
236         };
237
238         EnterFunction(10);
239
240         if (ip_route_output_key(&init_net, &rt, &fl)) {
241                 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
242                              __func__, &iph->daddr);
243                 goto tx_error_icmp;
244         }
245
246         /* MTU checking */
247         mtu = dst_mtu(&rt->u.dst);
248         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
249                 ip_rt_put(rt);
250                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
251                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
252                 goto tx_error;
253         }
254
255         /*
256          * Call ip_send_check because we are not sure it is called
257          * after ip_defrag. Is copy-on-write needed?
258          */
259         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
260                 ip_rt_put(rt);
261                 return NF_STOLEN;
262         }
263         ip_send_check(ip_hdr(skb));
264
265         /* drop old route */
266         skb_dst_drop(skb);
267         skb_dst_set(skb, &rt->u.dst);
268
269         /* Another hack: avoid icmp_send in ip_fragment */
270         skb->local_df = 1;
271
272         IP_VS_XMIT(PF_INET, skb, rt);
273
274         LeaveFunction(10);
275         return NF_STOLEN;
276
277  tx_error_icmp:
278         dst_link_failure(skb);
279  tx_error:
280         kfree_skb(skb);
281         LeaveFunction(10);
282         return NF_STOLEN;
283 }
284
285 #ifdef CONFIG_IP_VS_IPV6
286 int
287 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
288                      struct ip_vs_protocol *pp)
289 {
290         struct rt6_info *rt;                    /* Route to the other host */
291         struct ipv6hdr  *iph = ipv6_hdr(skb);
292         int    mtu;
293         struct flowi fl = {
294                 .oif = 0,
295                 .nl_u = {
296                         .ip6_u = {
297                                 .daddr = iph->daddr,
298                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
299         };
300
301         EnterFunction(10);
302
303         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
304         if (!rt) {
305                 IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n",
306                              __func__, &iph->daddr);
307                 goto tx_error_icmp;
308         }
309
310         /* MTU checking */
311         mtu = dst_mtu(&rt->u.dst);
312         if (skb->len > mtu) {
313                 dst_release(&rt->u.dst);
314                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
315                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
316                 goto tx_error;
317         }
318
319         /*
320          * Call ip_send_check because we are not sure it is called
321          * after ip_defrag. Is copy-on-write needed?
322          */
323         skb = skb_share_check(skb, GFP_ATOMIC);
324         if (unlikely(skb == NULL)) {
325                 dst_release(&rt->u.dst);
326                 return NF_STOLEN;
327         }
328
329         /* drop old route */
330         skb_dst_drop(skb);
331         skb_dst_set(skb, &rt->u.dst);
332
333         /* Another hack: avoid icmp_send in ip_fragment */
334         skb->local_df = 1;
335
336         IP_VS_XMIT(PF_INET6, skb, rt);
337
338         LeaveFunction(10);
339         return NF_STOLEN;
340
341  tx_error_icmp:
342         dst_link_failure(skb);
343  tx_error:
344         kfree_skb(skb);
345         LeaveFunction(10);
346         return NF_STOLEN;
347 }
348 #endif
349
350 /*
351  *      NAT transmitter (only for outside-to-inside nat forwarding)
352  *      Not used for related ICMP
353  */
354 int
355 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
356                struct ip_vs_protocol *pp)
357 {
358         struct rtable *rt;              /* Route to the other host */
359         int mtu;
360         struct iphdr *iph = ip_hdr(skb);
361
362         EnterFunction(10);
363
364         /* check if it is a connection of no-client-port */
365         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
366                 __be16 _pt, *p;
367                 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
368                 if (p == NULL)
369                         goto tx_error;
370                 ip_vs_conn_fill_cport(cp, *p);
371                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
372         }
373
374         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
375                 goto tx_error_icmp;
376
377         /* MTU checking */
378         mtu = dst_mtu(&rt->u.dst);
379         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
380                 ip_rt_put(rt);
381                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
382                 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
383                 goto tx_error;
384         }
385
386         /* copy-on-write the packet before mangling it */
387         if (!skb_make_writable(skb, sizeof(struct iphdr)))
388                 goto tx_error_put;
389
390         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
391                 goto tx_error_put;
392
393         /* drop old route */
394         skb_dst_drop(skb);
395         skb_dst_set(skb, &rt->u.dst);
396
397         /* mangle the packet */
398         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
399                 goto tx_error;
400         ip_hdr(skb)->daddr = cp->daddr.ip;
401         ip_send_check(ip_hdr(skb));
402
403         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
404
405         /* FIXME: when application helper enlarges the packet and the length
406            is larger than the MTU of outgoing device, there will be still
407            MTU problem. */
408
409         /* Another hack: avoid icmp_send in ip_fragment */
410         skb->local_df = 1;
411
412         IP_VS_XMIT(PF_INET, skb, rt);
413
414         LeaveFunction(10);
415         return NF_STOLEN;
416
417   tx_error_icmp:
418         dst_link_failure(skb);
419   tx_error:
420         LeaveFunction(10);
421         kfree_skb(skb);
422         return NF_STOLEN;
423   tx_error_put:
424         ip_rt_put(rt);
425         goto tx_error;
426 }
427
428 #ifdef CONFIG_IP_VS_IPV6
429 int
430 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
431                   struct ip_vs_protocol *pp)
432 {
433         struct rt6_info *rt;            /* Route to the other host */
434         int mtu;
435
436         EnterFunction(10);
437
438         /* check if it is a connection of no-client-port */
439         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
440                 __be16 _pt, *p;
441                 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
442                                        sizeof(_pt), &_pt);
443                 if (p == NULL)
444                         goto tx_error;
445                 ip_vs_conn_fill_cport(cp, *p);
446                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
447         }
448
449         rt = __ip_vs_get_out_rt_v6(cp);
450         if (!rt)
451                 goto tx_error_icmp;
452
453         /* MTU checking */
454         mtu = dst_mtu(&rt->u.dst);
455         if (skb->len > mtu) {
456                 dst_release(&rt->u.dst);
457                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
458                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
459                                  "ip_vs_nat_xmit_v6(): frag needed for");
460                 goto tx_error;
461         }
462
463         /* copy-on-write the packet before mangling it */
464         if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
465                 goto tx_error_put;
466
467         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
468                 goto tx_error_put;
469
470         /* drop old route */
471         skb_dst_drop(skb);
472         skb_dst_set(skb, &rt->u.dst);
473
474         /* mangle the packet */
475         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
476                 goto tx_error;
477         ipv6_hdr(skb)->daddr = cp->daddr.in6;
478
479         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
480
481         /* FIXME: when application helper enlarges the packet and the length
482            is larger than the MTU of outgoing device, there will be still
483            MTU problem. */
484
485         /* Another hack: avoid icmp_send in ip_fragment */
486         skb->local_df = 1;
487
488         IP_VS_XMIT(PF_INET6, skb, rt);
489
490         LeaveFunction(10);
491         return NF_STOLEN;
492
493 tx_error_icmp:
494         dst_link_failure(skb);
495 tx_error:
496         LeaveFunction(10);
497         kfree_skb(skb);
498         return NF_STOLEN;
499 tx_error_put:
500         dst_release(&rt->u.dst);
501         goto tx_error;
502 }
503 #endif
504
505
506 /*
507  *   IP Tunneling transmitter
508  *
509  *   This function encapsulates the packet in a new IP packet, its
510  *   destination will be set to cp->daddr. Most code of this function
511  *   is taken from ipip.c.
512  *
513  *   It is used in VS/TUN cluster. The load balancer selects a real
514  *   server from a cluster based on a scheduling algorithm,
515  *   encapsulates the request packet and forwards it to the selected
516  *   server. For example, all real servers are configured with
517  *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
518  *   the encapsulated packet, it will decapsulate the packet, processe
519  *   the request and return the response packets directly to the client
520  *   without passing the load balancer. This can greatly increase the
521  *   scalability of virtual server.
522  *
523  *   Used for ANY protocol
524  */
525 int
526 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
527                   struct ip_vs_protocol *pp)
528 {
529         struct rtable *rt;                      /* Route to the other host */
530         struct net_device *tdev;                /* Device to other host */
531         struct iphdr  *old_iph = ip_hdr(skb);
532         u8     tos = old_iph->tos;
533         __be16 df = old_iph->frag_off;
534         sk_buff_data_t old_transport_header = skb->transport_header;
535         struct iphdr  *iph;                     /* Our new IP header */
536         unsigned int max_headroom;              /* The extra header space needed */
537         int    mtu;
538
539         EnterFunction(10);
540
541         if (skb->protocol != htons(ETH_P_IP)) {
542                 IP_VS_DBG_RL("%s(): protocol error, "
543                              "ETH_P_IP: %d, skb protocol: %d\n",
544                              __func__, htons(ETH_P_IP), skb->protocol);
545                 goto tx_error;
546         }
547
548         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
549                 goto tx_error_icmp;
550
551         tdev = rt->u.dst.dev;
552
553         mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
554         if (mtu < 68) {
555                 ip_rt_put(rt);
556                 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
557                 goto tx_error;
558         }
559         if (skb_dst(skb))
560                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
561
562         df |= (old_iph->frag_off & htons(IP_DF));
563
564         if ((old_iph->frag_off & htons(IP_DF))
565             && mtu < ntohs(old_iph->tot_len)) {
566                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
567                 ip_rt_put(rt);
568                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
569                 goto tx_error;
570         }
571
572         /*
573          * Okay, now see if we can stuff it in the buffer as-is.
574          */
575         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
576
577         if (skb_headroom(skb) < max_headroom
578             || skb_cloned(skb) || skb_shared(skb)) {
579                 struct sk_buff *new_skb =
580                         skb_realloc_headroom(skb, max_headroom);
581                 if (!new_skb) {
582                         ip_rt_put(rt);
583                         kfree_skb(skb);
584                         IP_VS_ERR_RL("%s(): no memory\n", __func__);
585                         return NF_STOLEN;
586                 }
587                 kfree_skb(skb);
588                 skb = new_skb;
589                 old_iph = ip_hdr(skb);
590         }
591
592         skb->transport_header = old_transport_header;
593
594         /* fix old IP header checksum */
595         ip_send_check(old_iph);
596
597         skb_push(skb, sizeof(struct iphdr));
598         skb_reset_network_header(skb);
599         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
600
601         /* drop old route */
602         skb_dst_drop(skb);
603         skb_dst_set(skb, &rt->u.dst);
604
605         /*
606          *      Push down and install the IPIP header.
607          */
608         iph                     =       ip_hdr(skb);
609         iph->version            =       4;
610         iph->ihl                =       sizeof(struct iphdr)>>2;
611         iph->frag_off           =       df;
612         iph->protocol           =       IPPROTO_IPIP;
613         iph->tos                =       tos;
614         iph->daddr              =       rt->rt_dst;
615         iph->saddr              =       rt->rt_src;
616         iph->ttl                =       old_iph->ttl;
617         ip_select_ident(iph, &rt->u.dst, NULL);
618
619         /* Another hack: avoid icmp_send in ip_fragment */
620         skb->local_df = 1;
621
622         ip_local_out(skb);
623
624         LeaveFunction(10);
625
626         return NF_STOLEN;
627
628   tx_error_icmp:
629         dst_link_failure(skb);
630   tx_error:
631         kfree_skb(skb);
632         LeaveFunction(10);
633         return NF_STOLEN;
634 }
635
636 #ifdef CONFIG_IP_VS_IPV6
637 int
638 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
639                      struct ip_vs_protocol *pp)
640 {
641         struct rt6_info *rt;            /* Route to the other host */
642         struct net_device *tdev;        /* Device to other host */
643         struct ipv6hdr  *old_iph = ipv6_hdr(skb);
644         sk_buff_data_t old_transport_header = skb->transport_header;
645         struct ipv6hdr  *iph;           /* Our new IP header */
646         unsigned int max_headroom;      /* The extra header space needed */
647         int    mtu;
648
649         EnterFunction(10);
650
651         if (skb->protocol != htons(ETH_P_IPV6)) {
652                 IP_VS_DBG_RL("%s(): protocol error, "
653                              "ETH_P_IPV6: %d, skb protocol: %d\n",
654                              __func__, htons(ETH_P_IPV6), skb->protocol);
655                 goto tx_error;
656         }
657
658         rt = __ip_vs_get_out_rt_v6(cp);
659         if (!rt)
660                 goto tx_error_icmp;
661
662         tdev = rt->u.dst.dev;
663
664         mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
665         /* TODO IPv6: do we need this check in IPv6? */
666         if (mtu < 1280) {
667                 dst_release(&rt->u.dst);
668                 IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
669                 goto tx_error;
670         }
671         if (skb_dst(skb))
672                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
673
674         if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
675                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
676                 dst_release(&rt->u.dst);
677                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
678                 goto tx_error;
679         }
680
681         /*
682          * Okay, now see if we can stuff it in the buffer as-is.
683          */
684         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
685
686         if (skb_headroom(skb) < max_headroom
687             || skb_cloned(skb) || skb_shared(skb)) {
688                 struct sk_buff *new_skb =
689                         skb_realloc_headroom(skb, max_headroom);
690                 if (!new_skb) {
691                         dst_release(&rt->u.dst);
692                         kfree_skb(skb);
693                         IP_VS_ERR_RL("%s(): no memory\n", __func__);
694                         return NF_STOLEN;
695                 }
696                 kfree_skb(skb);
697                 skb = new_skb;
698                 old_iph = ipv6_hdr(skb);
699         }
700
701         skb->transport_header = old_transport_header;
702
703         skb_push(skb, sizeof(struct ipv6hdr));
704         skb_reset_network_header(skb);
705         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
706
707         /* drop old route */
708         skb_dst_drop(skb);
709         skb_dst_set(skb, &rt->u.dst);
710
711         /*
712          *      Push down and install the IPIP header.
713          */
714         iph                     =       ipv6_hdr(skb);
715         iph->version            =       6;
716         iph->nexthdr            =       IPPROTO_IPV6;
717         iph->payload_len        =       old_iph->payload_len;
718         be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
719         iph->priority           =       old_iph->priority;
720         memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
721         iph->daddr              =       rt->rt6i_dst.addr;
722         iph->saddr              =       cp->vaddr.in6; /* rt->rt6i_src.addr; */
723         iph->hop_limit          =       old_iph->hop_limit;
724
725         /* Another hack: avoid icmp_send in ip_fragment */
726         skb->local_df = 1;
727
728         ip6_local_out(skb);
729
730         LeaveFunction(10);
731
732         return NF_STOLEN;
733
734 tx_error_icmp:
735         dst_link_failure(skb);
736 tx_error:
737         kfree_skb(skb);
738         LeaveFunction(10);
739         return NF_STOLEN;
740 }
741 #endif
742
743
744 /*
745  *      Direct Routing transmitter
746  *      Used for ANY protocol
747  */
748 int
749 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
750               struct ip_vs_protocol *pp)
751 {
752         struct rtable *rt;                      /* Route to the other host */
753         struct iphdr  *iph = ip_hdr(skb);
754         int    mtu;
755
756         EnterFunction(10);
757
758         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
759                 goto tx_error_icmp;
760
761         /* MTU checking */
762         mtu = dst_mtu(&rt->u.dst);
763         if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
764                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
765                 ip_rt_put(rt);
766                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
767                 goto tx_error;
768         }
769
770         /*
771          * Call ip_send_check because we are not sure it is called
772          * after ip_defrag. Is copy-on-write needed?
773          */
774         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
775                 ip_rt_put(rt);
776                 return NF_STOLEN;
777         }
778         ip_send_check(ip_hdr(skb));
779
780         /* drop old route */
781         skb_dst_drop(skb);
782         skb_dst_set(skb, &rt->u.dst);
783
784         /* Another hack: avoid icmp_send in ip_fragment */
785         skb->local_df = 1;
786
787         IP_VS_XMIT(PF_INET, skb, rt);
788
789         LeaveFunction(10);
790         return NF_STOLEN;
791
792   tx_error_icmp:
793         dst_link_failure(skb);
794   tx_error:
795         kfree_skb(skb);
796         LeaveFunction(10);
797         return NF_STOLEN;
798 }
799
800 #ifdef CONFIG_IP_VS_IPV6
801 int
802 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
803                  struct ip_vs_protocol *pp)
804 {
805         struct rt6_info *rt;                    /* Route to the other host */
806         int    mtu;
807
808         EnterFunction(10);
809
810         rt = __ip_vs_get_out_rt_v6(cp);
811         if (!rt)
812                 goto tx_error_icmp;
813
814         /* MTU checking */
815         mtu = dst_mtu(&rt->u.dst);
816         if (skb->len > mtu) {
817                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
818                 dst_release(&rt->u.dst);
819                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
820                 goto tx_error;
821         }
822
823         /*
824          * Call ip_send_check because we are not sure it is called
825          * after ip_defrag. Is copy-on-write needed?
826          */
827         skb = skb_share_check(skb, GFP_ATOMIC);
828         if (unlikely(skb == NULL)) {
829                 dst_release(&rt->u.dst);
830                 return NF_STOLEN;
831         }
832
833         /* drop old route */
834         skb_dst_drop(skb);
835         skb_dst_set(skb, &rt->u.dst);
836
837         /* Another hack: avoid icmp_send in ip_fragment */
838         skb->local_df = 1;
839
840         IP_VS_XMIT(PF_INET6, skb, rt);
841
842         LeaveFunction(10);
843         return NF_STOLEN;
844
845 tx_error_icmp:
846         dst_link_failure(skb);
847 tx_error:
848         kfree_skb(skb);
849         LeaveFunction(10);
850         return NF_STOLEN;
851 }
852 #endif
853
854
855 /*
856  *      ICMP packet transmitter
857  *      called by the ip_vs_in_icmp
858  */
859 int
860 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
861                 struct ip_vs_protocol *pp, int offset)
862 {
863         struct rtable   *rt;    /* Route to the other host */
864         int mtu;
865         int rc;
866
867         EnterFunction(10);
868
869         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
870            forwarded directly here, because there is no need to
871            translate address/port back */
872         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
873                 if (cp->packet_xmit)
874                         rc = cp->packet_xmit(skb, cp, pp);
875                 else
876                         rc = NF_ACCEPT;
877                 /* do not touch skb anymore */
878                 atomic_inc(&cp->in_pkts);
879                 goto out;
880         }
881
882         /*
883          * mangle and send the packet here (only for VS/NAT)
884          */
885
886         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
887                 goto tx_error_icmp;
888
889         /* MTU checking */
890         mtu = dst_mtu(&rt->u.dst);
891         if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
892                 ip_rt_put(rt);
893                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
894                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
895                 goto tx_error;
896         }
897
898         /* copy-on-write the packet before mangling it */
899         if (!skb_make_writable(skb, offset))
900                 goto tx_error_put;
901
902         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
903                 goto tx_error_put;
904
905         /* drop the old route when skb is not shared */
906         skb_dst_drop(skb);
907         skb_dst_set(skb, &rt->u.dst);
908
909         ip_vs_nat_icmp(skb, pp, cp, 0);
910
911         /* Another hack: avoid icmp_send in ip_fragment */
912         skb->local_df = 1;
913
914         IP_VS_XMIT(PF_INET, skb, rt);
915
916         rc = NF_STOLEN;
917         goto out;
918
919   tx_error_icmp:
920         dst_link_failure(skb);
921   tx_error:
922         dev_kfree_skb(skb);
923         rc = NF_STOLEN;
924   out:
925         LeaveFunction(10);
926         return rc;
927   tx_error_put:
928         ip_rt_put(rt);
929         goto tx_error;
930 }
931
932 #ifdef CONFIG_IP_VS_IPV6
933 int
934 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
935                 struct ip_vs_protocol *pp, int offset)
936 {
937         struct rt6_info *rt;    /* Route to the other host */
938         int mtu;
939         int rc;
940
941         EnterFunction(10);
942
943         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
944            forwarded directly here, because there is no need to
945            translate address/port back */
946         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
947                 if (cp->packet_xmit)
948                         rc = cp->packet_xmit(skb, cp, pp);
949                 else
950                         rc = NF_ACCEPT;
951                 /* do not touch skb anymore */
952                 atomic_inc(&cp->in_pkts);
953                 goto out;
954         }
955
956         /*
957          * mangle and send the packet here (only for VS/NAT)
958          */
959
960         rt = __ip_vs_get_out_rt_v6(cp);
961         if (!rt)
962                 goto tx_error_icmp;
963
964         /* MTU checking */
965         mtu = dst_mtu(&rt->u.dst);
966         if (skb->len > mtu) {
967                 dst_release(&rt->u.dst);
968                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
969                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
970                 goto tx_error;
971         }
972
973         /* copy-on-write the packet before mangling it */
974         if (!skb_make_writable(skb, offset))
975                 goto tx_error_put;
976
977         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
978                 goto tx_error_put;
979
980         /* drop the old route when skb is not shared */
981         skb_dst_drop(skb);
982         skb_dst_set(skb, &rt->u.dst);
983
984         ip_vs_nat_icmp_v6(skb, pp, cp, 0);
985
986         /* Another hack: avoid icmp_send in ip_fragment */
987         skb->local_df = 1;
988
989         IP_VS_XMIT(PF_INET6, skb, rt);
990
991         rc = NF_STOLEN;
992         goto out;
993
994 tx_error_icmp:
995         dst_link_failure(skb);
996 tx_error:
997         dev_kfree_skb(skb);
998         rc = NF_STOLEN;
999 out:
1000         LeaveFunction(10);
1001         return rc;
1002 tx_error_put:
1003         dst_release(&rt->u.dst);
1004         goto tx_error;
1005 }
1006 #endif