netfilter: xtables: inclusion of xt_TEE
[linux-flexiantxendom0-natty.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
43
44 #include <net/sock.h>
45 #include <net/snmp.h>
46
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
57
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60 int __ip6_local_out(struct sk_buff *skb)
61 {
62         int len;
63
64         len = skb->len - sizeof(struct ipv6hdr);
65         if (len > IPV6_MAXPLEN)
66                 len = 0;
67         ipv6_hdr(skb)->payload_len = htons(len);
68
69         return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
70                        skb_dst(skb)->dev, dst_output);
71 }
72
73 int ip6_local_out(struct sk_buff *skb)
74 {
75         int err;
76
77         err = __ip6_local_out(skb);
78         if (likely(err == 1))
79                 err = dst_output(skb);
80
81         return err;
82 }
83 EXPORT_SYMBOL_GPL(ip6_local_out);
84
85 /* dev_loopback_xmit for use with netfilter. */
86 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
87 {
88         skb_reset_mac_header(newskb);
89         __skb_pull(newskb, skb_network_offset(newskb));
90         newskb->pkt_type = PACKET_LOOPBACK;
91         newskb->ip_summed = CHECKSUM_UNNECESSARY;
92         WARN_ON(!skb_dst(newskb));
93
94         netif_rx(newskb);
95         return 0;
96 }
97
98 static int ip6_finish_output2(struct sk_buff *skb)
99 {
100         struct dst_entry *dst = skb_dst(skb);
101         struct net_device *dev = dst->dev;
102
103         skb->protocol = htons(ETH_P_IPV6);
104         skb->dev = dev;
105
106         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
107                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
108
109                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
110                     ((mroute6_socket(dev_net(dev)) &&
111                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
112                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
113                                          &ipv6_hdr(skb)->saddr))) {
114                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
115
116                         /* Do not check for IFF_ALLMULTI; multicast routing
117                            is not supported in any case.
118                          */
119                         if (newskb)
120                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
121                                         newskb, NULL, newskb->dev,
122                                         ip6_dev_loopback_xmit);
123
124                         if (ipv6_hdr(skb)->hop_limit == 0) {
125                                 IP6_INC_STATS(dev_net(dev), idev,
126                                               IPSTATS_MIB_OUTDISCARDS);
127                                 kfree_skb(skb);
128                                 return 0;
129                         }
130                 }
131
132                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
133                                 skb->len);
134         }
135
136         if (dst->hh)
137                 return neigh_hh_output(dst->hh, skb);
138         else if (dst->neighbour)
139                 return dst->neighbour->output(skb);
140
141         IP6_INC_STATS_BH(dev_net(dst->dev),
142                          ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
143         kfree_skb(skb);
144         return -EINVAL;
145 }
146
147 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
148 {
149         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
150
151         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
152                skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
153 }
154
155 static int ip6_finish_output(struct sk_buff *skb)
156 {
157         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
158             dst_allfrag(skb_dst(skb)))
159                 return ip6_fragment(skb, ip6_finish_output2);
160         else
161                 return ip6_finish_output2(skb);
162 }
163
164 int ip6_output(struct sk_buff *skb)
165 {
166         struct net_device *dev = skb_dst(skb)->dev;
167         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
168         if (unlikely(idev->cnf.disable_ipv6)) {
169                 IP6_INC_STATS(dev_net(dev), idev,
170                               IPSTATS_MIB_OUTDISCARDS);
171                 kfree_skb(skb);
172                 return 0;
173         }
174
175         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
176                             ip6_finish_output,
177                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
178 }
179 EXPORT_SYMBOL_GPL(ip6_output);
180
181 /*
182  *      xmit an sk_buff (used by TCP)
183  */
184
185 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
186              struct ipv6_txoptions *opt, int ipfragok)
187 {
188         struct net *net = sock_net(sk);
189         struct ipv6_pinfo *np = inet6_sk(sk);
190         struct in6_addr *first_hop = &fl->fl6_dst;
191         struct dst_entry *dst = skb_dst(skb);
192         struct ipv6hdr *hdr;
193         u8  proto = fl->proto;
194         int seg_len = skb->len;
195         int hlimit = -1;
196         int tclass = 0;
197         u32 mtu;
198
199         if (opt) {
200                 unsigned int head_room;
201
202                 /* First: exthdrs may take lots of space (~8K for now)
203                    MAX_HEADER is not enough.
204                  */
205                 head_room = opt->opt_nflen + opt->opt_flen;
206                 seg_len += head_room;
207                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
208
209                 if (skb_headroom(skb) < head_room) {
210                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
211                         if (skb2 == NULL) {
212                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
213                                               IPSTATS_MIB_OUTDISCARDS);
214                                 kfree_skb(skb);
215                                 return -ENOBUFS;
216                         }
217                         kfree_skb(skb);
218                         skb = skb2;
219                         if (sk)
220                                 skb_set_owner_w(skb, sk);
221                 }
222                 if (opt->opt_flen)
223                         ipv6_push_frag_opts(skb, opt, &proto);
224                 if (opt->opt_nflen)
225                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
226         }
227
228         skb_push(skb, sizeof(struct ipv6hdr));
229         skb_reset_network_header(skb);
230         hdr = ipv6_hdr(skb);
231
232         /* Allow local fragmentation. */
233         if (ipfragok)
234                 skb->local_df = 1;
235
236         /*
237          *      Fill in the IPv6 header
238          */
239         if (np) {
240                 tclass = np->tclass;
241                 hlimit = np->hop_limit;
242         }
243         if (hlimit < 0)
244                 hlimit = ip6_dst_hoplimit(dst);
245
246         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
247
248         hdr->payload_len = htons(seg_len);
249         hdr->nexthdr = proto;
250         hdr->hop_limit = hlimit;
251
252         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
253         ipv6_addr_copy(&hdr->daddr, first_hop);
254
255         skb->priority = sk->sk_priority;
256         skb->mark = sk->sk_mark;
257
258         mtu = dst_mtu(dst);
259         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
260                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
261                               IPSTATS_MIB_OUT, skb->len);
262                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
263                                dst->dev, dst_output);
264         }
265
266         if (net_ratelimit())
267                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
268         skb->dev = dst->dev;
269         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
270         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
271         kfree_skb(skb);
272         return -EMSGSIZE;
273 }
274
275 EXPORT_SYMBOL(ip6_xmit);
276
277 /*
278  *      To avoid extra problems ND packets are send through this
279  *      routine. It's code duplication but I really want to avoid
280  *      extra checks since ipv6_build_header is used by TCP (which
281  *      is for us performance critical)
282  */
283
284 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
285                const struct in6_addr *saddr, const struct in6_addr *daddr,
286                int proto, int len)
287 {
288         struct ipv6_pinfo *np = inet6_sk(sk);
289         struct ipv6hdr *hdr;
290         int totlen;
291
292         skb->protocol = htons(ETH_P_IPV6);
293         skb->dev = dev;
294
295         totlen = len + sizeof(struct ipv6hdr);
296
297         skb_reset_network_header(skb);
298         skb_put(skb, sizeof(struct ipv6hdr));
299         hdr = ipv6_hdr(skb);
300
301         *(__be32*)hdr = htonl(0x60000000);
302
303         hdr->payload_len = htons(len);
304         hdr->nexthdr = proto;
305         hdr->hop_limit = np->hop_limit;
306
307         ipv6_addr_copy(&hdr->saddr, saddr);
308         ipv6_addr_copy(&hdr->daddr, daddr);
309
310         return 0;
311 }
312
313 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
314 {
315         struct ip6_ra_chain *ra;
316         struct sock *last = NULL;
317
318         read_lock(&ip6_ra_lock);
319         for (ra = ip6_ra_chain; ra; ra = ra->next) {
320                 struct sock *sk = ra->sk;
321                 if (sk && ra->sel == sel &&
322                     (!sk->sk_bound_dev_if ||
323                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
324                         if (last) {
325                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
326                                 if (skb2)
327                                         rawv6_rcv(last, skb2);
328                         }
329                         last = sk;
330                 }
331         }
332
333         if (last) {
334                 rawv6_rcv(last, skb);
335                 read_unlock(&ip6_ra_lock);
336                 return 1;
337         }
338         read_unlock(&ip6_ra_lock);
339         return 0;
340 }
341
342 static int ip6_forward_proxy_check(struct sk_buff *skb)
343 {
344         struct ipv6hdr *hdr = ipv6_hdr(skb);
345         u8 nexthdr = hdr->nexthdr;
346         int offset;
347
348         if (ipv6_ext_hdr(nexthdr)) {
349                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
350                 if (offset < 0)
351                         return 0;
352         } else
353                 offset = sizeof(struct ipv6hdr);
354
355         if (nexthdr == IPPROTO_ICMPV6) {
356                 struct icmp6hdr *icmp6;
357
358                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
359                                          offset + 1 - skb->data)))
360                         return 0;
361
362                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
363
364                 switch (icmp6->icmp6_type) {
365                 case NDISC_ROUTER_SOLICITATION:
366                 case NDISC_ROUTER_ADVERTISEMENT:
367                 case NDISC_NEIGHBOUR_SOLICITATION:
368                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
369                 case NDISC_REDIRECT:
370                         /* For reaction involving unicast neighbor discovery
371                          * message destined to the proxied address, pass it to
372                          * input function.
373                          */
374                         return 1;
375                 default:
376                         break;
377                 }
378         }
379
380         /*
381          * The proxying router can't forward traffic sent to a link-local
382          * address, so signal the sender and discard the packet. This
383          * behavior is clarified by the MIPv6 specification.
384          */
385         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
386                 dst_link_failure(skb);
387                 return -1;
388         }
389
390         return 0;
391 }
392
393 static inline int ip6_forward_finish(struct sk_buff *skb)
394 {
395         return dst_output(skb);
396 }
397
398 int ip6_forward(struct sk_buff *skb)
399 {
400         struct dst_entry *dst = skb_dst(skb);
401         struct ipv6hdr *hdr = ipv6_hdr(skb);
402         struct inet6_skb_parm *opt = IP6CB(skb);
403         struct net *net = dev_net(dst->dev);
404         u32 mtu;
405
406         if (net->ipv6.devconf_all->forwarding == 0)
407                 goto error;
408
409         if (skb_warn_if_lro(skb))
410                 goto drop;
411
412         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
413                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
414                 goto drop;
415         }
416
417         skb_forward_csum(skb);
418
419         /*
420          *      We DO NOT make any processing on
421          *      RA packets, pushing them to user level AS IS
422          *      without ane WARRANTY that application will be able
423          *      to interpret them. The reason is that we
424          *      cannot make anything clever here.
425          *
426          *      We are not end-node, so that if packet contains
427          *      AH/ESP, we cannot make anything.
428          *      Defragmentation also would be mistake, RA packets
429          *      cannot be fragmented, because there is no warranty
430          *      that different fragments will go along one path. --ANK
431          */
432         if (opt->ra) {
433                 u8 *ptr = skb_network_header(skb) + opt->ra;
434                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
435                         return 0;
436         }
437
438         /*
439          *      check and decrement ttl
440          */
441         if (hdr->hop_limit <= 1) {
442                 /* Force OUTPUT device used as source address */
443                 skb->dev = dst->dev;
444                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
445                 IP6_INC_STATS_BH(net,
446                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
447
448                 kfree_skb(skb);
449                 return -ETIMEDOUT;
450         }
451
452         /* XXX: idev->cnf.proxy_ndp? */
453         if (net->ipv6.devconf_all->proxy_ndp &&
454             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
455                 int proxied = ip6_forward_proxy_check(skb);
456                 if (proxied > 0)
457                         return ip6_input(skb);
458                 else if (proxied < 0) {
459                         IP6_INC_STATS(net, ip6_dst_idev(dst),
460                                       IPSTATS_MIB_INDISCARDS);
461                         goto drop;
462                 }
463         }
464
465         if (!xfrm6_route_forward(skb)) {
466                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
467                 goto drop;
468         }
469         dst = skb_dst(skb);
470
471         /* IPv6 specs say nothing about it, but it is clear that we cannot
472            send redirects to source routed frames.
473            We don't send redirects to frames decapsulated from IPsec.
474          */
475         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
476             !skb_sec_path(skb)) {
477                 struct in6_addr *target = NULL;
478                 struct rt6_info *rt;
479                 struct neighbour *n = dst->neighbour;
480
481                 /*
482                  *      incoming and outgoing devices are the same
483                  *      send a redirect.
484                  */
485
486                 rt = (struct rt6_info *) dst;
487                 if ((rt->rt6i_flags & RTF_GATEWAY))
488                         target = (struct in6_addr*)&n->primary_key;
489                 else
490                         target = &hdr->daddr;
491
492                 /* Limit redirects both by destination (here)
493                    and by source (inside ndisc_send_redirect)
494                  */
495                 if (xrlim_allow(dst, 1*HZ))
496                         ndisc_send_redirect(skb, n, target);
497         } else {
498                 int addrtype = ipv6_addr_type(&hdr->saddr);
499
500                 /* This check is security critical. */
501                 if (addrtype == IPV6_ADDR_ANY ||
502                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
503                         goto error;
504                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
505                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
506                                     ICMPV6_NOT_NEIGHBOUR, 0);
507                         goto error;
508                 }
509         }
510
511         mtu = dst_mtu(dst);
512         if (mtu < IPV6_MIN_MTU)
513                 mtu = IPV6_MIN_MTU;
514
515         if (skb->len > mtu) {
516                 /* Again, force OUTPUT device used as source address */
517                 skb->dev = dst->dev;
518                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
519                 IP6_INC_STATS_BH(net,
520                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
521                 IP6_INC_STATS_BH(net,
522                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
523                 kfree_skb(skb);
524                 return -EMSGSIZE;
525         }
526
527         if (skb_cow(skb, dst->dev->hard_header_len)) {
528                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
529                 goto drop;
530         }
531
532         hdr = ipv6_hdr(skb);
533
534         /* Mangling hops number delayed to point after skb COW */
535
536         hdr->hop_limit--;
537
538         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
539         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
540                        ip6_forward_finish);
541
542 error:
543         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
544 drop:
545         kfree_skb(skb);
546         return -EINVAL;
547 }
548
549 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
550 {
551         to->pkt_type = from->pkt_type;
552         to->priority = from->priority;
553         to->protocol = from->protocol;
554         skb_dst_drop(to);
555         skb_dst_set(to, dst_clone(skb_dst(from)));
556         to->dev = from->dev;
557         to->mark = from->mark;
558
559 #ifdef CONFIG_NET_SCHED
560         to->tc_index = from->tc_index;
561 #endif
562         nf_copy(to, from);
563 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
564     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
565         to->nf_trace = from->nf_trace;
566 #endif
567         skb_copy_secmark(to, from);
568 }
569
570 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
571 {
572         u16 offset = sizeof(struct ipv6hdr);
573         struct ipv6_opt_hdr *exthdr =
574                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
575         unsigned int packet_len = skb->tail - skb->network_header;
576         int found_rhdr = 0;
577         *nexthdr = &ipv6_hdr(skb)->nexthdr;
578
579         while (offset + 1 <= packet_len) {
580
581                 switch (**nexthdr) {
582
583                 case NEXTHDR_HOP:
584                         break;
585                 case NEXTHDR_ROUTING:
586                         found_rhdr = 1;
587                         break;
588                 case NEXTHDR_DEST:
589 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
590                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
591                                 break;
592 #endif
593                         if (found_rhdr)
594                                 return offset;
595                         break;
596                 default :
597                         return offset;
598                 }
599
600                 offset += ipv6_optlen(exthdr);
601                 *nexthdr = &exthdr->nexthdr;
602                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
603                                                  offset);
604         }
605
606         return offset;
607 }
608
609 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
610 {
611         struct sk_buff *frag;
612         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
613         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
614         struct ipv6hdr *tmp_hdr;
615         struct frag_hdr *fh;
616         unsigned int mtu, hlen, left, len;
617         __be32 frag_id = 0;
618         int ptr, offset = 0, err=0;
619         u8 *prevhdr, nexthdr = 0;
620         struct net *net = dev_net(skb_dst(skb)->dev);
621
622         hlen = ip6_find_1stfragopt(skb, &prevhdr);
623         nexthdr = *prevhdr;
624
625         mtu = ip6_skb_dst_mtu(skb);
626
627         /* We must not fragment if the socket is set to force MTU discovery
628          * or if the skb it not generated by a local socket.
629          */
630         if (!skb->local_df) {
631                 skb->dev = skb_dst(skb)->dev;
632                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
633                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
634                               IPSTATS_MIB_FRAGFAILS);
635                 kfree_skb(skb);
636                 return -EMSGSIZE;
637         }
638
639         if (np && np->frag_size < mtu) {
640                 if (np->frag_size)
641                         mtu = np->frag_size;
642         }
643         mtu -= hlen + sizeof(struct frag_hdr);
644
645         if (skb_has_frags(skb)) {
646                 int first_len = skb_pagelen(skb);
647                 int truesizes = 0;
648
649                 if (first_len - hlen > mtu ||
650                     ((first_len - hlen) & 7) ||
651                     skb_cloned(skb))
652                         goto slow_path;
653
654                 skb_walk_frags(skb, frag) {
655                         /* Correct geometry. */
656                         if (frag->len > mtu ||
657                             ((frag->len & 7) && frag->next) ||
658                             skb_headroom(frag) < hlen)
659                             goto slow_path;
660
661                         /* Partially cloned skb? */
662                         if (skb_shared(frag))
663                                 goto slow_path;
664
665                         BUG_ON(frag->sk);
666                         if (skb->sk) {
667                                 frag->sk = skb->sk;
668                                 frag->destructor = sock_wfree;
669                                 truesizes += frag->truesize;
670                         }
671                 }
672
673                 err = 0;
674                 offset = 0;
675                 frag = skb_shinfo(skb)->frag_list;
676                 skb_frag_list_init(skb);
677                 /* BUILD HEADER */
678
679                 *prevhdr = NEXTHDR_FRAGMENT;
680                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
681                 if (!tmp_hdr) {
682                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
683                                       IPSTATS_MIB_FRAGFAILS);
684                         return -ENOMEM;
685                 }
686
687                 __skb_pull(skb, hlen);
688                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
689                 __skb_push(skb, hlen);
690                 skb_reset_network_header(skb);
691                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
692
693                 ipv6_select_ident(fh);
694                 fh->nexthdr = nexthdr;
695                 fh->reserved = 0;
696                 fh->frag_off = htons(IP6_MF);
697                 frag_id = fh->identification;
698
699                 first_len = skb_pagelen(skb);
700                 skb->data_len = first_len - skb_headlen(skb);
701                 skb->truesize -= truesizes;
702                 skb->len = first_len;
703                 ipv6_hdr(skb)->payload_len = htons(first_len -
704                                                    sizeof(struct ipv6hdr));
705
706                 dst_hold(&rt->u.dst);
707
708                 for (;;) {
709                         /* Prepare header of the next frame,
710                          * before previous one went down. */
711                         if (frag) {
712                                 frag->ip_summed = CHECKSUM_NONE;
713                                 skb_reset_transport_header(frag);
714                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
715                                 __skb_push(frag, hlen);
716                                 skb_reset_network_header(frag);
717                                 memcpy(skb_network_header(frag), tmp_hdr,
718                                        hlen);
719                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
720                                 fh->nexthdr = nexthdr;
721                                 fh->reserved = 0;
722                                 fh->frag_off = htons(offset);
723                                 if (frag->next != NULL)
724                                         fh->frag_off |= htons(IP6_MF);
725                                 fh->identification = frag_id;
726                                 ipv6_hdr(frag)->payload_len =
727                                                 htons(frag->len -
728                                                       sizeof(struct ipv6hdr));
729                                 ip6_copy_metadata(frag, skb);
730                         }
731
732                         err = output(skb);
733                         if(!err)
734                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
735                                               IPSTATS_MIB_FRAGCREATES);
736
737                         if (err || !frag)
738                                 break;
739
740                         skb = frag;
741                         frag = skb->next;
742                         skb->next = NULL;
743                 }
744
745                 kfree(tmp_hdr);
746
747                 if (err == 0) {
748                         IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
749                                       IPSTATS_MIB_FRAGOKS);
750                         dst_release(&rt->u.dst);
751                         return 0;
752                 }
753
754                 while (frag) {
755                         skb = frag->next;
756                         kfree_skb(frag);
757                         frag = skb;
758                 }
759
760                 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
761                               IPSTATS_MIB_FRAGFAILS);
762                 dst_release(&rt->u.dst);
763                 return err;
764         }
765
766 slow_path:
767         left = skb->len - hlen;         /* Space per frame */
768         ptr = hlen;                     /* Where to start from */
769
770         /*
771          *      Fragment the datagram.
772          */
773
774         *prevhdr = NEXTHDR_FRAGMENT;
775
776         /*
777          *      Keep copying data until we run out.
778          */
779         while(left > 0) {
780                 len = left;
781                 /* IF: it doesn't fit, use 'mtu' - the data space left */
782                 if (len > mtu)
783                         len = mtu;
784                 /* IF: we are not sending upto and including the packet end
785                    then align the next start on an eight byte boundary */
786                 if (len < left) {
787                         len &= ~7;
788                 }
789                 /*
790                  *      Allocate buffer.
791                  */
792
793                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
794                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
795                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
796                                       IPSTATS_MIB_FRAGFAILS);
797                         err = -ENOMEM;
798                         goto fail;
799                 }
800
801                 /*
802                  *      Set up data on packet
803                  */
804
805                 ip6_copy_metadata(frag, skb);
806                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
807                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
808                 skb_reset_network_header(frag);
809                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
810                 frag->transport_header = (frag->network_header + hlen +
811                                           sizeof(struct frag_hdr));
812
813                 /*
814                  *      Charge the memory for the fragment to any owner
815                  *      it might possess
816                  */
817                 if (skb->sk)
818                         skb_set_owner_w(frag, skb->sk);
819
820                 /*
821                  *      Copy the packet header into the new buffer.
822                  */
823                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
824
825                 /*
826                  *      Build fragment header.
827                  */
828                 fh->nexthdr = nexthdr;
829                 fh->reserved = 0;
830                 if (!frag_id) {
831                         ipv6_select_ident(fh);
832                         frag_id = fh->identification;
833                 } else
834                         fh->identification = frag_id;
835
836                 /*
837                  *      Copy a block of the IP datagram.
838                  */
839                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
840                         BUG();
841                 left -= len;
842
843                 fh->frag_off = htons(offset);
844                 if (left > 0)
845                         fh->frag_off |= htons(IP6_MF);
846                 ipv6_hdr(frag)->payload_len = htons(frag->len -
847                                                     sizeof(struct ipv6hdr));
848
849                 ptr += len;
850                 offset += len;
851
852                 /*
853                  *      Put this fragment into the sending queue.
854                  */
855                 err = output(frag);
856                 if (err)
857                         goto fail;
858
859                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
860                               IPSTATS_MIB_FRAGCREATES);
861         }
862         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
863                       IPSTATS_MIB_FRAGOKS);
864         kfree_skb(skb);
865         return err;
866
867 fail:
868         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
869                       IPSTATS_MIB_FRAGFAILS);
870         kfree_skb(skb);
871         return err;
872 }
873
874 static inline int ip6_rt_check(struct rt6key *rt_key,
875                                struct in6_addr *fl_addr,
876                                struct in6_addr *addr_cache)
877 {
878         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
879                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
880 }
881
882 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
883                                           struct dst_entry *dst,
884                                           struct flowi *fl)
885 {
886         struct ipv6_pinfo *np = inet6_sk(sk);
887         struct rt6_info *rt = (struct rt6_info *)dst;
888
889         if (!dst)
890                 goto out;
891
892         /* Yes, checking route validity in not connected
893          * case is not very simple. Take into account,
894          * that we do not support routing by source, TOS,
895          * and MSG_DONTROUTE            --ANK (980726)
896          *
897          * 1. ip6_rt_check(): If route was host route,
898          *    check that cached destination is current.
899          *    If it is network route, we still may
900          *    check its validity using saved pointer
901          *    to the last used address: daddr_cache.
902          *    We do not want to save whole address now,
903          *    (because main consumer of this service
904          *    is tcp, which has not this problem),
905          *    so that the last trick works only on connected
906          *    sockets.
907          * 2. oif also should be the same.
908          */
909         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
910 #ifdef CONFIG_IPV6_SUBTREES
911             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
912 #endif
913             (fl->oif && fl->oif != dst->dev->ifindex)) {
914                 dst_release(dst);
915                 dst = NULL;
916         }
917
918 out:
919         return dst;
920 }
921
922 static int ip6_dst_lookup_tail(struct sock *sk,
923                                struct dst_entry **dst, struct flowi *fl)
924 {
925         int err;
926         struct net *net = sock_net(sk);
927
928         if (*dst == NULL)
929                 *dst = ip6_route_output(net, sk, fl);
930
931         if ((err = (*dst)->error))
932                 goto out_err_release;
933
934         if (ipv6_addr_any(&fl->fl6_src)) {
935                 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
936                                          &fl->fl6_dst,
937                                          sk ? inet6_sk(sk)->srcprefs : 0,
938                                          &fl->fl6_src);
939                 if (err)
940                         goto out_err_release;
941         }
942
943 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
944         /*
945          * Here if the dst entry we've looked up
946          * has a neighbour entry that is in the INCOMPLETE
947          * state and the src address from the flow is
948          * marked as OPTIMISTIC, we release the found
949          * dst entry and replace it instead with the
950          * dst entry of the nexthop router
951          */
952         if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
953                 struct inet6_ifaddr *ifp;
954                 struct flowi fl_gw;
955                 int redirect;
956
957                 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
958                                       (*dst)->dev, 1);
959
960                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
961                 if (ifp)
962                         in6_ifa_put(ifp);
963
964                 if (redirect) {
965                         /*
966                          * We need to get the dst entry for the
967                          * default router instead
968                          */
969                         dst_release(*dst);
970                         memcpy(&fl_gw, fl, sizeof(struct flowi));
971                         memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
972                         *dst = ip6_route_output(net, sk, &fl_gw);
973                         if ((err = (*dst)->error))
974                                 goto out_err_release;
975                 }
976         }
977 #endif
978
979         return 0;
980
981 out_err_release:
982         if (err == -ENETUNREACH)
983                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
984         dst_release(*dst);
985         *dst = NULL;
986         return err;
987 }
988
989 /**
990  *      ip6_dst_lookup - perform route lookup on flow
991  *      @sk: socket which provides route info
992  *      @dst: pointer to dst_entry * for result
993  *      @fl: flow to lookup
994  *
995  *      This function performs a route lookup on the given flow.
996  *
997  *      It returns zero on success, or a standard errno code on error.
998  */
999 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1000 {
1001         *dst = NULL;
1002         return ip6_dst_lookup_tail(sk, dst, fl);
1003 }
1004 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1005
1006 /**
1007  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
1008  *      @sk: socket which provides the dst cache and route info
1009  *      @dst: pointer to dst_entry * for result
1010  *      @fl: flow to lookup
1011  *
1012  *      This function performs a route lookup on the given flow with the
1013  *      possibility of using the cached route in the socket if it is valid.
1014  *      It will take the socket dst lock when operating on the dst cache.
1015  *      As a result, this function can only be used in process context.
1016  *
1017  *      It returns zero on success, or a standard errno code on error.
1018  */
1019 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1020 {
1021         *dst = NULL;
1022         if (sk) {
1023                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1024                 *dst = ip6_sk_dst_check(sk, *dst, fl);
1025         }
1026
1027         return ip6_dst_lookup_tail(sk, dst, fl);
1028 }
1029 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1030
1031 static inline int ip6_ufo_append_data(struct sock *sk,
1032                         int getfrag(void *from, char *to, int offset, int len,
1033                         int odd, struct sk_buff *skb),
1034                         void *from, int length, int hh_len, int fragheaderlen,
1035                         int transhdrlen, int mtu,unsigned int flags)
1036
1037 {
1038         struct sk_buff *skb;
1039         int err;
1040
1041         /* There is support for UDP large send offload by network
1042          * device, so create one single skb packet containing complete
1043          * udp datagram
1044          */
1045         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1046                 skb = sock_alloc_send_skb(sk,
1047                         hh_len + fragheaderlen + transhdrlen + 20,
1048                         (flags & MSG_DONTWAIT), &err);
1049                 if (skb == NULL)
1050                         return -ENOMEM;
1051
1052                 /* reserve space for Hardware header */
1053                 skb_reserve(skb, hh_len);
1054
1055                 /* create space for UDP/IP header */
1056                 skb_put(skb,fragheaderlen + transhdrlen);
1057
1058                 /* initialize network header pointer */
1059                 skb_reset_network_header(skb);
1060
1061                 /* initialize protocol header pointer */
1062                 skb->transport_header = skb->network_header + fragheaderlen;
1063
1064                 skb->ip_summed = CHECKSUM_PARTIAL;
1065                 skb->csum = 0;
1066                 sk->sk_sndmsg_off = 0;
1067         }
1068
1069         err = skb_append_datato_frags(sk,skb, getfrag, from,
1070                                       (length - transhdrlen));
1071         if (!err) {
1072                 struct frag_hdr fhdr;
1073
1074                 /* Specify the length of each IPv6 datagram fragment.
1075                  * It has to be a multiple of 8.
1076                  */
1077                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1078                                              sizeof(struct frag_hdr)) & ~7;
1079                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1080                 ipv6_select_ident(&fhdr);
1081                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1082                 __skb_queue_tail(&sk->sk_write_queue, skb);
1083
1084                 return 0;
1085         }
1086         /* There is not enough support do UPD LSO,
1087          * so follow normal path
1088          */
1089         kfree_skb(skb);
1090
1091         return err;
1092 }
1093
1094 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1095                                                gfp_t gfp)
1096 {
1097         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1098 }
1099
1100 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1101                                                 gfp_t gfp)
1102 {
1103         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1104 }
1105
1106 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1107         int offset, int len, int odd, struct sk_buff *skb),
1108         void *from, int length, int transhdrlen,
1109         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1110         struct rt6_info *rt, unsigned int flags)
1111 {
1112         struct inet_sock *inet = inet_sk(sk);
1113         struct ipv6_pinfo *np = inet6_sk(sk);
1114         struct sk_buff *skb;
1115         unsigned int maxfraglen, fragheaderlen;
1116         int exthdrlen;
1117         int hh_len;
1118         int mtu;
1119         int copy;
1120         int err;
1121         int offset = 0;
1122         int csummode = CHECKSUM_NONE;
1123
1124         if (flags&MSG_PROBE)
1125                 return 0;
1126         if (skb_queue_empty(&sk->sk_write_queue)) {
1127                 /*
1128                  * setup for corking
1129                  */
1130                 if (opt) {
1131                         if (WARN_ON(np->cork.opt))
1132                                 return -EINVAL;
1133
1134                         np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1135                         if (unlikely(np->cork.opt == NULL))
1136                                 return -ENOBUFS;
1137
1138                         np->cork.opt->tot_len = opt->tot_len;
1139                         np->cork.opt->opt_flen = opt->opt_flen;
1140                         np->cork.opt->opt_nflen = opt->opt_nflen;
1141
1142                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1143                                                             sk->sk_allocation);
1144                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1145                                 return -ENOBUFS;
1146
1147                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1148                                                             sk->sk_allocation);
1149                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1150                                 return -ENOBUFS;
1151
1152                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1153                                                            sk->sk_allocation);
1154                         if (opt->hopopt && !np->cork.opt->hopopt)
1155                                 return -ENOBUFS;
1156
1157                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1158                                                             sk->sk_allocation);
1159                         if (opt->srcrt && !np->cork.opt->srcrt)
1160                                 return -ENOBUFS;
1161
1162                         /* need source address above miyazawa*/
1163                 }
1164                 dst_hold(&rt->u.dst);
1165                 inet->cork.dst = &rt->u.dst;
1166                 inet->cork.fl = *fl;
1167                 np->cork.hop_limit = hlimit;
1168                 np->cork.tclass = tclass;
1169                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1170                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1171                 if (np->frag_size < mtu) {
1172                         if (np->frag_size)
1173                                 mtu = np->frag_size;
1174                 }
1175                 inet->cork.fragsize = mtu;
1176                 if (dst_allfrag(rt->u.dst.path))
1177                         inet->cork.flags |= IPCORK_ALLFRAG;
1178                 inet->cork.length = 0;
1179                 sk->sk_sndmsg_page = NULL;
1180                 sk->sk_sndmsg_off = 0;
1181                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1182                             rt->rt6i_nfheader_len;
1183                 length += exthdrlen;
1184                 transhdrlen += exthdrlen;
1185         } else {
1186                 rt = (struct rt6_info *)inet->cork.dst;
1187                 fl = &inet->cork.fl;
1188                 opt = np->cork.opt;
1189                 transhdrlen = 0;
1190                 exthdrlen = 0;
1191                 mtu = inet->cork.fragsize;
1192         }
1193
1194         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1195
1196         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1197                         (opt ? opt->opt_nflen : 0);
1198         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1199
1200         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1201                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1202                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1203                         return -EMSGSIZE;
1204                 }
1205         }
1206
1207         /*
1208          * Let's try using as much space as possible.
1209          * Use MTU if total length of the message fits into the MTU.
1210          * Otherwise, we need to reserve fragment header and
1211          * fragment alignment (= 8-15 octects, in total).
1212          *
1213          * Note that we may need to "move" the data from the tail of
1214          * of the buffer to the new fragment when we split
1215          * the message.
1216          *
1217          * FIXME: It may be fragmented into multiple chunks
1218          *        at once if non-fragmentable extension headers
1219          *        are too large.
1220          * --yoshfuji
1221          */
1222
1223         inet->cork.length += length;
1224         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1225             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1226
1227                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1228                                           fragheaderlen, transhdrlen, mtu,
1229                                           flags);
1230                 if (err)
1231                         goto error;
1232                 return 0;
1233         }
1234
1235         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1236                 goto alloc_new_skb;
1237
1238         while (length > 0) {
1239                 /* Check if the remaining data fits into current packet. */
1240                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1241                 if (copy < length)
1242                         copy = maxfraglen - skb->len;
1243
1244                 if (copy <= 0) {
1245                         char *data;
1246                         unsigned int datalen;
1247                         unsigned int fraglen;
1248                         unsigned int fraggap;
1249                         unsigned int alloclen;
1250                         struct sk_buff *skb_prev;
1251 alloc_new_skb:
1252                         skb_prev = skb;
1253
1254                         /* There's no room in the current skb */
1255                         if (skb_prev)
1256                                 fraggap = skb_prev->len - maxfraglen;
1257                         else
1258                                 fraggap = 0;
1259
1260                         /*
1261                          * If remaining data exceeds the mtu,
1262                          * we know we need more fragment(s).
1263                          */
1264                         datalen = length + fraggap;
1265                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1266                                 datalen = maxfraglen - fragheaderlen;
1267
1268                         fraglen = datalen + fragheaderlen;
1269                         if ((flags & MSG_MORE) &&
1270                             !(rt->u.dst.dev->features&NETIF_F_SG))
1271                                 alloclen = mtu;
1272                         else
1273                                 alloclen = datalen + fragheaderlen;
1274
1275                         /*
1276                          * The last fragment gets additional space at tail.
1277                          * Note: we overallocate on fragments with MSG_MODE
1278                          * because we have no idea if we're the last one.
1279                          */
1280                         if (datalen == length + fraggap)
1281                                 alloclen += rt->u.dst.trailer_len;
1282
1283                         /*
1284                          * We just reserve space for fragment header.
1285                          * Note: this may be overallocation if the message
1286                          * (without MSG_MORE) fits into the MTU.
1287                          */
1288                         alloclen += sizeof(struct frag_hdr);
1289
1290                         if (transhdrlen) {
1291                                 skb = sock_alloc_send_skb(sk,
1292                                                 alloclen + hh_len,
1293                                                 (flags & MSG_DONTWAIT), &err);
1294                         } else {
1295                                 skb = NULL;
1296                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1297                                     2 * sk->sk_sndbuf)
1298                                         skb = sock_wmalloc(sk,
1299                                                            alloclen + hh_len, 1,
1300                                                            sk->sk_allocation);
1301                                 if (unlikely(skb == NULL))
1302                                         err = -ENOBUFS;
1303                         }
1304                         if (skb == NULL)
1305                                 goto error;
1306                         /*
1307                          *      Fill in the control structures
1308                          */
1309                         skb->ip_summed = csummode;
1310                         skb->csum = 0;
1311                         /* reserve for fragmentation */
1312                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1313
1314                         /*
1315                          *      Find where to start putting bytes
1316                          */
1317                         data = skb_put(skb, fraglen);
1318                         skb_set_network_header(skb, exthdrlen);
1319                         data += fragheaderlen;
1320                         skb->transport_header = (skb->network_header +
1321                                                  fragheaderlen);
1322                         if (fraggap) {
1323                                 skb->csum = skb_copy_and_csum_bits(
1324                                         skb_prev, maxfraglen,
1325                                         data + transhdrlen, fraggap, 0);
1326                                 skb_prev->csum = csum_sub(skb_prev->csum,
1327                                                           skb->csum);
1328                                 data += fraggap;
1329                                 pskb_trim_unique(skb_prev, maxfraglen);
1330                         }
1331                         copy = datalen - transhdrlen - fraggap;
1332                         if (copy < 0) {
1333                                 err = -EINVAL;
1334                                 kfree_skb(skb);
1335                                 goto error;
1336                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1337                                 err = -EFAULT;
1338                                 kfree_skb(skb);
1339                                 goto error;
1340                         }
1341
1342                         offset += copy;
1343                         length -= datalen - fraggap;
1344                         transhdrlen = 0;
1345                         exthdrlen = 0;
1346                         csummode = CHECKSUM_NONE;
1347
1348                         /*
1349                          * Put the packet on the pending queue
1350                          */
1351                         __skb_queue_tail(&sk->sk_write_queue, skb);
1352                         continue;
1353                 }
1354
1355                 if (copy > length)
1356                         copy = length;
1357
1358                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1359                         unsigned int off;
1360
1361                         off = skb->len;
1362                         if (getfrag(from, skb_put(skb, copy),
1363                                                 offset, copy, off, skb) < 0) {
1364                                 __skb_trim(skb, off);
1365                                 err = -EFAULT;
1366                                 goto error;
1367                         }
1368                 } else {
1369                         int i = skb_shinfo(skb)->nr_frags;
1370                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1371                         struct page *page = sk->sk_sndmsg_page;
1372                         int off = sk->sk_sndmsg_off;
1373                         unsigned int left;
1374
1375                         if (page && (left = PAGE_SIZE - off) > 0) {
1376                                 if (copy >= left)
1377                                         copy = left;
1378                                 if (page != frag->page) {
1379                                         if (i == MAX_SKB_FRAGS) {
1380                                                 err = -EMSGSIZE;
1381                                                 goto error;
1382                                         }
1383                                         get_page(page);
1384                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1385                                         frag = &skb_shinfo(skb)->frags[i];
1386                                 }
1387                         } else if(i < MAX_SKB_FRAGS) {
1388                                 if (copy > PAGE_SIZE)
1389                                         copy = PAGE_SIZE;
1390                                 page = alloc_pages(sk->sk_allocation, 0);
1391                                 if (page == NULL) {
1392                                         err = -ENOMEM;
1393                                         goto error;
1394                                 }
1395                                 sk->sk_sndmsg_page = page;
1396                                 sk->sk_sndmsg_off = 0;
1397
1398                                 skb_fill_page_desc(skb, i, page, 0, 0);
1399                                 frag = &skb_shinfo(skb)->frags[i];
1400                         } else {
1401                                 err = -EMSGSIZE;
1402                                 goto error;
1403                         }
1404                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1405                                 err = -EFAULT;
1406                                 goto error;
1407                         }
1408                         sk->sk_sndmsg_off += copy;
1409                         frag->size += copy;
1410                         skb->len += copy;
1411                         skb->data_len += copy;
1412                         skb->truesize += copy;
1413                         atomic_add(copy, &sk->sk_wmem_alloc);
1414                 }
1415                 offset += copy;
1416                 length -= copy;
1417         }
1418         return 0;
1419 error:
1420         inet->cork.length -= length;
1421         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1422         return err;
1423 }
1424
1425 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1426 {
1427         if (np->cork.opt) {
1428                 kfree(np->cork.opt->dst0opt);
1429                 kfree(np->cork.opt->dst1opt);
1430                 kfree(np->cork.opt->hopopt);
1431                 kfree(np->cork.opt->srcrt);
1432                 kfree(np->cork.opt);
1433                 np->cork.opt = NULL;
1434         }
1435
1436         if (inet->cork.dst) {
1437                 dst_release(inet->cork.dst);
1438                 inet->cork.dst = NULL;
1439                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1440         }
1441         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1442 }
1443
1444 int ip6_push_pending_frames(struct sock *sk)
1445 {
1446         struct sk_buff *skb, *tmp_skb;
1447         struct sk_buff **tail_skb;
1448         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1449         struct inet_sock *inet = inet_sk(sk);
1450         struct ipv6_pinfo *np = inet6_sk(sk);
1451         struct net *net = sock_net(sk);
1452         struct ipv6hdr *hdr;
1453         struct ipv6_txoptions *opt = np->cork.opt;
1454         struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1455         struct flowi *fl = &inet->cork.fl;
1456         unsigned char proto = fl->proto;
1457         int err = 0;
1458
1459         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1460                 goto out;
1461         tail_skb = &(skb_shinfo(skb)->frag_list);
1462
1463         /* move skb->data to ip header from ext header */
1464         if (skb->data < skb_network_header(skb))
1465                 __skb_pull(skb, skb_network_offset(skb));
1466         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1467                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1468                 *tail_skb = tmp_skb;
1469                 tail_skb = &(tmp_skb->next);
1470                 skb->len += tmp_skb->len;
1471                 skb->data_len += tmp_skb->len;
1472                 skb->truesize += tmp_skb->truesize;
1473                 tmp_skb->destructor = NULL;
1474                 tmp_skb->sk = NULL;
1475         }
1476
1477         /* Allow local fragmentation. */
1478         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1479                 skb->local_df = 1;
1480
1481         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1482         __skb_pull(skb, skb_network_header_len(skb));
1483         if (opt && opt->opt_flen)
1484                 ipv6_push_frag_opts(skb, opt, &proto);
1485         if (opt && opt->opt_nflen)
1486                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1487
1488         skb_push(skb, sizeof(struct ipv6hdr));
1489         skb_reset_network_header(skb);
1490         hdr = ipv6_hdr(skb);
1491
1492         *(__be32*)hdr = fl->fl6_flowlabel |
1493                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1494
1495         hdr->hop_limit = np->cork.hop_limit;
1496         hdr->nexthdr = proto;
1497         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1498         ipv6_addr_copy(&hdr->daddr, final_dst);
1499
1500         skb->priority = sk->sk_priority;
1501         skb->mark = sk->sk_mark;
1502
1503         skb_dst_set(skb, dst_clone(&rt->u.dst));
1504         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1505         if (proto == IPPROTO_ICMPV6) {
1506                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1507
1508                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1509                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1510         }
1511
1512         err = ip6_local_out(skb);
1513         if (err) {
1514                 if (err > 0)
1515                         err = net_xmit_errno(err);
1516                 if (err)
1517                         goto error;
1518         }
1519
1520 out:
1521         ip6_cork_release(inet, np);
1522         return err;
1523 error:
1524         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1525         goto out;
1526 }
1527
1528 void ip6_flush_pending_frames(struct sock *sk)
1529 {
1530         struct sk_buff *skb;
1531
1532         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1533                 if (skb_dst(skb))
1534                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1535                                       IPSTATS_MIB_OUTDISCARDS);
1536                 kfree_skb(skb);
1537         }
1538
1539         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1540 }