2 * Extension Header handling for IPv6
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Andi Kleen <ak@muc.de>
8 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
10 * $Id: exthdrs.c,v 1.13 2001/06/19 15:58:56 davem Exp $
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 * yoshfuji : ensure not to overrun while parsing
21 * Mitsuru KANDA @USAGI and: Remove ipv6_parse_exthdrs().
22 * YOSHIFUJI Hideaki @USAGI Register inbound extension header
23 * handlers as inet6_protocol{}.
26 #include <linux/errno.h>
27 #include <linux/types.h>
28 #include <linux/socket.h>
29 #include <linux/sockios.h>
30 #include <linux/sched.h>
31 #include <linux/net.h>
32 #include <linux/netdevice.h>
33 #include <linux/in6.h>
34 #include <linux/icmpv6.h>
40 #include <net/protocol.h>
41 #include <net/transp_v6.h>
42 #include <net/rawv6.h>
43 #include <net/ndisc.h>
44 #include <net/ip6_route.h>
45 #include <net/addrconf.h>
47 #include <asm/uaccess.h>
50 * Parsing tlv encoded headers.
52 * Parsing function "func" returns 1, if parsing succeed
53 * and 0, if it failed.
54 * It MUST NOT touch skb->h.
59 int (*func)(struct sk_buff *skb, int offset);
62 /*********************
64 *********************/
66 /* An unknown option is detected, decide what to do */
68 static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
70 switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
74 case 1: /* drop packet */
77 case 3: /* Send ICMP if not a multicast address and drop packet */
78 /* Actually, it is redundant check. icmp_send
79 will recheck in any case.
81 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
83 case 2: /* send ICMP PARM PROB regardless and drop packet */
84 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
92 /* Parse tlv encoded option header (hop-by-hop or destination) */
94 static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
96 struct tlvtype_proc *curr;
97 int off = skb->h.raw - skb->nh.raw;
98 int len = ((skb->h.raw[1]+1)<<3);
100 if ((skb->h.raw + len) - skb->data > skb_headlen(skb))
107 int optlen = skb->nh.raw[off+1]+2;
109 switch (skb->nh.raw[off]) {
117 default: /* Other TLV code so scan list */
120 for (curr=procs; curr->type >= 0; curr++) {
121 if (curr->type == skb->nh.raw[off]) {
122 /* type specific length/alignment
123 checks will be perfomed in the
125 if (curr->func(skb, off) == 0)
130 if (curr->type < 0) {
131 if (ip6_tlvopt_unknown(skb, off) == 0)
146 /*****************************
147 Destination options header.
148 *****************************/
150 static struct tlvtype_proc tlvprocdestopt_lst[] = {
151 /* No destination options are defined now */
155 static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
157 struct sk_buff *skb = *skbp;
158 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
160 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
161 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
166 opt->dst1 = skb->h.raw - skb->nh.raw;
168 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
169 skb->h.raw += ((skb->h.raw[1]+1)<<3);
177 static struct inet6_protocol destopt_protocol = {
178 .handler = ipv6_destopt_rcv,
179 .flags = INET6_PROTO_NOPOLICY,
182 void __init ipv6_destopt_init(void)
184 if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
185 printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
188 /********************************
189 NONE header. No data in packet.
190 ********************************/
192 static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
194 struct sk_buff *skb = *skbp;
200 static struct inet6_protocol nodata_protocol = {
201 .handler = ipv6_nodata_rcv,
202 .flags = INET6_PROTO_NOPOLICY,
205 void __init ipv6_nodata_init(void)
207 if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
208 printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
211 /********************************
213 ********************************/
215 static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
217 struct sk_buff *skb = *skbp;
218 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
219 struct in6_addr *addr;
220 struct in6_addr daddr;
223 struct ipv6_rt_hdr *hdr;
224 struct rt0_hdr *rthdr;
226 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
227 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
228 IP6_INC_STATS_BH(Ip6InHdrErrors);
233 hdr = (struct ipv6_rt_hdr *) skb->h.raw;
235 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) ||
236 skb->pkt_type != PACKET_HOST) {
242 if (hdr->segments_left == 0) {
243 opt->srcrt = skb->h.raw - skb->nh.raw;
244 skb->h.raw += (hdr->hdrlen + 1) << 3;
245 opt->dst0 = opt->dst1;
247 *nhoffp = (&hdr->nexthdr) - skb->nh.raw;
251 if (hdr->type != IPV6_SRCRT_TYPE_0) {
252 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
256 if (hdr->hdrlen & 0x01) {
257 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
262 * This is the routing header forwarding algorithm from
266 n = hdr->hdrlen >> 1;
268 if (hdr->segments_left > n) {
269 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
273 /* We are about to mangle packet header. Be careful!
274 Do not damage packets queued somewhere.
276 if (skb_cloned(skb)) {
277 struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
282 opt = (struct inet6_skb_parm *)skb2->cb;
283 hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
286 if (skb->ip_summed == CHECKSUM_HW)
287 skb->ip_summed = CHECKSUM_NONE;
289 i = n - --hdr->segments_left;
291 rthdr = (struct rt0_hdr *) hdr;
295 if (ipv6_addr_is_multicast(addr)) {
300 ipv6_addr_copy(&daddr, addr);
301 ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
302 ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
304 dst_release(xchg(&skb->dst, NULL));
305 ip6_route_input(skb);
306 if (skb->dst->error) {
310 if (skb->dst->dev->flags&IFF_LOOPBACK) {
311 if (skb->nh.ipv6h->hop_limit <= 1) {
312 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
317 skb->nh.ipv6h->hop_limit--;
325 static struct inet6_protocol rthdr_protocol = {
326 .handler = ipv6_rthdr_rcv,
327 .flags = INET6_PROTO_NOPOLICY,
330 void __init ipv6_rthdr_init(void)
332 if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0)
333 printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n");
337 This function inverts received rthdr.
338 NOTE: specs allow to make it automatically only if
339 packet authenticated.
341 I will not discuss it here (though, I am really pissed off at
342 this stupid requirement making rthdr idea useless)
344 Actually, it creates severe problems for us.
345 Embryonic requests has no associated sockets,
346 so that user have no control over it and
347 cannot not only to set reply options, but
348 even to know, that someone wants to connect
351 For now we need to test the engine, so that I created
352 temporary (or permanent) backdoor.
353 If listening socket set IPV6_RTHDR to 2, then we invert header.
357 struct ipv6_txoptions *
358 ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
362 [ H1 -> H2 -> ... H_prev ] daddr=ME
365 [ H_prev -> ... -> H1 ] daddr =sender
367 Note, that IP output engine will rewrite this rthdr
368 by rotating it left by one addr.
372 struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr;
373 struct rt0_hdr *irthdr;
374 struct ipv6_txoptions *opt;
375 int hdrlen = ipv6_optlen(hdr);
377 if (hdr->segments_left ||
378 hdr->type != IPV6_SRCRT_TYPE_0 ||
382 n = hdr->hdrlen >> 1;
383 opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC);
386 memset(opt, 0, sizeof(*opt));
387 opt->tot_len = sizeof(*opt) + hdrlen;
388 opt->srcrt = (void*)(opt+1);
389 opt->opt_nflen = hdrlen;
391 memcpy(opt->srcrt, hdr, sizeof(*hdr));
392 irthdr = (struct rt0_hdr*)opt->srcrt;
393 /* Obsolete field, MBZ, when originated by us */
395 opt->srcrt->segments_left = n;
397 memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16);
401 /**********************************
403 **********************************/
405 /* Router Alert as of RFC 2711 */
407 static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
409 if (skb->nh.raw[optoff+1] == 2) {
410 ((struct inet6_skb_parm*)skb->cb)->ra = optoff;
414 printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1]);
421 static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
425 if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
427 printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1]);
431 pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2));
432 if (pkt_len <= IPV6_MAXPLEN) {
433 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
436 if (skb->nh.ipv6h->payload_len) {
437 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
441 if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
442 IP6_INC_STATS_BH(Ip6InTruncatedPkts);
445 if (pkt_len + sizeof(struct ipv6hdr) < skb->len) {
446 __pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr));
447 if (skb->ip_summed == CHECKSUM_HW)
448 skb->ip_summed = CHECKSUM_NONE;
457 static struct tlvtype_proc tlvprochopopt_lst[] = {
459 .type = IPV6_TLV_ROUTERALERT,
463 .type = IPV6_TLV_JUMBO,
464 .func = ipv6_hop_jumbo,
469 int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff)
471 ((struct inet6_skb_parm*)skb->cb)->hop = sizeof(struct ipv6hdr);
472 if (ip6_parse_tlv(tlvprochopopt_lst, skb))
473 return sizeof(struct ipv6hdr);
478 * Creating outbound headers.
480 * "build" functions work when skb is filled from head to tail (datagram)
481 * "push" functions work when headers are added from tail to head (tcp)
483 * In both cases we assume, that caller reserved enough room
487 static u8 *ipv6_build_rthdr(struct sk_buff *skb, u8 *prev_hdr,
488 struct ipv6_rt_hdr *opt, struct in6_addr *addr)
490 struct rt0_hdr *phdr, *ihdr;
493 ihdr = (struct rt0_hdr *) opt;
495 phdr = (struct rt0_hdr *) skb_put(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
496 memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
498 hops = ihdr->rt_hdr.hdrlen >> 1;
501 memcpy(phdr->addr, ihdr->addr + 1,
502 (hops - 1) * sizeof(struct in6_addr));
504 ipv6_addr_copy(phdr->addr + (hops - 1), addr);
506 phdr->rt_hdr.nexthdr = *prev_hdr;
507 *prev_hdr = NEXTHDR_ROUTING;
508 return &phdr->rt_hdr.nexthdr;
511 static u8 *ipv6_build_exthdr(struct sk_buff *skb, u8 *prev_hdr, u8 type, struct ipv6_opt_hdr *opt)
513 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_put(skb, ipv6_optlen(opt));
515 memcpy(h, opt, ipv6_optlen(opt));
516 h->nexthdr = *prev_hdr;
521 u8 *ipv6_build_nfrag_opts(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_txoptions *opt,
522 struct in6_addr *daddr, u32 jumbolen)
524 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb->data;
526 if (opt && opt->hopopt)
527 prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_HOP, opt->hopopt);
530 u8 *jumboopt = (u8 *)skb_put(skb, 8);
532 if (opt && opt->hopopt) {
533 *jumboopt++ = IPV6_TLV_PADN;
537 h = (struct ipv6_opt_hdr *)jumboopt;
538 h->nexthdr = *prev_hdr;
541 *prev_hdr = NEXTHDR_HOP;
542 prev_hdr = &h->nexthdr;
544 jumboopt[0] = IPV6_TLV_JUMBO;
546 *(u32*)(jumboopt+2) = htonl(jumbolen);
550 prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_DEST, opt->dst0opt);
552 prev_hdr = ipv6_build_rthdr(skb, prev_hdr, opt->srcrt, daddr);
557 u8 *ipv6_build_frag_opts(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_txoptions *opt)
560 prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_DEST, opt->dst1opt);
564 static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
565 struct ipv6_rt_hdr *opt,
566 struct in6_addr **addr_p)
568 struct rt0_hdr *phdr, *ihdr;
571 ihdr = (struct rt0_hdr *) opt;
573 phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
574 memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
576 hops = ihdr->rt_hdr.hdrlen >> 1;
579 memcpy(phdr->addr, ihdr->addr + 1,
580 (hops - 1) * sizeof(struct in6_addr));
582 ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
583 *addr_p = ihdr->addr;
585 phdr->rt_hdr.nexthdr = *proto;
586 *proto = NEXTHDR_ROUTING;
589 static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
591 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
593 memcpy(h, opt, ipv6_optlen(opt));
598 void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
600 struct in6_addr **daddr)
603 ipv6_push_rthdr(skb, proto, opt->srcrt, daddr);
605 ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
607 ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
610 void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
613 ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
616 struct ipv6_txoptions *
617 ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
619 struct ipv6_txoptions *opt2;
621 opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
623 long dif = (char*)opt2 - (char*)opt;
624 memcpy(opt2, opt, opt->tot_len);
626 *((char**)&opt2->hopopt) += dif;
628 *((char**)&opt2->dst0opt) += dif;
630 *((char**)&opt2->dst1opt) += dif;
632 *((char**)&opt2->srcrt) += dif;
639 * find out if nexthdr is a well-known extension header or a protocol
642 int ipv6_ext_hdr(u8 nexthdr)
645 * find out if nexthdr is an extension header or a protocol
647 return ( (nexthdr == NEXTHDR_HOP) ||
648 (nexthdr == NEXTHDR_ROUTING) ||
649 (nexthdr == NEXTHDR_FRAGMENT) ||
650 (nexthdr == NEXTHDR_AUTH) ||
651 (nexthdr == NEXTHDR_NONE) ||
652 (nexthdr == NEXTHDR_DEST) );
656 * Skip any extension headers. This is used by the ICMP module.
658 * Note that strictly speaking this conflicts with RFC 2460 4.0:
659 * ...The contents and semantics of each extension header determine whether
660 * or not to proceed to the next header. Therefore, extension headers must
661 * be processed strictly in the order they appear in the packet; a
662 * receiver must not, for example, scan through a packet looking for a
663 * particular kind of extension header and process that header prior to
664 * processing all preceding ones.
666 * We do exactly this. This is a protocol bug. We can't decide after a
667 * seeing an unknown discard-with-error flavour TLV option if it's a
668 * ICMP error message or not (errors should never be send in reply to
669 * ICMP error messages).
671 * But I see no other way to do this. This might need to be reexamined
672 * when Linux implements ESP (and maybe AUTH) headers.
675 * This function parses (probably truncated) exthdr set "hdr"
676 * of length "len". "nexthdrp" initially points to some place,
677 * where type of the first header can be found.
679 * It skips all well-known exthdrs, and returns pointer to the start
680 * of unparsable area i.e. the first header with unknown type.
681 * If it is not NULL *nexthdr is updated by type/protocol of this header.
683 * NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL.
684 * - it may return pointer pointing beyond end of packet,
685 * if the last recognized header is truncated in the middle.
686 * - if packet is truncated, so that all parsed headers are skipped,
688 * - First fragment header is skipped, not-first ones
689 * are considered as unparsable.
690 * - ESP is unparsable for now and considered like
691 * normal payload protocol.
692 * - Note also special handling of AUTH header. Thanks to IPsec wizards.
697 int ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len)
699 u8 nexthdr = *nexthdrp;
701 while (ipv6_ext_hdr(nexthdr)) {
702 struct ipv6_opt_hdr hdr;
705 if (len < (int)sizeof(struct ipv6_opt_hdr))
707 if (nexthdr == NEXTHDR_NONE)
709 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
711 if (nexthdr == NEXTHDR_FRAGMENT) {
712 struct frag_hdr *fhdr = (struct frag_hdr *) &hdr;
713 if (ntohs(fhdr->frag_off) & ~0x7)
716 } else if (nexthdr == NEXTHDR_AUTH)
717 hdrlen = (hdr.hdrlen+2)<<2;
719 hdrlen = ipv6_optlen(&hdr);
721 nexthdr = hdr.nexthdr;