c1147531acd2eb0be2bb8d09e970b318478e0e59
[linux-flexiantxendom0-3.2.10.git] / net / ipv4 / netfilter / ipt_REJECT.c
1 /*
2  * This is a module which is used for rejecting packets.
3  * Added support for customized reject packets (Jozsef Kadlecsik).
4  * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812]
5  */
6 #include <linux/config.h>
7 #include <linux/module.h>
8 #include <linux/skbuff.h>
9 #include <linux/ip.h>
10 #include <linux/udp.h>
11 #include <linux/icmp.h>
12 #include <net/icmp.h>
13 #include <net/ip.h>
14 #include <net/tcp.h>
15 #include <net/route.h>
16 #include <linux/netfilter_ipv4/ip_tables.h>
17 #include <linux/netfilter_ipv4/ipt_REJECT.h>
18
19 #if 0
20 #define DEBUGP printk
21 #else
22 #define DEBUGP(format, args...)
23 #endif
24
25 /* If the original packet is part of a connection, but the connection
26    is not confirmed, our manufactured reply will not be associated
27    with it, so we need to do this manually. */
28 static void connection_attach(struct sk_buff *new_skb, struct nf_ct_info *nfct)
29 {
30         void (*attach)(struct sk_buff *, struct nf_ct_info *);
31
32         /* Avoid module unload race with ip_ct_attach being NULLed out */
33         if (nfct && (attach = ip_ct_attach) != NULL) {
34                 mb(); /* Just to be sure: must be read before executing this */
35                 attach(new_skb, nfct);
36         }
37 }
38
39 static inline struct rtable *route_reverse(struct sk_buff *skb, int local)
40 {
41         struct iphdr *iph = skb->nh.iph;
42         struct dst_entry *odst;
43         struct flowi fl = {};
44         struct rtable *rt;
45
46         if (local) {
47                 fl.nl_u.ip4_u.daddr = iph->saddr;
48                 fl.nl_u.ip4_u.saddr = iph->daddr;
49                 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
50
51                 if (ip_route_output_key(&rt, &fl) != 0)
52                         return NULL;
53         } else {
54                 /* non-local src, find valid iif to satisfy
55                  * rp-filter when calling ip_route_input. */
56                 fl.nl_u.ip4_u.daddr = iph->daddr;
57                 if (ip_route_output_key(&rt, &fl) != 0)
58                         return NULL;
59
60                 odst = skb->dst;
61                 if (ip_route_input(skb, iph->saddr, iph->daddr,
62                                    RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
63                         dst_release(&rt->u.dst);
64                         return NULL;
65                 }
66                 dst_release(&rt->u.dst);
67                 rt = (struct rtable *)skb->dst;
68                 skb->dst = odst;
69         }
70
71         if (rt->u.dst.error) {
72                 dst_release(&rt->u.dst);
73                 rt = NULL;
74         }
75
76         return rt;
77 }
78
79 /* Send RST reply */
80 static void send_reset(struct sk_buff *oldskb, int local)
81 {
82         struct sk_buff *nskb;
83         struct tcphdr *otcph, *tcph;
84         struct rtable *rt;
85         unsigned int otcplen;
86         u_int16_t tmp_port;
87         u_int32_t tmp_addr;
88         int needs_ack;
89         int hh_len;
90
91         /* IP header checks: fragment, too short. */
92         if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)
93             || oldskb->len < (oldskb->nh.iph->ihl<<2) + sizeof(struct tcphdr))
94                 return;
95
96         otcph = (struct tcphdr *)((u_int32_t*)oldskb->nh.iph + oldskb->nh.iph->ihl);
97         otcplen = oldskb->len - oldskb->nh.iph->ihl*4;
98
99         if (skb_copy_bits(oldskb, oldskb->nh.iph->ihl*4,
100                           otcph, sizeof(*otcph)) < 0)
101                 return;
102
103         /* No RST for RST. */
104         if (otcph->rst)
105                 return;
106
107         /* Check checksum. */
108         if (tcp_v4_check(otcph, otcplen, oldskb->nh.iph->saddr,
109                          oldskb->nh.iph->daddr,
110                          csum_partial((char *)otcph, otcplen, 0)) != 0)
111                 return;
112
113         if ((rt = route_reverse(oldskb, local)) == NULL)
114                 return;
115
116         hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
117
118         /* Copy skb (even if skb is about to be dropped, we can't just
119            clone it because there may be other things, such as tcpdump,
120            interested in it). We also need to expand headroom in case
121            hh_len of incoming interface < hh_len of outgoing interface */
122         nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
123                                GFP_ATOMIC);
124         if (!nskb) {
125                 dst_release(&rt->u.dst);
126                 return;
127         }
128
129         dst_release(nskb->dst);
130         nskb->dst = &rt->u.dst;
131
132         /* This packet will not be the same as the other: clear nf fields */
133         nf_conntrack_put(nskb->nfct);
134         nskb->nfct = NULL;
135         nskb->nfcache = 0;
136 #ifdef CONFIG_NETFILTER_DEBUG
137         nskb->nf_debug = 0;
138 #endif
139         nskb->nfmark = 0;
140
141         tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);
142
143         /* Swap source and dest */
144         tmp_addr = nskb->nh.iph->saddr;
145         nskb->nh.iph->saddr = nskb->nh.iph->daddr;
146         nskb->nh.iph->daddr = tmp_addr;
147         tmp_port = tcph->source;
148         tcph->source = tcph->dest;
149         tcph->dest = tmp_port;
150
151         /* Truncate to length (no data) */
152         tcph->doff = sizeof(struct tcphdr)/4;
153         skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr));
154         nskb->nh.iph->tot_len = htons(nskb->len);
155
156         if (tcph->ack) {
157                 needs_ack = 0;
158                 tcph->seq = otcph->ack_seq;
159                 tcph->ack_seq = 0;
160         } else {
161                 needs_ack = 1;
162                 tcph->ack_seq = htonl(ntohl(otcph->seq) + otcph->syn + otcph->fin
163                                       + otcplen - (otcph->doff<<2));
164                 tcph->seq = 0;
165         }
166
167         /* Reset flags */
168         ((u_int8_t *)tcph)[13] = 0;
169         tcph->rst = 1;
170         tcph->ack = needs_ack;
171
172         tcph->window = 0;
173         tcph->urg_ptr = 0;
174
175         /* Adjust TCP checksum */
176         tcph->check = 0;
177         tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
178                                    nskb->nh.iph->saddr,
179                                    nskb->nh.iph->daddr,
180                                    csum_partial((char *)tcph,
181                                                 sizeof(struct tcphdr), 0));
182
183         /* Adjust IP TTL, DF */
184         nskb->nh.iph->ttl = MAXTTL;
185         /* Set DF, id = 0 */
186         nskb->nh.iph->frag_off = htons(IP_DF);
187         nskb->nh.iph->id = 0;
188
189         /* Adjust IP checksum */
190         nskb->nh.iph->check = 0;
191         nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, 
192                                            nskb->nh.iph->ihl);
193
194         /* "Never happens" */
195         if (nskb->len > dst_pmtu(nskb->dst))
196                 goto free_nskb;
197
198         connection_attach(nskb, oldskb->nfct);
199
200         NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
201                 ip_finish_output);
202         return;
203
204  free_nskb:
205         kfree_skb(nskb);
206 }
207
208 static void send_unreach(struct sk_buff *skb_in, int code)
209 {
210         struct iphdr *iph;
211         struct udphdr *udph;
212         struct icmphdr *icmph;
213         struct sk_buff *nskb;
214         u32 saddr;
215         u8 tos;
216         int hh_len, length;
217         struct rtable *rt = (struct rtable*)skb_in->dst;
218         unsigned char *data;
219
220         if (!rt)
221                 return;
222
223         /* FIXME: Use sysctl number. --RR */
224         if (!xrlim_allow(&rt->u.dst, 1*HZ))
225                 return;
226
227         iph = skb_in->nh.iph;
228
229         /* No replies to physical multicast/broadcast */
230         if (skb_in->pkt_type!=PACKET_HOST)
231                 return;
232
233         /* Now check at the protocol level */
234         if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST))
235                 return;
236
237         /* Only reply to fragment 0. */
238         if (iph->frag_off&htons(IP_OFFSET))
239                 return;
240
241         /* Ensure we have at least 8 bytes of proto header. */
242         if (skb_in->len < skb_in->nh.iph->ihl*4 + 8)
243                 return;
244
245         /* if UDP checksum is set, verify it's correct */
246         if (iph->protocol == IPPROTO_UDP
247             && skb_in->tail-(u8*)iph >= sizeof(struct udphdr)) {
248                 int datalen = skb_in->len - (iph->ihl<<2);
249                 udph = (struct udphdr *)((char *)iph + (iph->ihl<<2));
250                 if (udph->check
251                     && csum_tcpudp_magic(iph->saddr, iph->daddr,
252                                          datalen, IPPROTO_UDP,
253                                          csum_partial((char *)udph, datalen,
254                                                       0)) != 0)
255                         return;
256         }
257
258         /* If we send an ICMP error to an ICMP error a mess would result.. */
259         if (iph->protocol == IPPROTO_ICMP
260             && skb_in->tail-(u8*)iph >= sizeof(struct icmphdr)) {
261                 icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2));
262
263                 if (skb_copy_bits(skb_in, skb_in->nh.iph->ihl*4,
264                                   icmph, sizeof(*icmph)) < 0)
265                         return;
266
267                 /* Between echo-reply (0) and timestamp (13),
268                    everything except echo-request (8) is an error.
269                    Also, anything greater than NR_ICMP_TYPES is
270                    unknown, and hence should be treated as an error... */
271                 if ((icmph->type < ICMP_TIMESTAMP
272                      && icmph->type != ICMP_ECHOREPLY
273                      && icmph->type != ICMP_ECHO)
274                     || icmph->type > NR_ICMP_TYPES)
275                         return;
276         }
277
278         saddr = iph->daddr;
279         if (!(rt->rt_flags & RTCF_LOCAL))
280                 saddr = 0;
281
282         tos = (iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL;
283
284         {
285                 struct flowi fl = { .nl_u = { .ip4_u =
286                                               { .daddr = skb_in->nh.iph->saddr,
287                                                 .saddr = saddr,
288                                                 .tos = RT_TOS(tos) } } };
289                 if (ip_route_output_key(&rt, &fl))
290                         return;
291         }
292         /* RFC says return as much as we can without exceeding 576 bytes. */
293         length = skb_in->len + sizeof(struct iphdr) + sizeof(struct icmphdr);
294
295         if (length > dst_pmtu(&rt->u.dst))
296                 length = dst_pmtu(&rt->u.dst);
297         if (length > 576)
298                 length = 576;
299
300         hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
301
302         nskb = alloc_skb(hh_len+15+length, GFP_ATOMIC);
303         if (!nskb) {
304                 ip_rt_put(rt);
305                 return;
306         }
307
308         nskb->priority = 0;
309         nskb->dst = &rt->u.dst;
310         skb_reserve(nskb, hh_len);
311
312         /* Set up IP header */
313         iph = nskb->nh.iph
314                 = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
315         iph->version=4;
316         iph->ihl=5;
317         iph->tos=tos;
318         iph->tot_len = htons(length);
319
320         /* PMTU discovery never applies to ICMP packets. */
321         iph->frag_off = 0;
322
323         iph->ttl = MAXTTL;
324         ip_select_ident(iph, &rt->u.dst, NULL);
325         iph->protocol=IPPROTO_ICMP;
326         iph->saddr=rt->rt_src;
327         iph->daddr=rt->rt_dst;
328         iph->check=0;
329         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
330
331         /* Set up ICMP header. */
332         icmph = nskb->h.icmph
333                 = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
334         icmph->type = ICMP_DEST_UNREACH;
335         icmph->code = code;     
336         icmph->un.gateway = 0;
337         icmph->checksum = 0;
338         
339         /* Copy as much of original packet as will fit */
340         data = skb_put(nskb,
341                        length - sizeof(struct iphdr) - sizeof(struct icmphdr));
342
343         skb_copy_bits(skb_in, 0, data,
344                       length - sizeof(struct iphdr) - sizeof(struct icmphdr));
345
346         icmph->checksum = ip_compute_csum((unsigned char *)icmph,
347                                           length - sizeof(struct iphdr));
348
349         connection_attach(nskb, skb_in->nfct);
350
351         NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
352                 ip_finish_output);
353 }       
354
355 static unsigned int reject(struct sk_buff **pskb,
356                            const struct net_device *in,
357                            const struct net_device *out,
358                            unsigned int hooknum,
359                            const void *targinfo,
360                            void *userinfo)
361 {
362         const struct ipt_reject_info *reject = targinfo;
363
364         /* Our naive response construction doesn't deal with IP
365            options, and probably shouldn't try. */
366         if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr))
367                 return NF_DROP;
368
369         /* WARNING: This code causes reentry within iptables.
370            This means that the iptables jump stack is now crap.  We
371            must return an absolute verdict. --RR */
372         switch (reject->with) {
373         case IPT_ICMP_NET_UNREACHABLE:
374                 send_unreach(*pskb, ICMP_NET_UNREACH);
375                 break;
376         case IPT_ICMP_HOST_UNREACHABLE:
377                 send_unreach(*pskb, ICMP_HOST_UNREACH);
378                 break;
379         case IPT_ICMP_PROT_UNREACHABLE:
380                 send_unreach(*pskb, ICMP_PROT_UNREACH);
381                 break;
382         case IPT_ICMP_PORT_UNREACHABLE:
383                 send_unreach(*pskb, ICMP_PORT_UNREACH);
384                 break;
385         case IPT_ICMP_NET_PROHIBITED:
386                 send_unreach(*pskb, ICMP_NET_ANO);
387                 break;
388         case IPT_ICMP_HOST_PROHIBITED:
389                 send_unreach(*pskb, ICMP_HOST_ANO);
390                 break;
391         case IPT_ICMP_ADMIN_PROHIBITED:
392                 send_unreach(*pskb, ICMP_PKT_FILTERED);
393                 break;
394         case IPT_TCP_RESET:
395                 send_reset(*pskb, hooknum == NF_IP_LOCAL_IN);
396         case IPT_ICMP_ECHOREPLY:
397                 /* Doesn't happen. */
398                 break;
399         }
400
401         return NF_DROP;
402 }
403
404 static int check(const char *tablename,
405                  const struct ipt_entry *e,
406                  void *targinfo,
407                  unsigned int targinfosize,
408                  unsigned int hook_mask)
409 {
410         const struct ipt_reject_info *rejinfo = targinfo;
411
412         if (targinfosize != IPT_ALIGN(sizeof(struct ipt_reject_info))) {
413                 DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize);
414                 return 0;
415         }
416
417         /* Only allow these for packet filtering. */
418         if (strcmp(tablename, "filter") != 0) {
419                 DEBUGP("REJECT: bad table `%s'.\n", tablename);
420                 return 0;
421         }
422         if ((hook_mask & ~((1 << NF_IP_LOCAL_IN)
423                            | (1 << NF_IP_FORWARD)
424                            | (1 << NF_IP_LOCAL_OUT))) != 0) {
425                 DEBUGP("REJECT: bad hook mask %X\n", hook_mask);
426                 return 0;
427         }
428
429         if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
430                 printk("REJECT: ECHOREPLY no longer supported.\n");
431                 return 0;
432         } else if (rejinfo->with == IPT_TCP_RESET) {
433                 /* Must specify that it's a TCP packet */
434                 if (e->ip.proto != IPPROTO_TCP
435                     || (e->ip.invflags & IPT_INV_PROTO)) {
436                         DEBUGP("REJECT: TCP_RESET illegal for non-tcp\n");
437                         return 0;
438                 }
439         }
440
441         return 1;
442 }
443
444 static struct ipt_target ipt_reject_reg = {
445         .name           = "REJECT",
446         .target         = reject,
447         .checkentry     = check,
448         .me             = THIS_MODULE,
449 };
450
451 static int __init init(void)
452 {
453         if (ipt_register_target(&ipt_reject_reg))
454                 return -EINVAL;
455         return 0;
456 }
457
458 static void __exit fini(void)
459 {
460         ipt_unregister_target(&ipt_reject_reg);
461 }
462
463 module_init(init);
464 module_exit(fini);
465 MODULE_LICENSE("GPL");