1 /* netfilter.c: look after the filters for various protocols.
2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
7 * Rusty Russell (C)2000 -- This code is GPL.
9 * February 2000: Modified by James Morris to have 1 queue per protocol.
10 * 15-Mar-2000: Added NF_REPEAT --RR.
12 #include <linux/config.h>
13 #include <linux/netfilter.h>
14 #include <net/protocol.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/wait.h>
18 #include <linux/module.h>
19 #include <linux/interrupt.h>
21 #include <linux/netdevice.h>
22 #include <linux/inetdevice.h>
23 #include <linux/tcp.h>
24 #include <linux/udp.h>
25 #include <linux/icmp.h>
27 #include <net/route.h>
30 #define __KERNEL_SYSCALLS__
31 #include <linux/unistd.h>
33 /* In this code, we can be waiting indefinitely for userspace to
34 * service a packet if a hook returns NF_QUEUE. We could keep a count
35 * of skbuffs queued for userspace, and not deregister a hook unless
36 * this is zero, but that sucks. Now, we simply check when the
37 * packets come back: if the hook is gone, the packet is discarded. */
38 #ifdef CONFIG_NETFILTER_DEBUG
39 #define NFDEBUG(format, args...) printk(format , ## args)
41 #define NFDEBUG(format, args...)
44 /* Sockopts only registered and called from user context, so
45 net locking would be overkill. Also, [gs]etsockopt calls may
47 static DECLARE_MUTEX(nf_sockopt_mutex);
49 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
50 static LIST_HEAD(nf_sockopts);
51 static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED;
54 * A queue handler may be registered for each protocol. Each is protected by
55 * long term mutex. The handler must provide an an outfn() to accept packets
56 * for queueing and must reinject all packets it receives, no matter what.
58 static struct nf_queue_handler_t {
59 nf_queue_outfn_t outfn;
61 } queue_handler[NPROTO];
62 static rwlock_t queue_handler_lock = RW_LOCK_UNLOCKED;
64 int nf_register_hook(struct nf_hook_ops *reg)
68 spin_lock_bh(&nf_hook_lock);
69 list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
70 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
73 list_add_rcu(®->list, i->prev);
74 spin_unlock_bh(&nf_hook_lock);
80 void nf_unregister_hook(struct nf_hook_ops *reg)
82 spin_lock_bh(&nf_hook_lock);
83 list_del_rcu(®->list);
84 spin_unlock_bh(&nf_hook_lock);
89 /* Do exclusive ranges overlap? */
90 static inline int overlap(int min1, int max1, int min2, int max2)
92 return max1 > min2 && min1 < max2;
95 /* Functions to register sockopt ranges (exclusive). */
96 int nf_register_sockopt(struct nf_sockopt_ops *reg)
101 if (down_interruptible(&nf_sockopt_mutex) != 0)
104 for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
105 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
106 if (ops->pf == reg->pf
107 && (overlap(ops->set_optmin, ops->set_optmax,
108 reg->set_optmin, reg->set_optmax)
109 || overlap(ops->get_optmin, ops->get_optmax,
110 reg->get_optmin, reg->get_optmax))) {
111 NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
112 ops->set_optmin, ops->set_optmax,
113 ops->get_optmin, ops->get_optmax,
114 reg->set_optmin, reg->set_optmax,
115 reg->get_optmin, reg->get_optmax);
121 list_add(®->list, &nf_sockopts);
123 up(&nf_sockopt_mutex);
127 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
129 /* No point being interruptible: we're probably in cleanup_module() */
131 down(&nf_sockopt_mutex);
133 /* To be woken by nf_sockopt call... */
134 /* FIXME: Stuart Young's name appears gratuitously. */
135 set_current_state(TASK_UNINTERRUPTIBLE);
136 reg->cleanup_task = current;
137 up(&nf_sockopt_mutex);
141 list_del(®->list);
142 up(&nf_sockopt_mutex);
145 #ifdef CONFIG_NETFILTER_DEBUG
148 #include <linux/netfilter_ipv4.h>
150 static void debug_print_hooks_ip(unsigned int nf_debug)
152 if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
153 printk("PRE_ROUTING ");
154 nf_debug ^= (1 << NF_IP_PRE_ROUTING);
156 if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
158 nf_debug ^= (1 << NF_IP_LOCAL_IN);
160 if (nf_debug & (1 << NF_IP_FORWARD)) {
162 nf_debug ^= (1 << NF_IP_FORWARD);
164 if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
165 printk("LOCAL_OUT ");
166 nf_debug ^= (1 << NF_IP_LOCAL_OUT);
168 if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
169 printk("POST_ROUTING ");
170 nf_debug ^= (1 << NF_IP_POST_ROUTING);
173 printk("Crap bits: 0x%04X", nf_debug);
177 void nf_dump_skb(int pf, struct sk_buff *skb)
179 printk("skb: pf=%i %s dev=%s len=%u\n",
181 skb->sk ? "(owned)" : "(unowned)",
182 skb->dev ? skb->dev->name : "(no dev)",
186 const struct iphdr *ip = skb->nh.iph;
187 __u32 *opt = (__u32 *) (ip + 1);
189 __u16 src_port = 0, dst_port = 0;
191 if (ip->protocol == IPPROTO_TCP
192 || ip->protocol == IPPROTO_UDP) {
193 struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
194 src_port = ntohs(tcp->source);
195 dst_port = ntohs(tcp->dest);
198 printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
199 " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
200 ip->protocol, NIPQUAD(ip->saddr),
201 src_port, NIPQUAD(ip->daddr),
203 ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
204 ntohs(ip->frag_off), ip->ttl);
206 for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
207 printk(" O=0x%8.8X", *opt++);
213 void nf_debug_ip_local_deliver(struct sk_buff *skb)
215 /* If it's a loopback packet, it must have come through
216 * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
217 * NF_IP_LOCAL_IN. Otherwise, must have gone through
218 * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
220 printk("ip_local_deliver: skb->dev is NULL.\n");
222 else if (strcmp(skb->dev->name, "lo") == 0) {
223 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
224 | (1 << NF_IP_POST_ROUTING)
225 | (1 << NF_IP_PRE_ROUTING)
226 | (1 << NF_IP_LOCAL_IN))) {
227 printk("ip_local_deliver: bad loopback skb: ");
228 debug_print_hooks_ip(skb->nf_debug);
229 nf_dump_skb(PF_INET, skb);
233 if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
234 | (1<<NF_IP_LOCAL_IN))) {
235 printk("ip_local_deliver: bad non-lo skb: ");
236 debug_print_hooks_ip(skb->nf_debug);
237 nf_dump_skb(PF_INET, skb);
242 void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
244 if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
245 | (1 << NF_IP_POST_ROUTING))) {
246 printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
248 debug_print_hooks_ip(newskb->nf_debug);
249 nf_dump_skb(PF_INET, newskb);
251 /* Clear to avoid confusing input check */
252 newskb->nf_debug = 0;
255 void nf_debug_ip_finish_output2(struct sk_buff *skb)
257 /* If it's owned, it must have gone through the
258 * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
259 * Otherwise, must have gone through
260 * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
263 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
264 | (1 << NF_IP_POST_ROUTING))) {
265 printk("ip_finish_output: bad owned skb = %p: ", skb);
266 debug_print_hooks_ip(skb->nf_debug);
267 nf_dump_skb(PF_INET, skb);
270 if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
271 | (1 << NF_IP_FORWARD)
272 | (1 << NF_IP_POST_ROUTING))) {
273 /* Fragments, entunnelled packets, TCP RSTs
274 generated by ipt_REJECT will have no
275 owners, but still may be local */
276 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
277 | (1 << NF_IP_POST_ROUTING))){
278 printk("ip_finish_output:"
279 " bad unowned skb = %p: ",skb);
280 debug_print_hooks_ip(skb->nf_debug);
281 nf_dump_skb(PF_INET, skb);
286 #endif /*CONFIG_NETFILTER_DEBUG*/
288 /* Call get/setsockopt() */
289 static int nf_sockopt(struct sock *sk, int pf, int val,
290 char *opt, int *len, int get)
293 struct nf_sockopt_ops *ops;
296 if (down_interruptible(&nf_sockopt_mutex) != 0)
299 for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
300 ops = (struct nf_sockopt_ops *)i;
303 if (val >= ops->get_optmin
304 && val < ops->get_optmax) {
306 up(&nf_sockopt_mutex);
307 ret = ops->get(sk, val, opt, len);
311 if (val >= ops->set_optmin
312 && val < ops->set_optmax) {
314 up(&nf_sockopt_mutex);
315 ret = ops->set(sk, val, opt, *len);
321 up(&nf_sockopt_mutex);
325 down(&nf_sockopt_mutex);
327 if (ops->cleanup_task)
328 wake_up_process(ops->cleanup_task);
329 up(&nf_sockopt_mutex);
333 int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
336 return nf_sockopt(sk, pf, val, opt, &len, 0);
339 int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
341 return nf_sockopt(sk, pf, val, opt, len, 1);
344 static unsigned int nf_iterate(struct list_head *head,
345 struct sk_buff **skb,
347 const struct net_device *indev,
348 const struct net_device *outdev,
349 struct list_head **i,
350 int (*okfn)(struct sk_buff *),
354 * The caller must not block between calls to this
355 * function because of risk of continuing from deleted element.
357 list_for_each_continue_rcu(*i, head) {
358 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
360 if (hook_thresh > elem->priority)
363 /* Optimization: we don't need to hold module
364 reference here, since function can't sleep. --RR */
365 switch (elem->hook(hook, skb, indev, outdev, okfn)) {
379 #ifdef CONFIG_NETFILTER_DEBUG
384 NFDEBUG("Evil return from %p(%u).\n",
392 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
396 write_lock_bh(&queue_handler_lock);
397 if (queue_handler[pf].outfn)
400 queue_handler[pf].outfn = outfn;
401 queue_handler[pf].data = data;
404 write_unlock_bh(&queue_handler_lock);
409 /* The caller must flush their queue before this */
410 int nf_unregister_queue_handler(int pf)
412 write_lock_bh(&queue_handler_lock);
413 queue_handler[pf].outfn = NULL;
414 queue_handler[pf].data = NULL;
415 write_unlock_bh(&queue_handler_lock);
421 * Any packet that leaves via this function must come back
422 * through nf_reinject().
424 static int nf_queue(struct sk_buff *skb,
425 struct list_head *elem,
426 int pf, unsigned int hook,
427 struct net_device *indev,
428 struct net_device *outdev,
429 int (*okfn)(struct sk_buff *))
432 struct nf_info *info;
433 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
434 struct net_device *physindev = NULL;
435 struct net_device *physoutdev = NULL;
438 /* QUEUE == DROP if noone is waiting, to be safe. */
439 read_lock(&queue_handler_lock);
440 if (!queue_handler[pf].outfn) {
441 read_unlock(&queue_handler_lock);
446 info = kmalloc(sizeof(*info), GFP_ATOMIC);
449 printk(KERN_ERR "OOM queueing packet %p\n",
451 read_unlock(&queue_handler_lock);
456 *info = (struct nf_info) {
457 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
459 /* If it's going away, ignore hook. */
460 if (!try_module_get(info->elem->owner)) {
461 read_unlock(&queue_handler_lock);
466 /* Bump dev refs so they don't vanish while packet is out */
467 if (indev) dev_hold(indev);
468 if (outdev) dev_hold(outdev);
470 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
471 if (skb->nf_bridge) {
472 physindev = skb->nf_bridge->physindev;
473 if (physindev) dev_hold(physindev);
474 physoutdev = skb->nf_bridge->physoutdev;
475 if (physoutdev) dev_hold(physoutdev);
479 status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
480 read_unlock(&queue_handler_lock);
483 /* James M doesn't say fuck enough. */
484 if (indev) dev_put(indev);
485 if (outdev) dev_put(outdev);
486 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
487 if (physindev) dev_put(physindev);
488 if (physoutdev) dev_put(physoutdev);
490 module_put(info->elem->owner);
498 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
499 struct net_device *indev,
500 struct net_device *outdev,
501 int (*okfn)(struct sk_buff *),
504 struct list_head *elem;
505 unsigned int verdict;
508 if (skb->ip_summed == CHECKSUM_HW) {
509 if (outdev == NULL) {
510 skb->ip_summed = CHECKSUM_NONE;
512 skb_checksum_help(skb);
516 /* We may already have this, but read-locks nest anyway */
519 #ifdef CONFIG_NETFILTER_DEBUG
520 if (skb->nf_debug & (1 << hook)) {
521 printk("nf_hook: hook %i already set.\n", hook);
522 nf_dump_skb(pf, skb);
524 skb->nf_debug |= (1 << hook);
527 elem = &nf_hooks[pf][hook];
529 verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
530 outdev, &elem, okfn, hook_thresh);
531 if (verdict == NF_QUEUE) {
532 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
533 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn))
552 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
553 unsigned int verdict)
555 struct list_head *elem = &info->elem->list;
560 /* Drop reference to owner of hook which queued us. */
561 module_put(info->elem->owner);
563 list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
568 if (elem == &nf_hooks[info->pf][info->hook]) {
569 /* The module which sent it to userspace is gone. */
570 NFDEBUG("%s: module disappeared, dropping packet.\n",
575 /* Continue traversal iff userspace said ok... */
576 if (verdict == NF_REPEAT) {
581 if (verdict == NF_ACCEPT) {
583 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
585 info->indev, info->outdev, &elem,
586 info->okfn, INT_MIN);
595 if (!nf_queue(skb, elem, info->pf, info->hook,
596 info->indev, info->outdev, info->okfn))
602 /* Release those devices we held, or Alexey will kill me. */
603 if (info->indev) dev_put(info->indev);
604 if (info->outdev) dev_put(info->outdev);
605 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
606 if (skb->nf_bridge) {
607 if (skb->nf_bridge->physindev)
608 dev_put(skb->nf_bridge->physindev);
609 if (skb->nf_bridge->physoutdev)
610 dev_put(skb->nf_bridge->physoutdev);
615 if (verdict == NF_DROP)
623 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
624 int ip_route_me_harder(struct sk_buff **pskb)
626 struct iphdr *iph = (*pskb)->nh.iph;
628 struct flowi fl = {};
629 struct dst_entry *odst;
632 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
633 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
635 if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
636 fl.nl_u.ip4_u.daddr = iph->daddr;
637 fl.nl_u.ip4_u.saddr = iph->saddr;
638 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
639 fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
640 #ifdef CONFIG_IP_ROUTE_FWMARK
641 fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
643 if (ip_route_output_key(&rt, &fl) != 0)
646 /* Drop old route. */
647 dst_release((*pskb)->dst);
648 (*pskb)->dst = &rt->u.dst;
650 /* non-local src, find valid iif to satisfy
651 * rp-filter when calling ip_route_input. */
652 fl.nl_u.ip4_u.daddr = iph->saddr;
653 if (ip_route_output_key(&rt, &fl) != 0)
657 if (ip_route_input(*pskb, iph->daddr, iph->saddr,
658 RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
659 dst_release(&rt->u.dst);
662 dst_release(&rt->u.dst);
666 if ((*pskb)->dst->error)
669 /* Change in oif may mean change in hh_len. */
670 hh_len = (*pskb)->dst->dev->hard_header_len;
671 if (skb_headroom(*pskb) < hh_len) {
672 struct sk_buff *nskb;
674 nskb = skb_realloc_headroom(*pskb, hh_len);
678 skb_set_owner_w(nskb, (*pskb)->sk);
686 int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
688 struct sk_buff *nskb;
691 if (writable_len > (*pskb)->len)
694 /* Not exclusive use of packet? Must copy. */
695 if (skb_shared(*pskb) || skb_cloned(*pskb))
698 /* Alexey says IP hdr is always modifiable and linear, so ok. */
699 if (writable_len <= (*pskb)->nh.iph->ihl*4)
702 iplen = writable_len - (*pskb)->nh.iph->ihl*4;
704 /* DaveM says protocol headers are also modifiable. */
705 switch ((*pskb)->nh.iph->protocol) {
708 if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
709 &hdr, sizeof(hdr)) != 0)
711 if (writable_len <= (*pskb)->nh.iph->ihl*4 + hdr.doff*4)
716 if (writable_len<=(*pskb)->nh.iph->ihl*4+sizeof(struct udphdr))
721 <= (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr))
724 /* Insert other cases here as desired */
728 nskb = skb_copy(*pskb, GFP_ATOMIC);
731 BUG_ON(skb_is_nonlinear(nskb));
733 /* Rest of kernel will get very unhappy if we pass it a
734 suddenly-orphaned skbuff */
736 skb_set_owner_w(nskb, (*pskb)->sk);
742 return pskb_may_pull(*pskb, writable_len);
744 EXPORT_SYMBOL(skb_ip_make_writable);
745 #endif /*CONFIG_INET*/
748 /* This does not belong here, but ipt_REJECT needs it if connection
749 tracking in use: without this, connection may not be in hash table,
750 and hence manufactured ICMP or RST packets will not be associated
752 void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
754 void __init netfilter_init(void)
758 for (i = 0; i < NPROTO; i++) {
759 for (h = 0; h < NF_MAX_HOOKS; h++)
760 INIT_LIST_HEAD(&nf_hooks[i][h]);