980cd4514d6946c46712cb01e7b4c65930f863e9
[linux-flexiantxendom0-3.2.10.git] / net / core / netfilter.c
1 /* netfilter.c: look after the filters for various protocols. 
2  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
3  *
4  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5  * way.
6  *
7  * Rusty Russell (C)2000 -- This code is GPL.
8  *
9  * February 2000: Modified by James Morris to have 1 queue per protocol.
10  * 15-Mar-2000:   Added NF_REPEAT --RR.
11  */
12 #include <linux/config.h>
13 #include <linux/netfilter.h>
14 #include <net/protocol.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/wait.h>
18 #include <linux/module.h>
19 #include <linux/interrupt.h>
20 #include <linux/if.h>
21 #include <linux/netdevice.h>
22 #include <linux/inetdevice.h>
23 #include <linux/tcp.h>
24 #include <linux/udp.h>
25 #include <linux/icmp.h>
26 #include <net/sock.h>
27 #include <net/route.h>
28 #include <linux/ip.h>
29
30 #define __KERNEL_SYSCALLS__
31 #include <linux/unistd.h>
32
33 /* In this code, we can be waiting indefinitely for userspace to
34  * service a packet if a hook returns NF_QUEUE.  We could keep a count
35  * of skbuffs queued for userspace, and not deregister a hook unless
36  * this is zero, but that sucks.  Now, we simply check when the
37  * packets come back: if the hook is gone, the packet is discarded. */
38 #ifdef CONFIG_NETFILTER_DEBUG
39 #define NFDEBUG(format, args...)  printk(format , ## args)
40 #else
41 #define NFDEBUG(format, args...)
42 #endif
43
44 /* Sockopts only registered and called from user context, so
45    net locking would be overkill.  Also, [gs]etsockopt calls may
46    sleep. */
47 static DECLARE_MUTEX(nf_sockopt_mutex);
48
49 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
50 static LIST_HEAD(nf_sockopts);
51 static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED;
52
53 /* 
54  * A queue handler may be registered for each protocol.  Each is protected by
55  * long term mutex.  The handler must provide an an outfn() to accept packets
56  * for queueing and must reinject all packets it receives, no matter what.
57  */
58 static struct nf_queue_handler_t {
59         nf_queue_outfn_t outfn;
60         void *data;
61 } queue_handler[NPROTO];
62 static rwlock_t queue_handler_lock = RW_LOCK_UNLOCKED;
63
64 int nf_register_hook(struct nf_hook_ops *reg)
65 {
66         struct list_head *i;
67
68         spin_lock_bh(&nf_hook_lock);
69         list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
70                 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
71                         break;
72         }
73         list_add_rcu(&reg->list, i->prev);
74         spin_unlock_bh(&nf_hook_lock);
75
76         synchronize_net();
77         return 0;
78 }
79
80 void nf_unregister_hook(struct nf_hook_ops *reg)
81 {
82         spin_lock_bh(&nf_hook_lock);
83         list_del_rcu(&reg->list);
84         spin_unlock_bh(&nf_hook_lock);
85
86         synchronize_net();
87 }
88
89 /* Do exclusive ranges overlap? */
90 static inline int overlap(int min1, int max1, int min2, int max2)
91 {
92         return max1 > min2 && min1 < max2;
93 }
94
95 /* Functions to register sockopt ranges (exclusive). */
96 int nf_register_sockopt(struct nf_sockopt_ops *reg)
97 {
98         struct list_head *i;
99         int ret = 0;
100
101         if (down_interruptible(&nf_sockopt_mutex) != 0)
102                 return -EINTR;
103
104         for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
105                 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
106                 if (ops->pf == reg->pf
107                     && (overlap(ops->set_optmin, ops->set_optmax, 
108                                 reg->set_optmin, reg->set_optmax)
109                         || overlap(ops->get_optmin, ops->get_optmax, 
110                                    reg->get_optmin, reg->get_optmax))) {
111                         NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
112                                 ops->set_optmin, ops->set_optmax, 
113                                 ops->get_optmin, ops->get_optmax, 
114                                 reg->set_optmin, reg->set_optmax,
115                                 reg->get_optmin, reg->get_optmax);
116                         ret = -EBUSY;
117                         goto out;
118                 }
119         }
120
121         list_add(&reg->list, &nf_sockopts);
122 out:
123         up(&nf_sockopt_mutex);
124         return ret;
125 }
126
127 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
128 {
129         /* No point being interruptible: we're probably in cleanup_module() */
130  restart:
131         down(&nf_sockopt_mutex);
132         if (reg->use != 0) {
133                 /* To be woken by nf_sockopt call... */
134                 /* FIXME: Stuart Young's name appears gratuitously. */
135                 set_current_state(TASK_UNINTERRUPTIBLE);
136                 reg->cleanup_task = current;
137                 up(&nf_sockopt_mutex);
138                 schedule();
139                 goto restart;
140         }
141         list_del(&reg->list);
142         up(&nf_sockopt_mutex);
143 }
144
145 #ifdef CONFIG_NETFILTER_DEBUG
146 #include <net/ip.h>
147 #include <net/tcp.h>
148 #include <linux/netfilter_ipv4.h>
149
150 static void debug_print_hooks_ip(unsigned int nf_debug)
151 {
152         if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
153                 printk("PRE_ROUTING ");
154                 nf_debug ^= (1 << NF_IP_PRE_ROUTING);
155         }
156         if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
157                 printk("LOCAL_IN ");
158                 nf_debug ^= (1 << NF_IP_LOCAL_IN);
159         }
160         if (nf_debug & (1 << NF_IP_FORWARD)) {
161                 printk("FORWARD ");
162                 nf_debug ^= (1 << NF_IP_FORWARD);
163         }
164         if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
165                 printk("LOCAL_OUT ");
166                 nf_debug ^= (1 << NF_IP_LOCAL_OUT);
167         }
168         if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
169                 printk("POST_ROUTING ");
170                 nf_debug ^= (1 << NF_IP_POST_ROUTING);
171         }
172         if (nf_debug)
173                 printk("Crap bits: 0x%04X", nf_debug);
174         printk("\n");
175 }
176
177 void nf_dump_skb(int pf, struct sk_buff *skb)
178 {
179         printk("skb: pf=%i %s dev=%s len=%u\n", 
180                pf,
181                skb->sk ? "(owned)" : "(unowned)",
182                skb->dev ? skb->dev->name : "(no dev)",
183                skb->len);
184         switch (pf) {
185         case PF_INET: {
186                 const struct iphdr *ip = skb->nh.iph;
187                 __u32 *opt = (__u32 *) (ip + 1);
188                 int opti;
189                 __u16 src_port = 0, dst_port = 0;
190
191                 if (ip->protocol == IPPROTO_TCP
192                     || ip->protocol == IPPROTO_UDP) {
193                         struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
194                         src_port = ntohs(tcp->source);
195                         dst_port = ntohs(tcp->dest);
196                 }
197         
198                 printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
199                        " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
200                        ip->protocol, NIPQUAD(ip->saddr),
201                        src_port, NIPQUAD(ip->daddr),
202                        dst_port,
203                        ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
204                        ntohs(ip->frag_off), ip->ttl);
205
206                 for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
207                         printk(" O=0x%8.8X", *opt++);
208                 printk("\n");
209         }
210         }
211 }
212
213 void nf_debug_ip_local_deliver(struct sk_buff *skb)
214 {
215         /* If it's a loopback packet, it must have come through
216          * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
217          * NF_IP_LOCAL_IN.  Otherwise, must have gone through
218          * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING.  */
219         if (!skb->dev) {
220                 printk("ip_local_deliver: skb->dev is NULL.\n");
221         }
222         else if (strcmp(skb->dev->name, "lo") == 0) {
223                 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
224                                       | (1 << NF_IP_POST_ROUTING)
225                                       | (1 << NF_IP_PRE_ROUTING)
226                                       | (1 << NF_IP_LOCAL_IN))) {
227                         printk("ip_local_deliver: bad loopback skb: ");
228                         debug_print_hooks_ip(skb->nf_debug);
229                         nf_dump_skb(PF_INET, skb);
230                 }
231         }
232         else {
233                 if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
234                                       | (1<<NF_IP_LOCAL_IN))) {
235                         printk("ip_local_deliver: bad non-lo skb: ");
236                         debug_print_hooks_ip(skb->nf_debug);
237                         nf_dump_skb(PF_INET, skb);
238                 }
239         }
240 }
241
242 void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
243 {
244         if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
245                                  | (1 << NF_IP_POST_ROUTING))) {
246                 printk("ip_dev_loopback_xmit: bad owned skb = %p: ", 
247                        newskb);
248                 debug_print_hooks_ip(newskb->nf_debug);
249                 nf_dump_skb(PF_INET, newskb);
250         }
251         /* Clear to avoid confusing input check */
252         newskb->nf_debug = 0;
253 }
254
255 void nf_debug_ip_finish_output2(struct sk_buff *skb)
256 {
257         /* If it's owned, it must have gone through the
258          * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
259          * Otherwise, must have gone through
260          * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
261          */
262         if (skb->sk) {
263                 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
264                                       | (1 << NF_IP_POST_ROUTING))) {
265                         printk("ip_finish_output: bad owned skb = %p: ", skb);
266                         debug_print_hooks_ip(skb->nf_debug);
267                         nf_dump_skb(PF_INET, skb);
268                 }
269         } else {
270                 if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
271                                       | (1 << NF_IP_FORWARD)
272                                       | (1 << NF_IP_POST_ROUTING))) {
273                         /* Fragments, entunnelled packets, TCP RSTs
274                            generated by ipt_REJECT will have no
275                            owners, but still may be local */
276                         if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
277                                               | (1 << NF_IP_POST_ROUTING))){
278                                 printk("ip_finish_output:"
279                                        " bad unowned skb = %p: ",skb);
280                                 debug_print_hooks_ip(skb->nf_debug);
281                                 nf_dump_skb(PF_INET, skb);
282                         }
283                 }
284         }
285 }
286 #endif /*CONFIG_NETFILTER_DEBUG*/
287
288 /* Call get/setsockopt() */
289 static int nf_sockopt(struct sock *sk, int pf, int val, 
290                       char *opt, int *len, int get)
291 {
292         struct list_head *i;
293         struct nf_sockopt_ops *ops;
294         int ret;
295
296         if (down_interruptible(&nf_sockopt_mutex) != 0)
297                 return -EINTR;
298
299         for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
300                 ops = (struct nf_sockopt_ops *)i;
301                 if (ops->pf == pf) {
302                         if (get) {
303                                 if (val >= ops->get_optmin
304                                     && val < ops->get_optmax) {
305                                         ops->use++;
306                                         up(&nf_sockopt_mutex);
307                                         ret = ops->get(sk, val, opt, len);
308                                         goto out;
309                                 }
310                         } else {
311                                 if (val >= ops->set_optmin
312                                     && val < ops->set_optmax) {
313                                         ops->use++;
314                                         up(&nf_sockopt_mutex);
315                                         ret = ops->set(sk, val, opt, *len);
316                                         goto out;
317                                 }
318                         }
319                 }
320         }
321         up(&nf_sockopt_mutex);
322         return -ENOPROTOOPT;
323         
324  out:
325         down(&nf_sockopt_mutex);
326         ops->use--;
327         if (ops->cleanup_task)
328                 wake_up_process(ops->cleanup_task);
329         up(&nf_sockopt_mutex);
330         return ret;
331 }
332
333 int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
334                   int len)
335 {
336         return nf_sockopt(sk, pf, val, opt, &len, 0);
337 }
338
339 int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
340 {
341         return nf_sockopt(sk, pf, val, opt, len, 1);
342 }
343
344 static unsigned int nf_iterate(struct list_head *head,
345                                struct sk_buff **skb,
346                                int hook,
347                                const struct net_device *indev,
348                                const struct net_device *outdev,
349                                struct list_head **i,
350                                int (*okfn)(struct sk_buff *),
351                                int hook_thresh)
352 {
353         /*
354          * The caller must not block between calls to this
355          * function because of risk of continuing from deleted element.
356          */
357         list_for_each_continue_rcu(*i, head) {
358                 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
359
360                 if (hook_thresh > elem->priority)
361                         continue;
362
363                 /* Optimization: we don't need to hold module
364                    reference here, since function can't sleep. --RR */
365                 switch (elem->hook(hook, skb, indev, outdev, okfn)) {
366                 case NF_QUEUE:
367                         return NF_QUEUE;
368
369                 case NF_STOLEN:
370                         return NF_STOLEN;
371
372                 case NF_DROP:
373                         return NF_DROP;
374
375                 case NF_REPEAT:
376                         *i = (*i)->prev;
377                         break;
378
379 #ifdef CONFIG_NETFILTER_DEBUG
380                 case NF_ACCEPT:
381                         break;
382
383                 default:
384                         NFDEBUG("Evil return from %p(%u).\n", 
385                                 elem->hook, hook);
386 #endif
387                 }
388         }
389         return NF_ACCEPT;
390 }
391
392 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
393 {      
394         int ret;
395
396         write_lock_bh(&queue_handler_lock);
397         if (queue_handler[pf].outfn)
398                 ret = -EBUSY;
399         else {
400                 queue_handler[pf].outfn = outfn;
401                 queue_handler[pf].data = data;
402                 ret = 0;
403         }
404         write_unlock_bh(&queue_handler_lock);
405
406         return ret;
407 }
408
409 /* The caller must flush their queue before this */
410 int nf_unregister_queue_handler(int pf)
411 {
412         write_lock_bh(&queue_handler_lock);
413         queue_handler[pf].outfn = NULL;
414         queue_handler[pf].data = NULL;
415         write_unlock_bh(&queue_handler_lock);
416         
417         return 0;
418 }
419
420 /* 
421  * Any packet that leaves via this function must come back 
422  * through nf_reinject().
423  */
424 static int nf_queue(struct sk_buff *skb, 
425                     struct list_head *elem, 
426                     int pf, unsigned int hook,
427                     struct net_device *indev,
428                     struct net_device *outdev,
429                     int (*okfn)(struct sk_buff *))
430 {
431         int status;
432         struct nf_info *info;
433 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
434         struct net_device *physindev = NULL;
435         struct net_device *physoutdev = NULL;
436 #endif
437
438         /* QUEUE == DROP if noone is waiting, to be safe. */
439         read_lock(&queue_handler_lock);
440         if (!queue_handler[pf].outfn) {
441                 read_unlock(&queue_handler_lock);
442                 kfree_skb(skb);
443                 return 1;
444         }
445
446         info = kmalloc(sizeof(*info), GFP_ATOMIC);
447         if (!info) {
448                 if (net_ratelimit())
449                         printk(KERN_ERR "OOM queueing packet %p\n",
450                                skb);
451                 read_unlock(&queue_handler_lock);
452                 kfree_skb(skb);
453                 return 1;
454         }
455
456         *info = (struct nf_info) { 
457                 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
458
459         /* If it's going away, ignore hook. */
460         if (!try_module_get(info->elem->owner)) {
461                 read_unlock(&queue_handler_lock);
462                 kfree(info);
463                 return 0;
464         }
465
466         /* Bump dev refs so they don't vanish while packet is out */
467         if (indev) dev_hold(indev);
468         if (outdev) dev_hold(outdev);
469
470 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
471         if (skb->nf_bridge) {
472                 physindev = skb->nf_bridge->physindev;
473                 if (physindev) dev_hold(physindev);
474                 physoutdev = skb->nf_bridge->physoutdev;
475                 if (physoutdev) dev_hold(physoutdev);
476         }
477 #endif
478
479         status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
480         read_unlock(&queue_handler_lock);
481
482         if (status < 0) {
483                 /* James M doesn't say fuck enough. */
484                 if (indev) dev_put(indev);
485                 if (outdev) dev_put(outdev);
486 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
487                 if (physindev) dev_put(physindev);
488                 if (physoutdev) dev_put(physoutdev);
489 #endif
490                 module_put(info->elem->owner);
491                 kfree(info);
492                 kfree_skb(skb);
493                 return 1;
494         }
495         return 1;
496 }
497
498 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
499                  struct net_device *indev,
500                  struct net_device *outdev,
501                  int (*okfn)(struct sk_buff *),
502                  int hook_thresh)
503 {
504         struct list_head *elem;
505         unsigned int verdict;
506         int ret = 0;
507
508         if (skb->ip_summed == CHECKSUM_HW) {
509                 if (outdev == NULL) {
510                         skb->ip_summed = CHECKSUM_NONE;
511                 } else {
512                         skb_checksum_help(skb);
513                 }
514         }
515
516         /* We may already have this, but read-locks nest anyway */
517         rcu_read_lock();
518
519 #ifdef CONFIG_NETFILTER_DEBUG
520         if (skb->nf_debug & (1 << hook)) {
521                 printk("nf_hook: hook %i already set.\n", hook);
522                 nf_dump_skb(pf, skb);
523         }
524         skb->nf_debug |= (1 << hook);
525 #endif
526
527         elem = &nf_hooks[pf][hook];
528  next_hook:
529         verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
530                              outdev, &elem, okfn, hook_thresh);
531         if (verdict == NF_QUEUE) {
532                 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
533                 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn))
534                         goto next_hook;
535         }
536
537         switch (verdict) {
538         case NF_ACCEPT:
539                 ret = okfn(skb);
540                 break;
541
542         case NF_DROP:
543                 kfree_skb(skb);
544                 ret = -EPERM;
545                 break;
546         }
547
548         rcu_read_unlock();
549         return ret;
550 }
551
552 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
553                  unsigned int verdict)
554 {
555         struct list_head *elem = &info->elem->list;
556         struct list_head *i;
557
558         rcu_read_lock();
559
560         /* Drop reference to owner of hook which queued us. */
561         module_put(info->elem->owner);
562
563         list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
564                 if (i == elem) 
565                         break;
566         }
567   
568         if (elem == &nf_hooks[info->pf][info->hook]) {
569                 /* The module which sent it to userspace is gone. */
570                 NFDEBUG("%s: module disappeared, dropping packet.\n",
571                         __FUNCTION__);
572                 verdict = NF_DROP;
573         }
574
575         /* Continue traversal iff userspace said ok... */
576         if (verdict == NF_REPEAT) {
577                 elem = elem->prev;
578                 verdict = NF_ACCEPT;
579         }
580
581         if (verdict == NF_ACCEPT) {
582         next_hook:
583                 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
584                                      &skb, info->hook, 
585                                      info->indev, info->outdev, &elem,
586                                      info->okfn, INT_MIN);
587         }
588
589         switch (verdict) {
590         case NF_ACCEPT:
591                 info->okfn(skb);
592                 break;
593
594         case NF_QUEUE:
595                 if (!nf_queue(skb, elem, info->pf, info->hook, 
596                               info->indev, info->outdev, info->okfn))
597                         goto next_hook;
598                 break;
599         }
600         rcu_read_unlock();
601
602         /* Release those devices we held, or Alexey will kill me. */
603         if (info->indev) dev_put(info->indev);
604         if (info->outdev) dev_put(info->outdev);
605 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
606         if (skb->nf_bridge) {
607                 if (skb->nf_bridge->physindev)
608                         dev_put(skb->nf_bridge->physindev);
609                 if (skb->nf_bridge->physoutdev)
610                         dev_put(skb->nf_bridge->physoutdev);
611         }
612 #endif
613
614
615         if (verdict == NF_DROP)
616                 kfree_skb(skb);
617
618         kfree(info);
619         return;
620 }
621
622 #ifdef CONFIG_INET
623 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
624 int ip_route_me_harder(struct sk_buff **pskb)
625 {
626         struct iphdr *iph = (*pskb)->nh.iph;
627         struct rtable *rt;
628         struct flowi fl = {};
629         struct dst_entry *odst;
630         unsigned int hh_len;
631
632         /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
633          * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
634          */
635         if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
636                 fl.nl_u.ip4_u.daddr = iph->daddr;
637                 fl.nl_u.ip4_u.saddr = iph->saddr;
638                 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
639                 fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
640 #ifdef CONFIG_IP_ROUTE_FWMARK
641                 fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
642 #endif
643                 if (ip_route_output_key(&rt, &fl) != 0)
644                         return -1;
645
646                 /* Drop old route. */
647                 dst_release((*pskb)->dst);
648                 (*pskb)->dst = &rt->u.dst;
649         } else {
650                 /* non-local src, find valid iif to satisfy
651                  * rp-filter when calling ip_route_input. */
652                 fl.nl_u.ip4_u.daddr = iph->saddr;
653                 if (ip_route_output_key(&rt, &fl) != 0)
654                         return -1;
655
656                 odst = (*pskb)->dst;
657                 if (ip_route_input(*pskb, iph->daddr, iph->saddr,
658                                    RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
659                         dst_release(&rt->u.dst);
660                         return -1;
661                 }
662                 dst_release(&rt->u.dst);
663                 dst_release(odst);
664         }
665         
666         if ((*pskb)->dst->error)
667                 return -1;
668
669         /* Change in oif may mean change in hh_len. */
670         hh_len = (*pskb)->dst->dev->hard_header_len;
671         if (skb_headroom(*pskb) < hh_len) {
672                 struct sk_buff *nskb;
673
674                 nskb = skb_realloc_headroom(*pskb, hh_len);
675                 if (!nskb) 
676                         return -1;
677                 if ((*pskb)->sk)
678                         skb_set_owner_w(nskb, (*pskb)->sk);
679                 kfree_skb(*pskb);
680                 *pskb = nskb;
681         }
682
683         return 0;
684 }
685
686 int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
687 {
688         struct sk_buff *nskb;
689         unsigned int iplen;
690
691         if (writable_len > (*pskb)->len)
692                 return 0;
693
694         /* Not exclusive use of packet?  Must copy. */
695         if (skb_shared(*pskb) || skb_cloned(*pskb))
696                 goto copy_skb;
697
698         /* Alexey says IP hdr is always modifiable and linear, so ok. */
699         if (writable_len <= (*pskb)->nh.iph->ihl*4)
700                 return 1;
701
702         iplen = writable_len - (*pskb)->nh.iph->ihl*4;
703
704         /* DaveM says protocol headers are also modifiable. */
705         switch ((*pskb)->nh.iph->protocol) {
706         case IPPROTO_TCP: {
707                 struct tcphdr hdr;
708                 if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
709                                   &hdr, sizeof(hdr)) != 0)
710                         goto copy_skb;
711                 if (writable_len <= (*pskb)->nh.iph->ihl*4 + hdr.doff*4)
712                         goto pull_skb;
713                 goto copy_skb;
714         }
715         case IPPROTO_UDP:
716                 if (writable_len<=(*pskb)->nh.iph->ihl*4+sizeof(struct udphdr))
717                         goto pull_skb;
718                 goto copy_skb;
719         case IPPROTO_ICMP:
720                 if (writable_len
721                     <= (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr))
722                         goto pull_skb;
723                 goto copy_skb;
724         /* Insert other cases here as desired */
725         }
726
727 copy_skb:
728         nskb = skb_copy(*pskb, GFP_ATOMIC);
729         if (!nskb)
730                 return 0;
731         BUG_ON(skb_is_nonlinear(nskb));
732
733         /* Rest of kernel will get very unhappy if we pass it a
734            suddenly-orphaned skbuff */
735         if ((*pskb)->sk)
736                 skb_set_owner_w(nskb, (*pskb)->sk);
737         kfree_skb(*pskb);
738         *pskb = nskb;
739         return 1;
740
741 pull_skb:
742         return pskb_may_pull(*pskb, writable_len);
743 }
744 EXPORT_SYMBOL(skb_ip_make_writable);
745 #endif /*CONFIG_INET*/
746
747
748 /* This does not belong here, but ipt_REJECT needs it if connection
749    tracking in use: without this, connection may not be in hash table,
750    and hence manufactured ICMP or RST packets will not be associated
751    with it. */
752 void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
753
754 void __init netfilter_init(void)
755 {
756         int i, h;
757
758         for (i = 0; i < NPROTO; i++) {
759                 for (h = 0; h < NF_MAX_HOOKS; h++)
760                         INIT_LIST_HEAD(&nf_hooks[i][h]);
761         }
762 }