update to 2.6.9-rc1
[linux-flexiantxendom0-3.2.10.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29
30 #include <linux/netfilter_ipv4/ip_tables.h>
31
32 MODULE_LICENSE("GPL");
33 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
34 MODULE_DESCRIPTION("IPv4 packet filter");
35
36 /*#define DEBUG_IP_FIREWALL*/
37 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
38 /*#define DEBUG_IP_FIREWALL_USER*/
39
40 #ifdef DEBUG_IP_FIREWALL
41 #define dprintf(format, args...)  printk(format , ## args)
42 #else
43 #define dprintf(format, args...)
44 #endif
45
46 #ifdef DEBUG_IP_FIREWALL_USER
47 #define duprintf(format, args...) printk(format , ## args)
48 #else
49 #define duprintf(format, args...)
50 #endif
51
52 #ifdef CONFIG_NETFILTER_DEBUG
53 #define IP_NF_ASSERT(x)                                         \
54 do {                                                            \
55         if (!(x))                                               \
56                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
57                        __FUNCTION__, __FILE__, __LINE__);       \
58 } while(0)
59 #else
60 #define IP_NF_ASSERT(x)
61 #endif
62 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
63
64 static DECLARE_MUTEX(ipt_mutex);
65
66 /* Must have mutex */
67 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
68 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #include <linux/netfilter_ipv4/lockhelp.h>
70 #include <linux/netfilter_ipv4/listhelp.h>
71
72 #if 0
73 /* All the better to debug you with... */
74 #define static
75 #define inline
76 #endif
77
78 /*
79    We keep a set of rules for each CPU, so we can avoid write-locking
80    them in the softirq when updating the counters and therefore
81    only need to read-lock in the softirq; doing a write_lock_bh() in user
82    context stops packets coming through and allows user context to read
83    the counters or update the rules.
84
85    To be cache friendly on SMP, we arrange them like so:
86    [ n-entries ]
87    ... cache-align padding ...
88    [ n-entries ]
89
90    Hence the start of any table is given by get_table() below.  */
91
92 /* The table itself */
93 struct ipt_table_info
94 {
95         /* Size per table */
96         unsigned int size;
97         /* Number of entries: FIXME. --RR */
98         unsigned int number;
99         /* Initial number of entries. Needed for module usage count */
100         unsigned int initial_entries;
101
102         /* Entry points and underflows */
103         unsigned int hook_entry[NF_IP_NUMHOOKS];
104         unsigned int underflow[NF_IP_NUMHOOKS];
105
106         /* ipt_entry tables: one per CPU */
107         char entries[0] ____cacheline_aligned;
108 };
109
110 static LIST_HEAD(ipt_target);
111 static LIST_HEAD(ipt_match);
112 static LIST_HEAD(ipt_tables);
113 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
114
115 #ifdef CONFIG_SMP
116 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
117 #else
118 #define TABLE_OFFSET(t,p) 0
119 #endif
120
121 #if 0
122 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
123 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
124 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
125 #endif
126
127 /* Returns whether matches rule or not. */
128 static inline int
129 ip_packet_match(const struct iphdr *ip,
130                 const char *indev,
131                 const char *outdev,
132                 const struct ipt_ip *ipinfo,
133                 int isfrag)
134 {
135         size_t i;
136         unsigned long ret;
137
138 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
139
140         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
141                   IPT_INV_SRCIP)
142             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
143                      IPT_INV_DSTIP)) {
144                 dprintf("Source or dest mismatch.\n");
145
146                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
147                         NIPQUAD(ip->saddr),
148                         NIPQUAD(ipinfo->smsk.s_addr),
149                         NIPQUAD(ipinfo->src.s_addr),
150                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
151                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
152                         NIPQUAD(ip->daddr),
153                         NIPQUAD(ipinfo->dmsk.s_addr),
154                         NIPQUAD(ipinfo->dst.s_addr),
155                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
156                 return 0;
157         }
158
159         /* Look for ifname matches; this should unroll nicely. */
160         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
161                 ret |= (((const unsigned long *)indev)[i]
162                         ^ ((const unsigned long *)ipinfo->iniface)[i])
163                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
164         }
165
166         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
167                 dprintf("VIA in mismatch (%s vs %s).%s\n",
168                         indev, ipinfo->iniface,
169                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
170                 return 0;
171         }
172
173         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
174                 ret |= (((const unsigned long *)outdev)[i]
175                         ^ ((const unsigned long *)ipinfo->outiface)[i])
176                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
177         }
178
179         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
180                 dprintf("VIA out mismatch (%s vs %s).%s\n",
181                         outdev, ipinfo->outiface,
182                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
183                 return 0;
184         }
185
186         /* Check specific protocol */
187         if (ipinfo->proto
188             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
189                 dprintf("Packet protocol %hi does not match %hi.%s\n",
190                         ip->protocol, ipinfo->proto,
191                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
192                 return 0;
193         }
194
195         /* If we have a fragment rule but the packet is not a fragment
196          * then we return zero */
197         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
198                 dprintf("Fragment rule but not fragment.%s\n",
199                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
200                 return 0;
201         }
202
203         return 1;
204 }
205
206 static inline int
207 ip_checkentry(const struct ipt_ip *ip)
208 {
209         if (ip->flags & ~IPT_F_MASK) {
210                 duprintf("Unknown flag bits set: %08X\n",
211                          ip->flags & ~IPT_F_MASK);
212                 return 0;
213         }
214         if (ip->invflags & ~IPT_INV_MASK) {
215                 duprintf("Unknown invflag bits set: %08X\n",
216                          ip->invflags & ~IPT_INV_MASK);
217                 return 0;
218         }
219         return 1;
220 }
221
222 static unsigned int
223 ipt_error(struct sk_buff **pskb,
224           const struct net_device *in,
225           const struct net_device *out,
226           unsigned int hooknum,
227           const void *targinfo,
228           void *userinfo)
229 {
230         if (net_ratelimit())
231                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
232
233         return NF_DROP;
234 }
235
236 static inline
237 int do_match(struct ipt_entry_match *m,
238              const struct sk_buff *skb,
239              const struct net_device *in,
240              const struct net_device *out,
241              int offset,
242              int *hotdrop)
243 {
244         /* Stop iteration if it doesn't match */
245         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
246                 return 1;
247         else
248                 return 0;
249 }
250
251 static inline struct ipt_entry *
252 get_entry(void *base, unsigned int offset)
253 {
254         return (struct ipt_entry *)(base + offset);
255 }
256
257 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
258 unsigned int
259 ipt_do_table(struct sk_buff **pskb,
260              unsigned int hook,
261              const struct net_device *in,
262              const struct net_device *out,
263              struct ipt_table *table,
264              void *userdata)
265 {
266         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
267         u_int16_t offset;
268         struct iphdr *ip;
269         u_int16_t datalen;
270         int hotdrop = 0;
271         /* Initializing verdict to NF_DROP keeps gcc happy. */
272         unsigned int verdict = NF_DROP;
273         const char *indev, *outdev;
274         void *table_base;
275         struct ipt_entry *e, *back;
276
277         /* Initialization */
278         ip = (*pskb)->nh.iph;
279         datalen = (*pskb)->len - ip->ihl * 4;
280         indev = in ? in->name : nulldevname;
281         outdev = out ? out->name : nulldevname;
282         /* We handle fragments by dealing with the first fragment as
283          * if it was a normal packet.  All other fragments are treated
284          * normally, except that they will NEVER match rules that ask
285          * things we don't know, ie. tcp syn flag or ports).  If the
286          * rule is also a fragment-specific rule, non-fragments won't
287          * match it. */
288         offset = ntohs(ip->frag_off) & IP_OFFSET;
289
290         read_lock_bh(&table->lock);
291         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
292         table_base = (void *)table->private->entries
293                 + TABLE_OFFSET(table->private, smp_processor_id());
294         e = get_entry(table_base, table->private->hook_entry[hook]);
295
296 #ifdef CONFIG_NETFILTER_DEBUG
297         /* Check noone else using our table */
298         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
299             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
300                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
301                        smp_processor_id(),
302                        table->name,
303                        &((struct ipt_entry *)table_base)->comefrom,
304                        ((struct ipt_entry *)table_base)->comefrom);
305         }
306         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
307 #endif
308
309         /* For return from builtin chain */
310         back = get_entry(table_base, table->private->underflow[hook]);
311
312         do {
313                 IP_NF_ASSERT(e);
314                 IP_NF_ASSERT(back);
315                 (*pskb)->nfcache |= e->nfcache;
316                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
317                         struct ipt_entry_target *t;
318
319                         if (IPT_MATCH_ITERATE(e, do_match,
320                                               *pskb, in, out,
321                                               offset, &hotdrop) != 0)
322                                 goto no_match;
323
324                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
325
326                         t = ipt_get_target(e);
327                         IP_NF_ASSERT(t->u.kernel.target);
328                         /* Standard target? */
329                         if (!t->u.kernel.target->target) {
330                                 int v;
331
332                                 v = ((struct ipt_standard_target *)t)->verdict;
333                                 if (v < 0) {
334                                         /* Pop from stack? */
335                                         if (v != IPT_RETURN) {
336                                                 verdict = (unsigned)(-v) - 1;
337                                                 break;
338                                         }
339                                         e = back;
340                                         back = get_entry(table_base,
341                                                          back->comefrom);
342                                         continue;
343                                 }
344                                 if (table_base + v
345                                     != (void *)e + e->next_offset) {
346                                         /* Save old back ptr in next entry */
347                                         struct ipt_entry *next
348                                                 = (void *)e + e->next_offset;
349                                         next->comefrom
350                                                 = (void *)back - table_base;
351                                         /* set back pointer to next entry */
352                                         back = next;
353                                 }
354
355                                 e = get_entry(table_base, v);
356                         } else {
357                                 /* Targets which reenter must return
358                                    abs. verdicts */
359 #ifdef CONFIG_NETFILTER_DEBUG
360                                 ((struct ipt_entry *)table_base)->comefrom
361                                         = 0xeeeeeeec;
362 #endif
363                                 verdict = t->u.kernel.target->target(pskb,
364                                                                      in, out,
365                                                                      hook,
366                                                                      t->data,
367                                                                      userdata);
368
369 #ifdef CONFIG_NETFILTER_DEBUG
370                                 if (((struct ipt_entry *)table_base)->comefrom
371                                     != 0xeeeeeeec
372                                     && verdict == IPT_CONTINUE) {
373                                         printk("Target %s reentered!\n",
374                                                t->u.kernel.target->name);
375                                         verdict = NF_DROP;
376                                 }
377                                 ((struct ipt_entry *)table_base)->comefrom
378                                         = 0x57acc001;
379 #endif
380                                 /* Target might have changed stuff. */
381                                 ip = (*pskb)->nh.iph;
382                                 datalen = (*pskb)->len - ip->ihl * 4;
383
384                                 if (verdict == IPT_CONTINUE)
385                                         e = (void *)e + e->next_offset;
386                                 else
387                                         /* Verdict */
388                                         break;
389                         }
390                 } else {
391
392                 no_match:
393                         e = (void *)e + e->next_offset;
394                 }
395         } while (!hotdrop);
396
397 #ifdef CONFIG_NETFILTER_DEBUG
398         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
399 #endif
400         read_unlock_bh(&table->lock);
401
402 #ifdef DEBUG_ALLOW_ALL
403         return NF_ACCEPT;
404 #else
405         if (hotdrop)
406                 return NF_DROP;
407         else return verdict;
408 #endif
409 }
410
411 /* If it succeeds, returns element and locks mutex */
412 static inline void *
413 find_inlist_lock_noload(struct list_head *head,
414                         const char *name,
415                         int *error,
416                         struct semaphore *mutex)
417 {
418         void *ret;
419
420 #if 0 
421         duprintf("find_inlist: searching for `%s' in %s.\n",
422                  name, head == &ipt_target ? "ipt_target"
423                  : head == &ipt_match ? "ipt_match"
424                  : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
425 #endif
426
427         *error = down_interruptible(mutex);
428         if (*error != 0)
429                 return NULL;
430
431         ret = list_named_find(head, name);
432         if (!ret) {
433                 *error = -ENOENT;
434                 up(mutex);
435         }
436         return ret;
437 }
438
439 #ifndef CONFIG_KMOD
440 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
441 #else
442 static void *
443 find_inlist_lock(struct list_head *head,
444                  const char *name,
445                  const char *prefix,
446                  int *error,
447                  struct semaphore *mutex)
448 {
449         void *ret;
450
451         ret = find_inlist_lock_noload(head, name, error, mutex);
452         if (!ret) {
453                 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
454                 request_module("%s%s", prefix, name);
455                 ret = find_inlist_lock_noload(head, name, error, mutex);
456         }
457
458         return ret;
459 }
460 #endif
461
462 static inline struct ipt_table *
463 ipt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
464 {
465         return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
466 }
467
468 static inline struct ipt_match *
469 find_match_lock(const char *name, int *error, struct semaphore *mutex)
470 {
471         return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
472 }
473
474 struct ipt_target *
475 ipt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
476 {
477         return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
478 }
479
480 /* All zeroes == unconditional rule. */
481 static inline int
482 unconditional(const struct ipt_ip *ip)
483 {
484         unsigned int i;
485
486         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
487                 if (((__u32 *)ip)[i])
488                         return 0;
489
490         return 1;
491 }
492
493 /* Figures out from what hook each rule can be called: returns 0 if
494    there are loops.  Puts hook bitmask in comefrom. */
495 static int
496 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
497 {
498         unsigned int hook;
499
500         /* No recursion; use packet counter to save back ptrs (reset
501            to 0 as we leave), and comefrom to save source hook bitmask */
502         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
503                 unsigned int pos = newinfo->hook_entry[hook];
504                 struct ipt_entry *e
505                         = (struct ipt_entry *)(newinfo->entries + pos);
506
507                 if (!(valid_hooks & (1 << hook)))
508                         continue;
509
510                 /* Set initial back pointer. */
511                 e->counters.pcnt = pos;
512
513                 for (;;) {
514                         struct ipt_standard_target *t
515                                 = (void *)ipt_get_target(e);
516
517                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
518                                 printk("iptables: loop hook %u pos %u %08X.\n",
519                                        hook, pos, e->comefrom);
520                                 return 0;
521                         }
522                         e->comefrom
523                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
524
525                         /* Unconditional return/END. */
526                         if (e->target_offset == sizeof(struct ipt_entry)
527                             && (strcmp(t->target.u.user.name,
528                                        IPT_STANDARD_TARGET) == 0)
529                             && t->verdict < 0
530                             && unconditional(&e->ip)) {
531                                 unsigned int oldpos, size;
532
533                                 /* Return: backtrack through the last
534                                    big jump. */
535                                 do {
536                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
537 #ifdef DEBUG_IP_FIREWALL_USER
538                                         if (e->comefrom
539                                             & (1 << NF_IP_NUMHOOKS)) {
540                                                 duprintf("Back unset "
541                                                          "on hook %u "
542                                                          "rule %u\n",
543                                                          hook, pos);
544                                         }
545 #endif
546                                         oldpos = pos;
547                                         pos = e->counters.pcnt;
548                                         e->counters.pcnt = 0;
549
550                                         /* We're at the start. */
551                                         if (pos == oldpos)
552                                                 goto next;
553
554                                         e = (struct ipt_entry *)
555                                                 (newinfo->entries + pos);
556                                 } while (oldpos == pos + e->next_offset);
557
558                                 /* Move along one */
559                                 size = e->next_offset;
560                                 e = (struct ipt_entry *)
561                                         (newinfo->entries + pos + size);
562                                 e->counters.pcnt = pos;
563                                 pos += size;
564                         } else {
565                                 int newpos = t->verdict;
566
567                                 if (strcmp(t->target.u.user.name,
568                                            IPT_STANDARD_TARGET) == 0
569                                     && newpos >= 0) {
570                                         /* This a jump; chase it. */
571                                         duprintf("Jump rule %u -> %u\n",
572                                                  pos, newpos);
573                                 } else {
574                                         /* ... this is a fallthru */
575                                         newpos = pos + e->next_offset;
576                                 }
577                                 e = (struct ipt_entry *)
578                                         (newinfo->entries + newpos);
579                                 e->counters.pcnt = pos;
580                                 pos = newpos;
581                         }
582                 }
583                 next:
584                 duprintf("Finished chain %u\n", hook);
585         }
586         return 1;
587 }
588
589 static inline int
590 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
591 {
592         if (i && (*i)-- == 0)
593                 return 1;
594
595         if (m->u.kernel.match->destroy)
596                 m->u.kernel.match->destroy(m->data,
597                                            m->u.match_size - sizeof(*m));
598         module_put(m->u.kernel.match->me);
599         return 0;
600 }
601
602 static inline int
603 standard_check(const struct ipt_entry_target *t,
604                unsigned int max_offset)
605 {
606         struct ipt_standard_target *targ = (void *)t;
607
608         /* Check standard info. */
609         if (t->u.target_size
610             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
611                 duprintf("standard_check: target size %u != %u\n",
612                          t->u.target_size,
613                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
614                 return 0;
615         }
616
617         if (targ->verdict >= 0
618             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
619                 duprintf("ipt_standard_check: bad verdict (%i)\n",
620                          targ->verdict);
621                 return 0;
622         }
623
624         if (targ->verdict < -NF_MAX_VERDICT - 1) {
625                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
626                          targ->verdict);
627                 return 0;
628         }
629         return 1;
630 }
631
632 static inline int
633 check_match(struct ipt_entry_match *m,
634             const char *name,
635             const struct ipt_ip *ip,
636             unsigned int hookmask,
637             unsigned int *i)
638 {
639         int ret;
640         struct ipt_match *match;
641
642         match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
643         if (!match) {
644                 duprintf("check_match: `%s' not found\n", m->u.user.name);
645                 return ret;
646         }
647         if (!try_module_get(match->me)) {
648                 up(&ipt_mutex);
649                 return -ENOENT;
650         }
651         m->u.kernel.match = match;
652         up(&ipt_mutex);
653
654         if (m->u.kernel.match->checkentry
655             && !m->u.kernel.match->checkentry(name, ip, m->data,
656                                               m->u.match_size - sizeof(*m),
657                                               hookmask)) {
658                 module_put(m->u.kernel.match->me);
659                 duprintf("ip_tables: check failed for `%s'.\n",
660                          m->u.kernel.match->name);
661                 return -EINVAL;
662         }
663
664         (*i)++;
665         return 0;
666 }
667
668 static struct ipt_target ipt_standard_target;
669
670 static inline int
671 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
672             unsigned int *i)
673 {
674         struct ipt_entry_target *t;
675         struct ipt_target *target;
676         int ret;
677         unsigned int j;
678
679         if (!ip_checkentry(&e->ip)) {
680                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
681                 return -EINVAL;
682         }
683
684         j = 0;
685         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
686         if (ret != 0)
687                 goto cleanup_matches;
688
689         t = ipt_get_target(e);
690         target = ipt_find_target_lock(t->u.user.name, &ret, &ipt_mutex);
691         if (!target) {
692                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
693                 goto cleanup_matches;
694         }
695         if (!try_module_get(target->me)) {
696                 up(&ipt_mutex);
697                 ret = -ENOENT;
698                 goto cleanup_matches;
699         }
700         t->u.kernel.target = target;
701         up(&ipt_mutex);
702
703         if (t->u.kernel.target == &ipt_standard_target) {
704                 if (!standard_check(t, size)) {
705                         ret = -EINVAL;
706                         goto cleanup_matches;
707                 }
708         } else if (t->u.kernel.target->checkentry
709                    && !t->u.kernel.target->checkentry(name, e, t->data,
710                                                       t->u.target_size
711                                                       - sizeof(*t),
712                                                       e->comefrom)) {
713                 module_put(t->u.kernel.target->me);
714                 duprintf("ip_tables: check failed for `%s'.\n",
715                          t->u.kernel.target->name);
716                 ret = -EINVAL;
717                 goto cleanup_matches;
718         }
719
720         (*i)++;
721         return 0;
722
723  cleanup_matches:
724         IPT_MATCH_ITERATE(e, cleanup_match, &j);
725         return ret;
726 }
727
728 static inline int
729 check_entry_size_and_hooks(struct ipt_entry *e,
730                            struct ipt_table_info *newinfo,
731                            unsigned char *base,
732                            unsigned char *limit,
733                            const unsigned int *hook_entries,
734                            const unsigned int *underflows,
735                            unsigned int *i)
736 {
737         unsigned int h;
738
739         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
740             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
741                 duprintf("Bad offset %p\n", e);
742                 return -EINVAL;
743         }
744
745         if (e->next_offset
746             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
747                 duprintf("checking: element %p size %u\n",
748                          e, e->next_offset);
749                 return -EINVAL;
750         }
751
752         /* Check hooks & underflows */
753         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
754                 if ((unsigned char *)e - base == hook_entries[h])
755                         newinfo->hook_entry[h] = hook_entries[h];
756                 if ((unsigned char *)e - base == underflows[h])
757                         newinfo->underflow[h] = underflows[h];
758         }
759
760         /* FIXME: underflows must be unconditional, standard verdicts
761            < 0 (not IPT_RETURN). --RR */
762
763         /* Clear counters and comefrom */
764         e->counters = ((struct ipt_counters) { 0, 0 });
765         e->comefrom = 0;
766
767         (*i)++;
768         return 0;
769 }
770
771 static inline int
772 cleanup_entry(struct ipt_entry *e, unsigned int *i)
773 {
774         struct ipt_entry_target *t;
775
776         if (i && (*i)-- == 0)
777                 return 1;
778
779         /* Cleanup all matches */
780         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
781         t = ipt_get_target(e);
782         if (t->u.kernel.target->destroy)
783                 t->u.kernel.target->destroy(t->data,
784                                             t->u.target_size - sizeof(*t));
785         module_put(t->u.kernel.target->me);
786         return 0;
787 }
788
789 /* Checks and translates the user-supplied table segment (held in
790    newinfo) */
791 static int
792 translate_table(const char *name,
793                 unsigned int valid_hooks,
794                 struct ipt_table_info *newinfo,
795                 unsigned int size,
796                 unsigned int number,
797                 const unsigned int *hook_entries,
798                 const unsigned int *underflows)
799 {
800         unsigned int i;
801         int ret;
802
803         newinfo->size = size;
804         newinfo->number = number;
805
806         /* Init all hooks to impossible value. */
807         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
808                 newinfo->hook_entry[i] = 0xFFFFFFFF;
809                 newinfo->underflow[i] = 0xFFFFFFFF;
810         }
811
812         duprintf("translate_table: size %u\n", newinfo->size);
813         i = 0;
814         /* Walk through entries, checking offsets. */
815         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
816                                 check_entry_size_and_hooks,
817                                 newinfo,
818                                 newinfo->entries,
819                                 newinfo->entries + size,
820                                 hook_entries, underflows, &i);
821         if (ret != 0)
822                 return ret;
823
824         if (i != number) {
825                 duprintf("translate_table: %u not %u entries\n",
826                          i, number);
827                 return -EINVAL;
828         }
829
830         /* Check hooks all assigned */
831         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
832                 /* Only hooks which are valid */
833                 if (!(valid_hooks & (1 << i)))
834                         continue;
835                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
836                         duprintf("Invalid hook entry %u %u\n",
837                                  i, hook_entries[i]);
838                         return -EINVAL;
839                 }
840                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
841                         duprintf("Invalid underflow %u %u\n",
842                                  i, underflows[i]);
843                         return -EINVAL;
844                 }
845         }
846
847         if (!mark_source_chains(newinfo, valid_hooks))
848                 return -ELOOP;
849
850         /* Finally, each sanity check must pass */
851         i = 0;
852         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
853                                 check_entry, name, size, &i);
854
855         if (ret != 0) {
856                 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
857                                   cleanup_entry, &i);
858                 return ret;
859         }
860
861         /* And one copy for every other CPU */
862         for (i = 1; i < NR_CPUS; i++) {
863                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
864                        newinfo->entries,
865                        SMP_ALIGN(newinfo->size));
866         }
867
868         return ret;
869 }
870
871 static struct ipt_table_info *
872 replace_table(struct ipt_table *table,
873               unsigned int num_counters,
874               struct ipt_table_info *newinfo,
875               int *error)
876 {
877         struct ipt_table_info *oldinfo;
878
879 #ifdef CONFIG_NETFILTER_DEBUG
880         {
881                 struct ipt_entry *table_base;
882                 unsigned int i;
883
884                 for (i = 0; i < NR_CPUS; i++) {
885                         table_base =
886                                 (void *)newinfo->entries
887                                 + TABLE_OFFSET(newinfo, i);
888
889                         table_base->comefrom = 0xdead57ac;
890                 }
891         }
892 #endif
893
894         /* Do the substitution. */
895         write_lock_bh(&table->lock);
896         /* Check inside lock: is the old number correct? */
897         if (num_counters != table->private->number) {
898                 duprintf("num_counters != table->private->number (%u/%u)\n",
899                          num_counters, table->private->number);
900                 write_unlock_bh(&table->lock);
901                 *error = -EAGAIN;
902                 return NULL;
903         }
904         oldinfo = table->private;
905         table->private = newinfo;
906         newinfo->initial_entries = oldinfo->initial_entries;
907         write_unlock_bh(&table->lock);
908
909         return oldinfo;
910 }
911
912 /* Gets counters. */
913 static inline int
914 add_entry_to_counter(const struct ipt_entry *e,
915                      struct ipt_counters total[],
916                      unsigned int *i)
917 {
918         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
919
920         (*i)++;
921         return 0;
922 }
923
924 static void
925 get_counters(const struct ipt_table_info *t,
926              struct ipt_counters counters[])
927 {
928         unsigned int cpu;
929         unsigned int i;
930
931         for (cpu = 0; cpu < NR_CPUS; cpu++) {
932                 i = 0;
933                 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
934                                   t->size,
935                                   add_entry_to_counter,
936                                   counters,
937                                   &i);
938         }
939 }
940
941 static int
942 copy_entries_to_user(unsigned int total_size,
943                      struct ipt_table *table,
944                      void __user *userptr)
945 {
946         unsigned int off, num, countersize;
947         struct ipt_entry *e;
948         struct ipt_counters *counters;
949         int ret = 0;
950
951         /* We need atomic snapshot of counters: rest doesn't change
952            (other than comefrom, which userspace doesn't care
953            about). */
954         countersize = sizeof(struct ipt_counters) * table->private->number;
955         counters = vmalloc(countersize);
956
957         if (counters == NULL)
958                 return -ENOMEM;
959
960         /* First, sum counters... */
961         memset(counters, 0, countersize);
962         write_lock_bh(&table->lock);
963         get_counters(table->private, counters);
964         write_unlock_bh(&table->lock);
965
966         /* ... then copy entire thing from CPU 0... */
967         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
968                 ret = -EFAULT;
969                 goto free_counters;
970         }
971
972         /* FIXME: use iterator macros --RR */
973         /* ... then go back and fix counters and names */
974         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
975                 unsigned int i;
976                 struct ipt_entry_match *m;
977                 struct ipt_entry_target *t;
978
979                 e = (struct ipt_entry *)(table->private->entries + off);
980                 if (copy_to_user(userptr + off
981                                  + offsetof(struct ipt_entry, counters),
982                                  &counters[num],
983                                  sizeof(counters[num])) != 0) {
984                         ret = -EFAULT;
985                         goto free_counters;
986                 }
987
988                 for (i = sizeof(struct ipt_entry);
989                      i < e->target_offset;
990                      i += m->u.match_size) {
991                         m = (void *)e + i;
992
993                         if (copy_to_user(userptr + off + i
994                                          + offsetof(struct ipt_entry_match,
995                                                     u.user.name),
996                                          m->u.kernel.match->name,
997                                          strlen(m->u.kernel.match->name)+1)
998                             != 0) {
999                                 ret = -EFAULT;
1000                                 goto free_counters;
1001                         }
1002                 }
1003
1004                 t = ipt_get_target(e);
1005                 if (copy_to_user(userptr + off + e->target_offset
1006                                  + offsetof(struct ipt_entry_target,
1007                                             u.user.name),
1008                                  t->u.kernel.target->name,
1009                                  strlen(t->u.kernel.target->name)+1) != 0) {
1010                         ret = -EFAULT;
1011                         goto free_counters;
1012                 }
1013         }
1014
1015  free_counters:
1016         vfree(counters);
1017         return ret;
1018 }
1019
1020 static int
1021 get_entries(const struct ipt_get_entries *entries,
1022             struct ipt_get_entries __user *uptr)
1023 {
1024         int ret;
1025         struct ipt_table *t;
1026
1027         t = ipt_find_table_lock(entries->name, &ret, &ipt_mutex);
1028         if (t) {
1029                 duprintf("t->private->number = %u\n",
1030                          t->private->number);
1031                 if (entries->size == t->private->size)
1032                         ret = copy_entries_to_user(t->private->size,
1033                                                    t, uptr->entrytable);
1034                 else {
1035                         duprintf("get_entries: I've got %u not %u!\n",
1036                                  t->private->size,
1037                                  entries->size);
1038                         ret = -EINVAL;
1039                 }
1040                 up(&ipt_mutex);
1041         } else
1042                 duprintf("get_entries: Can't find %s!\n",
1043                          entries->name);
1044
1045         return ret;
1046 }
1047
1048 static int
1049 do_replace(void __user *user, unsigned int len)
1050 {
1051         int ret;
1052         struct ipt_replace tmp;
1053         struct ipt_table *t;
1054         struct ipt_table_info *newinfo, *oldinfo;
1055         struct ipt_counters *counters;
1056
1057         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1058                 return -EFAULT;
1059
1060         /* Hack: Causes ipchains to give correct error msg --RR */
1061         if (len != sizeof(tmp) + tmp.size)
1062                 return -ENOPROTOOPT;
1063
1064         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1065         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1066                 return -ENOMEM;
1067
1068         newinfo = vmalloc(sizeof(struct ipt_table_info)
1069                           + SMP_ALIGN(tmp.size) * NR_CPUS);
1070         if (!newinfo)
1071                 return -ENOMEM;
1072
1073         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1074                            tmp.size) != 0) {
1075                 ret = -EFAULT;
1076                 goto free_newinfo;
1077         }
1078
1079         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1080         if (!counters) {
1081                 ret = -ENOMEM;
1082                 goto free_newinfo;
1083         }
1084         memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1085
1086         ret = translate_table(tmp.name, tmp.valid_hooks,
1087                               newinfo, tmp.size, tmp.num_entries,
1088                               tmp.hook_entry, tmp.underflow);
1089         if (ret != 0)
1090                 goto free_newinfo_counters;
1091
1092         duprintf("ip_tables: Translated table\n");
1093
1094         t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1095         if (!t)
1096                 goto free_newinfo_counters_untrans;
1097
1098         /* You lied! */
1099         if (tmp.valid_hooks != t->valid_hooks) {
1100                 duprintf("Valid hook crap: %08X vs %08X\n",
1101                          tmp.valid_hooks, t->valid_hooks);
1102                 ret = -EINVAL;
1103                 goto free_newinfo_counters_untrans_unlock;
1104         }
1105
1106         /* Get a reference in advance, we're not allowed fail later */
1107         if (!try_module_get(t->me)) {
1108                 ret = -EBUSY;
1109                 goto free_newinfo_counters_untrans_unlock;
1110         }
1111
1112
1113         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1114         if (!oldinfo)
1115                 goto put_module;
1116
1117         /* Update module usage count based on number of rules */
1118         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1119                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1120         if ((oldinfo->number > oldinfo->initial_entries) || 
1121             (newinfo->number <= oldinfo->initial_entries)) 
1122                 module_put(t->me);
1123         if ((oldinfo->number > oldinfo->initial_entries) &&
1124             (newinfo->number <= oldinfo->initial_entries))
1125                 module_put(t->me);
1126
1127         /* Get the old counters. */
1128         get_counters(oldinfo, counters);
1129         /* Decrease module usage counts and free resource */
1130         IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1131         vfree(oldinfo);
1132         /* Silent error: too late now. */
1133         copy_to_user(tmp.counters, counters,
1134                      sizeof(struct ipt_counters) * tmp.num_counters);
1135         vfree(counters);
1136         up(&ipt_mutex);
1137         return 0;
1138
1139  put_module:
1140         module_put(t->me);
1141  free_newinfo_counters_untrans_unlock:
1142         up(&ipt_mutex);
1143  free_newinfo_counters_untrans:
1144         IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1145  free_newinfo_counters:
1146         vfree(counters);
1147  free_newinfo:
1148         vfree(newinfo);
1149         return ret;
1150 }
1151
1152 /* We're lazy, and add to the first CPU; overflow works its fey magic
1153  * and everything is OK. */
1154 static inline int
1155 add_counter_to_entry(struct ipt_entry *e,
1156                      const struct ipt_counters addme[],
1157                      unsigned int *i)
1158 {
1159 #if 0
1160         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1161                  *i,
1162                  (long unsigned int)e->counters.pcnt,
1163                  (long unsigned int)e->counters.bcnt,
1164                  (long unsigned int)addme[*i].pcnt,
1165                  (long unsigned int)addme[*i].bcnt);
1166 #endif
1167
1168         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1169
1170         (*i)++;
1171         return 0;
1172 }
1173
1174 static int
1175 do_add_counters(void __user *user, unsigned int len)
1176 {
1177         unsigned int i;
1178         struct ipt_counters_info tmp, *paddc;
1179         struct ipt_table *t;
1180         int ret;
1181
1182         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1183                 return -EFAULT;
1184
1185         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1186                 return -EINVAL;
1187
1188         paddc = vmalloc(len);
1189         if (!paddc)
1190                 return -ENOMEM;
1191
1192         if (copy_from_user(paddc, user, len) != 0) {
1193                 ret = -EFAULT;
1194                 goto free;
1195         }
1196
1197         t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1198         if (!t)
1199                 goto free;
1200
1201         write_lock_bh(&t->lock);
1202         if (t->private->number != paddc->num_counters) {
1203                 ret = -EINVAL;
1204                 goto unlock_up_free;
1205         }
1206
1207         i = 0;
1208         IPT_ENTRY_ITERATE(t->private->entries,
1209                           t->private->size,
1210                           add_counter_to_entry,
1211                           paddc->counters,
1212                           &i);
1213  unlock_up_free:
1214         write_unlock_bh(&t->lock);
1215         up(&ipt_mutex);
1216  free:
1217         vfree(paddc);
1218
1219         return ret;
1220 }
1221
1222 static int
1223 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1224 {
1225         int ret;
1226
1227         if (!capable(CAP_NET_ADMIN))
1228                 return -EPERM;
1229
1230         switch (cmd) {
1231         case IPT_SO_SET_REPLACE:
1232                 ret = do_replace(user, len);
1233                 break;
1234
1235         case IPT_SO_SET_ADD_COUNTERS:
1236                 ret = do_add_counters(user, len);
1237                 break;
1238
1239         default:
1240                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1241                 ret = -EINVAL;
1242         }
1243
1244         return ret;
1245 }
1246
1247 static int
1248 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1249 {
1250         int ret;
1251
1252         if (!capable(CAP_NET_ADMIN))
1253                 return -EPERM;
1254
1255         switch (cmd) {
1256         case IPT_SO_GET_INFO: {
1257                 char name[IPT_TABLE_MAXNAMELEN];
1258                 struct ipt_table *t;
1259
1260                 if (*len != sizeof(struct ipt_getinfo)) {
1261                         duprintf("length %u != %u\n", *len,
1262                                  sizeof(struct ipt_getinfo));
1263                         ret = -EINVAL;
1264                         break;
1265                 }
1266
1267                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1268                         ret = -EFAULT;
1269                         break;
1270                 }
1271                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1272                 t = ipt_find_table_lock(name, &ret, &ipt_mutex);
1273                 if (t) {
1274                         struct ipt_getinfo info;
1275
1276                         info.valid_hooks = t->valid_hooks;
1277                         memcpy(info.hook_entry, t->private->hook_entry,
1278                                sizeof(info.hook_entry));
1279                         memcpy(info.underflow, t->private->underflow,
1280                                sizeof(info.underflow));
1281                         info.num_entries = t->private->number;
1282                         info.size = t->private->size;
1283                         strcpy(info.name, name);
1284
1285                         if (copy_to_user(user, &info, *len) != 0)
1286                                 ret = -EFAULT;
1287                         else
1288                                 ret = 0;
1289
1290                         up(&ipt_mutex);
1291                 }
1292         }
1293         break;
1294
1295         case IPT_SO_GET_ENTRIES: {
1296                 struct ipt_get_entries get;
1297
1298                 if (*len < sizeof(get)) {
1299                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1300                         ret = -EINVAL;
1301                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1302                         ret = -EFAULT;
1303                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1304                         duprintf("get_entries: %u != %u\n", *len,
1305                                  sizeof(struct ipt_get_entries) + get.size);
1306                         ret = -EINVAL;
1307                 } else
1308                         ret = get_entries(&get, user);
1309                 break;
1310         }
1311
1312         default:
1313                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1314                 ret = -EINVAL;
1315         }
1316
1317         return ret;
1318 }
1319
1320 /* Registration hooks for targets. */
1321 int
1322 ipt_register_target(struct ipt_target *target)
1323 {
1324         int ret;
1325
1326         ret = down_interruptible(&ipt_mutex);
1327         if (ret != 0)
1328                 return ret;
1329
1330         if (!list_named_insert(&ipt_target, target)) {
1331                 duprintf("ipt_register_target: `%s' already in list!\n",
1332                          target->name);
1333                 ret = -EINVAL;
1334         }
1335         up(&ipt_mutex);
1336         return ret;
1337 }
1338
1339 void
1340 ipt_unregister_target(struct ipt_target *target)
1341 {
1342         down(&ipt_mutex);
1343         LIST_DELETE(&ipt_target, target);
1344         up(&ipt_mutex);
1345 }
1346
1347 int
1348 ipt_register_match(struct ipt_match *match)
1349 {
1350         int ret;
1351
1352         ret = down_interruptible(&ipt_mutex);
1353         if (ret != 0)
1354                 return ret;
1355
1356         if (!list_named_insert(&ipt_match, match)) {
1357                 duprintf("ipt_register_match: `%s' already in list!\n",
1358                          match->name);
1359                 ret = -EINVAL;
1360         }
1361         up(&ipt_mutex);
1362
1363         return ret;
1364 }
1365
1366 void
1367 ipt_unregister_match(struct ipt_match *match)
1368 {
1369         down(&ipt_mutex);
1370         LIST_DELETE(&ipt_match, match);
1371         up(&ipt_mutex);
1372 }
1373
1374 int ipt_register_table(struct ipt_table *table)
1375 {
1376         int ret;
1377         struct ipt_table_info *newinfo;
1378         static struct ipt_table_info bootstrap
1379                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1380
1381         newinfo = vmalloc(sizeof(struct ipt_table_info)
1382                           + SMP_ALIGN(table->table->size) * NR_CPUS);
1383         if (!newinfo)
1384                 return -ENOMEM;
1385
1386         memcpy(newinfo->entries, table->table->entries, table->table->size);
1387
1388         ret = translate_table(table->name, table->valid_hooks,
1389                               newinfo, table->table->size,
1390                               table->table->num_entries,
1391                               table->table->hook_entry,
1392                               table->table->underflow);
1393         if (ret != 0) {
1394                 vfree(newinfo);
1395                 return ret;
1396         }
1397
1398         ret = down_interruptible(&ipt_mutex);
1399         if (ret != 0) {
1400                 vfree(newinfo);
1401                 return ret;
1402         }
1403
1404         /* Don't autoload: we'd eat our tail... */
1405         if (list_named_find(&ipt_tables, table->name)) {
1406                 ret = -EEXIST;
1407                 goto free_unlock;
1408         }
1409
1410         /* Simplifies replace_table code. */
1411         table->private = &bootstrap;
1412         if (!replace_table(table, 0, newinfo, &ret))
1413                 goto free_unlock;
1414
1415         duprintf("table->private->number = %u\n",
1416                  table->private->number);
1417         
1418         /* save number of initial entries */
1419         table->private->initial_entries = table->private->number;
1420
1421         table->lock = RW_LOCK_UNLOCKED;
1422         list_prepend(&ipt_tables, table);
1423
1424  unlock:
1425         up(&ipt_mutex);
1426         return ret;
1427
1428  free_unlock:
1429         vfree(newinfo);
1430         goto unlock;
1431 }
1432
1433 void ipt_unregister_table(struct ipt_table *table)
1434 {
1435         down(&ipt_mutex);
1436         LIST_DELETE(&ipt_tables, table);
1437         up(&ipt_mutex);
1438
1439         /* Decrease module usage counts and free resources */
1440         IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1441                           cleanup_entry, NULL);
1442         vfree(table->private);
1443 }
1444
1445 /* Returns 1 if the port is matched by the range, 0 otherwise */
1446 static inline int
1447 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1448 {
1449         int ret;
1450
1451         ret = (port >= min && port <= max) ^ invert;
1452         return ret;
1453 }
1454
1455 static int
1456 tcp_find_option(u_int8_t option,
1457                 const struct sk_buff *skb,
1458                 unsigned int optlen,
1459                 int invert,
1460                 int *hotdrop)
1461 {
1462         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1463         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1464         unsigned int i;
1465
1466         duprintf("tcp_match: finding option\n");
1467         /* If we don't have the whole header, drop packet. */
1468         BUG_ON(!optlen);
1469         op = skb_header_pointer(skb,
1470                                 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1471                                 optlen, _opt);
1472         if (op == NULL) {
1473                 *hotdrop = 1;
1474                 return 0;
1475         }
1476
1477         for (i = 0; i < optlen; ) {
1478                 if (op[i] == option) return !invert;
1479                 if (op[i] < 2) i++;
1480                 else i += op[i+1]?:1;
1481         }
1482
1483         return invert;
1484 }
1485
1486 static int
1487 tcp_match(const struct sk_buff *skb,
1488           const struct net_device *in,
1489           const struct net_device *out,
1490           const void *matchinfo,
1491           int offset,
1492           int *hotdrop)
1493 {
1494         struct tcphdr _tcph, *th;
1495         const struct ipt_tcp *tcpinfo = matchinfo;
1496
1497         if (offset) {
1498                 /* To quote Alan:
1499
1500                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1501                    causes this. Its a cracker trying to break in by doing a
1502                    flag overwrite to pass the direction checks.
1503                 */
1504                 if (offset == 1) {
1505                         duprintf("Dropping evil TCP offset=1 frag.\n");
1506                         *hotdrop = 1;
1507                 }
1508                 /* Must not be a fragment. */
1509                 return 0;
1510         }
1511
1512 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1513
1514         th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1515                                 sizeof(_tcph), &_tcph);
1516         if (th == NULL) {
1517                 /* We've been asked to examine this packet, and we
1518                    can't.  Hence, no choice but to drop. */
1519                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1520                 *hotdrop = 1;
1521                 return 0;
1522         }
1523
1524         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1525                         ntohs(th->source),
1526                         !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1527                 return 0;
1528         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1529                         ntohs(th->dest),
1530                         !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1531                 return 0;
1532         if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1533                       == tcpinfo->flg_cmp,
1534                       IPT_TCP_INV_FLAGS))
1535                 return 0;
1536         if (tcpinfo->option) {
1537                 if (th->doff * 4 < sizeof(_tcph)) {
1538                         *hotdrop = 1;
1539                         return 0;
1540                 }
1541                 if (!tcp_find_option(tcpinfo->option, skb,
1542                                      th->doff*4 - sizeof(_tcph),
1543                                      tcpinfo->invflags & IPT_TCP_INV_OPTION,
1544                                      hotdrop))
1545                         return 0;
1546         }
1547         return 1;
1548 }
1549
1550 /* Called when user tries to insert an entry of this type. */
1551 static int
1552 tcp_checkentry(const char *tablename,
1553                const struct ipt_ip *ip,
1554                void *matchinfo,
1555                unsigned int matchsize,
1556                unsigned int hook_mask)
1557 {
1558         const struct ipt_tcp *tcpinfo = matchinfo;
1559
1560         /* Must specify proto == TCP, and no unknown invflags */
1561         return ip->proto == IPPROTO_TCP
1562                 && !(ip->invflags & IPT_INV_PROTO)
1563                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1564                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1565 }
1566
1567 static int
1568 udp_match(const struct sk_buff *skb,
1569           const struct net_device *in,
1570           const struct net_device *out,
1571           const void *matchinfo,
1572           int offset,
1573           int *hotdrop)
1574 {
1575         struct udphdr _udph, *uh;
1576         const struct ipt_udp *udpinfo = matchinfo;
1577
1578         /* Must not be a fragment. */
1579         if (offset)
1580                 return 0;
1581
1582         uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1583                                 sizeof(_udph), &_udph);
1584         if (uh == NULL) {
1585                 /* We've been asked to examine this packet, and we
1586                    can't.  Hence, no choice but to drop. */
1587                 duprintf("Dropping evil UDP tinygram.\n");
1588                 *hotdrop = 1;
1589                 return 0;
1590         }
1591
1592         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1593                           ntohs(uh->source),
1594                           !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1595                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1596                               ntohs(uh->dest),
1597                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1598 }
1599
1600 /* Called when user tries to insert an entry of this type. */
1601 static int
1602 udp_checkentry(const char *tablename,
1603                const struct ipt_ip *ip,
1604                void *matchinfo,
1605                unsigned int matchinfosize,
1606                unsigned int hook_mask)
1607 {
1608         const struct ipt_udp *udpinfo = matchinfo;
1609
1610         /* Must specify proto == UDP, and no unknown invflags */
1611         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1612                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1613                          IPPROTO_UDP);
1614                 return 0;
1615         }
1616         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1617                 duprintf("ipt_udp: matchsize %u != %u\n",
1618                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1619                 return 0;
1620         }
1621         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1622                 duprintf("ipt_udp: unknown flags %X\n",
1623                          udpinfo->invflags);
1624                 return 0;
1625         }
1626
1627         return 1;
1628 }
1629
1630 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1631 static inline int
1632 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1633                      u_int8_t type, u_int8_t code,
1634                      int invert)
1635 {
1636         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1637                 ^ invert;
1638 }
1639
1640 static int
1641 icmp_match(const struct sk_buff *skb,
1642            const struct net_device *in,
1643            const struct net_device *out,
1644            const void *matchinfo,
1645            int offset,
1646            int *hotdrop)
1647 {
1648         struct icmphdr _icmph, *ic;
1649         const struct ipt_icmp *icmpinfo = matchinfo;
1650
1651         /* Must not be a fragment. */
1652         if (offset)
1653                 return 0;
1654
1655         ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1656                                 sizeof(_icmph), &_icmph);
1657         if (ic == NULL) {
1658                 /* We've been asked to examine this packet, and we
1659                  * can't.  Hence, no choice but to drop.
1660                  */
1661                 duprintf("Dropping evil ICMP tinygram.\n");
1662                 *hotdrop = 1;
1663                 return 0;
1664         }
1665
1666         return icmp_type_code_match(icmpinfo->type,
1667                                     icmpinfo->code[0],
1668                                     icmpinfo->code[1],
1669                                     ic->type, ic->code,
1670                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1671 }
1672
1673 /* Called when user tries to insert an entry of this type. */
1674 static int
1675 icmp_checkentry(const char *tablename,
1676            const struct ipt_ip *ip,
1677            void *matchinfo,
1678            unsigned int matchsize,
1679            unsigned int hook_mask)
1680 {
1681         const struct ipt_icmp *icmpinfo = matchinfo;
1682
1683         /* Must specify proto == ICMP, and no unknown invflags */
1684         return ip->proto == IPPROTO_ICMP
1685                 && !(ip->invflags & IPT_INV_PROTO)
1686                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1687                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1688 }
1689
1690 /* The built-in targets: standard (NULL) and error. */
1691 static struct ipt_target ipt_standard_target = {
1692         .name           = IPT_STANDARD_TARGET,
1693 };
1694
1695 static struct ipt_target ipt_error_target = {
1696         .name           = IPT_ERROR_TARGET,
1697         .target         = ipt_error,
1698 };
1699
1700 static struct nf_sockopt_ops ipt_sockopts = {
1701         .pf             = PF_INET,
1702         .set_optmin     = IPT_BASE_CTL,
1703         .set_optmax     = IPT_SO_SET_MAX+1,
1704         .set            = do_ipt_set_ctl,
1705         .get_optmin     = IPT_BASE_CTL,
1706         .get_optmax     = IPT_SO_GET_MAX+1,
1707         .get            = do_ipt_get_ctl,
1708 };
1709
1710 static struct ipt_match tcp_matchstruct = {
1711         .name           = "tcp",
1712         .match          = &tcp_match,
1713         .checkentry     = &tcp_checkentry,
1714 };
1715
1716 static struct ipt_match udp_matchstruct = {
1717         .name           = "udp",
1718         .match          = &udp_match,
1719         .checkentry     = &udp_checkentry,
1720 };
1721
1722 static struct ipt_match icmp_matchstruct = {
1723         .name           = "icmp",
1724         .match          = &icmp_match,
1725         .checkentry     = &icmp_checkentry,
1726 };
1727
1728 #ifdef CONFIG_PROC_FS
1729 static inline int print_name(const char *i,
1730                              off_t start_offset, char *buffer, int length,
1731                              off_t *pos, unsigned int *count)
1732 {
1733         if ((*count)++ >= start_offset) {
1734                 unsigned int namelen;
1735
1736                 namelen = sprintf(buffer + *pos, "%s\n",
1737                                   i + sizeof(struct list_head));
1738                 if (*pos + namelen > length) {
1739                         /* Stop iterating */
1740                         return 1;
1741                 }
1742                 *pos += namelen;
1743         }
1744         return 0;
1745 }
1746
1747 static inline int print_target(const struct ipt_target *t,
1748                                off_t start_offset, char *buffer, int length,
1749                                off_t *pos, unsigned int *count)
1750 {
1751         if (t == &ipt_standard_target || t == &ipt_error_target)
1752                 return 0;
1753         return print_name((char *)t, start_offset, buffer, length, pos, count);
1754 }
1755
1756 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1757 {
1758         off_t pos = 0;
1759         unsigned int count = 0;
1760
1761         if (down_interruptible(&ipt_mutex) != 0)
1762                 return 0;
1763
1764         LIST_FIND(&ipt_tables, print_name, void *,
1765                   offset, buffer, length, &pos, &count);
1766
1767         up(&ipt_mutex);
1768
1769         /* `start' hack - see fs/proc/generic.c line ~105 */
1770         *start=(char *)((unsigned long)count-offset);
1771         return pos;
1772 }
1773
1774 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1775 {
1776         off_t pos = 0;
1777         unsigned int count = 0;
1778
1779         if (down_interruptible(&ipt_mutex) != 0)
1780                 return 0;
1781
1782         LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1783                   offset, buffer, length, &pos, &count);
1784         
1785         up(&ipt_mutex);
1786
1787         *start = (char *)((unsigned long)count - offset);
1788         return pos;
1789 }
1790
1791 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1792 {
1793         off_t pos = 0;
1794         unsigned int count = 0;
1795
1796         if (down_interruptible(&ipt_mutex) != 0)
1797                 return 0;
1798         
1799         LIST_FIND(&ipt_match, print_name, void *,
1800                   offset, buffer, length, &pos, &count);
1801
1802         up(&ipt_mutex);
1803
1804         *start = (char *)((unsigned long)count - offset);
1805         return pos;
1806 }
1807
1808 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1809 { { "ip_tables_names", ipt_get_tables },
1810   { "ip_tables_targets", ipt_get_targets },
1811   { "ip_tables_matches", ipt_get_matches },
1812   { NULL, NULL} };
1813 #endif /*CONFIG_PROC_FS*/
1814
1815 static int __init init(void)
1816 {
1817         int ret;
1818
1819         /* Noone else will be downing sem now, so we won't sleep */
1820         down(&ipt_mutex);
1821         list_append(&ipt_target, &ipt_standard_target);
1822         list_append(&ipt_target, &ipt_error_target);
1823         list_append(&ipt_match, &tcp_matchstruct);
1824         list_append(&ipt_match, &udp_matchstruct);
1825         list_append(&ipt_match, &icmp_matchstruct);
1826         up(&ipt_mutex);
1827
1828         /* Register setsockopt */
1829         ret = nf_register_sockopt(&ipt_sockopts);
1830         if (ret < 0) {
1831                 duprintf("Unable to register sockopts.\n");
1832                 return ret;
1833         }
1834
1835 #ifdef CONFIG_PROC_FS
1836         {
1837         struct proc_dir_entry *proc;
1838         int i;
1839
1840         for (i = 0; ipt_proc_entry[i].name; i++) {
1841                 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1842                                        ipt_proc_entry[i].get_info);
1843                 if (!proc) {
1844                         while (--i >= 0)
1845                                 proc_net_remove(ipt_proc_entry[i].name);
1846                         nf_unregister_sockopt(&ipt_sockopts);
1847                         return -ENOMEM;
1848                 }
1849                 proc->owner = THIS_MODULE;
1850         }
1851         }
1852 #endif
1853
1854         printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1855         return 0;
1856 }
1857
1858 static void __exit fini(void)
1859 {
1860         nf_unregister_sockopt(&ipt_sockopts);
1861 #ifdef CONFIG_PROC_FS
1862         {
1863         int i;
1864         for (i = 0; ipt_proc_entry[i].name; i++)
1865                 proc_net_remove(ipt_proc_entry[i].name);
1866         }
1867 #endif
1868 }
1869
1870 EXPORT_SYMBOL(ipt_register_table);
1871 EXPORT_SYMBOL(ipt_unregister_table);
1872 EXPORT_SYMBOL(ipt_register_match);
1873 EXPORT_SYMBOL(ipt_unregister_match);
1874 EXPORT_SYMBOL(ipt_do_table);
1875 EXPORT_SYMBOL(ipt_register_target);
1876 EXPORT_SYMBOL(ipt_unregister_target);
1877 EXPORT_SYMBOL(ipt_find_target_lock);
1878
1879 module_init(init);
1880 module_exit(fini);