Import changeset
[linux-flexiantxendom0-3.2.10.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  */
6 #include <linux/config.h>
7 #include <linux/skbuff.h>
8 #include <linux/kmod.h>
9 #include <linux/vmalloc.h>
10 #include <linux/netdevice.h>
11 #include <linux/module.h>
12 #include <linux/tcp.h>
13 #include <linux/udp.h>
14 #include <linux/icmp.h>
15 #include <net/ip.h>
16 #include <asm/uaccess.h>
17 #include <asm/semaphore.h>
18 #include <linux/proc_fs.h>
19
20 #include <linux/netfilter_ipv4/ip_tables.h>
21
22 /*#define DEBUG_IP_FIREWALL*/
23 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
24 /*#define DEBUG_IP_FIREWALL_USER*/
25
26 #ifdef DEBUG_IP_FIREWALL
27 #define dprintf(format, args...)  printk(format , ## args)
28 #else
29 #define dprintf(format, args...)
30 #endif
31
32 #ifdef DEBUG_IP_FIREWALL_USER
33 #define duprintf(format, args...) printk(format , ## args)
34 #else
35 #define duprintf(format, args...)
36 #endif
37
38 #ifdef CONFIG_NETFILTER_DEBUG
39 #define IP_NF_ASSERT(x)                                         \
40 do {                                                            \
41         if (!(x))                                               \
42                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
43                        __FUNCTION__, __FILE__, __LINE__);       \
44 } while(0)
45 #else
46 #define IP_NF_ASSERT(x)
47 #endif
48 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
49
50 /* Mutex protects lists (only traversed in user context). */
51 static DECLARE_MUTEX(ipt_mutex);
52
53 /* Must have mutex */
54 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
55 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
56 #include <linux/netfilter_ipv4/lockhelp.h>
57 #include <linux/netfilter_ipv4/listhelp.h>
58
59 #if 0
60 /* All the better to debug you with... */
61 #define static
62 #define inline
63 #endif
64
65 /* Locking is simple: we assume at worst case there will be one packet
66    in user context and one from bottom halves (or soft irq if Alexey's
67    softnet patch was applied).
68
69    We keep a set of rules for each CPU, so we can avoid write-locking
70    them; doing a readlock_bh() stops packets coming through if we're
71    in user context.
72
73    To be cache friendly on SMP, we arrange them like so:
74    [ n-entries ]
75    ... cache-align padding ...
76    [ n-entries ]
77
78    Hence the start of any table is given by get_table() below.  */
79
80 /* The table itself */
81 struct ipt_table_info
82 {
83         /* Size per table */
84         unsigned int size;
85         /* Number of entries: FIXME. --RR */
86         unsigned int number;
87
88         /* Entry points and underflows */
89         unsigned int hook_entry[NF_IP_NUMHOOKS];
90         unsigned int underflow[NF_IP_NUMHOOKS];
91
92         /* ipt_entry tables: one per CPU */
93         char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
94 };
95
96 static LIST_HEAD(ipt_target);
97 static LIST_HEAD(ipt_match);
98 static LIST_HEAD(ipt_tables);
99 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
100
101 #ifdef CONFIG_SMP
102 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
103 #else
104 #define TABLE_OFFSET(t,p) 0
105 #endif
106
107 #if 0
108 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
109 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
110 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
111 #endif
112
113 /* Returns whether matches rule or not. */
114 static inline int
115 ip_packet_match(const struct iphdr *ip,
116                 const char *indev,
117                 const char *outdev,
118                 const struct ipt_ip *ipinfo,
119                 int isfrag)
120 {
121         size_t i;
122         unsigned long ret;
123
124 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
125
126         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
127                   IPT_INV_SRCIP)
128             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
129                      IPT_INV_DSTIP)) {
130                 dprintf("Source or dest mismatch.\n");
131
132                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
133                         NIPQUAD(ip->saddr),
134                         NIPQUAD(ipinfo->smsk.s_addr),
135                         NIPQUAD(ipinfo->src.s_addr),
136                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
137                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
138                         NIPQUAD(ip->daddr),
139                         NIPQUAD(ipinfo->dmsk.s_addr),
140                         NIPQUAD(ipinfo->dst.s_addr),
141                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
142                 return 0;
143         }
144
145         /* Look for ifname matches; this should unroll nicely. */
146         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
147                 ret |= (((const unsigned long *)indev)[i]
148                         ^ ((const unsigned long *)ipinfo->iniface)[i])
149                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
150         }
151
152         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
153                 dprintf("VIA in mismatch (%s vs %s).%s\n",
154                         indev, ipinfo->iniface,
155                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
156                 return 0;
157         }
158
159         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
160                 ret |= (((const unsigned long *)outdev)[i]
161                         ^ ((const unsigned long *)ipinfo->outiface)[i])
162                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
163         }
164
165         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
166                 dprintf("VIA out mismatch (%s vs %s).%s\n",
167                         outdev, ipinfo->outiface,
168                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
169                 return 0;
170         }
171
172         /* Check specific protocol */
173         if (ipinfo->proto
174             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
175                 dprintf("Packet protocol %hi does not match %hi.%s\n",
176                         ip->protocol, ipinfo->proto,
177                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
178                 return 0;
179         }
180
181         /* If we have a fragment rule but the packet is not a fragment
182          * then we return zero */
183         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
184                 dprintf("Fragment rule but not fragment.%s\n",
185                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
186                 return 0;
187         }
188
189         return 1;
190 }
191
192 static inline int
193 ip_checkentry(const struct ipt_ip *ip)
194 {
195         if (ip->flags & ~IPT_F_MASK) {
196                 duprintf("Unknown flag bits set: %08X\n",
197                          ip->flags & ~IPT_F_MASK);
198                 return 0;
199         }
200         if (ip->invflags & ~IPT_INV_MASK) {
201                 duprintf("Unknown invflag bits set: %08X\n",
202                          ip->invflags & ~IPT_INV_MASK);
203                 return 0;
204         }
205         return 1;
206 }
207
208 static unsigned int
209 ipt_error(struct sk_buff **pskb,
210           unsigned int hooknum,
211           const struct net_device *in,
212           const struct net_device *out,
213           const void *targinfo,
214           void *userinfo)
215 {
216         if (net_ratelimit())
217                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
218
219         return NF_DROP;
220 }
221
222 static inline
223 int do_match(struct ipt_entry_match *m,
224              const struct sk_buff *skb,
225              const struct net_device *in,
226              const struct net_device *out,
227              int offset,
228              const void *hdr,
229              u_int16_t datalen,
230              int *hotdrop)
231 {
232         /* Stop iteration if it doesn't match */
233         if (!m->u.kernel.match->match(skb, in, out, m->data,
234                                       offset, hdr, datalen, hotdrop))
235                 return 1;
236         else
237                 return 0;
238 }
239
240 static inline struct ipt_entry *
241 get_entry(void *base, unsigned int offset)
242 {
243         return (struct ipt_entry *)(base + offset);
244 }
245
246 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
247 unsigned int
248 ipt_do_table(struct sk_buff **pskb,
249              unsigned int hook,
250              const struct net_device *in,
251              const struct net_device *out,
252              struct ipt_table *table,
253              void *userdata)
254 {
255         static const char nulldevname[IFNAMSIZ] = { 0 };
256         u_int16_t offset;
257         struct iphdr *ip;
258         void *protohdr;
259         u_int16_t datalen;
260         int hotdrop = 0;
261         /* Initializing verdict to NF_DROP keeps gcc happy. */
262         unsigned int verdict = NF_DROP;
263         const char *indev, *outdev;
264         void *table_base;
265         struct ipt_entry *e, *back;
266
267         /* Initialization */
268         ip = (*pskb)->nh.iph;
269         protohdr = (u_int32_t *)ip + ip->ihl;
270         datalen = (*pskb)->len - ip->ihl * 4;
271         indev = in ? in->name : nulldevname;
272         outdev = out ? out->name : nulldevname;
273         /* We handle fragments by dealing with the first fragment as
274          * if it was a normal packet.  All other fragments are treated
275          * normally, except that they will NEVER match rules that ask
276          * things we don't know, ie. tcp syn flag or ports).  If the
277          * rule is also a fragment-specific rule, non-fragments won't
278          * match it. */
279         offset = ntohs(ip->frag_off) & IP_OFFSET;
280
281         read_lock_bh(&table->lock);
282         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
283         table_base = (void *)table->private->entries
284                 + TABLE_OFFSET(table->private,
285                                cpu_number_map(smp_processor_id()));
286         e = get_entry(table_base, table->private->hook_entry[hook]);
287
288 #ifdef CONFIG_NETFILTER_DEBUG
289         /* Check noone else using our table */
290         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
291             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
292                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
293                        smp_processor_id(),
294                        table->name,
295                        &((struct ipt_entry *)table_base)->comefrom,
296                        ((struct ipt_entry *)table_base)->comefrom);
297         }
298         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
299 #endif
300
301         /* For return from builtin chain */
302         back = get_entry(table_base, table->private->underflow[hook]);
303
304         do {
305                 IP_NF_ASSERT(e);
306                 IP_NF_ASSERT(back);
307                 (*pskb)->nfcache |= e->nfcache;
308                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
309                         struct ipt_entry_target *t;
310
311                         if (IPT_MATCH_ITERATE(e, do_match,
312                                               *pskb, in, out,
313                                               offset, protohdr,
314                                               datalen, &hotdrop) != 0)
315                                 goto no_match;
316
317                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
318
319                         t = ipt_get_target(e);
320                         IP_NF_ASSERT(t->u.kernel.target);
321                         /* Standard target? */
322                         if (!t->u.kernel.target->target) {
323                                 int v;
324
325                                 v = ((struct ipt_standard_target *)t)->verdict;
326                                 if (v < 0) {
327                                         /* Pop from stack? */
328                                         if (v != IPT_RETURN) {
329                                                 verdict = (unsigned)(-v) - 1;
330                                                 break;
331                                         }
332                                         e = back;
333                                         back = get_entry(table_base,
334                                                          back->comefrom);
335                                         continue;
336                                 }
337                                 if (table_base + v
338                                     != (void *)e + e->next_offset) {
339                                         /* Save old back ptr in next entry */
340                                         struct ipt_entry *next
341                                                 = (void *)e + e->next_offset;
342                                         next->comefrom
343                                                 = (void *)back - table_base;
344                                         /* set back pointer to next entry */
345                                         back = next;
346                                 }
347
348                                 e = get_entry(table_base, v);
349                         } else {
350                                 /* Targets which reenter must return
351                                    abs. verdicts */
352 #ifdef CONFIG_NETFILTER_DEBUG
353                                 ((struct ipt_entry *)table_base)->comefrom
354                                         = 0xeeeeeeec;
355 #endif
356                                 verdict = t->u.kernel.target->target(pskb,
357                                                                      hook,
358                                                                      in, out,
359                                                                      t->data,
360                                                                      userdata);
361
362 #ifdef CONFIG_NETFILTER_DEBUG
363                                 if (((struct ipt_entry *)table_base)->comefrom
364                                     != 0xeeeeeeec
365                                     && verdict == IPT_CONTINUE) {
366                                         printk("Target %s reentered!\n",
367                                                t->u.kernel.target->name);
368                                         verdict = NF_DROP;
369                                 }
370                                 ((struct ipt_entry *)table_base)->comefrom
371                                         = 0x57acc001;
372 #endif
373                                 /* Target might have changed stuff. */
374                                 ip = (*pskb)->nh.iph;
375                                 protohdr = (u_int32_t *)ip + ip->ihl;
376                                 datalen = (*pskb)->len - ip->ihl * 4;
377
378                                 if (verdict == IPT_CONTINUE)
379                                         e = (void *)e + e->next_offset;
380                                 else
381                                         /* Verdict */
382                                         break;
383                         }
384                 } else {
385
386                 no_match:
387                         e = (void *)e + e->next_offset;
388                 }
389         } while (!hotdrop);
390
391 #ifdef CONFIG_NETFILTER_DEBUG
392         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
393 #endif
394         read_unlock_bh(&table->lock);
395
396 #ifdef DEBUG_ALLOW_ALL
397         return NF_ACCEPT;
398 #else
399         if (hotdrop)
400                 return NF_DROP;
401         else return verdict;
402 #endif
403 }
404
405 /* If it succeeds, returns element and locks mutex */
406 static inline void *
407 find_inlist_lock_noload(struct list_head *head,
408                         const char *name,
409                         int *error,
410                         struct semaphore *mutex)
411 {
412         void *ret;
413
414 #if 0
415         duprintf("find_inlist: searching for `%s' in %s.\n",
416                  name, head == &ipt_target ? "ipt_target"
417                  : head == &ipt_match ? "ipt_match"
418                  : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
419 #endif
420
421         *error = down_interruptible(mutex);
422         if (*error != 0)
423                 return NULL;
424
425         ret = list_named_find(head, name);
426         if (!ret) {
427                 *error = -ENOENT;
428                 up(mutex);
429         }
430         return ret;
431 }
432
433 #ifndef CONFIG_KMOD
434 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
435 #else
436 static void *
437 find_inlist_lock(struct list_head *head,
438                  const char *name,
439                  const char *prefix,
440                  int *error,
441                  struct semaphore *mutex)
442 {
443         void *ret;
444
445         ret = find_inlist_lock_noload(head, name, error, mutex);
446         if (!ret) {
447                 char modulename[IPT_FUNCTION_MAXNAMELEN + strlen(prefix) + 1];
448                 strcpy(modulename, prefix);
449                 strcat(modulename, name);
450                 duprintf("find_inlist: loading `%s'.\n", modulename);
451                 request_module(modulename);
452                 ret = find_inlist_lock_noload(head, name, error, mutex);
453         }
454
455         return ret;
456 }
457 #endif
458
459 static inline struct ipt_table *
460 find_table_lock(const char *name, int *error, struct semaphore *mutex)
461 {
462         return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
463 }
464
465 static inline struct ipt_match *
466 find_match_lock(const char *name, int *error, struct semaphore *mutex)
467 {
468         return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
469 }
470
471 static inline struct ipt_target *
472 find_target_lock(const char *name, int *error, struct semaphore *mutex)
473 {
474         return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
475 }
476
477 /* All zeroes == unconditional rule. */
478 static inline int
479 unconditional(const struct ipt_ip *ip)
480 {
481         unsigned int i;
482
483         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
484                 if (((__u32 *)ip)[i])
485                         return 0;
486
487         return 1;
488 }
489
490 /* Figures out from what hook each rule can be called: returns 0 if
491    there are loops.  Puts hook bitmask in comefrom. */
492 static int
493 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
494 {
495         unsigned int hook;
496
497         /* No recursion; use packet counter to save back ptrs (reset
498            to 0 as we leave), and comefrom to save source hook bitmask */
499         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
500                 unsigned int pos = newinfo->hook_entry[hook];
501                 struct ipt_entry *e
502                         = (struct ipt_entry *)(newinfo->entries + pos);
503
504                 if (!(valid_hooks & (1 << hook)))
505                         continue;
506
507                 /* Set initial back pointer. */
508                 e->counters.pcnt = pos;
509
510                 for (;;) {
511                         struct ipt_standard_target *t
512                                 = (void *)ipt_get_target(e);
513
514                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
515                                 printk("iptables: loop hook %u pos %u %08X.\n",
516                                        hook, pos, e->comefrom);
517                                 return 0;
518                         }
519                         e->comefrom
520                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
521
522                         /* Unconditional return/END. */
523                         if (e->target_offset == sizeof(struct ipt_entry)
524                             && (strcmp(t->target.u.user.name,
525                                        IPT_STANDARD_TARGET) == 0)
526                             && t->verdict < 0
527                             && unconditional(&e->ip)) {
528                                 unsigned int oldpos, size;
529
530                                 /* Return: backtrack through the last
531                                    big jump. */
532                                 do {
533                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
534 #ifdef DEBUG_IP_FIREWALL_USER
535                                         if (e->comefrom
536                                             & (1 << NF_IP_NUMHOOKS)) {
537                                                 duprintf("Back unset "
538                                                          "on hook %u "
539                                                          "rule %u\n",
540                                                          hook, pos);
541                                         }
542 #endif
543                                         oldpos = pos;
544                                         pos = e->counters.pcnt;
545                                         e->counters.pcnt = 0;
546
547                                         /* We're at the start. */
548                                         if (pos == oldpos)
549                                                 goto next;
550
551                                         e = (struct ipt_entry *)
552                                                 (newinfo->entries + pos);
553                                 } while (oldpos == pos + e->next_offset);
554
555                                 /* Move along one */
556                                 size = e->next_offset;
557                                 e = (struct ipt_entry *)
558                                         (newinfo->entries + pos + size);
559                                 e->counters.pcnt = pos;
560                                 pos += size;
561                         } else {
562                                 int newpos = t->verdict;
563
564                                 if (strcmp(t->target.u.user.name,
565                                            IPT_STANDARD_TARGET) == 0
566                                     && newpos >= 0) {
567                                         /* This a jump; chase it. */
568                                         duprintf("Jump rule %u -> %u\n",
569                                                  pos, newpos);
570                                 } else {
571                                         /* ... this is a fallthru */
572                                         newpos = pos + e->next_offset;
573                                 }
574                                 e = (struct ipt_entry *)
575                                         (newinfo->entries + newpos);
576                                 e->counters.pcnt = pos;
577                                 pos = newpos;
578                         }
579                 }
580                 next:
581                 duprintf("Finished chain %u\n", hook);
582         }
583         return 1;
584 }
585
586 static inline int
587 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
588 {
589         if (i && (*i)-- == 0)
590                 return 1;
591
592         if (m->u.kernel.match->destroy)
593                 m->u.kernel.match->destroy(m->data,
594                                            m->u.match_size - sizeof(*m));
595
596         if (m->u.kernel.match->me)
597                 __MOD_DEC_USE_COUNT(m->u.kernel.match->me);
598
599         return 0;
600 }
601
602 static inline int
603 standard_check(const struct ipt_entry_target *t,
604                unsigned int max_offset)
605 {
606         struct ipt_standard_target *targ = (void *)t;
607
608         /* Check standard info. */
609         if (t->u.target_size
610             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
611                 duprintf("standard_check: target size %u != %u\n",
612                          t->u.target_size,
613                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
614                 return 0;
615         }
616
617         if (targ->verdict >= 0
618             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
619                 duprintf("ipt_standard_check: bad verdict (%i)\n",
620                          targ->verdict);
621                 return 0;
622         }
623
624         if (targ->verdict < -NF_MAX_VERDICT - 1) {
625                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
626                          targ->verdict);
627                 return 0;
628         }
629         return 1;
630 }
631
632 static inline int
633 check_match(struct ipt_entry_match *m,
634             const char *name,
635             const struct ipt_ip *ip,
636             unsigned int hookmask,
637             unsigned int *i)
638 {
639         int ret;
640         struct ipt_match *match;
641
642         match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
643         if (!match) {
644                 duprintf("check_match: `%s' not found\n", m->u.user.name);
645                 return ret;
646         }
647         if (match->me)
648                 __MOD_INC_USE_COUNT(match->me);
649         m->u.kernel.match = match;
650         up(&ipt_mutex);
651
652         if (m->u.kernel.match->checkentry
653             && !m->u.kernel.match->checkentry(name, ip, m->data,
654                                               m->u.match_size - sizeof(*m),
655                                               hookmask)) {
656                 if (m->u.kernel.match->me)
657                         __MOD_DEC_USE_COUNT(m->u.kernel.match->me);
658                 duprintf("ip_tables: check failed for `%s'.\n",
659                          m->u.kernel.match->name);
660                 return -EINVAL;
661         }
662
663         (*i)++;
664         return 0;
665 }
666
667 static struct ipt_target ipt_standard_target;
668
669 static inline int
670 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
671             unsigned int *i)
672 {
673         struct ipt_entry_target *t;
674         struct ipt_target *target;
675         int ret;
676         unsigned int j;
677
678         if (!ip_checkentry(&e->ip)) {
679                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
680                 return -EINVAL;
681         }
682
683         j = 0;
684         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
685         if (ret != 0)
686                 goto cleanup_matches;
687
688         t = ipt_get_target(e);
689         target = find_target_lock(t->u.user.name, &ret, &ipt_mutex);
690         if (!target) {
691                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
692                 goto cleanup_matches;
693         }
694         if (target->me)
695                 __MOD_INC_USE_COUNT(target->me);
696         t->u.kernel.target = target;
697         up(&ipt_mutex);
698
699         if (t->u.kernel.target == &ipt_standard_target) {
700                 if (!standard_check(t, size)) {
701                         ret = -EINVAL;
702                         goto cleanup_matches;
703                 }
704         } else if (t->u.kernel.target->checkentry
705                    && !t->u.kernel.target->checkentry(name, e, t->data,
706                                                       t->u.target_size
707                                                       - sizeof(*t),
708                                                       e->comefrom)) {
709                 if (t->u.kernel.target->me)
710                         __MOD_DEC_USE_COUNT(t->u.kernel.target->me);
711                 duprintf("ip_tables: check failed for `%s'.\n",
712                          t->u.kernel.target->name);
713                 ret = -EINVAL;
714                 goto cleanup_matches;
715         }
716
717         (*i)++;
718         return 0;
719
720  cleanup_matches:
721         IPT_MATCH_ITERATE(e, cleanup_match, &j);
722         return ret;
723 }
724
725 static inline int
726 check_entry_size_and_hooks(struct ipt_entry *e,
727                            struct ipt_table_info *newinfo,
728                            unsigned char *base,
729                            unsigned char *limit,
730                            const unsigned int *hook_entries,
731                            const unsigned int *underflows,
732                            unsigned int *i)
733 {
734         unsigned int h;
735
736         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
737             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
738                 duprintf("Bad offset %p\n", e);
739                 return -EINVAL;
740         }
741
742         if (e->next_offset
743             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
744                 duprintf("checking: element %p size %u\n",
745                          e, e->next_offset);
746                 return -EINVAL;
747         }
748
749         /* Check hooks & underflows */
750         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
751                 if ((unsigned char *)e - base == hook_entries[h])
752                         newinfo->hook_entry[h] = hook_entries[h];
753                 if ((unsigned char *)e - base == underflows[h])
754                         newinfo->underflow[h] = underflows[h];
755         }
756
757         /* FIXME: underflows must be unconditional, standard verdicts
758            < 0 (not IPT_RETURN). --RR */
759
760         /* Clear counters and comefrom */
761         e->counters = ((struct ipt_counters) { 0, 0 });
762         e->comefrom = 0;
763
764         (*i)++;
765         return 0;
766 }
767
768 static inline int
769 cleanup_entry(struct ipt_entry *e, unsigned int *i)
770 {
771         struct ipt_entry_target *t;
772
773         if (i && (*i)-- == 0)
774                 return 1;
775
776         /* Cleanup all matches */
777         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
778         t = ipt_get_target(e);
779         if (t->u.kernel.target->destroy)
780                 t->u.kernel.target->destroy(t->data,
781                                             t->u.target_size - sizeof(*t));
782         if (t->u.kernel.target->me)
783                 __MOD_DEC_USE_COUNT(t->u.kernel.target->me);
784
785         return 0;
786 }
787
788 /* Checks and translates the user-supplied table segment (held in
789    newinfo) */
790 static int
791 translate_table(const char *name,
792                 unsigned int valid_hooks,
793                 struct ipt_table_info *newinfo,
794                 unsigned int size,
795                 unsigned int number,
796                 const unsigned int *hook_entries,
797                 const unsigned int *underflows)
798 {
799         unsigned int i;
800         int ret;
801
802         newinfo->size = size;
803         newinfo->number = number;
804
805         /* Init all hooks to impossible value. */
806         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
807                 newinfo->hook_entry[i] = 0xFFFFFFFF;
808                 newinfo->underflow[i] = 0xFFFFFFFF;
809         }
810
811         duprintf("translate_table: size %u\n", newinfo->size);
812         i = 0;
813         /* Walk through entries, checking offsets. */
814         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
815                                 check_entry_size_and_hooks,
816                                 newinfo,
817                                 newinfo->entries,
818                                 newinfo->entries + size,
819                                 hook_entries, underflows, &i);
820         if (ret != 0)
821                 return ret;
822
823         if (i != number) {
824                 duprintf("translate_table: %u not %u entries\n",
825                          i, number);
826                 return -EINVAL;
827         }
828
829         /* Check hooks all assigned */
830         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
831                 /* Only hooks which are valid */
832                 if (!(valid_hooks & (1 << i)))
833                         continue;
834                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
835                         duprintf("Invalid hook entry %u %u\n",
836                                  i, hook_entries[i]);
837                         return -EINVAL;
838                 }
839                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
840                         duprintf("Invalid underflow %u %u\n",
841                                  i, underflows[i]);
842                         return -EINVAL;
843                 }
844         }
845
846         if (!mark_source_chains(newinfo, valid_hooks))
847                 return -ELOOP;
848
849         /* Finally, each sanity check must pass */
850         i = 0;
851         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
852                                 check_entry, name, size, &i);
853
854         if (ret != 0) {
855                 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
856                                   cleanup_entry, &i);
857                 return ret;
858         }
859
860         /* And one copy for every other CPU */
861         for (i = 1; i < smp_num_cpus; i++) {
862                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
863                        newinfo->entries,
864                        SMP_ALIGN(newinfo->size));
865         }
866
867         return ret;
868 }
869
870 static struct ipt_table_info *
871 replace_table(struct ipt_table *table,
872               unsigned int num_counters,
873               struct ipt_table_info *newinfo,
874               int *error)
875 {
876         struct ipt_table_info *oldinfo;
877
878 #ifdef CONFIG_NETFILTER_DEBUG
879         {
880                 struct ipt_entry *table_base;
881                 unsigned int i;
882
883                 for (i = 0; i < smp_num_cpus; i++) {
884                         table_base =
885                                 (void *)newinfo->entries
886                                 + TABLE_OFFSET(newinfo, i);
887
888                         table_base->comefrom = 0xdead57ac;
889                 }
890         }
891 #endif
892
893         /* Do the substitution. */
894         write_lock_bh(&table->lock);
895         /* Check inside lock: is the old number correct? */
896         if (num_counters != table->private->number) {
897                 duprintf("num_counters != table->private->number (%u/%u)\n",
898                          num_counters, table->private->number);
899                 write_unlock_bh(&table->lock);
900                 *error = -EAGAIN;
901                 return NULL;
902         }
903         oldinfo = table->private;
904         table->private = newinfo;
905         write_unlock_bh(&table->lock);
906
907         return oldinfo;
908 }
909
910 /* Gets counters. */
911 static inline int
912 add_entry_to_counter(const struct ipt_entry *e,
913                      struct ipt_counters total[],
914                      unsigned int *i)
915 {
916         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
917
918         (*i)++;
919         return 0;
920 }
921
922 static void
923 get_counters(const struct ipt_table_info *t,
924              struct ipt_counters counters[])
925 {
926         unsigned int cpu;
927         unsigned int i;
928
929         for (cpu = 0; cpu < smp_num_cpus; cpu++) {
930                 i = 0;
931                 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
932                                   t->size,
933                                   add_entry_to_counter,
934                                   counters,
935                                   &i);
936         }
937 }
938
939 static int
940 copy_entries_to_user(unsigned int total_size,
941                      struct ipt_table *table,
942                      void *userptr)
943 {
944         unsigned int off, num, countersize;
945         struct ipt_entry *e;
946         struct ipt_counters *counters;
947         int ret = 0;
948
949         /* We need atomic snapshot of counters: rest doesn't change
950            (other than comefrom, which userspace doesn't care
951            about). */
952         countersize = sizeof(struct ipt_counters) * table->private->number;
953         counters = vmalloc(countersize);
954
955         if (counters == NULL)
956                 return -ENOMEM;
957
958         /* First, sum counters... */
959         memset(counters, 0, countersize);
960         write_lock_bh(&table->lock);
961         get_counters(table->private, counters);
962         write_unlock_bh(&table->lock);
963
964         /* ... then copy entire thing from CPU 0... */
965         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
966                 ret = -EFAULT;
967                 goto free_counters;
968         }
969
970         /* FIXME: use iterator macros --RR */
971         /* ... then go back and fix counters and names */
972         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
973                 unsigned int i;
974                 struct ipt_entry_match *m;
975                 struct ipt_entry_target *t;
976
977                 e = (struct ipt_entry *)(table->private->entries + off);
978                 if (copy_to_user(userptr + off
979                                  + offsetof(struct ipt_entry, counters),
980                                  &counters[num],
981                                  sizeof(counters[num])) != 0) {
982                         ret = -EFAULT;
983                         goto free_counters;
984                 }
985
986                 for (i = sizeof(struct ipt_entry);
987                      i < e->target_offset;
988                      i += m->u.match_size) {
989                         m = (void *)e + i;
990
991                         if (copy_to_user(userptr + off + i
992                                          + offsetof(struct ipt_entry_match,
993                                                     u.user.name),
994                                          m->u.kernel.match->name,
995                                          strlen(m->u.kernel.match->name)+1)
996                             != 0) {
997                                 ret = -EFAULT;
998                                 goto free_counters;
999                         }
1000                 }
1001
1002                 t = ipt_get_target(e);
1003                 if (copy_to_user(userptr + off + e->target_offset
1004                                  + offsetof(struct ipt_entry_target,
1005                                             u.user.name),
1006                                  t->u.kernel.target->name,
1007                                  strlen(t->u.kernel.target->name)+1) != 0) {
1008                         ret = -EFAULT;
1009                         goto free_counters;
1010                 }
1011         }
1012
1013  free_counters:
1014         vfree(counters);
1015         return ret;
1016 }
1017
1018 static int
1019 get_entries(const struct ipt_get_entries *entries,
1020             struct ipt_get_entries *uptr)
1021 {
1022         int ret;
1023         struct ipt_table *t;
1024
1025         t = find_table_lock(entries->name, &ret, &ipt_mutex);
1026         if (t) {
1027                 duprintf("t->private->number = %u\n",
1028                          t->private->number);
1029                 if (entries->size == t->private->size)
1030                         ret = copy_entries_to_user(t->private->size,
1031                                                    t, uptr->entrytable);
1032                 else {
1033                         duprintf("get_entries: I've got %u not %u!\n",
1034                                  t->private->size,
1035                                  entries->size);
1036                         ret = -EINVAL;
1037                 }
1038                 up(&ipt_mutex);
1039         } else
1040                 duprintf("get_entries: Can't find %s!\n",
1041                          entries->name);
1042
1043         return ret;
1044 }
1045
1046 static int
1047 do_replace(void *user, unsigned int len)
1048 {
1049         int ret;
1050         struct ipt_replace tmp;
1051         struct ipt_table *t;
1052         struct ipt_table_info *newinfo, *oldinfo;
1053         struct ipt_counters *counters;
1054
1055         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1056                 return -EFAULT;
1057
1058         /* Hack: Causes ipchains to give correct error msg --RR */
1059         if (len != sizeof(tmp) + tmp.size)
1060                 return -ENOPROTOOPT;
1061
1062         newinfo = vmalloc(sizeof(struct ipt_table_info)
1063                           + SMP_ALIGN(tmp.size) * smp_num_cpus);
1064         if (!newinfo)
1065                 return -ENOMEM;
1066
1067         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1068                            tmp.size) != 0) {
1069                 ret = -EFAULT;
1070                 goto free_newinfo;
1071         }
1072
1073         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1074         if (!counters) {
1075                 ret = -ENOMEM;
1076                 goto free_newinfo;
1077         }
1078         memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1079
1080         ret = translate_table(tmp.name, tmp.valid_hooks,
1081                               newinfo, tmp.size, tmp.num_entries,
1082                               tmp.hook_entry, tmp.underflow);
1083         if (ret != 0)
1084                 goto free_newinfo_counters;
1085
1086         duprintf("ip_tables: Translated table\n");
1087
1088         t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1089         if (!t)
1090                 goto free_newinfo_counters_untrans;
1091
1092         /* You lied! */
1093         if (tmp.valid_hooks != t->valid_hooks) {
1094                 duprintf("Valid hook crap: %08X vs %08X\n",
1095                          tmp.valid_hooks, t->valid_hooks);
1096                 ret = -EINVAL;
1097                 goto free_newinfo_counters_untrans_unlock;
1098         }
1099
1100         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1101         if (!oldinfo)
1102                 goto free_newinfo_counters_untrans_unlock;
1103
1104         /* Get the old counters. */
1105         get_counters(oldinfo, counters);
1106         /* Decrease module usage counts and free resource */
1107         IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1108         vfree(oldinfo);
1109         /* Silent error: too late now. */
1110         copy_to_user(tmp.counters, counters,
1111                      sizeof(struct ipt_counters) * tmp.num_counters);
1112         vfree(counters);
1113         up(&ipt_mutex);
1114         return 0;
1115
1116  free_newinfo_counters_untrans_unlock:
1117         up(&ipt_mutex);
1118  free_newinfo_counters_untrans:
1119         IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1120  free_newinfo_counters:
1121         vfree(counters);
1122  free_newinfo:
1123         vfree(newinfo);
1124         return ret;
1125 }
1126
1127 /* We're lazy, and add to the first CPU; overflow works its fey magic
1128  * and everything is OK. */
1129 static inline int
1130 add_counter_to_entry(struct ipt_entry *e,
1131                      const struct ipt_counters addme[],
1132                      unsigned int *i)
1133 {
1134 #if 0
1135         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1136                  *i,
1137                  (long unsigned int)e->counters.pcnt,
1138                  (long unsigned int)e->counters.bcnt,
1139                  (long unsigned int)addme[*i].pcnt,
1140                  (long unsigned int)addme[*i].bcnt);
1141 #endif
1142
1143         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1144
1145         (*i)++;
1146         return 0;
1147 }
1148
1149 static int
1150 do_add_counters(void *user, unsigned int len)
1151 {
1152         unsigned int i;
1153         struct ipt_counters_info tmp, *paddc;
1154         struct ipt_table *t;
1155         int ret;
1156
1157         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1158                 return -EFAULT;
1159
1160         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1161                 return -EINVAL;
1162
1163         paddc = vmalloc(len);
1164         if (!paddc)
1165                 return -ENOMEM;
1166
1167         if (copy_from_user(paddc, user, len) != 0) {
1168                 ret = -EFAULT;
1169                 goto free;
1170         }
1171
1172         t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1173         if (!t)
1174                 goto free;
1175
1176         write_lock_bh(&t->lock);
1177         if (t->private->number != paddc->num_counters) {
1178                 ret = -EINVAL;
1179                 goto unlock_up_free;
1180         }
1181
1182         i = 0;
1183         IPT_ENTRY_ITERATE(t->private->entries,
1184                           t->private->size,
1185                           add_counter_to_entry,
1186                           paddc->counters,
1187                           &i);
1188  unlock_up_free:
1189         write_unlock_bh(&t->lock);
1190         up(&ipt_mutex);
1191  free:
1192         vfree(paddc);
1193
1194         return ret;
1195 }
1196
1197 static int
1198 do_ipt_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1199 {
1200         int ret;
1201
1202         if (!capable(CAP_NET_ADMIN))
1203                 return -EPERM;
1204
1205         switch (cmd) {
1206         case IPT_SO_SET_REPLACE:
1207                 ret = do_replace(user, len);
1208                 break;
1209
1210         case IPT_SO_SET_ADD_COUNTERS:
1211                 ret = do_add_counters(user, len);
1212                 break;
1213
1214         default:
1215                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1216                 ret = -EINVAL;
1217         }
1218
1219         return ret;
1220 }
1221
1222 static int
1223 do_ipt_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1224 {
1225         int ret;
1226
1227         if (!capable(CAP_NET_ADMIN))
1228                 return -EPERM;
1229
1230         switch (cmd) {
1231         case IPT_SO_GET_INFO: {
1232                 char name[IPT_TABLE_MAXNAMELEN];
1233                 struct ipt_table *t;
1234
1235                 if (*len != sizeof(struct ipt_getinfo)) {
1236                         duprintf("length %u != %u\n", *len,
1237                                  sizeof(struct ipt_getinfo));
1238                         ret = -EINVAL;
1239                         break;
1240                 }
1241
1242                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1243                         ret = -EFAULT;
1244                         break;
1245                 }
1246                 t = find_table_lock(name, &ret, &ipt_mutex);
1247                 if (t) {
1248                         struct ipt_getinfo info;
1249
1250                         info.valid_hooks = t->valid_hooks;
1251                         memcpy(info.hook_entry, t->private->hook_entry,
1252                                sizeof(info.hook_entry));
1253                         memcpy(info.underflow, t->private->underflow,
1254                                sizeof(info.underflow));
1255                         info.num_entries = t->private->number;
1256                         info.size = t->private->size;
1257                         strcpy(info.name, name);
1258
1259                         if (copy_to_user(user, &info, *len) != 0)
1260                                 ret = -EFAULT;
1261                         else
1262                                 ret = 0;
1263
1264                         up(&ipt_mutex);
1265                 }
1266         }
1267         break;
1268
1269         case IPT_SO_GET_ENTRIES: {
1270                 struct ipt_get_entries get;
1271
1272                 if (*len < sizeof(get)) {
1273                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1274                         ret = -EINVAL;
1275                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1276                         ret = -EFAULT;
1277                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1278                         duprintf("get_entries: %u != %u\n", *len,
1279                                  sizeof(struct ipt_get_entries) + get.size);
1280                         ret = -EINVAL;
1281                 } else
1282                         ret = get_entries(&get, user);
1283                 break;
1284         }
1285
1286         default:
1287                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1288                 ret = -EINVAL;
1289         }
1290
1291         return ret;
1292 }
1293
1294 /* Registration hooks for targets. */
1295 int
1296 ipt_register_target(struct ipt_target *target)
1297 {
1298         int ret;
1299
1300         MOD_INC_USE_COUNT;
1301         ret = down_interruptible(&ipt_mutex);
1302         if (ret != 0) {
1303                 MOD_DEC_USE_COUNT;
1304                 return ret;
1305         }
1306         if (!list_named_insert(&ipt_target, target)) {
1307                 duprintf("ipt_register_target: `%s' already in list!\n",
1308                          target->name);
1309                 ret = -EINVAL;
1310                 MOD_DEC_USE_COUNT;
1311         }
1312         up(&ipt_mutex);
1313         return ret;
1314 }
1315
1316 void
1317 ipt_unregister_target(struct ipt_target *target)
1318 {
1319         down(&ipt_mutex);
1320         LIST_DELETE(&ipt_target, target);
1321         up(&ipt_mutex);
1322         MOD_DEC_USE_COUNT;
1323 }
1324
1325 int
1326 ipt_register_match(struct ipt_match *match)
1327 {
1328         int ret;
1329
1330         MOD_INC_USE_COUNT;
1331         ret = down_interruptible(&ipt_mutex);
1332         if (ret != 0) {
1333                 MOD_DEC_USE_COUNT;
1334                 return ret;
1335         }
1336         if (!list_named_insert(&ipt_match, match)) {
1337                 duprintf("ipt_register_match: `%s' already in list!\n",
1338                          match->name);
1339                 MOD_DEC_USE_COUNT;
1340                 ret = -EINVAL;
1341         }
1342         up(&ipt_mutex);
1343
1344         return ret;
1345 }
1346
1347 void
1348 ipt_unregister_match(struct ipt_match *match)
1349 {
1350         down(&ipt_mutex);
1351         LIST_DELETE(&ipt_match, match);
1352         up(&ipt_mutex);
1353         MOD_DEC_USE_COUNT;
1354 }
1355
1356 int ipt_register_table(struct ipt_table *table)
1357 {
1358         int ret;
1359         struct ipt_table_info *newinfo;
1360         static struct ipt_table_info bootstrap
1361                 = { 0, 0, { 0 }, { 0 }, { } };
1362
1363         MOD_INC_USE_COUNT;
1364         newinfo = vmalloc(sizeof(struct ipt_table_info)
1365                           + SMP_ALIGN(table->table->size) * smp_num_cpus);
1366         if (!newinfo) {
1367                 ret = -ENOMEM;
1368                 MOD_DEC_USE_COUNT;
1369                 return ret;
1370         }
1371         memcpy(newinfo->entries, table->table->entries, table->table->size);
1372
1373         ret = translate_table(table->name, table->valid_hooks,
1374                               newinfo, table->table->size,
1375                               table->table->num_entries,
1376                               table->table->hook_entry,
1377                               table->table->underflow);
1378         if (ret != 0) {
1379                 vfree(newinfo);
1380                 MOD_DEC_USE_COUNT;
1381                 return ret;
1382         }
1383
1384         ret = down_interruptible(&ipt_mutex);
1385         if (ret != 0) {
1386                 vfree(newinfo);
1387                 MOD_DEC_USE_COUNT;
1388                 return ret;
1389         }
1390
1391         /* Don't autoload: we'd eat our tail... */
1392         if (list_named_find(&ipt_tables, table->name)) {
1393                 ret = -EEXIST;
1394                 goto free_unlock;
1395         }
1396
1397         /* Simplifies replace_table code. */
1398         table->private = &bootstrap;
1399         if (!replace_table(table, 0, newinfo, &ret))
1400                 goto free_unlock;
1401
1402         duprintf("table->private->number = %u\n",
1403                  table->private->number);
1404
1405         table->lock = RW_LOCK_UNLOCKED;
1406         list_prepend(&ipt_tables, table);
1407
1408  unlock:
1409         up(&ipt_mutex);
1410         return ret;
1411
1412  free_unlock:
1413         vfree(newinfo);
1414         MOD_DEC_USE_COUNT;
1415         goto unlock;
1416 }
1417
1418 void ipt_unregister_table(struct ipt_table *table)
1419 {
1420         down(&ipt_mutex);
1421         LIST_DELETE(&ipt_tables, table);
1422         up(&ipt_mutex);
1423
1424         /* Decrease module usage counts and free resources */
1425         IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1426                           cleanup_entry, NULL);
1427         vfree(table->private);
1428         MOD_DEC_USE_COUNT;
1429 }
1430
1431 /* Returns 1 if the port is matched by the range, 0 otherwise */
1432 static inline int
1433 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1434 {
1435         int ret;
1436
1437         ret = (port >= min && port <= max) ^ invert;
1438         return ret;
1439 }
1440
1441 static int
1442 tcp_find_option(u_int8_t option,
1443                 const struct tcphdr *tcp,
1444                 u_int16_t datalen,
1445                 int invert,
1446                 int *hotdrop)
1447 {
1448         unsigned int i = sizeof(struct tcphdr);
1449         const u_int8_t *opt = (u_int8_t *)tcp;
1450
1451         duprintf("tcp_match: finding option\n");
1452         /* If we don't have the whole header, drop packet. */
1453         if (tcp->doff * 4 > datalen) {
1454                 *hotdrop = 1;
1455                 return 0;
1456         }
1457
1458         while (i < tcp->doff * 4) {
1459                 if (opt[i] == option) return !invert;
1460                 if (opt[i] < 2) i++;
1461                 else i += opt[i+1]?:1;
1462         }
1463
1464         return invert;
1465 }
1466
1467 static int
1468 tcp_match(const struct sk_buff *skb,
1469           const struct net_device *in,
1470           const struct net_device *out,
1471           const void *matchinfo,
1472           int offset,
1473           const void *hdr,
1474           u_int16_t datalen,
1475           int *hotdrop)
1476 {
1477         const struct tcphdr *tcp = hdr;
1478         const struct ipt_tcp *tcpinfo = matchinfo;
1479
1480         /* To quote Alan:
1481
1482            Don't allow a fragment of TCP 8 bytes in. Nobody normal
1483            causes this. Its a cracker trying to break in by doing a
1484            flag overwrite to pass the direction checks.
1485         */
1486
1487         if (offset == 1) {
1488                 duprintf("Dropping evil TCP offset=1 frag.\n");
1489                 *hotdrop = 1;
1490                 return 0;
1491         } else if (offset == 0 && datalen < sizeof(struct tcphdr)) {
1492                 /* We've been asked to examine this packet, and we
1493                    can't.  Hence, no choice but to drop. */
1494                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1495                 *hotdrop = 1;
1496                 return 0;
1497         }
1498
1499         /* FIXME: Try tcp doff >> packet len against various stacks --RR */
1500
1501 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1502
1503         /* Must not be a fragment. */
1504         return !offset
1505                 && port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1506                               ntohs(tcp->source),
1507                               !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT))
1508                 && port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1509                               ntohs(tcp->dest),
1510                               !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT))
1511                 && FWINVTCP((((unsigned char *)tcp)[13]
1512                              & tcpinfo->flg_mask)
1513                             == tcpinfo->flg_cmp,
1514                             IPT_TCP_INV_FLAGS)
1515                 && (!tcpinfo->option
1516                     || tcp_find_option(tcpinfo->option, tcp, datalen,
1517                                        tcpinfo->invflags
1518                                        & IPT_TCP_INV_OPTION,
1519                                        hotdrop));
1520 }
1521
1522 /* Called when user tries to insert an entry of this type. */
1523 static int
1524 tcp_checkentry(const char *tablename,
1525                const struct ipt_ip *ip,
1526                void *matchinfo,
1527                unsigned int matchsize,
1528                unsigned int hook_mask)
1529 {
1530         const struct ipt_tcp *tcpinfo = matchinfo;
1531
1532         /* Must specify proto == TCP, and no unknown invflags */
1533         return ip->proto == IPPROTO_TCP
1534                 && !(ip->invflags & IPT_INV_PROTO)
1535                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1536                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1537 }
1538
1539 static int
1540 udp_match(const struct sk_buff *skb,
1541           const struct net_device *in,
1542           const struct net_device *out,
1543           const void *matchinfo,
1544           int offset,
1545           const void *hdr,
1546           u_int16_t datalen,
1547           int *hotdrop)
1548 {
1549         const struct udphdr *udp = hdr;
1550         const struct ipt_udp *udpinfo = matchinfo;
1551
1552         if (offset == 0 && datalen < sizeof(struct udphdr)) {
1553                 /* We've been asked to examine this packet, and we
1554                    can't.  Hence, no choice but to drop. */
1555                 duprintf("Dropping evil UDP tinygram.\n");
1556                 *hotdrop = 1;
1557                 return 0;
1558         }
1559
1560         /* Must not be a fragment. */
1561         return !offset
1562                 && port_match(udpinfo->spts[0], udpinfo->spts[1],
1563                               ntohs(udp->source),
1564                               !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1565                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1566                               ntohs(udp->dest),
1567                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1568 }
1569
1570 /* Called when user tries to insert an entry of this type. */
1571 static int
1572 udp_checkentry(const char *tablename,
1573                const struct ipt_ip *ip,
1574                void *matchinfo,
1575                unsigned int matchinfosize,
1576                unsigned int hook_mask)
1577 {
1578         const struct ipt_udp *udpinfo = matchinfo;
1579
1580         /* Must specify proto == UDP, and no unknown invflags */
1581         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1582                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1583                          IPPROTO_UDP);
1584                 return 0;
1585         }
1586         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1587                 duprintf("ipt_udp: matchsize %u != %u\n",
1588                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1589                 return 0;
1590         }
1591         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1592                 duprintf("ipt_udp: unknown flags %X\n",
1593                          udpinfo->invflags);
1594                 return 0;
1595         }
1596
1597         return 1;
1598 }
1599
1600 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1601 static inline int
1602 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1603                      u_int8_t type, u_int8_t code,
1604                      int invert)
1605 {
1606         return (type == test_type && code >= min_code && code <= max_code)
1607                 ^ invert;
1608 }
1609
1610 static int
1611 icmp_match(const struct sk_buff *skb,
1612            const struct net_device *in,
1613            const struct net_device *out,
1614            const void *matchinfo,
1615            int offset,
1616            const void *hdr,
1617            u_int16_t datalen,
1618            int *hotdrop)
1619 {
1620         const struct icmphdr *icmp = hdr;
1621         const struct ipt_icmp *icmpinfo = matchinfo;
1622
1623         if (offset == 0 && datalen < 2) {
1624                 /* We've been asked to examine this packet, and we
1625                    can't.  Hence, no choice but to drop. */
1626                 duprintf("Dropping evil ICMP tinygram.\n");
1627                 *hotdrop = 1;
1628                 return 0;
1629         }
1630
1631         /* Must not be a fragment. */
1632         return !offset
1633                 && icmp_type_code_match(icmpinfo->type,
1634                                         icmpinfo->code[0],
1635                                         icmpinfo->code[1],
1636                                         icmp->type, icmp->code,
1637                                         !!(icmpinfo->invflags&IPT_ICMP_INV));
1638 }
1639
1640 /* Called when user tries to insert an entry of this type. */
1641 static int
1642 icmp_checkentry(const char *tablename,
1643            const struct ipt_ip *ip,
1644            void *matchinfo,
1645            unsigned int matchsize,
1646            unsigned int hook_mask)
1647 {
1648         const struct ipt_icmp *icmpinfo = matchinfo;
1649
1650         /* Must specify proto == ICMP, and no unknown invflags */
1651         return ip->proto == IPPROTO_ICMP
1652                 && !(ip->invflags & IPT_INV_PROTO)
1653                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1654                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1655 }
1656
1657 /* The built-in targets: standard (NULL) and error. */
1658 static struct ipt_target ipt_standard_target
1659 = { { NULL, NULL }, IPT_STANDARD_TARGET, NULL, NULL, NULL };
1660 static struct ipt_target ipt_error_target
1661 = { { NULL, NULL }, IPT_ERROR_TARGET, ipt_error, NULL, NULL };
1662
1663 static struct nf_sockopt_ops ipt_sockopts
1664 = { { NULL, NULL }, PF_INET, IPT_BASE_CTL, IPT_SO_SET_MAX+1, do_ipt_set_ctl,
1665     IPT_BASE_CTL, IPT_SO_GET_MAX+1, do_ipt_get_ctl, 0, NULL  };
1666
1667 static struct ipt_match tcp_matchstruct
1668 = { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL };
1669 static struct ipt_match udp_matchstruct
1670 = { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL };
1671 static struct ipt_match icmp_matchstruct
1672 = { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL };
1673
1674 #ifdef CONFIG_PROC_FS
1675 static inline int print_name(const struct ipt_table *t,
1676                              off_t start_offset, char *buffer, int length,
1677                              off_t *pos, unsigned int *count)
1678 {
1679         if ((*count)++ >= start_offset) {
1680                 unsigned int namelen;
1681
1682                 namelen = sprintf(buffer + *pos, "%s\n", t->name);
1683                 if (*pos + namelen > length) {
1684                         /* Stop iterating */
1685                         return 1;
1686                 }
1687                 *pos += namelen;
1688         }
1689         return 0;
1690 }
1691
1692 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1693 {
1694         off_t pos = 0;
1695         unsigned int count = 0;
1696
1697         if (down_interruptible(&ipt_mutex) != 0)
1698                 return 0;
1699
1700         LIST_FIND(&ipt_tables, print_name, struct ipt_table *,
1701                   offset, buffer, length, &pos, &count);
1702
1703         up(&ipt_mutex);
1704
1705         /* `start' hack - see fs/proc/generic.c line ~105 */
1706         *start=(char *)((unsigned long)count-offset);
1707         return pos;
1708 }
1709 #endif /*CONFIG_PROC_FS*/
1710
1711 static int __init init(void)
1712 {
1713         int ret;
1714
1715         /* Noone else will be downing sem now, so we won't sleep */
1716         down(&ipt_mutex);
1717         list_append(&ipt_target, &ipt_standard_target);
1718         list_append(&ipt_target, &ipt_error_target);
1719         list_append(&ipt_match, &tcp_matchstruct);
1720         list_append(&ipt_match, &udp_matchstruct);
1721         list_append(&ipt_match, &icmp_matchstruct);
1722         up(&ipt_mutex);
1723
1724         /* Register setsockopt */
1725         ret = nf_register_sockopt(&ipt_sockopts);
1726         if (ret < 0) {
1727                 duprintf("Unable to register sockopts.\n");
1728                 return ret;
1729         }
1730
1731 #ifdef CONFIG_PROC_FS
1732         if (!proc_net_create("ip_tables_names", 0, ipt_get_tables)) {
1733                 nf_unregister_sockopt(&ipt_sockopts);
1734                 return -ENOMEM;
1735         }
1736 #endif
1737
1738         printk("ip_tables: (c)2000 Netfilter core team\n");
1739         return 0;
1740 }
1741
1742 static void __exit fini(void)
1743 {
1744         nf_unregister_sockopt(&ipt_sockopts);
1745 #ifdef CONFIG_PROC_FS
1746         proc_net_remove("ip_tables_names");
1747 #endif
1748 }
1749
1750 EXPORT_SYMBOL(ipt_register_table);
1751 EXPORT_SYMBOL(ipt_unregister_table);
1752 EXPORT_SYMBOL(ipt_register_match);
1753 EXPORT_SYMBOL(ipt_unregister_match);
1754 EXPORT_SYMBOL(ipt_do_table);
1755 EXPORT_SYMBOL(ipt_register_target);
1756 EXPORT_SYMBOL(ipt_unregister_target);
1757
1758 module_init(init);
1759 module_exit(fini);