2 * Packet matching code.
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
6 #include <linux/config.h>
7 #include <linux/skbuff.h>
8 #include <linux/kmod.h>
9 #include <linux/vmalloc.h>
10 #include <linux/netdevice.h>
11 #include <linux/module.h>
12 #include <linux/tcp.h>
13 #include <linux/udp.h>
14 #include <linux/icmp.h>
16 #include <asm/uaccess.h>
17 #include <asm/semaphore.h>
18 #include <linux/proc_fs.h>
20 #include <linux/netfilter_ipv4/ip_tables.h>
22 /*#define DEBUG_IP_FIREWALL*/
23 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
24 /*#define DEBUG_IP_FIREWALL_USER*/
26 #ifdef DEBUG_IP_FIREWALL
27 #define dprintf(format, args...) printk(format , ## args)
29 #define dprintf(format, args...)
32 #ifdef DEBUG_IP_FIREWALL_USER
33 #define duprintf(format, args...) printk(format , ## args)
35 #define duprintf(format, args...)
38 #ifdef CONFIG_NETFILTER_DEBUG
39 #define IP_NF_ASSERT(x) \
42 printk("IP_NF_ASSERT: %s:%s:%u\n", \
43 __FUNCTION__, __FILE__, __LINE__); \
46 #define IP_NF_ASSERT(x)
48 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
50 /* Mutex protects lists (only traversed in user context). */
51 static DECLARE_MUTEX(ipt_mutex);
54 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
55 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
56 #include <linux/netfilter_ipv4/lockhelp.h>
57 #include <linux/netfilter_ipv4/listhelp.h>
60 /* All the better to debug you with... */
65 /* Locking is simple: we assume at worst case there will be one packet
66 in user context and one from bottom halves (or soft irq if Alexey's
67 softnet patch was applied).
69 We keep a set of rules for each CPU, so we can avoid write-locking
70 them; doing a readlock_bh() stops packets coming through if we're
73 To be cache friendly on SMP, we arrange them like so:
75 ... cache-align padding ...
78 Hence the start of any table is given by get_table() below. */
80 /* The table itself */
85 /* Number of entries: FIXME. --RR */
88 /* Entry points and underflows */
89 unsigned int hook_entry[NF_IP_NUMHOOKS];
90 unsigned int underflow[NF_IP_NUMHOOKS];
92 /* ipt_entry tables: one per CPU */
93 char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
96 static LIST_HEAD(ipt_target);
97 static LIST_HEAD(ipt_match);
98 static LIST_HEAD(ipt_tables);
99 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
102 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
104 #define TABLE_OFFSET(t,p) 0
108 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
109 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
110 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
113 /* Returns whether matches rule or not. */
115 ip_packet_match(const struct iphdr *ip,
118 const struct ipt_ip *ipinfo,
124 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
126 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
128 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
130 dprintf("Source or dest mismatch.\n");
132 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
134 NIPQUAD(ipinfo->smsk.s_addr),
135 NIPQUAD(ipinfo->src.s_addr),
136 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
137 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
139 NIPQUAD(ipinfo->dmsk.s_addr),
140 NIPQUAD(ipinfo->dst.s_addr),
141 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
145 /* Look for ifname matches; this should unroll nicely. */
146 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
147 ret |= (((const unsigned long *)indev)[i]
148 ^ ((const unsigned long *)ipinfo->iniface)[i])
149 & ((const unsigned long *)ipinfo->iniface_mask)[i];
152 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
153 dprintf("VIA in mismatch (%s vs %s).%s\n",
154 indev, ipinfo->iniface,
155 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
159 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
160 ret |= (((const unsigned long *)outdev)[i]
161 ^ ((const unsigned long *)ipinfo->outiface)[i])
162 & ((const unsigned long *)ipinfo->outiface_mask)[i];
165 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
166 dprintf("VIA out mismatch (%s vs %s).%s\n",
167 outdev, ipinfo->outiface,
168 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
172 /* Check specific protocol */
174 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
175 dprintf("Packet protocol %hi does not match %hi.%s\n",
176 ip->protocol, ipinfo->proto,
177 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
181 /* If we have a fragment rule but the packet is not a fragment
182 * then we return zero */
183 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
184 dprintf("Fragment rule but not fragment.%s\n",
185 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
193 ip_checkentry(const struct ipt_ip *ip)
195 if (ip->flags & ~IPT_F_MASK) {
196 duprintf("Unknown flag bits set: %08X\n",
197 ip->flags & ~IPT_F_MASK);
200 if (ip->invflags & ~IPT_INV_MASK) {
201 duprintf("Unknown invflag bits set: %08X\n",
202 ip->invflags & ~IPT_INV_MASK);
209 ipt_error(struct sk_buff **pskb,
210 unsigned int hooknum,
211 const struct net_device *in,
212 const struct net_device *out,
213 const void *targinfo,
217 printk("ip_tables: error: `%s'\n", (char *)targinfo);
223 int do_match(struct ipt_entry_match *m,
224 const struct sk_buff *skb,
225 const struct net_device *in,
226 const struct net_device *out,
232 /* Stop iteration if it doesn't match */
233 if (!m->u.kernel.match->match(skb, in, out, m->data,
234 offset, hdr, datalen, hotdrop))
240 static inline struct ipt_entry *
241 get_entry(void *base, unsigned int offset)
243 return (struct ipt_entry *)(base + offset);
246 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
248 ipt_do_table(struct sk_buff **pskb,
250 const struct net_device *in,
251 const struct net_device *out,
252 struct ipt_table *table,
255 static const char nulldevname[IFNAMSIZ] = { 0 };
261 /* Initializing verdict to NF_DROP keeps gcc happy. */
262 unsigned int verdict = NF_DROP;
263 const char *indev, *outdev;
265 struct ipt_entry *e, *back;
268 ip = (*pskb)->nh.iph;
269 protohdr = (u_int32_t *)ip + ip->ihl;
270 datalen = (*pskb)->len - ip->ihl * 4;
271 indev = in ? in->name : nulldevname;
272 outdev = out ? out->name : nulldevname;
273 /* We handle fragments by dealing with the first fragment as
274 * if it was a normal packet. All other fragments are treated
275 * normally, except that they will NEVER match rules that ask
276 * things we don't know, ie. tcp syn flag or ports). If the
277 * rule is also a fragment-specific rule, non-fragments won't
279 offset = ntohs(ip->frag_off) & IP_OFFSET;
281 read_lock_bh(&table->lock);
282 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
283 table_base = (void *)table->private->entries
284 + TABLE_OFFSET(table->private,
285 cpu_number_map(smp_processor_id()));
286 e = get_entry(table_base, table->private->hook_entry[hook]);
288 #ifdef CONFIG_NETFILTER_DEBUG
289 /* Check noone else using our table */
290 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
291 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
292 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
295 &((struct ipt_entry *)table_base)->comefrom,
296 ((struct ipt_entry *)table_base)->comefrom);
298 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
301 /* For return from builtin chain */
302 back = get_entry(table_base, table->private->underflow[hook]);
307 (*pskb)->nfcache |= e->nfcache;
308 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
309 struct ipt_entry_target *t;
311 if (IPT_MATCH_ITERATE(e, do_match,
314 datalen, &hotdrop) != 0)
317 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
319 t = ipt_get_target(e);
320 IP_NF_ASSERT(t->u.kernel.target);
321 /* Standard target? */
322 if (!t->u.kernel.target->target) {
325 v = ((struct ipt_standard_target *)t)->verdict;
327 /* Pop from stack? */
328 if (v != IPT_RETURN) {
329 verdict = (unsigned)(-v) - 1;
333 back = get_entry(table_base,
338 != (void *)e + e->next_offset) {
339 /* Save old back ptr in next entry */
340 struct ipt_entry *next
341 = (void *)e + e->next_offset;
343 = (void *)back - table_base;
344 /* set back pointer to next entry */
348 e = get_entry(table_base, v);
350 /* Targets which reenter must return
352 #ifdef CONFIG_NETFILTER_DEBUG
353 ((struct ipt_entry *)table_base)->comefrom
356 verdict = t->u.kernel.target->target(pskb,
362 #ifdef CONFIG_NETFILTER_DEBUG
363 if (((struct ipt_entry *)table_base)->comefrom
365 && verdict == IPT_CONTINUE) {
366 printk("Target %s reentered!\n",
367 t->u.kernel.target->name);
370 ((struct ipt_entry *)table_base)->comefrom
373 /* Target might have changed stuff. */
374 ip = (*pskb)->nh.iph;
375 protohdr = (u_int32_t *)ip + ip->ihl;
376 datalen = (*pskb)->len - ip->ihl * 4;
378 if (verdict == IPT_CONTINUE)
379 e = (void *)e + e->next_offset;
387 e = (void *)e + e->next_offset;
391 #ifdef CONFIG_NETFILTER_DEBUG
392 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
394 read_unlock_bh(&table->lock);
396 #ifdef DEBUG_ALLOW_ALL
405 /* If it succeeds, returns element and locks mutex */
407 find_inlist_lock_noload(struct list_head *head,
410 struct semaphore *mutex)
415 duprintf("find_inlist: searching for `%s' in %s.\n",
416 name, head == &ipt_target ? "ipt_target"
417 : head == &ipt_match ? "ipt_match"
418 : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
421 *error = down_interruptible(mutex);
425 ret = list_named_find(head, name);
434 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
437 find_inlist_lock(struct list_head *head,
441 struct semaphore *mutex)
445 ret = find_inlist_lock_noload(head, name, error, mutex);
447 char modulename[IPT_FUNCTION_MAXNAMELEN + strlen(prefix) + 1];
448 strcpy(modulename, prefix);
449 strcat(modulename, name);
450 duprintf("find_inlist: loading `%s'.\n", modulename);
451 request_module(modulename);
452 ret = find_inlist_lock_noload(head, name, error, mutex);
459 static inline struct ipt_table *
460 find_table_lock(const char *name, int *error, struct semaphore *mutex)
462 return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
465 static inline struct ipt_match *
466 find_match_lock(const char *name, int *error, struct semaphore *mutex)
468 return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
471 static inline struct ipt_target *
472 find_target_lock(const char *name, int *error, struct semaphore *mutex)
474 return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
477 /* All zeroes == unconditional rule. */
479 unconditional(const struct ipt_ip *ip)
483 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
484 if (((__u32 *)ip)[i])
490 /* Figures out from what hook each rule can be called: returns 0 if
491 there are loops. Puts hook bitmask in comefrom. */
493 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
497 /* No recursion; use packet counter to save back ptrs (reset
498 to 0 as we leave), and comefrom to save source hook bitmask */
499 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
500 unsigned int pos = newinfo->hook_entry[hook];
502 = (struct ipt_entry *)(newinfo->entries + pos);
504 if (!(valid_hooks & (1 << hook)))
507 /* Set initial back pointer. */
508 e->counters.pcnt = pos;
511 struct ipt_standard_target *t
512 = (void *)ipt_get_target(e);
514 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
515 printk("iptables: loop hook %u pos %u %08X.\n",
516 hook, pos, e->comefrom);
520 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
522 /* Unconditional return/END. */
523 if (e->target_offset == sizeof(struct ipt_entry)
524 && (strcmp(t->target.u.user.name,
525 IPT_STANDARD_TARGET) == 0)
527 && unconditional(&e->ip)) {
528 unsigned int oldpos, size;
530 /* Return: backtrack through the last
533 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
534 #ifdef DEBUG_IP_FIREWALL_USER
536 & (1 << NF_IP_NUMHOOKS)) {
537 duprintf("Back unset "
544 pos = e->counters.pcnt;
545 e->counters.pcnt = 0;
547 /* We're at the start. */
551 e = (struct ipt_entry *)
552 (newinfo->entries + pos);
553 } while (oldpos == pos + e->next_offset);
556 size = e->next_offset;
557 e = (struct ipt_entry *)
558 (newinfo->entries + pos + size);
559 e->counters.pcnt = pos;
562 int newpos = t->verdict;
564 if (strcmp(t->target.u.user.name,
565 IPT_STANDARD_TARGET) == 0
567 /* This a jump; chase it. */
568 duprintf("Jump rule %u -> %u\n",
571 /* ... this is a fallthru */
572 newpos = pos + e->next_offset;
574 e = (struct ipt_entry *)
575 (newinfo->entries + newpos);
576 e->counters.pcnt = pos;
581 duprintf("Finished chain %u\n", hook);
587 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
589 if (i && (*i)-- == 0)
592 if (m->u.kernel.match->destroy)
593 m->u.kernel.match->destroy(m->data,
594 m->u.match_size - sizeof(*m));
596 if (m->u.kernel.match->me)
597 __MOD_DEC_USE_COUNT(m->u.kernel.match->me);
603 standard_check(const struct ipt_entry_target *t,
604 unsigned int max_offset)
606 struct ipt_standard_target *targ = (void *)t;
608 /* Check standard info. */
610 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
611 duprintf("standard_check: target size %u != %u\n",
613 IPT_ALIGN(sizeof(struct ipt_standard_target)));
617 if (targ->verdict >= 0
618 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
619 duprintf("ipt_standard_check: bad verdict (%i)\n",
624 if (targ->verdict < -NF_MAX_VERDICT - 1) {
625 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
633 check_match(struct ipt_entry_match *m,
635 const struct ipt_ip *ip,
636 unsigned int hookmask,
640 struct ipt_match *match;
642 match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
644 duprintf("check_match: `%s' not found\n", m->u.user.name);
648 __MOD_INC_USE_COUNT(match->me);
649 m->u.kernel.match = match;
652 if (m->u.kernel.match->checkentry
653 && !m->u.kernel.match->checkentry(name, ip, m->data,
654 m->u.match_size - sizeof(*m),
656 if (m->u.kernel.match->me)
657 __MOD_DEC_USE_COUNT(m->u.kernel.match->me);
658 duprintf("ip_tables: check failed for `%s'.\n",
659 m->u.kernel.match->name);
667 static struct ipt_target ipt_standard_target;
670 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
673 struct ipt_entry_target *t;
674 struct ipt_target *target;
678 if (!ip_checkentry(&e->ip)) {
679 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
684 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
686 goto cleanup_matches;
688 t = ipt_get_target(e);
689 target = find_target_lock(t->u.user.name, &ret, &ipt_mutex);
691 duprintf("check_entry: `%s' not found\n", t->u.user.name);
692 goto cleanup_matches;
695 __MOD_INC_USE_COUNT(target->me);
696 t->u.kernel.target = target;
699 if (t->u.kernel.target == &ipt_standard_target) {
700 if (!standard_check(t, size)) {
702 goto cleanup_matches;
704 } else if (t->u.kernel.target->checkentry
705 && !t->u.kernel.target->checkentry(name, e, t->data,
709 if (t->u.kernel.target->me)
710 __MOD_DEC_USE_COUNT(t->u.kernel.target->me);
711 duprintf("ip_tables: check failed for `%s'.\n",
712 t->u.kernel.target->name);
714 goto cleanup_matches;
721 IPT_MATCH_ITERATE(e, cleanup_match, &j);
726 check_entry_size_and_hooks(struct ipt_entry *e,
727 struct ipt_table_info *newinfo,
729 unsigned char *limit,
730 const unsigned int *hook_entries,
731 const unsigned int *underflows,
736 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
737 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
738 duprintf("Bad offset %p\n", e);
743 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
744 duprintf("checking: element %p size %u\n",
749 /* Check hooks & underflows */
750 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
751 if ((unsigned char *)e - base == hook_entries[h])
752 newinfo->hook_entry[h] = hook_entries[h];
753 if ((unsigned char *)e - base == underflows[h])
754 newinfo->underflow[h] = underflows[h];
757 /* FIXME: underflows must be unconditional, standard verdicts
758 < 0 (not IPT_RETURN). --RR */
760 /* Clear counters and comefrom */
761 e->counters = ((struct ipt_counters) { 0, 0 });
769 cleanup_entry(struct ipt_entry *e, unsigned int *i)
771 struct ipt_entry_target *t;
773 if (i && (*i)-- == 0)
776 /* Cleanup all matches */
777 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
778 t = ipt_get_target(e);
779 if (t->u.kernel.target->destroy)
780 t->u.kernel.target->destroy(t->data,
781 t->u.target_size - sizeof(*t));
782 if (t->u.kernel.target->me)
783 __MOD_DEC_USE_COUNT(t->u.kernel.target->me);
788 /* Checks and translates the user-supplied table segment (held in
791 translate_table(const char *name,
792 unsigned int valid_hooks,
793 struct ipt_table_info *newinfo,
796 const unsigned int *hook_entries,
797 const unsigned int *underflows)
802 newinfo->size = size;
803 newinfo->number = number;
805 /* Init all hooks to impossible value. */
806 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
807 newinfo->hook_entry[i] = 0xFFFFFFFF;
808 newinfo->underflow[i] = 0xFFFFFFFF;
811 duprintf("translate_table: size %u\n", newinfo->size);
813 /* Walk through entries, checking offsets. */
814 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
815 check_entry_size_and_hooks,
818 newinfo->entries + size,
819 hook_entries, underflows, &i);
824 duprintf("translate_table: %u not %u entries\n",
829 /* Check hooks all assigned */
830 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
831 /* Only hooks which are valid */
832 if (!(valid_hooks & (1 << i)))
834 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
835 duprintf("Invalid hook entry %u %u\n",
839 if (newinfo->underflow[i] == 0xFFFFFFFF) {
840 duprintf("Invalid underflow %u %u\n",
846 if (!mark_source_chains(newinfo, valid_hooks))
849 /* Finally, each sanity check must pass */
851 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
852 check_entry, name, size, &i);
855 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
860 /* And one copy for every other CPU */
861 for (i = 1; i < smp_num_cpus; i++) {
862 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
864 SMP_ALIGN(newinfo->size));
870 static struct ipt_table_info *
871 replace_table(struct ipt_table *table,
872 unsigned int num_counters,
873 struct ipt_table_info *newinfo,
876 struct ipt_table_info *oldinfo;
878 #ifdef CONFIG_NETFILTER_DEBUG
880 struct ipt_entry *table_base;
883 for (i = 0; i < smp_num_cpus; i++) {
885 (void *)newinfo->entries
886 + TABLE_OFFSET(newinfo, i);
888 table_base->comefrom = 0xdead57ac;
893 /* Do the substitution. */
894 write_lock_bh(&table->lock);
895 /* Check inside lock: is the old number correct? */
896 if (num_counters != table->private->number) {
897 duprintf("num_counters != table->private->number (%u/%u)\n",
898 num_counters, table->private->number);
899 write_unlock_bh(&table->lock);
903 oldinfo = table->private;
904 table->private = newinfo;
905 write_unlock_bh(&table->lock);
912 add_entry_to_counter(const struct ipt_entry *e,
913 struct ipt_counters total[],
916 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
923 get_counters(const struct ipt_table_info *t,
924 struct ipt_counters counters[])
929 for (cpu = 0; cpu < smp_num_cpus; cpu++) {
931 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
933 add_entry_to_counter,
940 copy_entries_to_user(unsigned int total_size,
941 struct ipt_table *table,
944 unsigned int off, num, countersize;
946 struct ipt_counters *counters;
949 /* We need atomic snapshot of counters: rest doesn't change
950 (other than comefrom, which userspace doesn't care
952 countersize = sizeof(struct ipt_counters) * table->private->number;
953 counters = vmalloc(countersize);
955 if (counters == NULL)
958 /* First, sum counters... */
959 memset(counters, 0, countersize);
960 write_lock_bh(&table->lock);
961 get_counters(table->private, counters);
962 write_unlock_bh(&table->lock);
964 /* ... then copy entire thing from CPU 0... */
965 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
970 /* FIXME: use iterator macros --RR */
971 /* ... then go back and fix counters and names */
972 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
974 struct ipt_entry_match *m;
975 struct ipt_entry_target *t;
977 e = (struct ipt_entry *)(table->private->entries + off);
978 if (copy_to_user(userptr + off
979 + offsetof(struct ipt_entry, counters),
981 sizeof(counters[num])) != 0) {
986 for (i = sizeof(struct ipt_entry);
987 i < e->target_offset;
988 i += m->u.match_size) {
991 if (copy_to_user(userptr + off + i
992 + offsetof(struct ipt_entry_match,
994 m->u.kernel.match->name,
995 strlen(m->u.kernel.match->name)+1)
1002 t = ipt_get_target(e);
1003 if (copy_to_user(userptr + off + e->target_offset
1004 + offsetof(struct ipt_entry_target,
1006 t->u.kernel.target->name,
1007 strlen(t->u.kernel.target->name)+1) != 0) {
1019 get_entries(const struct ipt_get_entries *entries,
1020 struct ipt_get_entries *uptr)
1023 struct ipt_table *t;
1025 t = find_table_lock(entries->name, &ret, &ipt_mutex);
1027 duprintf("t->private->number = %u\n",
1028 t->private->number);
1029 if (entries->size == t->private->size)
1030 ret = copy_entries_to_user(t->private->size,
1031 t, uptr->entrytable);
1033 duprintf("get_entries: I've got %u not %u!\n",
1040 duprintf("get_entries: Can't find %s!\n",
1047 do_replace(void *user, unsigned int len)
1050 struct ipt_replace tmp;
1051 struct ipt_table *t;
1052 struct ipt_table_info *newinfo, *oldinfo;
1053 struct ipt_counters *counters;
1055 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1058 /* Hack: Causes ipchains to give correct error msg --RR */
1059 if (len != sizeof(tmp) + tmp.size)
1060 return -ENOPROTOOPT;
1062 newinfo = vmalloc(sizeof(struct ipt_table_info)
1063 + SMP_ALIGN(tmp.size) * smp_num_cpus);
1067 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1073 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1078 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1080 ret = translate_table(tmp.name, tmp.valid_hooks,
1081 newinfo, tmp.size, tmp.num_entries,
1082 tmp.hook_entry, tmp.underflow);
1084 goto free_newinfo_counters;
1086 duprintf("ip_tables: Translated table\n");
1088 t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1090 goto free_newinfo_counters_untrans;
1093 if (tmp.valid_hooks != t->valid_hooks) {
1094 duprintf("Valid hook crap: %08X vs %08X\n",
1095 tmp.valid_hooks, t->valid_hooks);
1097 goto free_newinfo_counters_untrans_unlock;
1100 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1102 goto free_newinfo_counters_untrans_unlock;
1104 /* Get the old counters. */
1105 get_counters(oldinfo, counters);
1106 /* Decrease module usage counts and free resource */
1107 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1109 /* Silent error: too late now. */
1110 copy_to_user(tmp.counters, counters,
1111 sizeof(struct ipt_counters) * tmp.num_counters);
1116 free_newinfo_counters_untrans_unlock:
1118 free_newinfo_counters_untrans:
1119 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1120 free_newinfo_counters:
1127 /* We're lazy, and add to the first CPU; overflow works its fey magic
1128 * and everything is OK. */
1130 add_counter_to_entry(struct ipt_entry *e,
1131 const struct ipt_counters addme[],
1135 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1137 (long unsigned int)e->counters.pcnt,
1138 (long unsigned int)e->counters.bcnt,
1139 (long unsigned int)addme[*i].pcnt,
1140 (long unsigned int)addme[*i].bcnt);
1143 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1150 do_add_counters(void *user, unsigned int len)
1153 struct ipt_counters_info tmp, *paddc;
1154 struct ipt_table *t;
1157 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1160 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1163 paddc = vmalloc(len);
1167 if (copy_from_user(paddc, user, len) != 0) {
1172 t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1176 write_lock_bh(&t->lock);
1177 if (t->private->number != paddc->num_counters) {
1179 goto unlock_up_free;
1183 IPT_ENTRY_ITERATE(t->private->entries,
1185 add_counter_to_entry,
1189 write_unlock_bh(&t->lock);
1198 do_ipt_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1202 if (!capable(CAP_NET_ADMIN))
1206 case IPT_SO_SET_REPLACE:
1207 ret = do_replace(user, len);
1210 case IPT_SO_SET_ADD_COUNTERS:
1211 ret = do_add_counters(user, len);
1215 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1223 do_ipt_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1227 if (!capable(CAP_NET_ADMIN))
1231 case IPT_SO_GET_INFO: {
1232 char name[IPT_TABLE_MAXNAMELEN];
1233 struct ipt_table *t;
1235 if (*len != sizeof(struct ipt_getinfo)) {
1236 duprintf("length %u != %u\n", *len,
1237 sizeof(struct ipt_getinfo));
1242 if (copy_from_user(name, user, sizeof(name)) != 0) {
1246 t = find_table_lock(name, &ret, &ipt_mutex);
1248 struct ipt_getinfo info;
1250 info.valid_hooks = t->valid_hooks;
1251 memcpy(info.hook_entry, t->private->hook_entry,
1252 sizeof(info.hook_entry));
1253 memcpy(info.underflow, t->private->underflow,
1254 sizeof(info.underflow));
1255 info.num_entries = t->private->number;
1256 info.size = t->private->size;
1257 strcpy(info.name, name);
1259 if (copy_to_user(user, &info, *len) != 0)
1269 case IPT_SO_GET_ENTRIES: {
1270 struct ipt_get_entries get;
1272 if (*len < sizeof(get)) {
1273 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1275 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1277 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1278 duprintf("get_entries: %u != %u\n", *len,
1279 sizeof(struct ipt_get_entries) + get.size);
1282 ret = get_entries(&get, user);
1287 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1294 /* Registration hooks for targets. */
1296 ipt_register_target(struct ipt_target *target)
1301 ret = down_interruptible(&ipt_mutex);
1306 if (!list_named_insert(&ipt_target, target)) {
1307 duprintf("ipt_register_target: `%s' already in list!\n",
1317 ipt_unregister_target(struct ipt_target *target)
1320 LIST_DELETE(&ipt_target, target);
1326 ipt_register_match(struct ipt_match *match)
1331 ret = down_interruptible(&ipt_mutex);
1336 if (!list_named_insert(&ipt_match, match)) {
1337 duprintf("ipt_register_match: `%s' already in list!\n",
1348 ipt_unregister_match(struct ipt_match *match)
1351 LIST_DELETE(&ipt_match, match);
1356 int ipt_register_table(struct ipt_table *table)
1359 struct ipt_table_info *newinfo;
1360 static struct ipt_table_info bootstrap
1361 = { 0, 0, { 0 }, { 0 }, { } };
1364 newinfo = vmalloc(sizeof(struct ipt_table_info)
1365 + SMP_ALIGN(table->table->size) * smp_num_cpus);
1371 memcpy(newinfo->entries, table->table->entries, table->table->size);
1373 ret = translate_table(table->name, table->valid_hooks,
1374 newinfo, table->table->size,
1375 table->table->num_entries,
1376 table->table->hook_entry,
1377 table->table->underflow);
1384 ret = down_interruptible(&ipt_mutex);
1391 /* Don't autoload: we'd eat our tail... */
1392 if (list_named_find(&ipt_tables, table->name)) {
1397 /* Simplifies replace_table code. */
1398 table->private = &bootstrap;
1399 if (!replace_table(table, 0, newinfo, &ret))
1402 duprintf("table->private->number = %u\n",
1403 table->private->number);
1405 table->lock = RW_LOCK_UNLOCKED;
1406 list_prepend(&ipt_tables, table);
1418 void ipt_unregister_table(struct ipt_table *table)
1421 LIST_DELETE(&ipt_tables, table);
1424 /* Decrease module usage counts and free resources */
1425 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1426 cleanup_entry, NULL);
1427 vfree(table->private);
1431 /* Returns 1 if the port is matched by the range, 0 otherwise */
1433 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1437 ret = (port >= min && port <= max) ^ invert;
1442 tcp_find_option(u_int8_t option,
1443 const struct tcphdr *tcp,
1448 unsigned int i = sizeof(struct tcphdr);
1449 const u_int8_t *opt = (u_int8_t *)tcp;
1451 duprintf("tcp_match: finding option\n");
1452 /* If we don't have the whole header, drop packet. */
1453 if (tcp->doff * 4 > datalen) {
1458 while (i < tcp->doff * 4) {
1459 if (opt[i] == option) return !invert;
1460 if (opt[i] < 2) i++;
1461 else i += opt[i+1]?:1;
1468 tcp_match(const struct sk_buff *skb,
1469 const struct net_device *in,
1470 const struct net_device *out,
1471 const void *matchinfo,
1477 const struct tcphdr *tcp = hdr;
1478 const struct ipt_tcp *tcpinfo = matchinfo;
1482 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1483 causes this. Its a cracker trying to break in by doing a
1484 flag overwrite to pass the direction checks.
1488 duprintf("Dropping evil TCP offset=1 frag.\n");
1491 } else if (offset == 0 && datalen < sizeof(struct tcphdr)) {
1492 /* We've been asked to examine this packet, and we
1493 can't. Hence, no choice but to drop. */
1494 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1499 /* FIXME: Try tcp doff >> packet len against various stacks --RR */
1501 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1503 /* Must not be a fragment. */
1505 && port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1507 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT))
1508 && port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1510 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT))
1511 && FWINVTCP((((unsigned char *)tcp)[13]
1512 & tcpinfo->flg_mask)
1513 == tcpinfo->flg_cmp,
1515 && (!tcpinfo->option
1516 || tcp_find_option(tcpinfo->option, tcp, datalen,
1518 & IPT_TCP_INV_OPTION,
1522 /* Called when user tries to insert an entry of this type. */
1524 tcp_checkentry(const char *tablename,
1525 const struct ipt_ip *ip,
1527 unsigned int matchsize,
1528 unsigned int hook_mask)
1530 const struct ipt_tcp *tcpinfo = matchinfo;
1532 /* Must specify proto == TCP, and no unknown invflags */
1533 return ip->proto == IPPROTO_TCP
1534 && !(ip->invflags & IPT_INV_PROTO)
1535 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1536 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1540 udp_match(const struct sk_buff *skb,
1541 const struct net_device *in,
1542 const struct net_device *out,
1543 const void *matchinfo,
1549 const struct udphdr *udp = hdr;
1550 const struct ipt_udp *udpinfo = matchinfo;
1552 if (offset == 0 && datalen < sizeof(struct udphdr)) {
1553 /* We've been asked to examine this packet, and we
1554 can't. Hence, no choice but to drop. */
1555 duprintf("Dropping evil UDP tinygram.\n");
1560 /* Must not be a fragment. */
1562 && port_match(udpinfo->spts[0], udpinfo->spts[1],
1564 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1565 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1567 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1570 /* Called when user tries to insert an entry of this type. */
1572 udp_checkentry(const char *tablename,
1573 const struct ipt_ip *ip,
1575 unsigned int matchinfosize,
1576 unsigned int hook_mask)
1578 const struct ipt_udp *udpinfo = matchinfo;
1580 /* Must specify proto == UDP, and no unknown invflags */
1581 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1582 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1586 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1587 duprintf("ipt_udp: matchsize %u != %u\n",
1588 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1591 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1592 duprintf("ipt_udp: unknown flags %X\n",
1600 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1602 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1603 u_int8_t type, u_int8_t code,
1606 return (type == test_type && code >= min_code && code <= max_code)
1611 icmp_match(const struct sk_buff *skb,
1612 const struct net_device *in,
1613 const struct net_device *out,
1614 const void *matchinfo,
1620 const struct icmphdr *icmp = hdr;
1621 const struct ipt_icmp *icmpinfo = matchinfo;
1623 if (offset == 0 && datalen < 2) {
1624 /* We've been asked to examine this packet, and we
1625 can't. Hence, no choice but to drop. */
1626 duprintf("Dropping evil ICMP tinygram.\n");
1631 /* Must not be a fragment. */
1633 && icmp_type_code_match(icmpinfo->type,
1636 icmp->type, icmp->code,
1637 !!(icmpinfo->invflags&IPT_ICMP_INV));
1640 /* Called when user tries to insert an entry of this type. */
1642 icmp_checkentry(const char *tablename,
1643 const struct ipt_ip *ip,
1645 unsigned int matchsize,
1646 unsigned int hook_mask)
1648 const struct ipt_icmp *icmpinfo = matchinfo;
1650 /* Must specify proto == ICMP, and no unknown invflags */
1651 return ip->proto == IPPROTO_ICMP
1652 && !(ip->invflags & IPT_INV_PROTO)
1653 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1654 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1657 /* The built-in targets: standard (NULL) and error. */
1658 static struct ipt_target ipt_standard_target
1659 = { { NULL, NULL }, IPT_STANDARD_TARGET, NULL, NULL, NULL };
1660 static struct ipt_target ipt_error_target
1661 = { { NULL, NULL }, IPT_ERROR_TARGET, ipt_error, NULL, NULL };
1663 static struct nf_sockopt_ops ipt_sockopts
1664 = { { NULL, NULL }, PF_INET, IPT_BASE_CTL, IPT_SO_SET_MAX+1, do_ipt_set_ctl,
1665 IPT_BASE_CTL, IPT_SO_GET_MAX+1, do_ipt_get_ctl, 0, NULL };
1667 static struct ipt_match tcp_matchstruct
1668 = { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL };
1669 static struct ipt_match udp_matchstruct
1670 = { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL };
1671 static struct ipt_match icmp_matchstruct
1672 = { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL };
1674 #ifdef CONFIG_PROC_FS
1675 static inline int print_name(const struct ipt_table *t,
1676 off_t start_offset, char *buffer, int length,
1677 off_t *pos, unsigned int *count)
1679 if ((*count)++ >= start_offset) {
1680 unsigned int namelen;
1682 namelen = sprintf(buffer + *pos, "%s\n", t->name);
1683 if (*pos + namelen > length) {
1684 /* Stop iterating */
1692 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1695 unsigned int count = 0;
1697 if (down_interruptible(&ipt_mutex) != 0)
1700 LIST_FIND(&ipt_tables, print_name, struct ipt_table *,
1701 offset, buffer, length, &pos, &count);
1705 /* `start' hack - see fs/proc/generic.c line ~105 */
1706 *start=(char *)((unsigned long)count-offset);
1709 #endif /*CONFIG_PROC_FS*/
1711 static int __init init(void)
1715 /* Noone else will be downing sem now, so we won't sleep */
1717 list_append(&ipt_target, &ipt_standard_target);
1718 list_append(&ipt_target, &ipt_error_target);
1719 list_append(&ipt_match, &tcp_matchstruct);
1720 list_append(&ipt_match, &udp_matchstruct);
1721 list_append(&ipt_match, &icmp_matchstruct);
1724 /* Register setsockopt */
1725 ret = nf_register_sockopt(&ipt_sockopts);
1727 duprintf("Unable to register sockopts.\n");
1731 #ifdef CONFIG_PROC_FS
1732 if (!proc_net_create("ip_tables_names", 0, ipt_get_tables)) {
1733 nf_unregister_sockopt(&ipt_sockopts);
1738 printk("ip_tables: (c)2000 Netfilter core team\n");
1742 static void __exit fini(void)
1744 nf_unregister_sockopt(&ipt_sockopts);
1745 #ifdef CONFIG_PROC_FS
1746 proc_net_remove("ip_tables_names");
1750 EXPORT_SYMBOL(ipt_register_table);
1751 EXPORT_SYMBOL(ipt_unregister_table);
1752 EXPORT_SYMBOL(ipt_register_match);
1753 EXPORT_SYMBOL(ipt_unregister_match);
1754 EXPORT_SYMBOL(ipt_do_table);
1755 EXPORT_SYMBOL(ipt_register_target);
1756 EXPORT_SYMBOL(ipt_unregister_target);