#include <linux/jhash.h>
#include <linux/netfilter/ipset/ip_set_timeout.h>
+#define CONCAT(a, b, c) a##b##c
+#define TOKEN(a, b, c) CONCAT(a, b, c)
+
+#define type_pf_next TOKEN(TYPE, PF, _elem)
+
/* Hashing which uses arrays to resolve clashing. The hash table is resized
* (doubled) when searching becomes too long.
* Internally jhash is used with the assumption that the size of the
/* Number of elements to store in an initial array block */
#define AHASH_INIT_SIZE 4
/* Max number of elements to store in an array block */
-#define AHASH_MAX_SIZE (3*4)
+#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE)
+
+/* Max number of elements can be tuned */
+#ifdef IP_SET_HASH_WITH_MULTI
+#define AHASH_MAX(h) ((h)->ahash_max)
+
+static inline u8
+tune_ahash_max(u8 curr, u32 multi)
+{
+ u32 n;
+
+ if (multi < curr)
+ return curr;
+
+ n = curr + AHASH_INIT_SIZE;
+ /* Currently, at listing one hash bucket must fit into a message.
+ * Therefore we have a hard limit here.
+ */
+ return n > curr && n <= 64 ? n : curr;
+}
+#define TUNE_AHASH_MAX(h, multi) \
+ ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
+#else
+#define AHASH_MAX(h) AHASH_MAX_SIZE
+#define TUNE_AHASH_MAX(h, multi)
+#endif
/* A hash bucket */
struct hbucket {
struct hbucket bucket[0]; /* hashtable buckets */
};
-#define hbucket(h, i) &((h)->bucket[i])
+#define hbucket(h, i) (&((h)->bucket[i]))
/* Book-keeping of the prefixes added to the set */
struct ip_set_hash_nets {
u32 initval; /* random jhash init value */
u32 timeout; /* timeout value, if enabled */
struct timer_list gc; /* garbage collection when timeout enabled */
+ struct type_pf_next next; /* temporary storage for uadd */
+#ifdef IP_SET_HASH_WITH_MULTI
+ u8 ahash_max; /* max elements in an array block */
+#endif
#ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask; /* netmask value for subnets to store */
#endif
+#ifdef IP_SET_HASH_WITH_RBTREE
+ struct rb_root rbtree;
+#endif
#ifdef IP_SET_HASH_WITH_NETS
struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */
#endif
};
+static size_t
+htable_size(u8 hbits)
+{
+ size_t hsize;
+
+ /* We must fit both into u32 in jhash and size_t */
+ if (hbits > 31)
+ return 0;
+ hsize = jhash_size(hbits);
+ if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket)
+ < hsize)
+ return 0;
+
+ return hsize * sizeof(struct hbucket) + sizeof(struct htable);
+}
+
/* Compute htable_bits from the user input parameter hashsize */
static u8
htable_bits(u32 hashsize)
}
#ifdef IP_SET_HASH_WITH_NETS
+#ifdef IP_SET_HASH_WITH_NETS_PACKED
+/* When cidr is packed with nomatch, cidr - 1 is stored in the entry */
+#define CIDR(cidr) (cidr + 1)
+#else
+#define CIDR(cidr) (cidr)
+#endif
#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
del_timer_sync(&h->gc);
ahash_destroy(h->table);
+#ifdef IP_SET_HASH_WITH_RBTREE
+ rbtree_destroy(&h->rbtree);
+#endif
kfree(h);
set->data = NULL;
}
-#define HKEY(data, initval, htable_bits) \
-(jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \
- & jhash_mask(htable_bits))
-
#endif /* _IP_SET_AHASH_H */
+#ifndef HKEY_DATALEN
+#define HKEY_DATALEN sizeof(struct type_pf_elem)
+#endif
+
+#define HKEY(data, initval, htable_bits) \
+(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \
+ & jhash_mask(htable_bits))
+
#define CONCAT(a, b, c) a##b##c
#define TOKEN(a, b, c) CONCAT(a, b, c)
#define type_pf_data_netmask TOKEN(TYPE, PF, _data_netmask)
#define type_pf_data_list TOKEN(TYPE, PF, _data_list)
#define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist)
+#define type_pf_data_next TOKEN(TYPE, PF, _data_next)
+#define type_pf_data_flags TOKEN(TYPE, PF, _data_flags)
+#ifdef IP_SET_HASH_WITH_NETS
+#define type_pf_data_match TOKEN(TYPE, PF, _data_match)
+#else
+#define type_pf_data_match(d) 1
+#endif
#define type_pf_elem TOKEN(TYPE, PF, _elem)
#define type_pf_telem TOKEN(TYPE, PF, _telem)
/* Add an element to the hash table when resizing the set:
* we spare the maintenance of the internal counters. */
static int
-type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value)
+type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value,
+ u8 ahash_max, u32 cadt_flags)
{
+ struct type_pf_elem *data;
+
if (n->pos >= n->size) {
void *tmp;
- if (n->size >= AHASH_MAX_SIZE)
+ if (n->size >= ahash_max)
/* Trigger rehashing */
return -EAGAIN;
n->value = tmp;
n->size += AHASH_INIT_SIZE;
}
- type_pf_data_copy(ahash_data(n, n->pos++), value);
+ data = ahash_data(n, n->pos++);
+ type_pf_data_copy(data, value);
+#ifdef IP_SET_HASH_WITH_NETS
+ /* Resizing won't overwrite stored flags */
+ if (cadt_flags)
+ type_pf_data_flags(data, cadt_flags);
+#endif
return 0;
}
htable_bits++;
pr_debug("attempt to resize set %s from %u to %u, t %p\n",
set->name, orig->htable_bits, htable_bits, orig);
- if (!htable_bits)
+ if (!htable_bits) {
/* In case we have plenty of memory :-) */
+ pr_warning("Cannot increase the hashsize of set %s further\n",
+ set->name);
return -IPSET_ERR_HASH_FULL;
+ }
t = ip_set_alloc(sizeof(*t)
+ jhash_size(htable_bits) * sizeof(struct hbucket));
if (!t)
for (j = 0; j < n->pos; j++) {
data = ahash_data(n, j);
m = hbucket(t, HKEY(data, h->initval, htable_bits));
- ret = type_pf_elem_add(m, data);
+ ret = type_pf_elem_add(m, data, AHASH_MAX(h), 0);
if (ret < 0) {
read_unlock_bh(&set->lock);
ahash_destroy(t);
return 0;
}
+static inline void
+type_pf_data_next(struct ip_set_hash *h, const struct type_pf_elem *d);
+
/* Add an element to a hash and update the internal counters when succeeded,
* otherwise report the proper error code. */
static int
-type_pf_add(struct ip_set *set, void *value, u32 timeout)
+type_pf_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
{
struct ip_set_hash *h = set->data;
struct htable *t;
const struct type_pf_elem *d = value;
struct hbucket *n;
int i, ret = 0;
- u32 key;
+ u32 key, multi = 0;
+ u32 cadt_flags = flags >> 16;
- if (h->elements >= h->maxelem)
+ if (h->elements >= h->maxelem) {
+ if (net_ratelimit())
+ pr_warning("Set %s is full, maxelem %u reached\n",
+ set->name, h->maxelem);
return -IPSET_ERR_HASH_FULL;
+ }
rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
key = HKEY(value, h->initval, t->htable_bits);
n = hbucket(t, key);
for (i = 0; i < n->pos; i++)
- if (type_pf_data_equal(ahash_data(n, i), d)) {
+ if (type_pf_data_equal(ahash_data(n, i), d, &multi)) {
+#ifdef IP_SET_HASH_WITH_NETS
+ if (flags & IPSET_FLAG_EXIST)
+ /* Support overwriting just the flags */
+ type_pf_data_flags(ahash_data(n, i),
+ cadt_flags);
+#endif
ret = -IPSET_ERR_EXIST;
goto out;
}
-
- ret = type_pf_elem_add(n, value);
- if (ret != 0)
+ TUNE_AHASH_MAX(h, multi);
+ ret = type_pf_elem_add(n, value, AHASH_MAX(h), cadt_flags);
+ if (ret != 0) {
+ if (ret == -EAGAIN)
+ type_pf_data_next(h, d);
goto out;
+ }
#ifdef IP_SET_HASH_WITH_NETS
- add_cidr(h, d->cidr, HOST_MASK);
+ add_cidr(h, CIDR(d->cidr), HOST_MASK);
#endif
h->elements++;
out:
* and free up space if possible.
*/
static int
-type_pf_del(struct ip_set *set, void *value, u32 timeout)
+type_pf_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
{
struct ip_set_hash *h = set->data;
struct htable *t = h->table;
struct hbucket *n;
int i;
struct type_pf_elem *data;
- u32 key;
+ u32 key, multi = 0;
key = HKEY(value, h->initval, t->htable_bits);
n = hbucket(t, key);
for (i = 0; i < n->pos; i++) {
data = ahash_data(n, i);
- if (!type_pf_data_equal(data, d))
+ if (!type_pf_data_equal(data, d, &multi))
continue;
if (i != n->pos - 1)
/* Not last one */
n->pos--;
h->elements--;
#ifdef IP_SET_HASH_WITH_NETS
- del_cidr(h, d->cidr, HOST_MASK);
+ del_cidr(h, CIDR(d->cidr), HOST_MASK);
#endif
if (n->pos + AHASH_INIT_SIZE < n->size) {
void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
struct hbucket *n;
const struct type_pf_elem *data;
int i, j = 0;
- u32 key;
+ u32 key, multi = 0;
u8 host_mask = SET_HOST_MASK(set->family);
pr_debug("test by nets\n");
- for (; j < host_mask && h->nets[j].cidr; j++) {
+ for (; j < host_mask && h->nets[j].cidr && !multi; j++) {
type_pf_data_netmask(d, h->nets[j].cidr);
key = HKEY(d, h->initval, t->htable_bits);
n = hbucket(t, key);
for (i = 0; i < n->pos; i++) {
data = ahash_data(n, i);
- if (type_pf_data_equal(data, d))
- return 1;
+ if (type_pf_data_equal(data, d, &multi))
+ return type_pf_data_match(data);
}
}
return 0;
/* Test whether the element is added to the set */
static int
-type_pf_test(struct ip_set *set, void *value, u32 timeout)
+type_pf_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
{
struct ip_set_hash *h = set->data;
struct htable *t = h->table;
struct hbucket *n;
const struct type_pf_elem *data;
int i;
- u32 key;
+ u32 key, multi = 0;
#ifdef IP_SET_HASH_WITH_NETS
/* If we test an IP address and not a network address,
* try all possible network sizes */
- if (d->cidr == SET_HOST_MASK(set->family))
+ if (CIDR(d->cidr) == SET_HOST_MASK(set->family))
return type_pf_test_cidrs(set, d, timeout);
#endif
n = hbucket(t, key);
for (i = 0; i < n->pos; i++) {
data = ahash_data(n, i);
- if (type_pf_data_equal(data, d))
- return 1;
+ if (type_pf_data_equal(data, d, &multi))
+ return type_pf_data_match(data);
}
return 0;
}
static int
type_pf_kadt(struct ip_set *set, const struct sk_buff * skb,
- enum ipset_adt adt, u8 pf, u8 dim, u8 flags);
+ const struct xt_action_param *par,
+ enum ipset_adt adt, const struct ip_set_adt_opt *opt);
static int
type_pf_uadt(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags);
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
static const struct ip_set_type_variant type_pf_variant = {
.kadt = type_pf_kadt,
static int
type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
- u32 timeout)
+ u8 ahash_max, u32 cadt_flags, u32 timeout)
{
struct type_pf_elem *data;
if (n->pos >= n->size) {
void *tmp;
- if (n->size >= AHASH_MAX_SIZE)
+ if (n->size >= ahash_max)
/* Trigger rehashing */
return -EAGAIN;
data = ahash_tdata(n, n->pos++);
type_pf_data_copy(data, value);
type_pf_data_timeout_set(data, timeout);
+#ifdef IP_SET_HASH_WITH_NETS
+ /* Resizing won't overwrite stored flags */
+ if (cadt_flags)
+ type_pf_data_flags(data, cadt_flags);
+#endif
return 0;
}
if (type_pf_data_expired(data)) {
pr_debug("expired %u/%u\n", i, j);
#ifdef IP_SET_HASH_WITH_NETS
- del_cidr(h, data->cidr, HOST_MASK);
+ del_cidr(h, CIDR(data->cidr), HOST_MASK);
#endif
if (j != n->pos - 1)
/* Not last one */
retry:
ret = 0;
htable_bits++;
- if (!htable_bits)
+ if (!htable_bits) {
/* In case we have plenty of memory :-) */
+ pr_warning("Cannot increase the hashsize of set %s further\n",
+ set->name);
return -IPSET_ERR_HASH_FULL;
+ }
t = ip_set_alloc(sizeof(*t)
+ jhash_size(htable_bits) * sizeof(struct hbucket));
if (!t)
for (j = 0; j < n->pos; j++) {
data = ahash_tdata(n, j);
m = hbucket(t, HKEY(data, h->initval, htable_bits));
- ret = type_pf_elem_tadd(m, data,
+ ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), 0,
type_pf_data_timeout(data));
if (ret < 0) {
read_unlock_bh(&set->lock);
}
static int
-type_pf_tadd(struct ip_set *set, void *value, u32 timeout)
+type_pf_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
{
struct ip_set_hash *h = set->data;
struct htable *t = h->table;
const struct type_pf_elem *d = value;
struct hbucket *n;
struct type_pf_elem *data;
- int ret = 0, i, j = AHASH_MAX_SIZE + 1;
- u32 key;
+ int ret = 0, i, j = AHASH_MAX(h) + 1;
+ bool flag_exist = flags & IPSET_FLAG_EXIST;
+ u32 key, multi = 0;
+ u32 cadt_flags = flags >> 16;
if (h->elements >= h->maxelem)
/* FIXME: when set is full, we slow down here */
type_pf_expire(h);
- if (h->elements >= h->maxelem)
+ if (h->elements >= h->maxelem) {
+ if (net_ratelimit())
+ pr_warning("Set %s is full, maxelem %u reached\n",
+ set->name, h->maxelem);
return -IPSET_ERR_HASH_FULL;
+ }
rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
n = hbucket(t, key);
for (i = 0; i < n->pos; i++) {
data = ahash_tdata(n, i);
- if (type_pf_data_equal(data, d)) {
- if (type_pf_data_expired(data))
+ if (type_pf_data_equal(data, d, &multi)) {
+ if (type_pf_data_expired(data) || flag_exist)
+ /* Just timeout value may be updated */
j = i;
else {
ret = -IPSET_ERR_EXIST;
goto out;
}
- } else if (j == AHASH_MAX_SIZE + 1 &&
+ } else if (j == AHASH_MAX(h) + 1 &&
type_pf_data_expired(data))
j = i;
}
- if (j != AHASH_MAX_SIZE + 1) {
+ if (j != AHASH_MAX(h) + 1) {
data = ahash_tdata(n, j);
#ifdef IP_SET_HASH_WITH_NETS
- del_cidr(h, data->cidr, HOST_MASK);
- add_cidr(h, d->cidr, HOST_MASK);
+ del_cidr(h, CIDR(data->cidr), HOST_MASK);
+ add_cidr(h, CIDR(d->cidr), HOST_MASK);
#endif
type_pf_data_copy(data, d);
type_pf_data_timeout_set(data, timeout);
+#ifdef IP_SET_HASH_WITH_NETS
+ type_pf_data_flags(data, cadt_flags);
+#endif
goto out;
}
- ret = type_pf_elem_tadd(n, d, timeout);
- if (ret != 0)
+ TUNE_AHASH_MAX(h, multi);
+ ret = type_pf_elem_tadd(n, d, AHASH_MAX(h), cadt_flags, timeout);
+ if (ret != 0) {
+ if (ret == -EAGAIN)
+ type_pf_data_next(h, d);
goto out;
+ }
#ifdef IP_SET_HASH_WITH_NETS
- add_cidr(h, d->cidr, HOST_MASK);
+ add_cidr(h, CIDR(d->cidr), HOST_MASK);
#endif
h->elements++;
out:
}
static int
-type_pf_tdel(struct ip_set *set, void *value, u32 timeout)
+type_pf_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
{
struct ip_set_hash *h = set->data;
struct htable *t = h->table;
struct hbucket *n;
int i;
struct type_pf_elem *data;
- u32 key;
+ u32 key, multi = 0;
key = HKEY(value, h->initval, t->htable_bits);
n = hbucket(t, key);
for (i = 0; i < n->pos; i++) {
data = ahash_tdata(n, i);
- if (!type_pf_data_equal(data, d))
+ if (!type_pf_data_equal(data, d, &multi))
continue;
if (type_pf_data_expired(data))
return -IPSET_ERR_EXIST;
n->pos--;
h->elements--;
#ifdef IP_SET_HASH_WITH_NETS
- del_cidr(h, d->cidr, HOST_MASK);
+ del_cidr(h, CIDR(d->cidr), HOST_MASK);
#endif
if (n->pos + AHASH_INIT_SIZE < n->size) {
void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
struct type_pf_elem *data;
struct hbucket *n;
int i, j = 0;
- u32 key;
+ u32 key, multi = 0;
u8 host_mask = SET_HOST_MASK(set->family);
- for (; j < host_mask && h->nets[j].cidr; j++) {
+ for (; j < host_mask && h->nets[j].cidr && !multi; j++) {
type_pf_data_netmask(d, h->nets[j].cidr);
key = HKEY(d, h->initval, t->htable_bits);
n = hbucket(t, key);
for (i = 0; i < n->pos; i++) {
data = ahash_tdata(n, i);
- if (type_pf_data_equal(data, d))
- return !type_pf_data_expired(data);
+#ifdef IP_SET_HASH_WITH_MULTI
+ if (type_pf_data_equal(data, d, &multi)) {
+ if (!type_pf_data_expired(data))
+ return type_pf_data_match(data);
+ multi = 0;
+ }
+#else
+ if (type_pf_data_equal(data, d, &multi) &&
+ !type_pf_data_expired(data))
+ return type_pf_data_match(data);
+#endif
}
}
return 0;
#endif
static int
-type_pf_ttest(struct ip_set *set, void *value, u32 timeout)
+type_pf_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
{
struct ip_set_hash *h = set->data;
struct htable *t = h->table;
struct type_pf_elem *data, *d = value;
struct hbucket *n;
int i;
- u32 key;
+ u32 key, multi = 0;
#ifdef IP_SET_HASH_WITH_NETS
- if (d->cidr == SET_HOST_MASK(set->family))
+ if (CIDR(d->cidr) == SET_HOST_MASK(set->family))
return type_pf_ttest_cidrs(set, d, timeout);
#endif
key = HKEY(d, h->initval, t->htable_bits);
n = hbucket(t, key);
for (i = 0; i < n->pos; i++) {
data = ahash_tdata(n, i);
- if (type_pf_data_equal(data, d))
- return !type_pf_data_expired(data);
+ if (type_pf_data_equal(data, d, &multi) &&
+ !type_pf_data_expired(data))
+ return type_pf_data_match(data);
}
return 0;
}
IPSET_GC_PERIOD(h->timeout));
}
+#undef HKEY_DATALEN
+#undef HKEY
#undef type_pf_data_equal
#undef type_pf_data_isnull
#undef type_pf_data_copy
#undef type_pf_data_zero_out
+#undef type_pf_data_netmask
#undef type_pf_data_list
#undef type_pf_data_tlist
+#undef type_pf_data_next
+#undef type_pf_data_flags
+#undef type_pf_data_match
#undef type_pf_elem
#undef type_pf_telem
#undef type_pf_data_timeout
#undef type_pf_data_expired
-#undef type_pf_data_netmask
#undef type_pf_data_timeout_set
#undef type_pf_elem_add
#undef type_pf_test
#undef type_pf_elem_tadd
+#undef type_pf_del_telem
#undef type_pf_expire
#undef type_pf_tadd
#undef type_pf_tdel