netfilter: ipset: adding ranges to hash types with timeout could still fail, fixed
[linux-flexiantxendom0-3.2.10.git] / include / linux / netfilter / ipset / ip_set_ahash.h
1 #ifndef _IP_SET_AHASH_H
2 #define _IP_SET_AHASH_H
3
4 #include <linux/rcupdate.h>
5 #include <linux/jhash.h>
6 #include <linux/netfilter/ipset/ip_set_timeout.h>
7
8 #define CONCAT(a, b, c)         a##b##c
9 #define TOKEN(a, b, c)          CONCAT(a, b, c)
10
11 #define type_pf_next            TOKEN(TYPE, PF, _elem)
12
13 /* Hashing which uses arrays to resolve clashing. The hash table is resized
14  * (doubled) when searching becomes too long.
15  * Internally jhash is used with the assumption that the size of the
16  * stored data is a multiple of sizeof(u32). If storage supports timeout,
17  * the timeout field must be the last one in the data structure - that field
18  * is ignored when computing the hash key.
19  *
20  * Readers and resizing
21  *
22  * Resizing can be triggered by userspace command only, and those
23  * are serialized by the nfnl mutex. During resizing the set is
24  * read-locked, so the only possible concurrent operations are
25  * the kernel side readers. Those must be protected by proper RCU locking.
26  */
27
28 /* Number of elements to store in an initial array block */
29 #define AHASH_INIT_SIZE                 4
30 /* Max number of elements to store in an array block */
31 #define AHASH_MAX_SIZE                  (3*4)
32
33 /* A hash bucket */
34 struct hbucket {
35         void *value;            /* the array of the values */
36         u8 size;                /* size of the array */
37         u8 pos;                 /* position of the first free entry */
38 };
39
40 /* The hash table: the table size stored here in order to make resizing easy */
41 struct htable {
42         u8 htable_bits;         /* size of hash table == 2^htable_bits */
43         struct hbucket bucket[0]; /* hashtable buckets */
44 };
45
46 #define hbucket(h, i)           &((h)->bucket[i])
47
48 /* Book-keeping of the prefixes added to the set */
49 struct ip_set_hash_nets {
50         u8 cidr;                /* the different cidr values in the set */
51         u32 nets;               /* number of elements per cidr */
52 };
53
54 /* The generic ip_set hash structure */
55 struct ip_set_hash {
56         struct htable *table;   /* the hash table */
57         u32 maxelem;            /* max elements in the hash */
58         u32 elements;           /* current element (vs timeout) */
59         u32 initval;            /* random jhash init value */
60         u32 timeout;            /* timeout value, if enabled */
61         struct timer_list gc;   /* garbage collection when timeout enabled */
62         struct type_pf_next next; /* temporary storage for uadd */
63 #ifdef IP_SET_HASH_WITH_NETMASK
64         u8 netmask;             /* netmask value for subnets to store */
65 #endif
66 #ifdef IP_SET_HASH_WITH_NETS
67         struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */
68 #endif
69 };
70
71 /* Compute htable_bits from the user input parameter hashsize */
72 static u8
73 htable_bits(u32 hashsize)
74 {
75         /* Assume that hashsize == 2^htable_bits */
76         u8 bits = fls(hashsize - 1);
77         if (jhash_size(bits) != hashsize)
78                 /* Round up to the first 2^n value */
79                 bits = fls(hashsize);
80
81         return bits;
82 }
83
84 #ifdef IP_SET_HASH_WITH_NETS
85
86 #define SET_HOST_MASK(family)   (family == AF_INET ? 32 : 128)
87
88 /* Network cidr size book keeping when the hash stores different
89  * sized networks */
90 static void
91 add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
92 {
93         u8 i;
94
95         ++h->nets[cidr-1].nets;
96
97         pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets);
98
99         if (h->nets[cidr-1].nets > 1)
100                 return;
101
102         /* New cidr size */
103         for (i = 0; i < host_mask && h->nets[i].cidr; i++) {
104                 /* Add in increasing prefix order, so larger cidr first */
105                 if (h->nets[i].cidr < cidr)
106                         swap(h->nets[i].cidr, cidr);
107         }
108         if (i < host_mask)
109                 h->nets[i].cidr = cidr;
110 }
111
112 static void
113 del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
114 {
115         u8 i;
116
117         --h->nets[cidr-1].nets;
118
119         pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets);
120
121         if (h->nets[cidr-1].nets != 0)
122                 return;
123
124         /* All entries with this cidr size deleted, so cleanup h->cidr[] */
125         for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) {
126                 if (h->nets[i].cidr == cidr)
127                         h->nets[i].cidr = cidr = h->nets[i+1].cidr;
128         }
129         h->nets[i - 1].cidr = 0;
130 }
131 #endif
132
133 /* Destroy the hashtable part of the set */
134 static void
135 ahash_destroy(struct htable *t)
136 {
137         struct hbucket *n;
138         u32 i;
139
140         for (i = 0; i < jhash_size(t->htable_bits); i++) {
141                 n = hbucket(t, i);
142                 if (n->size)
143                         /* FIXME: use slab cache */
144                         kfree(n->value);
145         }
146
147         ip_set_free(t);
148 }
149
150 /* Calculate the actual memory size of the set data */
151 static size_t
152 ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask)
153 {
154         u32 i;
155         struct htable *t = h->table;
156         size_t memsize = sizeof(*h)
157                          + sizeof(*t)
158 #ifdef IP_SET_HASH_WITH_NETS
159                          + sizeof(struct ip_set_hash_nets) * host_mask
160 #endif
161                          + jhash_size(t->htable_bits) * sizeof(struct hbucket);
162
163         for (i = 0; i < jhash_size(t->htable_bits); i++)
164                         memsize += t->bucket[i].size * dsize;
165
166         return memsize;
167 }
168
169 /* Flush a hash type of set: destroy all elements */
170 static void
171 ip_set_hash_flush(struct ip_set *set)
172 {
173         struct ip_set_hash *h = set->data;
174         struct htable *t = h->table;
175         struct hbucket *n;
176         u32 i;
177
178         for (i = 0; i < jhash_size(t->htable_bits); i++) {
179                 n = hbucket(t, i);
180                 if (n->size) {
181                         n->size = n->pos = 0;
182                         /* FIXME: use slab cache */
183                         kfree(n->value);
184                 }
185         }
186 #ifdef IP_SET_HASH_WITH_NETS
187         memset(h->nets, 0, sizeof(struct ip_set_hash_nets)
188                            * SET_HOST_MASK(set->family));
189 #endif
190         h->elements = 0;
191 }
192
193 /* Destroy a hash type of set */
194 static void
195 ip_set_hash_destroy(struct ip_set *set)
196 {
197         struct ip_set_hash *h = set->data;
198
199         if (with_timeout(h->timeout))
200                 del_timer_sync(&h->gc);
201
202         ahash_destroy(h->table);
203         kfree(h);
204
205         set->data = NULL;
206 }
207
208 #define HKEY(data, initval, htable_bits)                                 \
209 (jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \
210         & jhash_mask(htable_bits))
211
212 #endif /* _IP_SET_AHASH_H */
213
214 #define CONCAT(a, b, c)         a##b##c
215 #define TOKEN(a, b, c)          CONCAT(a, b, c)
216
217 /* Type/family dependent function prototypes */
218
219 #define type_pf_data_equal      TOKEN(TYPE, PF, _data_equal)
220 #define type_pf_data_isnull     TOKEN(TYPE, PF, _data_isnull)
221 #define type_pf_data_copy       TOKEN(TYPE, PF, _data_copy)
222 #define type_pf_data_zero_out   TOKEN(TYPE, PF, _data_zero_out)
223 #define type_pf_data_netmask    TOKEN(TYPE, PF, _data_netmask)
224 #define type_pf_data_list       TOKEN(TYPE, PF, _data_list)
225 #define type_pf_data_tlist      TOKEN(TYPE, PF, _data_tlist)
226 #define type_pf_data_next       TOKEN(TYPE, PF, _data_next)
227
228 #define type_pf_elem            TOKEN(TYPE, PF, _elem)
229 #define type_pf_telem           TOKEN(TYPE, PF, _telem)
230 #define type_pf_data_timeout    TOKEN(TYPE, PF, _data_timeout)
231 #define type_pf_data_expired    TOKEN(TYPE, PF, _data_expired)
232 #define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set)
233
234 #define type_pf_elem_add        TOKEN(TYPE, PF, _elem_add)
235 #define type_pf_add             TOKEN(TYPE, PF, _add)
236 #define type_pf_del             TOKEN(TYPE, PF, _del)
237 #define type_pf_test_cidrs      TOKEN(TYPE, PF, _test_cidrs)
238 #define type_pf_test            TOKEN(TYPE, PF, _test)
239
240 #define type_pf_elem_tadd       TOKEN(TYPE, PF, _elem_tadd)
241 #define type_pf_del_telem       TOKEN(TYPE, PF, _ahash_del_telem)
242 #define type_pf_expire          TOKEN(TYPE, PF, _expire)
243 #define type_pf_tadd            TOKEN(TYPE, PF, _tadd)
244 #define type_pf_tdel            TOKEN(TYPE, PF, _tdel)
245 #define type_pf_ttest_cidrs     TOKEN(TYPE, PF, _ahash_ttest_cidrs)
246 #define type_pf_ttest           TOKEN(TYPE, PF, _ahash_ttest)
247
248 #define type_pf_resize          TOKEN(TYPE, PF, _resize)
249 #define type_pf_tresize         TOKEN(TYPE, PF, _tresize)
250 #define type_pf_flush           ip_set_hash_flush
251 #define type_pf_destroy         ip_set_hash_destroy
252 #define type_pf_head            TOKEN(TYPE, PF, _head)
253 #define type_pf_list            TOKEN(TYPE, PF, _list)
254 #define type_pf_tlist           TOKEN(TYPE, PF, _tlist)
255 #define type_pf_same_set        TOKEN(TYPE, PF, _same_set)
256 #define type_pf_kadt            TOKEN(TYPE, PF, _kadt)
257 #define type_pf_uadt            TOKEN(TYPE, PF, _uadt)
258 #define type_pf_gc              TOKEN(TYPE, PF, _gc)
259 #define type_pf_gc_init         TOKEN(TYPE, PF, _gc_init)
260 #define type_pf_variant         TOKEN(TYPE, PF, _variant)
261 #define type_pf_tvariant        TOKEN(TYPE, PF, _tvariant)
262
263 /* Flavour without timeout */
264
265 /* Get the ith element from the array block n */
266 #define ahash_data(n, i)        \
267         ((struct type_pf_elem *)((n)->value) + (i))
268
269 /* Add an element to the hash table when resizing the set:
270  * we spare the maintenance of the internal counters. */
271 static int
272 type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value)
273 {
274         if (n->pos >= n->size) {
275                 void *tmp;
276
277                 if (n->size >= AHASH_MAX_SIZE)
278                         /* Trigger rehashing */
279                         return -EAGAIN;
280
281                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
282                               * sizeof(struct type_pf_elem),
283                               GFP_ATOMIC);
284                 if (!tmp)
285                         return -ENOMEM;
286                 if (n->size) {
287                         memcpy(tmp, n->value,
288                                sizeof(struct type_pf_elem) * n->size);
289                         kfree(n->value);
290                 }
291                 n->value = tmp;
292                 n->size += AHASH_INIT_SIZE;
293         }
294         type_pf_data_copy(ahash_data(n, n->pos++), value);
295         return 0;
296 }
297
298 /* Resize a hash: create a new hash table with doubling the hashsize
299  * and inserting the elements to it. Repeat until we succeed or
300  * fail due to memory pressures. */
301 static int
302 type_pf_resize(struct ip_set *set, bool retried)
303 {
304         struct ip_set_hash *h = set->data;
305         struct htable *t, *orig = h->table;
306         u8 htable_bits = orig->htable_bits;
307         const struct type_pf_elem *data;
308         struct hbucket *n, *m;
309         u32 i, j;
310         int ret;
311
312 retry:
313         ret = 0;
314         htable_bits++;
315         pr_debug("attempt to resize set %s from %u to %u, t %p\n",
316                  set->name, orig->htable_bits, htable_bits, orig);
317         if (!htable_bits)
318                 /* In case we have plenty of memory :-) */
319                 return -IPSET_ERR_HASH_FULL;
320         t = ip_set_alloc(sizeof(*t)
321                          + jhash_size(htable_bits) * sizeof(struct hbucket));
322         if (!t)
323                 return -ENOMEM;
324         t->htable_bits = htable_bits;
325
326         read_lock_bh(&set->lock);
327         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
328                 n = hbucket(orig, i);
329                 for (j = 0; j < n->pos; j++) {
330                         data = ahash_data(n, j);
331                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
332                         ret = type_pf_elem_add(m, data);
333                         if (ret < 0) {
334                                 read_unlock_bh(&set->lock);
335                                 ahash_destroy(t);
336                                 if (ret == -EAGAIN)
337                                         goto retry;
338                                 return ret;
339                         }
340                 }
341         }
342
343         rcu_assign_pointer(h->table, t);
344         read_unlock_bh(&set->lock);
345
346         /* Give time to other readers of the set */
347         synchronize_rcu_bh();
348
349         pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
350                  orig->htable_bits, orig, t->htable_bits, t);
351         ahash_destroy(orig);
352
353         return 0;
354 }
355
356 static void
357 type_pf_data_next(struct ip_set_hash *h, const struct type_pf_elem *d);
358
359 /* Add an element to a hash and update the internal counters when succeeded,
360  * otherwise report the proper error code. */
361 static int
362 type_pf_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
363 {
364         struct ip_set_hash *h = set->data;
365         struct htable *t;
366         const struct type_pf_elem *d = value;
367         struct hbucket *n;
368         int i, ret = 0;
369         u32 key;
370
371         if (h->elements >= h->maxelem)
372                 return -IPSET_ERR_HASH_FULL;
373
374         rcu_read_lock_bh();
375         t = rcu_dereference_bh(h->table);
376         key = HKEY(value, h->initval, t->htable_bits);
377         n = hbucket(t, key);
378         for (i = 0; i < n->pos; i++)
379                 if (type_pf_data_equal(ahash_data(n, i), d)) {
380                         ret = -IPSET_ERR_EXIST;
381                         goto out;
382                 }
383
384         ret = type_pf_elem_add(n, value);
385         if (ret != 0) {
386                 if (ret == -EAGAIN)
387                         type_pf_data_next(h, d);
388                 goto out;
389         }
390
391 #ifdef IP_SET_HASH_WITH_NETS
392         add_cidr(h, d->cidr, HOST_MASK);
393 #endif
394         h->elements++;
395 out:
396         rcu_read_unlock_bh();
397         return ret;
398 }
399
400 /* Delete an element from the hash: swap it with the last element
401  * and free up space if possible.
402  */
403 static int
404 type_pf_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
405 {
406         struct ip_set_hash *h = set->data;
407         struct htable *t = h->table;
408         const struct type_pf_elem *d = value;
409         struct hbucket *n;
410         int i;
411         struct type_pf_elem *data;
412         u32 key;
413
414         key = HKEY(value, h->initval, t->htable_bits);
415         n = hbucket(t, key);
416         for (i = 0; i < n->pos; i++) {
417                 data = ahash_data(n, i);
418                 if (!type_pf_data_equal(data, d))
419                         continue;
420                 if (i != n->pos - 1)
421                         /* Not last one */
422                         type_pf_data_copy(data, ahash_data(n, n->pos - 1));
423
424                 n->pos--;
425                 h->elements--;
426 #ifdef IP_SET_HASH_WITH_NETS
427                 del_cidr(h, d->cidr, HOST_MASK);
428 #endif
429                 if (n->pos + AHASH_INIT_SIZE < n->size) {
430                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
431                                             * sizeof(struct type_pf_elem),
432                                             GFP_ATOMIC);
433                         if (!tmp)
434                                 return 0;
435                         n->size -= AHASH_INIT_SIZE;
436                         memcpy(tmp, n->value,
437                                n->size * sizeof(struct type_pf_elem));
438                         kfree(n->value);
439                         n->value = tmp;
440                 }
441                 return 0;
442         }
443
444         return -IPSET_ERR_EXIST;
445 }
446
447 #ifdef IP_SET_HASH_WITH_NETS
448
449 /* Special test function which takes into account the different network
450  * sizes added to the set */
451 static int
452 type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
453 {
454         struct ip_set_hash *h = set->data;
455         struct htable *t = h->table;
456         struct hbucket *n;
457         const struct type_pf_elem *data;
458         int i, j = 0;
459         u32 key;
460         u8 host_mask = SET_HOST_MASK(set->family);
461
462         pr_debug("test by nets\n");
463         for (; j < host_mask && h->nets[j].cidr; j++) {
464                 type_pf_data_netmask(d, h->nets[j].cidr);
465                 key = HKEY(d, h->initval, t->htable_bits);
466                 n = hbucket(t, key);
467                 for (i = 0; i < n->pos; i++) {
468                         data = ahash_data(n, i);
469                         if (type_pf_data_equal(data, d))
470                                 return 1;
471                 }
472         }
473         return 0;
474 }
475 #endif
476
477 /* Test whether the element is added to the set */
478 static int
479 type_pf_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
480 {
481         struct ip_set_hash *h = set->data;
482         struct htable *t = h->table;
483         struct type_pf_elem *d = value;
484         struct hbucket *n;
485         const struct type_pf_elem *data;
486         int i;
487         u32 key;
488
489 #ifdef IP_SET_HASH_WITH_NETS
490         /* If we test an IP address and not a network address,
491          * try all possible network sizes */
492         if (d->cidr == SET_HOST_MASK(set->family))
493                 return type_pf_test_cidrs(set, d, timeout);
494 #endif
495
496         key = HKEY(d, h->initval, t->htable_bits);
497         n = hbucket(t, key);
498         for (i = 0; i < n->pos; i++) {
499                 data = ahash_data(n, i);
500                 if (type_pf_data_equal(data, d))
501                         return 1;
502         }
503         return 0;
504 }
505
506 /* Reply a HEADER request: fill out the header part of the set */
507 static int
508 type_pf_head(struct ip_set *set, struct sk_buff *skb)
509 {
510         const struct ip_set_hash *h = set->data;
511         struct nlattr *nested;
512         size_t memsize;
513
514         read_lock_bh(&set->lock);
515         memsize = ahash_memsize(h, with_timeout(h->timeout)
516                                         ? sizeof(struct type_pf_telem)
517                                         : sizeof(struct type_pf_elem),
518                                 set->family == AF_INET ? 32 : 128);
519         read_unlock_bh(&set->lock);
520
521         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
522         if (!nested)
523                 goto nla_put_failure;
524         NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE,
525                       htonl(jhash_size(h->table->htable_bits)));
526         NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem));
527 #ifdef IP_SET_HASH_WITH_NETMASK
528         if (h->netmask != HOST_MASK)
529                 NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask);
530 #endif
531         NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
532         NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize));
533         if (with_timeout(h->timeout))
534                 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout));
535         ipset_nest_end(skb, nested);
536
537         return 0;
538 nla_put_failure:
539         return -EMSGSIZE;
540 }
541
542 /* Reply a LIST/SAVE request: dump the elements of the specified set */
543 static int
544 type_pf_list(const struct ip_set *set,
545              struct sk_buff *skb, struct netlink_callback *cb)
546 {
547         const struct ip_set_hash *h = set->data;
548         const struct htable *t = h->table;
549         struct nlattr *atd, *nested;
550         const struct hbucket *n;
551         const struct type_pf_elem *data;
552         u32 first = cb->args[2];
553         /* We assume that one hash bucket fills into one page */
554         void *incomplete;
555         int i;
556
557         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
558         if (!atd)
559                 return -EMSGSIZE;
560         pr_debug("list hash set %s\n", set->name);
561         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
562                 incomplete = skb_tail_pointer(skb);
563                 n = hbucket(t, cb->args[2]);
564                 pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
565                 for (i = 0; i < n->pos; i++) {
566                         data = ahash_data(n, i);
567                         pr_debug("list hash %lu hbucket %p i %u, data %p\n",
568                                  cb->args[2], n, i, data);
569                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
570                         if (!nested) {
571                                 if (cb->args[2] == first) {
572                                         nla_nest_cancel(skb, atd);
573                                         return -EMSGSIZE;
574                                 } else
575                                         goto nla_put_failure;
576                         }
577                         if (type_pf_data_list(skb, data))
578                                 goto nla_put_failure;
579                         ipset_nest_end(skb, nested);
580                 }
581         }
582         ipset_nest_end(skb, atd);
583         /* Set listing finished */
584         cb->args[2] = 0;
585
586         return 0;
587
588 nla_put_failure:
589         nlmsg_trim(skb, incomplete);
590         ipset_nest_end(skb, atd);
591         if (unlikely(first == cb->args[2])) {
592                 pr_warning("Can't list set %s: one bucket does not fit into "
593                            "a message. Please report it!\n", set->name);
594                 cb->args[2] = 0;
595                 return -EMSGSIZE;
596         }
597         return 0;
598 }
599
600 static int
601 type_pf_kadt(struct ip_set *set, const struct sk_buff * skb,
602              enum ipset_adt adt, const struct ip_set_adt_opt *opt);
603 static int
604 type_pf_uadt(struct ip_set *set, struct nlattr *tb[],
605              enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
606
607 static const struct ip_set_type_variant type_pf_variant = {
608         .kadt   = type_pf_kadt,
609         .uadt   = type_pf_uadt,
610         .adt    = {
611                 [IPSET_ADD] = type_pf_add,
612                 [IPSET_DEL] = type_pf_del,
613                 [IPSET_TEST] = type_pf_test,
614         },
615         .destroy = type_pf_destroy,
616         .flush  = type_pf_flush,
617         .head   = type_pf_head,
618         .list   = type_pf_list,
619         .resize = type_pf_resize,
620         .same_set = type_pf_same_set,
621 };
622
623 /* Flavour with timeout support */
624
625 #define ahash_tdata(n, i) \
626         (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i))
627
628 static inline u32
629 type_pf_data_timeout(const struct type_pf_elem *data)
630 {
631         const struct type_pf_telem *tdata =
632                 (const struct type_pf_telem *) data;
633
634         return tdata->timeout;
635 }
636
637 static inline bool
638 type_pf_data_expired(const struct type_pf_elem *data)
639 {
640         const struct type_pf_telem *tdata =
641                 (const struct type_pf_telem *) data;
642
643         return ip_set_timeout_expired(tdata->timeout);
644 }
645
646 static inline void
647 type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout)
648 {
649         struct type_pf_telem *tdata = (struct type_pf_telem *) data;
650
651         tdata->timeout = ip_set_timeout_set(timeout);
652 }
653
654 static int
655 type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
656                   u32 timeout)
657 {
658         struct type_pf_elem *data;
659
660         if (n->pos >= n->size) {
661                 void *tmp;
662
663                 if (n->size >= AHASH_MAX_SIZE)
664                         /* Trigger rehashing */
665                         return -EAGAIN;
666
667                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
668                               * sizeof(struct type_pf_telem),
669                               GFP_ATOMIC);
670                 if (!tmp)
671                         return -ENOMEM;
672                 if (n->size) {
673                         memcpy(tmp, n->value,
674                                sizeof(struct type_pf_telem) * n->size);
675                         kfree(n->value);
676                 }
677                 n->value = tmp;
678                 n->size += AHASH_INIT_SIZE;
679         }
680         data = ahash_tdata(n, n->pos++);
681         type_pf_data_copy(data, value);
682         type_pf_data_timeout_set(data, timeout);
683         return 0;
684 }
685
686 /* Delete expired elements from the hashtable */
687 static void
688 type_pf_expire(struct ip_set_hash *h)
689 {
690         struct htable *t = h->table;
691         struct hbucket *n;
692         struct type_pf_elem *data;
693         u32 i;
694         int j;
695
696         for (i = 0; i < jhash_size(t->htable_bits); i++) {
697                 n = hbucket(t, i);
698                 for (j = 0; j < n->pos; j++) {
699                         data = ahash_tdata(n, j);
700                         if (type_pf_data_expired(data)) {
701                                 pr_debug("expired %u/%u\n", i, j);
702 #ifdef IP_SET_HASH_WITH_NETS
703                                 del_cidr(h, data->cidr, HOST_MASK);
704 #endif
705                                 if (j != n->pos - 1)
706                                         /* Not last one */
707                                         type_pf_data_copy(data,
708                                                 ahash_tdata(n, n->pos - 1));
709                                 n->pos--;
710                                 h->elements--;
711                         }
712                 }
713                 if (n->pos + AHASH_INIT_SIZE < n->size) {
714                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
715                                             * sizeof(struct type_pf_telem),
716                                             GFP_ATOMIC);
717                         if (!tmp)
718                                 /* Still try to delete expired elements */
719                                 continue;
720                         n->size -= AHASH_INIT_SIZE;
721                         memcpy(tmp, n->value,
722                                n->size * sizeof(struct type_pf_telem));
723                         kfree(n->value);
724                         n->value = tmp;
725                 }
726         }
727 }
728
729 static int
730 type_pf_tresize(struct ip_set *set, bool retried)
731 {
732         struct ip_set_hash *h = set->data;
733         struct htable *t, *orig = h->table;
734         u8 htable_bits = orig->htable_bits;
735         const struct type_pf_elem *data;
736         struct hbucket *n, *m;
737         u32 i, j;
738         int ret;
739
740         /* Try to cleanup once */
741         if (!retried) {
742                 i = h->elements;
743                 write_lock_bh(&set->lock);
744                 type_pf_expire(set->data);
745                 write_unlock_bh(&set->lock);
746                 if (h->elements <  i)
747                         return 0;
748         }
749
750 retry:
751         ret = 0;
752         htable_bits++;
753         if (!htable_bits)
754                 /* In case we have plenty of memory :-) */
755                 return -IPSET_ERR_HASH_FULL;
756         t = ip_set_alloc(sizeof(*t)
757                          + jhash_size(htable_bits) * sizeof(struct hbucket));
758         if (!t)
759                 return -ENOMEM;
760         t->htable_bits = htable_bits;
761
762         read_lock_bh(&set->lock);
763         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
764                 n = hbucket(orig, i);
765                 for (j = 0; j < n->pos; j++) {
766                         data = ahash_tdata(n, j);
767                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
768                         ret = type_pf_elem_tadd(m, data,
769                                                 type_pf_data_timeout(data));
770                         if (ret < 0) {
771                                 read_unlock_bh(&set->lock);
772                                 ahash_destroy(t);
773                                 if (ret == -EAGAIN)
774                                         goto retry;
775                                 return ret;
776                         }
777                 }
778         }
779
780         rcu_assign_pointer(h->table, t);
781         read_unlock_bh(&set->lock);
782
783         /* Give time to other readers of the set */
784         synchronize_rcu_bh();
785
786         ahash_destroy(orig);
787
788         return 0;
789 }
790
791 static int
792 type_pf_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
793 {
794         struct ip_set_hash *h = set->data;
795         struct htable *t = h->table;
796         const struct type_pf_elem *d = value;
797         struct hbucket *n;
798         struct type_pf_elem *data;
799         int ret = 0, i, j = AHASH_MAX_SIZE + 1;
800         bool flag_exist = flags & IPSET_FLAG_EXIST;
801         u32 key;
802
803         if (h->elements >= h->maxelem)
804                 /* FIXME: when set is full, we slow down here */
805                 type_pf_expire(h);
806         if (h->elements >= h->maxelem)
807                 return -IPSET_ERR_HASH_FULL;
808
809         rcu_read_lock_bh();
810         t = rcu_dereference_bh(h->table);
811         key = HKEY(d, h->initval, t->htable_bits);
812         n = hbucket(t, key);
813         for (i = 0; i < n->pos; i++) {
814                 data = ahash_tdata(n, i);
815                 if (type_pf_data_equal(data, d)) {
816                         if (type_pf_data_expired(data) || flag_exist)
817                                 j = i;
818                         else {
819                                 ret = -IPSET_ERR_EXIST;
820                                 goto out;
821                         }
822                 } else if (j == AHASH_MAX_SIZE + 1 &&
823                            type_pf_data_expired(data))
824                         j = i;
825         }
826         if (j != AHASH_MAX_SIZE + 1) {
827                 data = ahash_tdata(n, j);
828 #ifdef IP_SET_HASH_WITH_NETS
829                 del_cidr(h, data->cidr, HOST_MASK);
830                 add_cidr(h, d->cidr, HOST_MASK);
831 #endif
832                 type_pf_data_copy(data, d);
833                 type_pf_data_timeout_set(data, timeout);
834                 goto out;
835         }
836         ret = type_pf_elem_tadd(n, d, timeout);
837         if (ret != 0) {
838                 if (ret == -EAGAIN)
839                         type_pf_data_next(h, d);
840                 goto out;
841         }
842
843 #ifdef IP_SET_HASH_WITH_NETS
844         add_cidr(h, d->cidr, HOST_MASK);
845 #endif
846         h->elements++;
847 out:
848         rcu_read_unlock_bh();
849         return ret;
850 }
851
852 static int
853 type_pf_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
854 {
855         struct ip_set_hash *h = set->data;
856         struct htable *t = h->table;
857         const struct type_pf_elem *d = value;
858         struct hbucket *n;
859         int i;
860         struct type_pf_elem *data;
861         u32 key;
862
863         key = HKEY(value, h->initval, t->htable_bits);
864         n = hbucket(t, key);
865         for (i = 0; i < n->pos; i++) {
866                 data = ahash_tdata(n, i);
867                 if (!type_pf_data_equal(data, d))
868                         continue;
869                 if (type_pf_data_expired(data))
870                         return -IPSET_ERR_EXIST;
871                 if (i != n->pos - 1)
872                         /* Not last one */
873                         type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
874
875                 n->pos--;
876                 h->elements--;
877 #ifdef IP_SET_HASH_WITH_NETS
878                 del_cidr(h, d->cidr, HOST_MASK);
879 #endif
880                 if (n->pos + AHASH_INIT_SIZE < n->size) {
881                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
882                                             * sizeof(struct type_pf_telem),
883                                             GFP_ATOMIC);
884                         if (!tmp)
885                                 return 0;
886                         n->size -= AHASH_INIT_SIZE;
887                         memcpy(tmp, n->value,
888                                n->size * sizeof(struct type_pf_telem));
889                         kfree(n->value);
890                         n->value = tmp;
891                 }
892                 return 0;
893         }
894
895         return -IPSET_ERR_EXIST;
896 }
897
898 #ifdef IP_SET_HASH_WITH_NETS
899 static int
900 type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
901 {
902         struct ip_set_hash *h = set->data;
903         struct htable *t = h->table;
904         struct type_pf_elem *data;
905         struct hbucket *n;
906         int i, j = 0;
907         u32 key;
908         u8 host_mask = SET_HOST_MASK(set->family);
909
910         for (; j < host_mask && h->nets[j].cidr; j++) {
911                 type_pf_data_netmask(d, h->nets[j].cidr);
912                 key = HKEY(d, h->initval, t->htable_bits);
913                 n = hbucket(t, key);
914                 for (i = 0; i < n->pos; i++) {
915                         data = ahash_tdata(n, i);
916                         if (type_pf_data_equal(data, d))
917                                 return !type_pf_data_expired(data);
918                 }
919         }
920         return 0;
921 }
922 #endif
923
924 static int
925 type_pf_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
926 {
927         struct ip_set_hash *h = set->data;
928         struct htable *t = h->table;
929         struct type_pf_elem *data, *d = value;
930         struct hbucket *n;
931         int i;
932         u32 key;
933
934 #ifdef IP_SET_HASH_WITH_NETS
935         if (d->cidr == SET_HOST_MASK(set->family))
936                 return type_pf_ttest_cidrs(set, d, timeout);
937 #endif
938         key = HKEY(d, h->initval, t->htable_bits);
939         n = hbucket(t, key);
940         for (i = 0; i < n->pos; i++) {
941                 data = ahash_tdata(n, i);
942                 if (type_pf_data_equal(data, d))
943                         return !type_pf_data_expired(data);
944         }
945         return 0;
946 }
947
948 static int
949 type_pf_tlist(const struct ip_set *set,
950               struct sk_buff *skb, struct netlink_callback *cb)
951 {
952         const struct ip_set_hash *h = set->data;
953         const struct htable *t = h->table;
954         struct nlattr *atd, *nested;
955         const struct hbucket *n;
956         const struct type_pf_elem *data;
957         u32 first = cb->args[2];
958         /* We assume that one hash bucket fills into one page */
959         void *incomplete;
960         int i;
961
962         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
963         if (!atd)
964                 return -EMSGSIZE;
965         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
966                 incomplete = skb_tail_pointer(skb);
967                 n = hbucket(t, cb->args[2]);
968                 for (i = 0; i < n->pos; i++) {
969                         data = ahash_tdata(n, i);
970                         pr_debug("list %p %u\n", n, i);
971                         if (type_pf_data_expired(data))
972                                 continue;
973                         pr_debug("do list %p %u\n", n, i);
974                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
975                         if (!nested) {
976                                 if (cb->args[2] == first) {
977                                         nla_nest_cancel(skb, atd);
978                                         return -EMSGSIZE;
979                                 } else
980                                         goto nla_put_failure;
981                         }
982                         if (type_pf_data_tlist(skb, data))
983                                 goto nla_put_failure;
984                         ipset_nest_end(skb, nested);
985                 }
986         }
987         ipset_nest_end(skb, atd);
988         /* Set listing finished */
989         cb->args[2] = 0;
990
991         return 0;
992
993 nla_put_failure:
994         nlmsg_trim(skb, incomplete);
995         ipset_nest_end(skb, atd);
996         if (unlikely(first == cb->args[2])) {
997                 pr_warning("Can't list set %s: one bucket does not fit into "
998                            "a message. Please report it!\n", set->name);
999                 cb->args[2] = 0;
1000                 return -EMSGSIZE;
1001         }
1002         return 0;
1003 }
1004
1005 static const struct ip_set_type_variant type_pf_tvariant = {
1006         .kadt   = type_pf_kadt,
1007         .uadt   = type_pf_uadt,
1008         .adt    = {
1009                 [IPSET_ADD] = type_pf_tadd,
1010                 [IPSET_DEL] = type_pf_tdel,
1011                 [IPSET_TEST] = type_pf_ttest,
1012         },
1013         .destroy = type_pf_destroy,
1014         .flush  = type_pf_flush,
1015         .head   = type_pf_head,
1016         .list   = type_pf_tlist,
1017         .resize = type_pf_tresize,
1018         .same_set = type_pf_same_set,
1019 };
1020
1021 static void
1022 type_pf_gc(unsigned long ul_set)
1023 {
1024         struct ip_set *set = (struct ip_set *) ul_set;
1025         struct ip_set_hash *h = set->data;
1026
1027         pr_debug("called\n");
1028         write_lock_bh(&set->lock);
1029         type_pf_expire(h);
1030         write_unlock_bh(&set->lock);
1031
1032         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1033         add_timer(&h->gc);
1034 }
1035
1036 static void
1037 type_pf_gc_init(struct ip_set *set)
1038 {
1039         struct ip_set_hash *h = set->data;
1040
1041         init_timer(&h->gc);
1042         h->gc.data = (unsigned long) set;
1043         h->gc.function = type_pf_gc;
1044         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1045         add_timer(&h->gc);
1046         pr_debug("gc initialized, run in every %u\n",
1047                  IPSET_GC_PERIOD(h->timeout));
1048 }
1049
1050 #undef type_pf_data_equal
1051 #undef type_pf_data_isnull
1052 #undef type_pf_data_copy
1053 #undef type_pf_data_zero_out
1054 #undef type_pf_data_list
1055 #undef type_pf_data_tlist
1056
1057 #undef type_pf_elem
1058 #undef type_pf_telem
1059 #undef type_pf_data_timeout
1060 #undef type_pf_data_expired
1061 #undef type_pf_data_netmask
1062 #undef type_pf_data_timeout_set
1063
1064 #undef type_pf_elem_add
1065 #undef type_pf_add
1066 #undef type_pf_del
1067 #undef type_pf_test_cidrs
1068 #undef type_pf_test
1069
1070 #undef type_pf_elem_tadd
1071 #undef type_pf_expire
1072 #undef type_pf_tadd
1073 #undef type_pf_tdel
1074 #undef type_pf_ttest_cidrs
1075 #undef type_pf_ttest
1076
1077 #undef type_pf_resize
1078 #undef type_pf_tresize
1079 #undef type_pf_flush
1080 #undef type_pf_destroy
1081 #undef type_pf_head
1082 #undef type_pf_list
1083 #undef type_pf_tlist
1084 #undef type_pf_same_set
1085 #undef type_pf_kadt
1086 #undef type_pf_uadt
1087 #undef type_pf_gc
1088 #undef type_pf_gc_init
1089 #undef type_pf_variant
1090 #undef type_pf_tvariant