netfilter: ipset: whitespace and coding fixes detected by checkpatch.pl
[linux-flexiantxendom0-3.2.10.git] / include / linux / netfilter / ipset / ip_set_ahash.h
1 #ifndef _IP_SET_AHASH_H
2 #define _IP_SET_AHASH_H
3
4 #include <linux/rcupdate.h>
5 #include <linux/jhash.h>
6 #include <linux/netfilter/ipset/ip_set_timeout.h>
7
8 #define CONCAT(a, b, c)         a##b##c
9 #define TOKEN(a, b, c)          CONCAT(a, b, c)
10
11 #define type_pf_next            TOKEN(TYPE, PF, _elem)
12
13 /* Hashing which uses arrays to resolve clashing. The hash table is resized
14  * (doubled) when searching becomes too long.
15  * Internally jhash is used with the assumption that the size of the
16  * stored data is a multiple of sizeof(u32). If storage supports timeout,
17  * the timeout field must be the last one in the data structure - that field
18  * is ignored when computing the hash key.
19  *
20  * Readers and resizing
21  *
22  * Resizing can be triggered by userspace command only, and those
23  * are serialized by the nfnl mutex. During resizing the set is
24  * read-locked, so the only possible concurrent operations are
25  * the kernel side readers. Those must be protected by proper RCU locking.
26  */
27
28 /* Number of elements to store in an initial array block */
29 #define AHASH_INIT_SIZE                 4
30 /* Max number of elements to store in an array block */
31 #define AHASH_MAX_SIZE                  (3*4)
32
33 /* A hash bucket */
34 struct hbucket {
35         void *value;            /* the array of the values */
36         u8 size;                /* size of the array */
37         u8 pos;                 /* position of the first free entry */
38 };
39
40 /* The hash table: the table size stored here in order to make resizing easy */
41 struct htable {
42         u8 htable_bits;         /* size of hash table == 2^htable_bits */
43         struct hbucket bucket[0]; /* hashtable buckets */
44 };
45
46 #define hbucket(h, i)           (&((h)->bucket[i]))
47
48 /* Book-keeping of the prefixes added to the set */
49 struct ip_set_hash_nets {
50         u8 cidr;                /* the different cidr values in the set */
51         u32 nets;               /* number of elements per cidr */
52 };
53
54 /* The generic ip_set hash structure */
55 struct ip_set_hash {
56         struct htable *table;   /* the hash table */
57         u32 maxelem;            /* max elements in the hash */
58         u32 elements;           /* current element (vs timeout) */
59         u32 initval;            /* random jhash init value */
60         u32 timeout;            /* timeout value, if enabled */
61         struct timer_list gc;   /* garbage collection when timeout enabled */
62         struct type_pf_next next; /* temporary storage for uadd */
63 #ifdef IP_SET_HASH_WITH_NETMASK
64         u8 netmask;             /* netmask value for subnets to store */
65 #endif
66 #ifdef IP_SET_HASH_WITH_RBTREE
67         struct rb_root rbtree;
68 #endif
69 #ifdef IP_SET_HASH_WITH_NETS
70         struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */
71 #endif
72 };
73
74 /* Compute htable_bits from the user input parameter hashsize */
75 static u8
76 htable_bits(u32 hashsize)
77 {
78         /* Assume that hashsize == 2^htable_bits */
79         u8 bits = fls(hashsize - 1);
80         if (jhash_size(bits) != hashsize)
81                 /* Round up to the first 2^n value */
82                 bits = fls(hashsize);
83
84         return bits;
85 }
86
87 #ifdef IP_SET_HASH_WITH_NETS
88
89 #define SET_HOST_MASK(family)   (family == AF_INET ? 32 : 128)
90
91 /* Network cidr size book keeping when the hash stores different
92  * sized networks */
93 static void
94 add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
95 {
96         u8 i;
97
98         ++h->nets[cidr-1].nets;
99
100         pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets);
101
102         if (h->nets[cidr-1].nets > 1)
103                 return;
104
105         /* New cidr size */
106         for (i = 0; i < host_mask && h->nets[i].cidr; i++) {
107                 /* Add in increasing prefix order, so larger cidr first */
108                 if (h->nets[i].cidr < cidr)
109                         swap(h->nets[i].cidr, cidr);
110         }
111         if (i < host_mask)
112                 h->nets[i].cidr = cidr;
113 }
114
115 static void
116 del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
117 {
118         u8 i;
119
120         --h->nets[cidr-1].nets;
121
122         pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets);
123
124         if (h->nets[cidr-1].nets != 0)
125                 return;
126
127         /* All entries with this cidr size deleted, so cleanup h->cidr[] */
128         for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) {
129                 if (h->nets[i].cidr == cidr)
130                         h->nets[i].cidr = cidr = h->nets[i+1].cidr;
131         }
132         h->nets[i - 1].cidr = 0;
133 }
134 #endif
135
136 /* Destroy the hashtable part of the set */
137 static void
138 ahash_destroy(struct htable *t)
139 {
140         struct hbucket *n;
141         u32 i;
142
143         for (i = 0; i < jhash_size(t->htable_bits); i++) {
144                 n = hbucket(t, i);
145                 if (n->size)
146                         /* FIXME: use slab cache */
147                         kfree(n->value);
148         }
149
150         ip_set_free(t);
151 }
152
153 /* Calculate the actual memory size of the set data */
154 static size_t
155 ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask)
156 {
157         u32 i;
158         struct htable *t = h->table;
159         size_t memsize = sizeof(*h)
160                          + sizeof(*t)
161 #ifdef IP_SET_HASH_WITH_NETS
162                          + sizeof(struct ip_set_hash_nets) * host_mask
163 #endif
164                          + jhash_size(t->htable_bits) * sizeof(struct hbucket);
165
166         for (i = 0; i < jhash_size(t->htable_bits); i++)
167                         memsize += t->bucket[i].size * dsize;
168
169         return memsize;
170 }
171
172 /* Flush a hash type of set: destroy all elements */
173 static void
174 ip_set_hash_flush(struct ip_set *set)
175 {
176         struct ip_set_hash *h = set->data;
177         struct htable *t = h->table;
178         struct hbucket *n;
179         u32 i;
180
181         for (i = 0; i < jhash_size(t->htable_bits); i++) {
182                 n = hbucket(t, i);
183                 if (n->size) {
184                         n->size = n->pos = 0;
185                         /* FIXME: use slab cache */
186                         kfree(n->value);
187                 }
188         }
189 #ifdef IP_SET_HASH_WITH_NETS
190         memset(h->nets, 0, sizeof(struct ip_set_hash_nets)
191                            * SET_HOST_MASK(set->family));
192 #endif
193         h->elements = 0;
194 }
195
196 /* Destroy a hash type of set */
197 static void
198 ip_set_hash_destroy(struct ip_set *set)
199 {
200         struct ip_set_hash *h = set->data;
201
202         if (with_timeout(h->timeout))
203                 del_timer_sync(&h->gc);
204
205         ahash_destroy(h->table);
206 #ifdef IP_SET_HASH_WITH_RBTREE
207         rbtree_destroy(&h->rbtree);
208 #endif
209         kfree(h);
210
211         set->data = NULL;
212 }
213
214 #define HKEY(data, initval, htable_bits)                                 \
215 (jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \
216         & jhash_mask(htable_bits))
217
218 #endif /* _IP_SET_AHASH_H */
219
220 #define CONCAT(a, b, c)         a##b##c
221 #define TOKEN(a, b, c)          CONCAT(a, b, c)
222
223 /* Type/family dependent function prototypes */
224
225 #define type_pf_data_equal      TOKEN(TYPE, PF, _data_equal)
226 #define type_pf_data_isnull     TOKEN(TYPE, PF, _data_isnull)
227 #define type_pf_data_copy       TOKEN(TYPE, PF, _data_copy)
228 #define type_pf_data_zero_out   TOKEN(TYPE, PF, _data_zero_out)
229 #define type_pf_data_netmask    TOKEN(TYPE, PF, _data_netmask)
230 #define type_pf_data_list       TOKEN(TYPE, PF, _data_list)
231 #define type_pf_data_tlist      TOKEN(TYPE, PF, _data_tlist)
232 #define type_pf_data_next       TOKEN(TYPE, PF, _data_next)
233
234 #define type_pf_elem            TOKEN(TYPE, PF, _elem)
235 #define type_pf_telem           TOKEN(TYPE, PF, _telem)
236 #define type_pf_data_timeout    TOKEN(TYPE, PF, _data_timeout)
237 #define type_pf_data_expired    TOKEN(TYPE, PF, _data_expired)
238 #define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set)
239
240 #define type_pf_elem_add        TOKEN(TYPE, PF, _elem_add)
241 #define type_pf_add             TOKEN(TYPE, PF, _add)
242 #define type_pf_del             TOKEN(TYPE, PF, _del)
243 #define type_pf_test_cidrs      TOKEN(TYPE, PF, _test_cidrs)
244 #define type_pf_test            TOKEN(TYPE, PF, _test)
245
246 #define type_pf_elem_tadd       TOKEN(TYPE, PF, _elem_tadd)
247 #define type_pf_del_telem       TOKEN(TYPE, PF, _ahash_del_telem)
248 #define type_pf_expire          TOKEN(TYPE, PF, _expire)
249 #define type_pf_tadd            TOKEN(TYPE, PF, _tadd)
250 #define type_pf_tdel            TOKEN(TYPE, PF, _tdel)
251 #define type_pf_ttest_cidrs     TOKEN(TYPE, PF, _ahash_ttest_cidrs)
252 #define type_pf_ttest           TOKEN(TYPE, PF, _ahash_ttest)
253
254 #define type_pf_resize          TOKEN(TYPE, PF, _resize)
255 #define type_pf_tresize         TOKEN(TYPE, PF, _tresize)
256 #define type_pf_flush           ip_set_hash_flush
257 #define type_pf_destroy         ip_set_hash_destroy
258 #define type_pf_head            TOKEN(TYPE, PF, _head)
259 #define type_pf_list            TOKEN(TYPE, PF, _list)
260 #define type_pf_tlist           TOKEN(TYPE, PF, _tlist)
261 #define type_pf_same_set        TOKEN(TYPE, PF, _same_set)
262 #define type_pf_kadt            TOKEN(TYPE, PF, _kadt)
263 #define type_pf_uadt            TOKEN(TYPE, PF, _uadt)
264 #define type_pf_gc              TOKEN(TYPE, PF, _gc)
265 #define type_pf_gc_init         TOKEN(TYPE, PF, _gc_init)
266 #define type_pf_variant         TOKEN(TYPE, PF, _variant)
267 #define type_pf_tvariant        TOKEN(TYPE, PF, _tvariant)
268
269 /* Flavour without timeout */
270
271 /* Get the ith element from the array block n */
272 #define ahash_data(n, i)        \
273         ((struct type_pf_elem *)((n)->value) + (i))
274
275 /* Add an element to the hash table when resizing the set:
276  * we spare the maintenance of the internal counters. */
277 static int
278 type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value)
279 {
280         if (n->pos >= n->size) {
281                 void *tmp;
282
283                 if (n->size >= AHASH_MAX_SIZE)
284                         /* Trigger rehashing */
285                         return -EAGAIN;
286
287                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
288                               * sizeof(struct type_pf_elem),
289                               GFP_ATOMIC);
290                 if (!tmp)
291                         return -ENOMEM;
292                 if (n->size) {
293                         memcpy(tmp, n->value,
294                                sizeof(struct type_pf_elem) * n->size);
295                         kfree(n->value);
296                 }
297                 n->value = tmp;
298                 n->size += AHASH_INIT_SIZE;
299         }
300         type_pf_data_copy(ahash_data(n, n->pos++), value);
301         return 0;
302 }
303
304 /* Resize a hash: create a new hash table with doubling the hashsize
305  * and inserting the elements to it. Repeat until we succeed or
306  * fail due to memory pressures. */
307 static int
308 type_pf_resize(struct ip_set *set, bool retried)
309 {
310         struct ip_set_hash *h = set->data;
311         struct htable *t, *orig = h->table;
312         u8 htable_bits = orig->htable_bits;
313         const struct type_pf_elem *data;
314         struct hbucket *n, *m;
315         u32 i, j;
316         int ret;
317
318 retry:
319         ret = 0;
320         htable_bits++;
321         pr_debug("attempt to resize set %s from %u to %u, t %p\n",
322                  set->name, orig->htable_bits, htable_bits, orig);
323         if (!htable_bits)
324                 /* In case we have plenty of memory :-) */
325                 return -IPSET_ERR_HASH_FULL;
326         t = ip_set_alloc(sizeof(*t)
327                          + jhash_size(htable_bits) * sizeof(struct hbucket));
328         if (!t)
329                 return -ENOMEM;
330         t->htable_bits = htable_bits;
331
332         read_lock_bh(&set->lock);
333         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
334                 n = hbucket(orig, i);
335                 for (j = 0; j < n->pos; j++) {
336                         data = ahash_data(n, j);
337                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
338                         ret = type_pf_elem_add(m, data);
339                         if (ret < 0) {
340                                 read_unlock_bh(&set->lock);
341                                 ahash_destroy(t);
342                                 if (ret == -EAGAIN)
343                                         goto retry;
344                                 return ret;
345                         }
346                 }
347         }
348
349         rcu_assign_pointer(h->table, t);
350         read_unlock_bh(&set->lock);
351
352         /* Give time to other readers of the set */
353         synchronize_rcu_bh();
354
355         pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
356                  orig->htable_bits, orig, t->htable_bits, t);
357         ahash_destroy(orig);
358
359         return 0;
360 }
361
362 static void
363 type_pf_data_next(struct ip_set_hash *h, const struct type_pf_elem *d);
364
365 /* Add an element to a hash and update the internal counters when succeeded,
366  * otherwise report the proper error code. */
367 static int
368 type_pf_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
369 {
370         struct ip_set_hash *h = set->data;
371         struct htable *t;
372         const struct type_pf_elem *d = value;
373         struct hbucket *n;
374         int i, ret = 0;
375         u32 key;
376
377         if (h->elements >= h->maxelem)
378                 return -IPSET_ERR_HASH_FULL;
379
380         rcu_read_lock_bh();
381         t = rcu_dereference_bh(h->table);
382         key = HKEY(value, h->initval, t->htable_bits);
383         n = hbucket(t, key);
384         for (i = 0; i < n->pos; i++)
385                 if (type_pf_data_equal(ahash_data(n, i), d)) {
386                         ret = -IPSET_ERR_EXIST;
387                         goto out;
388                 }
389
390         ret = type_pf_elem_add(n, value);
391         if (ret != 0) {
392                 if (ret == -EAGAIN)
393                         type_pf_data_next(h, d);
394                 goto out;
395         }
396
397 #ifdef IP_SET_HASH_WITH_NETS
398         add_cidr(h, d->cidr, HOST_MASK);
399 #endif
400         h->elements++;
401 out:
402         rcu_read_unlock_bh();
403         return ret;
404 }
405
406 /* Delete an element from the hash: swap it with the last element
407  * and free up space if possible.
408  */
409 static int
410 type_pf_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
411 {
412         struct ip_set_hash *h = set->data;
413         struct htable *t = h->table;
414         const struct type_pf_elem *d = value;
415         struct hbucket *n;
416         int i;
417         struct type_pf_elem *data;
418         u32 key;
419
420         key = HKEY(value, h->initval, t->htable_bits);
421         n = hbucket(t, key);
422         for (i = 0; i < n->pos; i++) {
423                 data = ahash_data(n, i);
424                 if (!type_pf_data_equal(data, d))
425                         continue;
426                 if (i != n->pos - 1)
427                         /* Not last one */
428                         type_pf_data_copy(data, ahash_data(n, n->pos - 1));
429
430                 n->pos--;
431                 h->elements--;
432 #ifdef IP_SET_HASH_WITH_NETS
433                 del_cidr(h, d->cidr, HOST_MASK);
434 #endif
435                 if (n->pos + AHASH_INIT_SIZE < n->size) {
436                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
437                                             * sizeof(struct type_pf_elem),
438                                             GFP_ATOMIC);
439                         if (!tmp)
440                                 return 0;
441                         n->size -= AHASH_INIT_SIZE;
442                         memcpy(tmp, n->value,
443                                n->size * sizeof(struct type_pf_elem));
444                         kfree(n->value);
445                         n->value = tmp;
446                 }
447                 return 0;
448         }
449
450         return -IPSET_ERR_EXIST;
451 }
452
453 #ifdef IP_SET_HASH_WITH_NETS
454
455 /* Special test function which takes into account the different network
456  * sizes added to the set */
457 static int
458 type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
459 {
460         struct ip_set_hash *h = set->data;
461         struct htable *t = h->table;
462         struct hbucket *n;
463         const struct type_pf_elem *data;
464         int i, j = 0;
465         u32 key;
466         u8 host_mask = SET_HOST_MASK(set->family);
467
468         pr_debug("test by nets\n");
469         for (; j < host_mask && h->nets[j].cidr; j++) {
470                 type_pf_data_netmask(d, h->nets[j].cidr);
471                 key = HKEY(d, h->initval, t->htable_bits);
472                 n = hbucket(t, key);
473                 for (i = 0; i < n->pos; i++) {
474                         data = ahash_data(n, i);
475                         if (type_pf_data_equal(data, d))
476                                 return 1;
477                 }
478         }
479         return 0;
480 }
481 #endif
482
483 /* Test whether the element is added to the set */
484 static int
485 type_pf_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
486 {
487         struct ip_set_hash *h = set->data;
488         struct htable *t = h->table;
489         struct type_pf_elem *d = value;
490         struct hbucket *n;
491         const struct type_pf_elem *data;
492         int i;
493         u32 key;
494
495 #ifdef IP_SET_HASH_WITH_NETS
496         /* If we test an IP address and not a network address,
497          * try all possible network sizes */
498         if (d->cidr == SET_HOST_MASK(set->family))
499                 return type_pf_test_cidrs(set, d, timeout);
500 #endif
501
502         key = HKEY(d, h->initval, t->htable_bits);
503         n = hbucket(t, key);
504         for (i = 0; i < n->pos; i++) {
505                 data = ahash_data(n, i);
506                 if (type_pf_data_equal(data, d))
507                         return 1;
508         }
509         return 0;
510 }
511
512 /* Reply a HEADER request: fill out the header part of the set */
513 static int
514 type_pf_head(struct ip_set *set, struct sk_buff *skb)
515 {
516         const struct ip_set_hash *h = set->data;
517         struct nlattr *nested;
518         size_t memsize;
519
520         read_lock_bh(&set->lock);
521         memsize = ahash_memsize(h, with_timeout(h->timeout)
522                                         ? sizeof(struct type_pf_telem)
523                                         : sizeof(struct type_pf_elem),
524                                 set->family == AF_INET ? 32 : 128);
525         read_unlock_bh(&set->lock);
526
527         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
528         if (!nested)
529                 goto nla_put_failure;
530         NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE,
531                       htonl(jhash_size(h->table->htable_bits)));
532         NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem));
533 #ifdef IP_SET_HASH_WITH_NETMASK
534         if (h->netmask != HOST_MASK)
535                 NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask);
536 #endif
537         NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
538         NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize));
539         if (with_timeout(h->timeout))
540                 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout));
541         ipset_nest_end(skb, nested);
542
543         return 0;
544 nla_put_failure:
545         return -EMSGSIZE;
546 }
547
548 /* Reply a LIST/SAVE request: dump the elements of the specified set */
549 static int
550 type_pf_list(const struct ip_set *set,
551              struct sk_buff *skb, struct netlink_callback *cb)
552 {
553         const struct ip_set_hash *h = set->data;
554         const struct htable *t = h->table;
555         struct nlattr *atd, *nested;
556         const struct hbucket *n;
557         const struct type_pf_elem *data;
558         u32 first = cb->args[2];
559         /* We assume that one hash bucket fills into one page */
560         void *incomplete;
561         int i;
562
563         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
564         if (!atd)
565                 return -EMSGSIZE;
566         pr_debug("list hash set %s\n", set->name);
567         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
568                 incomplete = skb_tail_pointer(skb);
569                 n = hbucket(t, cb->args[2]);
570                 pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
571                 for (i = 0; i < n->pos; i++) {
572                         data = ahash_data(n, i);
573                         pr_debug("list hash %lu hbucket %p i %u, data %p\n",
574                                  cb->args[2], n, i, data);
575                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
576                         if (!nested) {
577                                 if (cb->args[2] == first) {
578                                         nla_nest_cancel(skb, atd);
579                                         return -EMSGSIZE;
580                                 } else
581                                         goto nla_put_failure;
582                         }
583                         if (type_pf_data_list(skb, data))
584                                 goto nla_put_failure;
585                         ipset_nest_end(skb, nested);
586                 }
587         }
588         ipset_nest_end(skb, atd);
589         /* Set listing finished */
590         cb->args[2] = 0;
591
592         return 0;
593
594 nla_put_failure:
595         nlmsg_trim(skb, incomplete);
596         ipset_nest_end(skb, atd);
597         if (unlikely(first == cb->args[2])) {
598                 pr_warning("Can't list set %s: one bucket does not fit into "
599                            "a message. Please report it!\n", set->name);
600                 cb->args[2] = 0;
601                 return -EMSGSIZE;
602         }
603         return 0;
604 }
605
606 static int
607 type_pf_kadt(struct ip_set *set, const struct sk_buff * skb,
608              const struct xt_action_param *par,
609              enum ipset_adt adt, const struct ip_set_adt_opt *opt);
610 static int
611 type_pf_uadt(struct ip_set *set, struct nlattr *tb[],
612              enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
613
614 static const struct ip_set_type_variant type_pf_variant = {
615         .kadt   = type_pf_kadt,
616         .uadt   = type_pf_uadt,
617         .adt    = {
618                 [IPSET_ADD] = type_pf_add,
619                 [IPSET_DEL] = type_pf_del,
620                 [IPSET_TEST] = type_pf_test,
621         },
622         .destroy = type_pf_destroy,
623         .flush  = type_pf_flush,
624         .head   = type_pf_head,
625         .list   = type_pf_list,
626         .resize = type_pf_resize,
627         .same_set = type_pf_same_set,
628 };
629
630 /* Flavour with timeout support */
631
632 #define ahash_tdata(n, i) \
633         (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i))
634
635 static inline u32
636 type_pf_data_timeout(const struct type_pf_elem *data)
637 {
638         const struct type_pf_telem *tdata =
639                 (const struct type_pf_telem *) data;
640
641         return tdata->timeout;
642 }
643
644 static inline bool
645 type_pf_data_expired(const struct type_pf_elem *data)
646 {
647         const struct type_pf_telem *tdata =
648                 (const struct type_pf_telem *) data;
649
650         return ip_set_timeout_expired(tdata->timeout);
651 }
652
653 static inline void
654 type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout)
655 {
656         struct type_pf_telem *tdata = (struct type_pf_telem *) data;
657
658         tdata->timeout = ip_set_timeout_set(timeout);
659 }
660
661 static int
662 type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
663                   u32 timeout)
664 {
665         struct type_pf_elem *data;
666
667         if (n->pos >= n->size) {
668                 void *tmp;
669
670                 if (n->size >= AHASH_MAX_SIZE)
671                         /* Trigger rehashing */
672                         return -EAGAIN;
673
674                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
675                               * sizeof(struct type_pf_telem),
676                               GFP_ATOMIC);
677                 if (!tmp)
678                         return -ENOMEM;
679                 if (n->size) {
680                         memcpy(tmp, n->value,
681                                sizeof(struct type_pf_telem) * n->size);
682                         kfree(n->value);
683                 }
684                 n->value = tmp;
685                 n->size += AHASH_INIT_SIZE;
686         }
687         data = ahash_tdata(n, n->pos++);
688         type_pf_data_copy(data, value);
689         type_pf_data_timeout_set(data, timeout);
690         return 0;
691 }
692
693 /* Delete expired elements from the hashtable */
694 static void
695 type_pf_expire(struct ip_set_hash *h)
696 {
697         struct htable *t = h->table;
698         struct hbucket *n;
699         struct type_pf_elem *data;
700         u32 i;
701         int j;
702
703         for (i = 0; i < jhash_size(t->htable_bits); i++) {
704                 n = hbucket(t, i);
705                 for (j = 0; j < n->pos; j++) {
706                         data = ahash_tdata(n, j);
707                         if (type_pf_data_expired(data)) {
708                                 pr_debug("expired %u/%u\n", i, j);
709 #ifdef IP_SET_HASH_WITH_NETS
710                                 del_cidr(h, data->cidr, HOST_MASK);
711 #endif
712                                 if (j != n->pos - 1)
713                                         /* Not last one */
714                                         type_pf_data_copy(data,
715                                                 ahash_tdata(n, n->pos - 1));
716                                 n->pos--;
717                                 h->elements--;
718                         }
719                 }
720                 if (n->pos + AHASH_INIT_SIZE < n->size) {
721                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
722                                             * sizeof(struct type_pf_telem),
723                                             GFP_ATOMIC);
724                         if (!tmp)
725                                 /* Still try to delete expired elements */
726                                 continue;
727                         n->size -= AHASH_INIT_SIZE;
728                         memcpy(tmp, n->value,
729                                n->size * sizeof(struct type_pf_telem));
730                         kfree(n->value);
731                         n->value = tmp;
732                 }
733         }
734 }
735
736 static int
737 type_pf_tresize(struct ip_set *set, bool retried)
738 {
739         struct ip_set_hash *h = set->data;
740         struct htable *t, *orig = h->table;
741         u8 htable_bits = orig->htable_bits;
742         const struct type_pf_elem *data;
743         struct hbucket *n, *m;
744         u32 i, j;
745         int ret;
746
747         /* Try to cleanup once */
748         if (!retried) {
749                 i = h->elements;
750                 write_lock_bh(&set->lock);
751                 type_pf_expire(set->data);
752                 write_unlock_bh(&set->lock);
753                 if (h->elements <  i)
754                         return 0;
755         }
756
757 retry:
758         ret = 0;
759         htable_bits++;
760         if (!htable_bits)
761                 /* In case we have plenty of memory :-) */
762                 return -IPSET_ERR_HASH_FULL;
763         t = ip_set_alloc(sizeof(*t)
764                          + jhash_size(htable_bits) * sizeof(struct hbucket));
765         if (!t)
766                 return -ENOMEM;
767         t->htable_bits = htable_bits;
768
769         read_lock_bh(&set->lock);
770         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
771                 n = hbucket(orig, i);
772                 for (j = 0; j < n->pos; j++) {
773                         data = ahash_tdata(n, j);
774                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
775                         ret = type_pf_elem_tadd(m, data,
776                                                 type_pf_data_timeout(data));
777                         if (ret < 0) {
778                                 read_unlock_bh(&set->lock);
779                                 ahash_destroy(t);
780                                 if (ret == -EAGAIN)
781                                         goto retry;
782                                 return ret;
783                         }
784                 }
785         }
786
787         rcu_assign_pointer(h->table, t);
788         read_unlock_bh(&set->lock);
789
790         /* Give time to other readers of the set */
791         synchronize_rcu_bh();
792
793         ahash_destroy(orig);
794
795         return 0;
796 }
797
798 static int
799 type_pf_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
800 {
801         struct ip_set_hash *h = set->data;
802         struct htable *t = h->table;
803         const struct type_pf_elem *d = value;
804         struct hbucket *n;
805         struct type_pf_elem *data;
806         int ret = 0, i, j = AHASH_MAX_SIZE + 1;
807         bool flag_exist = flags & IPSET_FLAG_EXIST;
808         u32 key;
809
810         if (h->elements >= h->maxelem)
811                 /* FIXME: when set is full, we slow down here */
812                 type_pf_expire(h);
813         if (h->elements >= h->maxelem)
814                 return -IPSET_ERR_HASH_FULL;
815
816         rcu_read_lock_bh();
817         t = rcu_dereference_bh(h->table);
818         key = HKEY(d, h->initval, t->htable_bits);
819         n = hbucket(t, key);
820         for (i = 0; i < n->pos; i++) {
821                 data = ahash_tdata(n, i);
822                 if (type_pf_data_equal(data, d)) {
823                         if (type_pf_data_expired(data) || flag_exist)
824                                 j = i;
825                         else {
826                                 ret = -IPSET_ERR_EXIST;
827                                 goto out;
828                         }
829                 } else if (j == AHASH_MAX_SIZE + 1 &&
830                            type_pf_data_expired(data))
831                         j = i;
832         }
833         if (j != AHASH_MAX_SIZE + 1) {
834                 data = ahash_tdata(n, j);
835 #ifdef IP_SET_HASH_WITH_NETS
836                 del_cidr(h, data->cidr, HOST_MASK);
837                 add_cidr(h, d->cidr, HOST_MASK);
838 #endif
839                 type_pf_data_copy(data, d);
840                 type_pf_data_timeout_set(data, timeout);
841                 goto out;
842         }
843         ret = type_pf_elem_tadd(n, d, timeout);
844         if (ret != 0) {
845                 if (ret == -EAGAIN)
846                         type_pf_data_next(h, d);
847                 goto out;
848         }
849
850 #ifdef IP_SET_HASH_WITH_NETS
851         add_cidr(h, d->cidr, HOST_MASK);
852 #endif
853         h->elements++;
854 out:
855         rcu_read_unlock_bh();
856         return ret;
857 }
858
859 static int
860 type_pf_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
861 {
862         struct ip_set_hash *h = set->data;
863         struct htable *t = h->table;
864         const struct type_pf_elem *d = value;
865         struct hbucket *n;
866         int i;
867         struct type_pf_elem *data;
868         u32 key;
869
870         key = HKEY(value, h->initval, t->htable_bits);
871         n = hbucket(t, key);
872         for (i = 0; i < n->pos; i++) {
873                 data = ahash_tdata(n, i);
874                 if (!type_pf_data_equal(data, d))
875                         continue;
876                 if (type_pf_data_expired(data))
877                         return -IPSET_ERR_EXIST;
878                 if (i != n->pos - 1)
879                         /* Not last one */
880                         type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
881
882                 n->pos--;
883                 h->elements--;
884 #ifdef IP_SET_HASH_WITH_NETS
885                 del_cidr(h, d->cidr, HOST_MASK);
886 #endif
887                 if (n->pos + AHASH_INIT_SIZE < n->size) {
888                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
889                                             * sizeof(struct type_pf_telem),
890                                             GFP_ATOMIC);
891                         if (!tmp)
892                                 return 0;
893                         n->size -= AHASH_INIT_SIZE;
894                         memcpy(tmp, n->value,
895                                n->size * sizeof(struct type_pf_telem));
896                         kfree(n->value);
897                         n->value = tmp;
898                 }
899                 return 0;
900         }
901
902         return -IPSET_ERR_EXIST;
903 }
904
905 #ifdef IP_SET_HASH_WITH_NETS
906 static int
907 type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
908 {
909         struct ip_set_hash *h = set->data;
910         struct htable *t = h->table;
911         struct type_pf_elem *data;
912         struct hbucket *n;
913         int i, j = 0;
914         u32 key;
915         u8 host_mask = SET_HOST_MASK(set->family);
916
917         for (; j < host_mask && h->nets[j].cidr; j++) {
918                 type_pf_data_netmask(d, h->nets[j].cidr);
919                 key = HKEY(d, h->initval, t->htable_bits);
920                 n = hbucket(t, key);
921                 for (i = 0; i < n->pos; i++) {
922                         data = ahash_tdata(n, i);
923                         if (type_pf_data_equal(data, d))
924                                 return !type_pf_data_expired(data);
925                 }
926         }
927         return 0;
928 }
929 #endif
930
931 static int
932 type_pf_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
933 {
934         struct ip_set_hash *h = set->data;
935         struct htable *t = h->table;
936         struct type_pf_elem *data, *d = value;
937         struct hbucket *n;
938         int i;
939         u32 key;
940
941 #ifdef IP_SET_HASH_WITH_NETS
942         if (d->cidr == SET_HOST_MASK(set->family))
943                 return type_pf_ttest_cidrs(set, d, timeout);
944 #endif
945         key = HKEY(d, h->initval, t->htable_bits);
946         n = hbucket(t, key);
947         for (i = 0; i < n->pos; i++) {
948                 data = ahash_tdata(n, i);
949                 if (type_pf_data_equal(data, d))
950                         return !type_pf_data_expired(data);
951         }
952         return 0;
953 }
954
955 static int
956 type_pf_tlist(const struct ip_set *set,
957               struct sk_buff *skb, struct netlink_callback *cb)
958 {
959         const struct ip_set_hash *h = set->data;
960         const struct htable *t = h->table;
961         struct nlattr *atd, *nested;
962         const struct hbucket *n;
963         const struct type_pf_elem *data;
964         u32 first = cb->args[2];
965         /* We assume that one hash bucket fills into one page */
966         void *incomplete;
967         int i;
968
969         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
970         if (!atd)
971                 return -EMSGSIZE;
972         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
973                 incomplete = skb_tail_pointer(skb);
974                 n = hbucket(t, cb->args[2]);
975                 for (i = 0; i < n->pos; i++) {
976                         data = ahash_tdata(n, i);
977                         pr_debug("list %p %u\n", n, i);
978                         if (type_pf_data_expired(data))
979                                 continue;
980                         pr_debug("do list %p %u\n", n, i);
981                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
982                         if (!nested) {
983                                 if (cb->args[2] == first) {
984                                         nla_nest_cancel(skb, atd);
985                                         return -EMSGSIZE;
986                                 } else
987                                         goto nla_put_failure;
988                         }
989                         if (type_pf_data_tlist(skb, data))
990                                 goto nla_put_failure;
991                         ipset_nest_end(skb, nested);
992                 }
993         }
994         ipset_nest_end(skb, atd);
995         /* Set listing finished */
996         cb->args[2] = 0;
997
998         return 0;
999
1000 nla_put_failure:
1001         nlmsg_trim(skb, incomplete);
1002         ipset_nest_end(skb, atd);
1003         if (unlikely(first == cb->args[2])) {
1004                 pr_warning("Can't list set %s: one bucket does not fit into "
1005                            "a message. Please report it!\n", set->name);
1006                 cb->args[2] = 0;
1007                 return -EMSGSIZE;
1008         }
1009         return 0;
1010 }
1011
1012 static const struct ip_set_type_variant type_pf_tvariant = {
1013         .kadt   = type_pf_kadt,
1014         .uadt   = type_pf_uadt,
1015         .adt    = {
1016                 [IPSET_ADD] = type_pf_tadd,
1017                 [IPSET_DEL] = type_pf_tdel,
1018                 [IPSET_TEST] = type_pf_ttest,
1019         },
1020         .destroy = type_pf_destroy,
1021         .flush  = type_pf_flush,
1022         .head   = type_pf_head,
1023         .list   = type_pf_tlist,
1024         .resize = type_pf_tresize,
1025         .same_set = type_pf_same_set,
1026 };
1027
1028 static void
1029 type_pf_gc(unsigned long ul_set)
1030 {
1031         struct ip_set *set = (struct ip_set *) ul_set;
1032         struct ip_set_hash *h = set->data;
1033
1034         pr_debug("called\n");
1035         write_lock_bh(&set->lock);
1036         type_pf_expire(h);
1037         write_unlock_bh(&set->lock);
1038
1039         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1040         add_timer(&h->gc);
1041 }
1042
1043 static void
1044 type_pf_gc_init(struct ip_set *set)
1045 {
1046         struct ip_set_hash *h = set->data;
1047
1048         init_timer(&h->gc);
1049         h->gc.data = (unsigned long) set;
1050         h->gc.function = type_pf_gc;
1051         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1052         add_timer(&h->gc);
1053         pr_debug("gc initialized, run in every %u\n",
1054                  IPSET_GC_PERIOD(h->timeout));
1055 }
1056
1057 #undef type_pf_data_equal
1058 #undef type_pf_data_isnull
1059 #undef type_pf_data_copy
1060 #undef type_pf_data_zero_out
1061 #undef type_pf_data_list
1062 #undef type_pf_data_tlist
1063
1064 #undef type_pf_elem
1065 #undef type_pf_telem
1066 #undef type_pf_data_timeout
1067 #undef type_pf_data_expired
1068 #undef type_pf_data_netmask
1069 #undef type_pf_data_timeout_set
1070
1071 #undef type_pf_elem_add
1072 #undef type_pf_add
1073 #undef type_pf_del
1074 #undef type_pf_test_cidrs
1075 #undef type_pf_test
1076
1077 #undef type_pf_elem_tadd
1078 #undef type_pf_expire
1079 #undef type_pf_tadd
1080 #undef type_pf_tdel
1081 #undef type_pf_ttest_cidrs
1082 #undef type_pf_ttest
1083
1084 #undef type_pf_resize
1085 #undef type_pf_tresize
1086 #undef type_pf_flush
1087 #undef type_pf_destroy
1088 #undef type_pf_head
1089 #undef type_pf_list
1090 #undef type_pf_tlist
1091 #undef type_pf_same_set
1092 #undef type_pf_kadt
1093 #undef type_pf_uadt
1094 #undef type_pf_gc
1095 #undef type_pf_gc_init
1096 #undef type_pf_variant
1097 #undef type_pf_tvariant