1 #include <linux/mempool.h>
2 #include <linux/spinlock.h>
3 #include <linux/mutex.h>
4 #include <linux/sched.h>
5 #include <linux/device.h>
9 /* max pages per shared pool. just to prevent accidental dos. */
10 #define POOL_MAX_PAGES (256*BLKIF_MAX_SEGMENTS_PER_REQUEST)
12 /* default page pool size. when considering to shrink a shared pool,
13 * note that paused tapdisks may grab a whole lot of pages for a long
15 #define POOL_DEFAULT_PAGES (2 * MMAP_PAGES)
17 /* max number of pages allocatable per request. */
18 #define POOL_MAX_REQUEST_PAGES BLKIF_MAX_SEGMENTS_PER_REQUEST
20 /* min request structs per pool. These grow dynamically. */
21 #define POOL_MIN_REQS BLK_RING_SIZE
23 static struct kset *pool_set;
25 #define kobj_to_pool(_kobj) \
26 container_of(_kobj, struct blktap_page_pool, kobj)
28 static struct kmem_cache *request_cache;
29 static mempool_t *request_pool;
32 __page_pool_wake(struct blktap_page_pool *pool)
34 mempool_t *mem = pool->bufs;
37 NB. slightly wasteful to always wait for a full segment
38 set. but this ensures the next disk makes
39 progress. presently, the repeated request struct
40 alloc/release cycles would otherwise keep everyone spinning.
43 if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES)
48 blktap_request_get_pages(struct blktap *tap,
49 struct blktap_request *request, int nr_pages)
51 struct blktap_page_pool *pool = tap->pool;
52 mempool_t *mem = pool->bufs;
55 BUG_ON(request->nr_pages != 0);
56 BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES);
58 if (mem->curr_nr < nr_pages)
61 /* NB. avoid thundering herds of tapdisks colliding. */
62 spin_lock(&pool->lock);
64 if (mem->curr_nr < nr_pages) {
65 spin_unlock(&pool->lock);
69 while (request->nr_pages < nr_pages) {
70 page = mempool_alloc(mem, GFP_NOWAIT);
72 request->pages[request->nr_pages++] = page;
75 spin_unlock(&pool->lock);
81 blktap_request_put_pages(struct blktap *tap,
82 struct blktap_request *request)
84 struct blktap_page_pool *pool = tap->pool;
87 while (request->nr_pages) {
88 page = request->pages[--request->nr_pages];
89 mempool_free(page, pool->bufs);
94 blktap_request_debug(struct blktap *tap, char *buf, size_t size)
96 struct blktap_page_pool *pool = tap->pool;
97 mempool_t *mem = pool->bufs;
98 char *s = buf, *end = buf + size;
100 s += snprintf(buf, end - s,
101 "pool:%s pages:%d free:%d\n",
102 kobject_name(&pool->kobj),
103 mem->min_nr, mem->curr_nr);
108 struct blktap_request*
109 blktap_request_alloc(struct blktap *tap)
111 struct blktap_request *request;
113 request = mempool_alloc(request_pool, GFP_NOWAIT);
121 blktap_request_free(struct blktap *tap,
122 struct blktap_request *request)
124 blktap_request_put_pages(tap, request);
126 mempool_free(request, request_pool);
128 __page_pool_wake(tap->pool);
132 blktap_request_bounce(struct blktap *tap,
133 struct blktap_request *request,
136 struct scatterlist *sg = &request->sg_table[seg];
139 BUG_ON(seg >= request->nr_pages);
142 p = page_address(request->pages[seg]) + sg->offset;
145 memcpy(p, s, sg->length);
147 memcpy(s, p, sg->length);
151 blktap_request_ctor(void *obj)
153 struct blktap_request *request = obj;
155 memset(request, 0, sizeof(*request));
156 sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table));
160 blktap_page_pool_resize(struct blktap_page_pool *pool, int target)
162 mempool_t *bufs = pool->bufs;
165 /* NB. mempool asserts min_nr >= 1 */
166 target = max(1, target);
168 err = mempool_resize(bufs, target, GFP_KERNEL);
172 __page_pool_wake(pool);
177 struct pool_attribute {
178 struct attribute attr;
180 ssize_t (*show)(struct blktap_page_pool *pool,
183 ssize_t (*store)(struct blktap_page_pool *pool,
184 const char *buf, size_t count);
187 #define kattr_to_pool_attr(_kattr) \
188 container_of(_kattr, struct pool_attribute, attr)
191 blktap_page_pool_show_size(struct blktap_page_pool *pool,
194 mempool_t *mem = pool->bufs;
195 return sprintf(buf, "%d", mem->min_nr);
199 blktap_page_pool_store_size(struct blktap_page_pool *pool,
200 const char *buf, size_t size)
205 * NB. target fixup to avoid undesired results. less than a
206 * full segment set can wedge the disk. much more than a
207 * couple times the physical queue depth is rarely useful.
210 target = simple_strtoul(buf, NULL, 0);
211 target = max(POOL_MAX_REQUEST_PAGES, target);
212 target = min(target, POOL_MAX_PAGES);
214 return blktap_page_pool_resize(pool, target) ? : size;
217 static struct pool_attribute blktap_page_pool_attr_size =
218 __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
219 blktap_page_pool_show_size,
220 blktap_page_pool_store_size);
223 blktap_page_pool_show_free(struct blktap_page_pool *pool,
226 mempool_t *mem = pool->bufs;
227 return sprintf(buf, "%d", mem->curr_nr);
230 static struct pool_attribute blktap_page_pool_attr_free =
231 __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH,
232 blktap_page_pool_show_free,
235 static struct attribute *blktap_page_pool_attrs[] = {
236 &blktap_page_pool_attr_size.attr,
237 &blktap_page_pool_attr_free.attr,
241 static inline struct kobject*
242 __blktap_kset_find_obj(struct kset *kset, const char *name)
245 struct kobject *ret = NULL;
247 spin_lock(&kset->list_lock);
248 list_for_each_entry(k, &kset->list, entry) {
249 if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
250 ret = kobject_get(k);
254 spin_unlock(&kset->list_lock);
259 blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr,
262 struct blktap_page_pool *pool = kobj_to_pool(kobj);
263 struct pool_attribute *attr = kattr_to_pool_attr(kattr);
266 return attr->show(pool, buf);
272 blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr,
273 const char *buf, size_t size)
275 struct blktap_page_pool *pool = kobj_to_pool(kobj);
276 struct pool_attribute *attr = kattr_to_pool_attr(kattr);
279 return attr->store(pool, buf, size);
284 static struct sysfs_ops blktap_page_pool_sysfs_ops = {
285 .show = blktap_page_pool_show_attr,
286 .store = blktap_page_pool_store_attr,
290 blktap_page_pool_release(struct kobject *kobj)
292 struct blktap_page_pool *pool = kobj_to_pool(kobj);
293 mempool_destroy(pool->bufs);
297 struct kobj_type blktap_page_pool_ktype = {
298 .release = blktap_page_pool_release,
299 .sysfs_ops = &blktap_page_pool_sysfs_ops,
300 .default_attrs = blktap_page_pool_attrs,
304 __mempool_page_alloc(gfp_t gfp_mask, void *pool_data)
308 if (!(gfp_mask & __GFP_WAIT))
311 page = alloc_page(gfp_mask);
313 SetPageReserved(page);
319 __mempool_page_free(void *element, void *pool_data)
321 struct page *page = element;
323 ClearPageReserved(page);
327 static struct kobject*
328 blktap_page_pool_create(const char *name, int nr_pages)
330 struct blktap_page_pool *pool;
333 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
337 spin_lock_init(&pool->lock);
338 init_waitqueue_head(&pool->wait);
340 pool->bufs = mempool_create(nr_pages,
341 __mempool_page_alloc, __mempool_page_free,
346 kobject_init(&pool->kobj, &blktap_page_pool_ktype);
347 pool->kobj.kset = pool_set;
348 err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name);
354 kobject_del(&pool->kobj);
356 mempool_destroy(pool->bufs);
363 struct blktap_page_pool*
364 blktap_page_pool_get(const char *name)
366 struct kobject *kobj;
368 kobj = __blktap_kset_find_obj(pool_set, name);
370 kobj = blktap_page_pool_create(name,
373 return ERR_PTR(-ENOMEM);
375 return kobj_to_pool(kobj);
379 blktap_page_pool_init(struct kobject *parent)
382 kmem_cache_create("blktap-request",
383 sizeof(struct blktap_request), 0,
384 0, blktap_request_ctor);
389 mempool_create_slab_pool(POOL_MIN_REQS, request_cache);
393 pool_set = kset_create_and_add("pools", NULL, parent);
401 blktap_page_pool_exit(void)
404 BUG_ON(!list_empty(&pool_set->list));
405 kset_unregister(pool_set);
410 mempool_destroy(request_pool);
415 kmem_cache_destroy(request_cache);
416 request_cache = NULL;