- Update Xen patches to 3.3-rc5 and c/s 1157.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / blktap2-new / request.c
1 #include <linux/mempool.h>
2 #include <linux/spinlock.h>
3 #include <linux/mutex.h>
4 #include <linux/sched.h>
5 #include <linux/device.h>
6
7 #include "blktap.h"
8
9 /* max pages per shared pool. just to prevent accidental dos. */
10 #define POOL_MAX_PAGES           (256*BLKIF_MAX_SEGMENTS_PER_REQUEST)
11
12 /* default page pool size. when considering to shrink a shared pool,
13  * note that paused tapdisks may grab a whole lot of pages for a long
14  * time. */
15 #define POOL_DEFAULT_PAGES       (2 * MMAP_PAGES)
16
17 /* max number of pages allocatable per request. */
18 #define POOL_MAX_REQUEST_PAGES   BLKIF_MAX_SEGMENTS_PER_REQUEST
19
20 /* min request structs per pool. These grow dynamically. */
21 #define POOL_MIN_REQS            BLK_RING_SIZE
22
23 static struct kset *pool_set;
24
25 #define kobj_to_pool(_kobj) \
26         container_of(_kobj, struct blktap_page_pool, kobj)
27
28 static struct kmem_cache *request_cache;
29 static mempool_t *request_pool;
30
31 static void
32 __page_pool_wake(struct blktap_page_pool *pool)
33 {
34         mempool_t *mem = pool->bufs;
35
36         /*
37           NB. slightly wasteful to always wait for a full segment
38           set. but this ensures the next disk makes
39           progress. presently, the repeated request struct
40           alloc/release cycles would otherwise keep everyone spinning.
41         */
42
43         if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES)
44                 wake_up(&pool->wait);
45 }
46
47 int
48 blktap_request_get_pages(struct blktap *tap,
49                          struct blktap_request *request, int nr_pages)
50 {
51         struct blktap_page_pool *pool = tap->pool;
52         mempool_t *mem = pool->bufs;
53         struct page *page;
54
55         BUG_ON(request->nr_pages != 0);
56         BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES);
57
58         if (mem->curr_nr < nr_pages)
59                 return -ENOMEM;
60
61         /* NB. avoid thundering herds of tapdisks colliding. */
62         spin_lock(&pool->lock);
63
64         if (mem->curr_nr < nr_pages) {
65                 spin_unlock(&pool->lock);
66                 return -ENOMEM;
67         }
68
69         while (request->nr_pages < nr_pages) {
70                 page = mempool_alloc(mem, GFP_NOWAIT);
71                 BUG_ON(!page);
72                 request->pages[request->nr_pages++] = page;
73         }
74
75         spin_unlock(&pool->lock);
76
77         return 0;
78 }
79
80 static void
81 blktap_request_put_pages(struct blktap *tap,
82                          struct blktap_request *request)
83 {
84         struct blktap_page_pool *pool = tap->pool;
85         struct page *page;
86
87         while (request->nr_pages) {
88                 page = request->pages[--request->nr_pages];
89                 mempool_free(page, pool->bufs);
90         }
91 }
92
93 size_t
94 blktap_request_debug(struct blktap *tap, char *buf, size_t size)
95 {
96         struct blktap_page_pool *pool = tap->pool;
97         mempool_t *mem = pool->bufs;
98         char *s = buf, *end = buf + size;
99
100         s += snprintf(buf, end - s,
101                       "pool:%s pages:%d free:%d\n",
102                       kobject_name(&pool->kobj),
103                       mem->min_nr, mem->curr_nr);
104
105         return s - buf;
106 }
107
108 struct blktap_request*
109 blktap_request_alloc(struct blktap *tap)
110 {
111         struct blktap_request *request;
112
113         request = mempool_alloc(request_pool, GFP_NOWAIT);
114         if (request)
115                 request->tap = tap;
116
117         return request;
118 }
119
120 void
121 blktap_request_free(struct blktap *tap,
122                     struct blktap_request *request)
123 {
124         blktap_request_put_pages(tap, request);
125
126         mempool_free(request, request_pool);
127
128         __page_pool_wake(tap->pool);
129 }
130
131 void
132 blktap_request_bounce(struct blktap *tap,
133                       struct blktap_request *request,
134                       int seg, int write)
135 {
136         struct scatterlist *sg = &request->sg_table[seg];
137         void *s, *p;
138
139         BUG_ON(seg >= request->nr_pages);
140
141         s = sg_virt(sg);
142         p = page_address(request->pages[seg]) + sg->offset;
143
144         if (write)
145                 memcpy(p, s, sg->length);
146         else
147                 memcpy(s, p, sg->length);
148 }
149
150 static void
151 blktap_request_ctor(void *obj)
152 {
153         struct blktap_request *request = obj;
154
155         memset(request, 0, sizeof(*request));
156         sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table));
157 }
158
159 static int
160 blktap_page_pool_resize(struct blktap_page_pool *pool, int target)
161 {
162         mempool_t *bufs = pool->bufs;
163         int err;
164
165         /* NB. mempool asserts min_nr >= 1 */
166         target = max(1, target);
167
168         err = mempool_resize(bufs, target, GFP_KERNEL);
169         if (err)
170                 return err;
171
172         __page_pool_wake(pool);
173
174         return 0;
175 }
176
177 struct pool_attribute {
178         struct attribute attr;
179
180         ssize_t (*show)(struct blktap_page_pool *pool,
181                         char *buf);
182
183         ssize_t (*store)(struct blktap_page_pool *pool,
184                          const char *buf, size_t count);
185 };
186
187 #define kattr_to_pool_attr(_kattr) \
188         container_of(_kattr, struct pool_attribute, attr)
189
190 static ssize_t
191 blktap_page_pool_show_size(struct blktap_page_pool *pool,
192                            char *buf)
193 {
194         mempool_t *mem = pool->bufs;
195         return sprintf(buf, "%d", mem->min_nr);
196 }
197
198 static ssize_t
199 blktap_page_pool_store_size(struct blktap_page_pool *pool,
200                             const char *buf, size_t size)
201 {
202         int target;
203
204         /*
205          * NB. target fixup to avoid undesired results. less than a
206          * full segment set can wedge the disk. much more than a
207          * couple times the physical queue depth is rarely useful.
208          */
209
210         target = simple_strtoul(buf, NULL, 0);
211         target = max(POOL_MAX_REQUEST_PAGES, target);
212         target = min(target, POOL_MAX_PAGES);
213
214         return blktap_page_pool_resize(pool, target) ? : size;
215 }
216
217 static struct pool_attribute blktap_page_pool_attr_size =
218         __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
219                blktap_page_pool_show_size,
220                blktap_page_pool_store_size);
221
222 static ssize_t
223 blktap_page_pool_show_free(struct blktap_page_pool *pool,
224                            char *buf)
225 {
226         mempool_t *mem = pool->bufs;
227         return sprintf(buf, "%d", mem->curr_nr);
228 }
229
230 static struct pool_attribute blktap_page_pool_attr_free =
231         __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH,
232                blktap_page_pool_show_free,
233                NULL);
234
235 static struct attribute *blktap_page_pool_attrs[] = {
236         &blktap_page_pool_attr_size.attr,
237         &blktap_page_pool_attr_free.attr,
238         NULL,
239 };
240
241 static inline struct kobject*
242 __blktap_kset_find_obj(struct kset *kset, const char *name)
243 {
244         struct kobject *k;
245         struct kobject *ret = NULL;
246
247         spin_lock(&kset->list_lock);
248         list_for_each_entry(k, &kset->list, entry) {
249                 if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
250                         ret = kobject_get(k);
251                         break;
252                 }
253         }
254         spin_unlock(&kset->list_lock);
255         return ret;
256 }
257
258 static ssize_t
259 blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr,
260                            char *buf)
261 {
262         struct blktap_page_pool *pool = kobj_to_pool(kobj);
263         struct pool_attribute *attr = kattr_to_pool_attr(kattr);
264
265         if (attr->show)
266                 return attr->show(pool, buf);
267
268         return -EIO;
269 }
270
271 static ssize_t
272 blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr,
273                             const char *buf, size_t size)
274 {
275         struct blktap_page_pool *pool = kobj_to_pool(kobj);
276         struct pool_attribute *attr = kattr_to_pool_attr(kattr);
277
278         if (attr->show)
279                 return attr->store(pool, buf, size);
280
281         return -EIO;
282 }
283
284 static struct sysfs_ops blktap_page_pool_sysfs_ops = {
285         .show           = blktap_page_pool_show_attr,
286         .store          = blktap_page_pool_store_attr,
287 };
288
289 static void
290 blktap_page_pool_release(struct kobject *kobj)
291 {
292         struct blktap_page_pool *pool = kobj_to_pool(kobj);
293         mempool_destroy(pool->bufs);
294         kfree(pool);
295 }
296
297 struct kobj_type blktap_page_pool_ktype = {
298         .release       = blktap_page_pool_release,
299         .sysfs_ops     = &blktap_page_pool_sysfs_ops,
300         .default_attrs = blktap_page_pool_attrs,
301 };
302
303 static void*
304 __mempool_page_alloc(gfp_t gfp_mask, void *pool_data)
305 {
306         struct page *page;
307
308         if (!(gfp_mask & __GFP_WAIT))
309                 return NULL;
310
311         page = alloc_page(gfp_mask);
312         if (page)
313                 SetPageReserved(page);
314
315         return page;
316 }
317
318 static void
319 __mempool_page_free(void *element, void *pool_data)
320 {
321         struct page *page = element;
322
323         ClearPageReserved(page);
324         put_page(page);
325 }
326
327 static struct kobject*
328 blktap_page_pool_create(const char *name, int nr_pages)
329 {
330         struct blktap_page_pool *pool;
331         int err;
332
333         pool = kzalloc(sizeof(*pool), GFP_KERNEL);
334         if (!pool)
335                 goto fail;
336
337         spin_lock_init(&pool->lock);
338         init_waitqueue_head(&pool->wait);
339
340         pool->bufs = mempool_create(nr_pages,
341                                     __mempool_page_alloc, __mempool_page_free,
342                                     pool);
343         if (!pool->bufs)
344                 goto fail_pool;
345
346         kobject_init(&pool->kobj, &blktap_page_pool_ktype);
347         pool->kobj.kset = pool_set;
348         err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name);
349         if (err)
350                 goto fail_bufs;
351
352         return &pool->kobj;
353
354         kobject_del(&pool->kobj);
355 fail_bufs:
356         mempool_destroy(pool->bufs);
357 fail_pool:
358         kfree(pool);
359 fail:
360         return NULL;
361 }
362
363 struct blktap_page_pool*
364 blktap_page_pool_get(const char *name)
365 {
366         struct kobject *kobj;
367
368         kobj = __blktap_kset_find_obj(pool_set, name);
369         if (!kobj)
370                 kobj = blktap_page_pool_create(name,
371                                                POOL_DEFAULT_PAGES);
372         if (!kobj)
373                 return ERR_PTR(-ENOMEM);
374
375         return kobj_to_pool(kobj);
376 }
377
378 int __init
379 blktap_page_pool_init(struct kobject *parent)
380 {
381         request_cache =
382                 kmem_cache_create("blktap-request",
383                                   sizeof(struct blktap_request), 0,
384                                   0, blktap_request_ctor);
385         if (!request_cache)
386                 return -ENOMEM;
387
388         request_pool =
389                 mempool_create_slab_pool(POOL_MIN_REQS, request_cache);
390         if (!request_pool)
391                 return -ENOMEM;
392
393         pool_set = kset_create_and_add("pools", NULL, parent);
394         if (!pool_set)
395                 return -ENOMEM;
396
397         return 0;
398 }
399
400 void
401 blktap_page_pool_exit(void)
402 {
403         if (pool_set) {
404                 BUG_ON(!list_empty(&pool_set->list));
405                 kset_unregister(pool_set);
406                 pool_set = NULL;
407         }
408
409         if (request_pool) {
410                 mempool_destroy(request_pool);
411                 request_pool = NULL;
412         }
413
414         if (request_cache) {
415                 kmem_cache_destroy(request_cache);
416                 request_cache = NULL;
417         }
418 }