- patches.suse/dm-mpath-accept-failed-paths: Only accept
[linux-flexiantxendom0-3.2.10.git] / drivers / md / dm-mpath.c
1 /*
2  * Copyright (C) 2003 Sistina Software Limited.
3  * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4  *
5  * This file is released under the GPL.
6  */
7
8 #include "dm.h"
9 #include "dm-path-selector.h"
10 #include "dm-uevent.h"
11
12 #include <linux/ctype.h>
13 #include <linux/init.h>
14 #include <linux/mempool.h>
15 #include <linux/module.h>
16 #include <linux/pagemap.h>
17 #include <linux/slab.h>
18 #include <linux/time.h>
19 #include <linux/workqueue.h>
20 #include <scsi/scsi_dh.h>
21 #include <asm/atomic.h>
22
23 #define DM_MSG_PREFIX "multipath"
24 #define MESG_STR(x) x, sizeof(x)
25
26 /* Path properties */
27 struct pgpath {
28         struct list_head list;
29
30         struct priority_group *pg;      /* Owning PG */
31         unsigned is_active;             /* Path status */
32         unsigned fail_count;            /* Cumulative failure count */
33
34         struct dm_path path;
35         struct work_struct deactivate_path;
36        struct work_struct activate_path;
37 };
38
39 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
40
41 /*
42  * Paths are grouped into Priority Groups and numbered from 1 upwards.
43  * Each has a path selector which controls which path gets used.
44  */
45 struct priority_group {
46         struct list_head list;
47
48         struct multipath *m;            /* Owning multipath instance */
49         struct path_selector ps;
50
51         unsigned pg_num;                /* Reference number */
52         unsigned bypassed;              /* Temporarily bypass this PG? */
53
54         unsigned nr_pgpaths;            /* Number of paths in PG */
55         struct list_head pgpaths;
56 };
57
58 /* Multipath context */
59 struct multipath {
60         struct list_head list;
61         struct dm_target *ti;
62
63         spinlock_t lock;
64
65         const char *hw_handler_name;
66         unsigned nr_priority_groups;
67         struct list_head priority_groups;
68         unsigned pg_init_required;      /* pg_init needs calling? */
69         unsigned pg_init_in_progress;   /* Only one pg_init allowed at once */
70
71         unsigned nr_valid_paths;        /* Total number of usable paths */
72         struct pgpath *current_pgpath;
73         struct priority_group *current_pg;
74         struct priority_group *next_pg; /* Switch to this PG if set */
75         unsigned repeat_count;          /* I/Os left before calling PS again */
76
77         unsigned queue_io;              /* Must we queue all I/O? */
78         unsigned queue_if_no_path;      /* Queue I/O if last path fails? */
79         unsigned saved_queue_if_no_path;/* Saved state during suspension */
80         unsigned pg_init_retries;       /* Number of times to retry pg_init */
81         unsigned pg_init_count;         /* Number of times pg_init called */
82
83         struct work_struct process_queued_ios;
84         struct list_head queued_ios;
85         unsigned queue_size;
86
87         struct work_struct trigger_event;
88
89         /*
90          * We must use a mempool of dm_mpath_io structs so that we
91          * can resubmit bios on error.
92          */
93         mempool_t *mpio_pool;
94 };
95
96 /*
97  * Context information attached to each bio we process.
98  */
99 struct dm_mpath_io {
100         struct pgpath *pgpath;
101         size_t nr_bytes;
102 };
103
104 typedef int (*action_fn) (struct pgpath *pgpath);
105
106 #define MIN_IOS 256     /* Mempool size */
107
108 static struct kmem_cache *_mpio_cache;
109
110 static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
111 static void process_queued_ios(struct work_struct *work);
112 static void trigger_event(struct work_struct *work);
113 static void activate_path(struct work_struct *work);
114 static void deactivate_path(struct work_struct *work);
115
116
117 /*-----------------------------------------------
118  * Allocation routines
119  *-----------------------------------------------*/
120
121 static struct pgpath *alloc_pgpath(void)
122 {
123         struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
124
125         if (pgpath) {
126                 pgpath->is_active = 1;
127                 INIT_WORK(&pgpath->deactivate_path, deactivate_path);
128                 INIT_WORK(&pgpath->activate_path, activate_path);
129         }
130
131         return pgpath;
132 }
133
134 static void free_pgpath(struct pgpath *pgpath)
135 {
136         kfree(pgpath);
137 }
138
139 static void deactivate_path(struct work_struct *work)
140 {
141         struct pgpath *pgpath =
142                 container_of(work, struct pgpath, deactivate_path);
143
144         if (pgpath->path.dev)
145                 blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue);
146 }
147
148 static struct priority_group *alloc_priority_group(void)
149 {
150         struct priority_group *pg;
151
152         pg = kzalloc(sizeof(*pg), GFP_KERNEL);
153
154         if (pg)
155                 INIT_LIST_HEAD(&pg->pgpaths);
156
157         return pg;
158 }
159
160 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
161 {
162         unsigned long flags;
163         struct pgpath *pgpath, *tmp;
164         struct multipath *m = ti->private;
165
166         list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
167                 list_del(&pgpath->list);
168                 dm_put_device(ti, pgpath->path.dev);
169                 free_pgpath(pgpath);
170         }
171 }
172
173 static void free_priority_group(struct priority_group *pg,
174                                 struct dm_target *ti)
175 {
176         struct path_selector *ps = &pg->ps;
177
178         if (ps->type) {
179                 ps->type->destroy(ps);
180                 dm_put_path_selector(ps->type);
181         }
182
183         free_pgpaths(&pg->pgpaths, ti);
184         kfree(pg);
185 }
186
187 static struct multipath *alloc_multipath(struct dm_target *ti)
188 {
189         struct multipath *m;
190
191         m = kzalloc(sizeof(*m), GFP_KERNEL);
192         if (m) {
193                 INIT_LIST_HEAD(&m->priority_groups);
194                 INIT_LIST_HEAD(&m->queued_ios);
195                 spin_lock_init(&m->lock);
196                 m->queue_io = 1;
197                 INIT_WORK(&m->process_queued_ios, process_queued_ios);
198                 INIT_WORK(&m->trigger_event, trigger_event);
199                 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
200                 if (!m->mpio_pool) {
201                         kfree(m);
202                         return NULL;
203                 }
204                 m->ti = ti;
205                 ti->private = m;
206         }
207
208         return m;
209 }
210
211 static void free_multipath(struct multipath *m)
212 {
213         struct priority_group *pg, *tmp;
214
215         list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
216                 list_del(&pg->list);
217                 free_priority_group(pg, m->ti);
218         }
219
220         kfree(m->hw_handler_name);
221         mempool_destroy(m->mpio_pool);
222         kfree(m);
223 }
224
225
226 /*-----------------------------------------------
227  * Path selection
228  *-----------------------------------------------*/
229
230 static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
231 {
232         m->current_pg = pgpath->pg;
233
234         /* Must we initialise the PG first, and queue I/O till it's ready? */
235         if (m->hw_handler_name) {
236                 m->pg_init_required = 1;
237                 m->queue_io = 1;
238         } else {
239                 m->pg_init_required = 0;
240                 m->queue_io = 0;
241         }
242
243         m->pg_init_count = 0;
244 }
245
246 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
247                                size_t nr_bytes)
248 {
249         struct dm_path *path;
250
251         path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
252         if (!path)
253                 return -ENXIO;
254
255         m->current_pgpath = path_to_pgpath(path);
256
257         if (!m->current_pgpath->path.dev) {
258                 m->current_pgpath = NULL;
259                 return -ENODEV;
260         }
261
262         if (m->current_pg != pg)
263                 __switch_pg(m, m->current_pgpath);
264
265         return 0;
266 }
267
268 static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
269 {
270         struct priority_group *pg;
271         unsigned bypassed = 1;
272
273         if (!m->nr_valid_paths)
274                 goto failed;
275
276         /* Were we instructed to switch PG? */
277         if (m->next_pg) {
278                 pg = m->next_pg;
279                 m->next_pg = NULL;
280                 if (!__choose_path_in_pg(m, pg, nr_bytes))
281                         return;
282         }
283
284         /* Don't change PG until it has no remaining paths */
285         if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
286                 return;
287
288         /*
289          * Loop through priority groups until we find a valid path.
290          * First time we skip PGs marked 'bypassed'.
291          * Second time we only try the ones we skipped.
292          */
293         do {
294                 list_for_each_entry(pg, &m->priority_groups, list) {
295                         if (pg->bypassed == bypassed)
296                                 continue;
297                         if (!__choose_path_in_pg(m, pg, nr_bytes))
298                                 return;
299                 }
300         } while (bypassed--);
301
302 failed:
303         m->current_pgpath = NULL;
304         m->current_pg = NULL;
305 }
306
307 /*
308  * Check whether bios must be queued in the device-mapper core rather
309  * than here in the target.
310  *
311  * m->lock must be held on entry.
312  *
313  * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
314  * same value then we are not between multipath_presuspend()
315  * and multipath_resume() calls and we have no need to check
316  * for the DMF_NOFLUSH_SUSPENDING flag.
317  */
318 static int __must_push_back(struct multipath *m)
319 {
320         return (m->queue_if_no_path != m->saved_queue_if_no_path &&
321                 dm_noflush_suspending(m->ti));
322 }
323
324 static int map_io(struct multipath *m, struct request *clone,
325                   struct dm_mpath_io *mpio, unsigned was_queued)
326 {
327         int r = DM_MAPIO_REMAPPED;
328         size_t nr_bytes = blk_rq_bytes(clone);
329         unsigned long flags;
330         struct pgpath *pgpath;
331         struct block_device *bdev;
332
333         spin_lock_irqsave(&m->lock, flags);
334
335         /* Do we need to select a new pgpath? */
336         if (!m->current_pgpath ||
337             (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
338                 __choose_pgpath(m, nr_bytes);
339
340         pgpath = m->current_pgpath;
341
342         if (was_queued)
343                 m->queue_size--;
344
345         if ((pgpath && m->queue_io) ||
346             (!pgpath && m->queue_if_no_path)) {
347                 /* Queue for the daemon to resubmit */
348                 list_add_tail(&clone->queuelist, &m->queued_ios);
349                 m->queue_size++;
350                 if ((m->pg_init_required && !m->pg_init_in_progress) ||
351                     !m->queue_io)
352                         queue_work(kmultipathd, &m->process_queued_ios);
353                 pgpath = NULL;
354                 r = DM_MAPIO_SUBMITTED;
355         } else if (pgpath) {
356                 bdev = pgpath->path.dev->bdev;
357                 clone->q = bdev_get_queue(bdev);
358                 clone->rq_disk = bdev->bd_disk;
359         } else if (__must_push_back(m))
360                 r = DM_MAPIO_REQUEUE;
361         else
362                 r = -EIO;       /* Failed */
363
364         mpio->pgpath = pgpath;
365         mpio->nr_bytes = nr_bytes;
366
367         if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io)
368                 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path,
369                                               nr_bytes);
370
371         spin_unlock_irqrestore(&m->lock, flags);
372
373         return r;
374 }
375
376 /*
377  * If we run out of usable paths, should we queue I/O or error it?
378  */
379 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
380                             unsigned save_old_value)
381 {
382         unsigned long flags;
383
384         spin_lock_irqsave(&m->lock, flags);
385
386         if (save_old_value)
387                 m->saved_queue_if_no_path = m->queue_if_no_path;
388         else
389                 m->saved_queue_if_no_path = queue_if_no_path;
390         m->queue_if_no_path = queue_if_no_path;
391         if (!m->queue_if_no_path && m->queue_size)
392                 queue_work(kmultipathd, &m->process_queued_ios);
393
394         spin_unlock_irqrestore(&m->lock, flags);
395
396         return 0;
397 }
398
399 /*-----------------------------------------------------------------
400  * The multipath daemon is responsible for resubmitting queued ios.
401  *---------------------------------------------------------------*/
402
403 static void dispatch_queued_ios(struct multipath *m)
404 {
405         int r;
406         unsigned long flags;
407         struct dm_mpath_io *mpio;
408         union map_info *info;
409         struct request *clone, *n;
410         LIST_HEAD(cl);
411
412         spin_lock_irqsave(&m->lock, flags);
413         list_splice_init(&m->queued_ios, &cl);
414         spin_unlock_irqrestore(&m->lock, flags);
415
416         list_for_each_entry_safe(clone, n, &cl, queuelist) {
417                 list_del_init(&clone->queuelist);
418
419                 info = dm_get_rq_mapinfo(clone);
420                 mpio = info->ptr;
421
422                 r = map_io(m, clone, mpio, 1);
423                 if (r < 0) {
424                         mempool_free(mpio, m->mpio_pool);
425                         dm_kill_request(clone, r);
426                 } else if (r == DM_MAPIO_REMAPPED)
427                         dm_dispatch_request(clone);
428                 else if (r == DM_MAPIO_REQUEUE) {
429                         mempool_free(mpio, m->mpio_pool);
430                         dm_requeue_request(clone);
431                 }
432         }
433 }
434
435 static void process_queued_ios(struct work_struct *work)
436 {
437         struct multipath *m =
438                 container_of(work, struct multipath, process_queued_ios);
439        struct pgpath *pgpath = NULL, *tmp;
440        unsigned must_queue = 1;
441         unsigned long flags;
442
443         spin_lock_irqsave(&m->lock, flags);
444
445         if (!m->queue_size)
446                 goto out;
447
448         if (!m->current_pgpath)
449                 __choose_pgpath(m, 1 << 19); /* Assume 512 KB */
450
451         pgpath = m->current_pgpath;
452
453         if ((pgpath && !m->queue_io) ||
454             (!pgpath && !m->queue_if_no_path))
455                 must_queue = 0;
456
457         if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
458                 m->pg_init_count++;
459                 m->pg_init_required = 0;
460                 list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
461                         queue_work(kmpath_handlerd, &tmp->activate_path);
462                         m->pg_init_in_progress++;
463                 }
464         }
465 out:
466         spin_unlock_irqrestore(&m->lock, flags);
467         if (!must_queue)
468                 dispatch_queued_ios(m);
469 }
470
471 /*
472  * An event is triggered whenever a path is taken out of use.
473  * Includes path failure and PG bypass.
474  */
475 static void trigger_event(struct work_struct *work)
476 {
477         struct multipath *m =
478                 container_of(work, struct multipath, trigger_event);
479
480         dm_table_event(m->ti->table);
481 }
482
483 /*-----------------------------------------------------------------
484  * Constructor/argument parsing:
485  * <#multipath feature args> [<arg>]*
486  * <#hw_handler args> [hw_handler [<arg>]*]
487  * <#priority groups>
488  * <initial priority group>
489  *     [<selector> <#selector args> [<arg>]*
490  *      <#paths> <#per-path selector args>
491  *         [<path> [<arg>]* ]+ ]+
492  *---------------------------------------------------------------*/
493 struct param {
494         unsigned min;
495         unsigned max;
496         char *error;
497 };
498
499 static int read_param(struct param *param, char *str, unsigned *v, char **error)
500 {
501         if (!str ||
502             (sscanf(str, "%u", v) != 1) ||
503             (*v < param->min) ||
504             (*v > param->max)) {
505                 *error = param->error;
506                 return -EINVAL;
507         }
508
509         return 0;
510 }
511
512 struct arg_set {
513         unsigned argc;
514         char **argv;
515 };
516
517 static char *shift(struct arg_set *as)
518 {
519         char *r;
520
521         if (as->argc) {
522                 as->argc--;
523                 r = *as->argv;
524                 as->argv++;
525                 return r;
526         }
527
528         return NULL;
529 }
530
531 static void consume(struct arg_set *as, unsigned n)
532 {
533         BUG_ON (as->argc < n);
534         as->argc -= n;
535         as->argv += n;
536 }
537
538 static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
539                                struct dm_target *ti)
540 {
541         int r;
542         struct path_selector_type *pst;
543         unsigned ps_argc;
544
545         static struct param _params[] = {
546                 {0, 1024, "invalid number of path selector args"},
547         };
548
549         pst = dm_get_path_selector(shift(as));
550         if (!pst) {
551                 ti->error = "unknown path selector type";
552                 return -EINVAL;
553         }
554
555         r = read_param(_params, shift(as), &ps_argc, &ti->error);
556         if (r) {
557                 dm_put_path_selector(pst);
558                 return -EINVAL;
559         }
560
561         r = pst->create(&pg->ps, ps_argc, as->argv);
562         if (r) {
563                 dm_put_path_selector(pst);
564                 ti->error = "path selector constructor failed";
565                 return r;
566         }
567
568         pg->ps.type = pst;
569         consume(as, ps_argc);
570
571         return 0;
572 }
573
574 static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
575                                struct dm_target *ti)
576 {
577         int r;
578         struct pgpath *p;
579         char *path;
580         struct multipath *m = ti->private;
581
582         /* we need at least a path arg */
583         if (as->argc < 1) {
584                 ti->error = "no device given";
585                 return NULL;
586         }
587
588         p = alloc_pgpath();
589         if (!p)
590                 return NULL;
591
592         path = shift(as);
593         r = dm_get_device(ti, path, ti->begin, ti->len,
594                           dm_table_get_mode(ti->table), &p->path.dev);
595         if (r) {
596                 unsigned major, minor;
597
598                 /* Try to add a failed device */
599                 if (r == -ENXIO && sscanf(path, "%u:%u", &major, &minor) == 2) {
600                         dev_t dev;
601
602                         /* Extract the major/minor numbers */
603                         dev = MKDEV(major, minor);
604                         if (MAJOR(dev) != major || MINOR(dev) != minor) {
605                                 /* Nice try, didn't work */
606                                 DMWARN("Invalid device path %s", path);
607                                 ti->error = "error converting devnum";
608                                 goto bad;
609                         }
610                         DMWARN("adding disabled device %d:%d", major, minor);
611                         p->path.dev = NULL;
612                         format_dev_t(p->path.pdev, dev);
613                         p->is_active = 0;
614                 } else {
615                         ti->error = "error getting device";
616                         goto bad;
617                 }
618         } else {
619                 memcpy(p->path.pdev, p->path.dev->name, 16);
620         }
621
622         if (m->hw_handler_name && p->path.dev) {
623                 struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
624
625                 r = scsi_dh_attach(q, m->hw_handler_name);
626                 if (r == -EBUSY) {
627                         /*
628                          * Already attached to different hw_handler,
629                          * try to reattach with correct one.
630                          */
631                         scsi_dh_detach(q);
632                         r = scsi_dh_attach(q, m->hw_handler_name);
633                 }
634                 if (r < 0) {
635                         ti->error = "error attaching hardware handler";
636                         dm_put_device(ti, p->path.dev);
637                         goto bad;
638                 }
639         }
640
641         r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
642         if (r) {
643                 dm_put_device(ti, p->path.dev);
644                 goto bad;
645         }
646
647         return p;
648
649  bad:
650         free_pgpath(p);
651         return NULL;
652 }
653
654 static struct priority_group *parse_priority_group(struct arg_set *as,
655                                                    struct multipath *m)
656 {
657         static struct param _params[] = {
658                 {1, 1024, "invalid number of paths"},
659                 {0, 1024, "invalid number of selector args"}
660         };
661
662         int r;
663         unsigned i, nr_selector_args, nr_params;
664         struct priority_group *pg;
665         struct dm_target *ti = m->ti;
666
667         if (as->argc < 2) {
668                 as->argc = 0;
669                 ti->error = "not enough priority group aruments";
670                 return NULL;
671         }
672
673         pg = alloc_priority_group();
674         if (!pg) {
675                 ti->error = "couldn't allocate priority group";
676                 return NULL;
677         }
678         pg->m = m;
679
680         r = parse_path_selector(as, pg, ti);
681         if (r)
682                 goto bad;
683
684         /*
685          * read the paths
686          */
687         r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
688         if (r)
689                 goto bad;
690
691         r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
692         if (r)
693                 goto bad;
694
695         nr_params = 1 + nr_selector_args;
696         for (i = 0; i < pg->nr_pgpaths; i++) {
697                 struct pgpath *pgpath;
698                 struct arg_set path_args;
699
700                 if (as->argc < nr_params) {
701                         ti->error = "not enough path parameters";
702                         goto bad;
703                 }
704
705                 path_args.argc = nr_params;
706                 path_args.argv = as->argv;
707
708                 pgpath = parse_path(&path_args, &pg->ps, ti);
709                 if (!pgpath)
710                         goto bad;
711
712                 pgpath->pg = pg;
713                 list_add_tail(&pgpath->list, &pg->pgpaths);
714                 consume(as, nr_params);
715         }
716
717         return pg;
718
719  bad:
720         free_priority_group(pg, ti);
721         return NULL;
722 }
723
724 static int parse_hw_handler(struct arg_set *as, struct multipath *m)
725 {
726         unsigned hw_argc;
727         struct dm_target *ti = m->ti;
728
729         static struct param _params[] = {
730                 {0, 1024, "invalid number of hardware handler args"},
731         };
732
733         if (read_param(_params, shift(as), &hw_argc, &ti->error))
734                 return -EINVAL;
735
736         if (!hw_argc)
737                 return 0;
738
739         m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
740         request_module("scsi_dh_%s", m->hw_handler_name);
741         if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
742                 ti->error = "unknown hardware handler type";
743                 kfree(m->hw_handler_name);
744                 m->hw_handler_name = NULL;
745                 return -EINVAL;
746         }
747         consume(as, hw_argc - 1);
748
749         return 0;
750 }
751
752 static int parse_features(struct arg_set *as, struct multipath *m)
753 {
754         int r;
755         unsigned argc;
756         struct dm_target *ti = m->ti;
757         const char *param_name;
758
759         static struct param _params[] = {
760                 {0, 3, "invalid number of feature args"},
761                 {1, 50, "pg_init_retries must be between 1 and 50"},
762         };
763
764         r = read_param(_params, shift(as), &argc, &ti->error);
765         if (r)
766                 return -EINVAL;
767
768         if (!argc)
769                 return 0;
770
771         do {
772                 param_name = shift(as);
773                 argc--;
774
775                 if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
776                         r = queue_if_no_path(m, 1, 0);
777                         continue;
778                 }
779
780                 if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
781                     (argc >= 1)) {
782                         r = read_param(_params + 1, shift(as),
783                                        &m->pg_init_retries, &ti->error);
784                         argc--;
785                         continue;
786                 }
787
788                 ti->error = "Unrecognised multipath feature request";
789                 r = -EINVAL;
790         } while (argc && !r);
791
792         return r;
793 }
794
795 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
796                          char **argv)
797 {
798         /* target parameters */
799         static struct param _params[] = {
800                 {0, 1024, "invalid number of priority groups"},
801                 {0, 1024, "invalid initial priority group number"},
802         };
803
804         int r;
805         struct multipath *m;
806         struct arg_set as;
807         unsigned pg_count = 0;
808         unsigned next_pg_num;
809
810         as.argc = argc;
811         as.argv = argv;
812
813         m = alloc_multipath(ti);
814         if (!m) {
815                 ti->error = "can't allocate multipath";
816                 return -EINVAL;
817         }
818
819         r = parse_features(&as, m);
820         if (r)
821                 goto bad;
822
823         r = parse_hw_handler(&as, m);
824         if (r)
825                 goto bad;
826
827         r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
828         if (r)
829                 goto bad;
830
831         r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
832         if (r)
833                 goto bad;
834
835         /* parse the priority groups */
836         while (as.argc) {
837                 struct priority_group *pg;
838
839                 pg = parse_priority_group(&as, m);
840                 if (!pg) {
841                         r = -EINVAL;
842                         goto bad;
843                 }
844
845                 m->nr_valid_paths += pg->nr_pgpaths;
846                 list_add_tail(&pg->list, &m->priority_groups);
847                 pg_count++;
848                 pg->pg_num = pg_count;
849                 if (!--next_pg_num)
850                         m->next_pg = pg;
851         }
852
853         if (pg_count != m->nr_priority_groups) {
854                 ti->error = "priority group count mismatch";
855                 r = -EINVAL;
856                 goto bad;
857         }
858
859         return 0;
860
861  bad:
862         free_multipath(m);
863         return r;
864 }
865
866 static void multipath_dtr(struct dm_target *ti)
867 {
868         struct multipath *m = (struct multipath *) ti->private;
869
870         flush_workqueue(kmpath_handlerd);
871         flush_workqueue(kmultipathd);
872         free_multipath(m);
873 }
874
875 /*
876  * Map cloned requests
877  */
878 static int multipath_map(struct dm_target *ti, struct request *clone,
879                          union map_info *map_context)
880 {
881         int r;
882         struct dm_mpath_io *mpio;
883         struct multipath *m = (struct multipath *) ti->private;
884
885         mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
886         if (!mpio)
887                 /* ENOMEM, requeue */
888                 return DM_MAPIO_REQUEUE;
889         memset(mpio, 0, sizeof(*mpio));
890
891         map_context->ptr = mpio;
892         clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
893         r = map_io(m, clone, mpio, 0);
894         if (r < 0 || r == DM_MAPIO_REQUEUE)
895                 mempool_free(mpio, m->mpio_pool);
896
897         return r;
898 }
899
900 /*
901  * Take a path out of use.
902  */
903 static int fail_path(struct pgpath *pgpath)
904 {
905         unsigned long flags;
906         struct multipath *m = pgpath->pg->m;
907
908         spin_lock_irqsave(&m->lock, flags);
909
910         if (!pgpath->is_active)
911                 goto out;
912
913         DMWARN("Failing path %s.", pgpath->path.pdev);
914
915         pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
916         pgpath->is_active = 0;
917         pgpath->fail_count++;
918
919         m->nr_valid_paths--;
920
921         if (pgpath == m->current_pgpath)
922                 m->current_pgpath = NULL;
923
924         dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
925                        pgpath->path.pdev, m->nr_valid_paths);
926
927         queue_work(kmultipathd, &m->trigger_event);
928         queue_work(kmultipathd, &pgpath->deactivate_path);
929
930 out:
931         spin_unlock_irqrestore(&m->lock, flags);
932
933         return 0;
934 }
935
936 /*
937  * Reinstate a previously-failed path
938  */
939 static int reinstate_path(struct pgpath *pgpath)
940 {
941         int r = 0;
942         unsigned long flags;
943         struct multipath *m = pgpath->pg->m;
944
945         spin_lock_irqsave(&m->lock, flags);
946
947         if (pgpath->is_active)
948                 goto out;
949
950         if (!pgpath->path.dev) {
951                 DMWARN("Cannot reinstate disabled path %s", pgpath->path.pdev);
952                 r = -ENODEV;
953                 goto out;
954         }
955
956         if (!pgpath->pg->ps.type->reinstate_path) {
957                 DMWARN("Reinstate path not supported by path selector %s",
958                        pgpath->pg->ps.type->name);
959                 r = -EINVAL;
960                 goto out;
961         }
962
963         r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
964         if (r)
965                 goto out;
966
967         pgpath->is_active = 1;
968
969         m->current_pgpath = NULL;
970         if (!m->nr_valid_paths++ && m->queue_size)
971                 queue_work(kmultipathd, &m->process_queued_ios);
972
973         dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
974                        pgpath->path.pdev, m->nr_valid_paths);
975
976         queue_work(kmultipathd, &m->trigger_event);
977
978 out:
979         spin_unlock_irqrestore(&m->lock, flags);
980
981         return r;
982 }
983
984 /*
985  * Fail or reinstate all paths that match the provided struct dm_dev.
986  */
987 static int action_dev(struct multipath *m, struct dm_dev *dev,
988                       action_fn action)
989 {
990         int r = 0;
991         struct pgpath *pgpath;
992         struct priority_group *pg;
993
994         if (!dev)
995                 return 0;
996
997         list_for_each_entry(pg, &m->priority_groups, list) {
998                 list_for_each_entry(pgpath, &pg->pgpaths, list) {
999                         if (pgpath->path.dev == dev)
1000                                 r = action(pgpath);
1001                 }
1002         }
1003
1004         return r;
1005 }
1006
1007 /*
1008  * Temporarily try to avoid having to use the specified PG
1009  */
1010 static void bypass_pg(struct multipath *m, struct priority_group *pg,
1011                       int bypassed)
1012 {
1013         unsigned long flags;
1014
1015         spin_lock_irqsave(&m->lock, flags);
1016
1017         pg->bypassed = bypassed;
1018         m->current_pgpath = NULL;
1019         m->current_pg = NULL;
1020
1021         spin_unlock_irqrestore(&m->lock, flags);
1022
1023         queue_work(kmultipathd, &m->trigger_event);
1024 }
1025
1026 /*
1027  * Switch to using the specified PG from the next I/O that gets mapped
1028  */
1029 static int switch_pg_num(struct multipath *m, const char *pgstr)
1030 {
1031         struct priority_group *pg;
1032         unsigned pgnum;
1033         unsigned long flags;
1034
1035         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1036             (pgnum > m->nr_priority_groups)) {
1037                 DMWARN("invalid PG number supplied to switch_pg_num");
1038                 return -EINVAL;
1039         }
1040
1041         spin_lock_irqsave(&m->lock, flags);
1042         list_for_each_entry(pg, &m->priority_groups, list) {
1043                 pg->bypassed = 0;
1044                 if (--pgnum)
1045                         continue;
1046
1047                 m->current_pgpath = NULL;
1048                 m->current_pg = NULL;
1049                 m->next_pg = pg;
1050         }
1051         spin_unlock_irqrestore(&m->lock, flags);
1052
1053         queue_work(kmultipathd, &m->trigger_event);
1054         return 0;
1055 }
1056
1057 /*
1058  * Set/clear bypassed status of a PG.
1059  * PGs are numbered upwards from 1 in the order they were declared.
1060  */
1061 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
1062 {
1063         struct priority_group *pg;
1064         unsigned pgnum;
1065
1066         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1067             (pgnum > m->nr_priority_groups)) {
1068                 DMWARN("invalid PG number supplied to bypass_pg");
1069                 return -EINVAL;
1070         }
1071
1072         list_for_each_entry(pg, &m->priority_groups, list) {
1073                 if (!--pgnum)
1074                         break;
1075         }
1076
1077         bypass_pg(m, pg, bypassed);
1078         return 0;
1079 }
1080
1081 /*
1082  * Should we retry pg_init immediately?
1083  */
1084 static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
1085 {
1086         unsigned long flags;
1087         int limit_reached = 0;
1088
1089         spin_lock_irqsave(&m->lock, flags);
1090
1091         if (m->pg_init_count <= m->pg_init_retries)
1092                 m->pg_init_required = 1;
1093         else
1094                 limit_reached = 1;
1095
1096         spin_unlock_irqrestore(&m->lock, flags);
1097
1098         return limit_reached;
1099 }
1100
1101 static void pg_init_done(struct dm_path *path, int errors)
1102 {
1103         struct pgpath *pgpath = path_to_pgpath(path);
1104         struct priority_group *pg = pgpath->pg;
1105         struct multipath *m = pg->m;
1106         unsigned long flags;
1107
1108         /* device or driver problems */
1109         switch (errors) {
1110         case SCSI_DH_OK:
1111                 break;
1112         case SCSI_DH_NOSYS:
1113                 if (!m->hw_handler_name) {
1114                         errors = 0;
1115                         break;
1116                 }
1117                 DMERR("Cannot failover device because scsi_dh_%s was not "
1118                       "loaded.", m->hw_handler_name);
1119                 /*
1120                  * Fail path for now, so we do not ping pong
1121                  */
1122                 fail_path(pgpath);
1123                 break;
1124         case SCSI_DH_DEV_TEMP_BUSY:
1125                 /*
1126                  * Probably doing something like FW upgrade on the
1127                  * controller so try the other pg.
1128                  */
1129                 bypass_pg(m, pg, 1);
1130                 break;
1131         /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
1132         case SCSI_DH_RETRY:
1133         case SCSI_DH_IMM_RETRY:
1134         case SCSI_DH_RES_TEMP_UNAVAIL:
1135                 if (pg_init_limit_reached(m, pgpath))
1136                         fail_path(pgpath);
1137                 errors = 0;
1138                 break;
1139         default:
1140                 /*
1141                  * We probably do not want to fail the path for a device
1142                  * error, but this is what the old dm did. In future
1143                  * patches we can do more advanced handling.
1144                  */
1145                 fail_path(pgpath);
1146         }
1147
1148         spin_lock_irqsave(&m->lock, flags);
1149         if (errors) {
1150                 DMERR("Could not failover device. Error %d.", errors);
1151                 m->current_pgpath = NULL;
1152                 m->current_pg = NULL;
1153         } else if (!m->pg_init_required) {
1154                 m->queue_io = 0;
1155                 pg->bypassed = 0;
1156         }
1157
1158        m->pg_init_in_progress--;
1159        if (!m->pg_init_in_progress)
1160                queue_work(kmultipathd, &m->process_queued_ios);
1161        spin_unlock_irqrestore(&m->lock, flags);
1162 }
1163
1164 static void activate_path(struct work_struct *work)
1165 {
1166         int ret = SCSI_DH_DEV_OFFLINED;
1167         struct pgpath *pgpath =
1168                 container_of(work, struct pgpath, activate_path);
1169
1170         if (pgpath->path.dev)
1171                 ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
1172         pg_init_done(&pgpath->path, ret);
1173 }
1174
1175 /*
1176  * end_io handling
1177  */
1178 static int do_end_io(struct multipath *m, struct request *clone,
1179                      int error, struct dm_mpath_io *mpio)
1180 {
1181         /*
1182          * We don't queue any clone request inside the multipath target
1183          * during end I/O handling, since those clone requests don't have
1184          * bio clones.  If we queue them inside the multipath target,
1185          * we need to make bio clones, that requires memory allocation.
1186          * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
1187          *  don't have bio clones.)
1188          * Instead of queueing the clone request here, we queue the original
1189          * request into dm core, which will remake a clone request and
1190          * clone bios for it and resubmit it later.
1191          */
1192         int r = DM_ENDIO_REQUEUE;
1193         unsigned long flags;
1194
1195         if (!error && !clone->errors)
1196                 return 0;       /* I/O complete */
1197
1198         if (error == -EOPNOTSUPP)
1199                 return error;
1200
1201         if (mpio->pgpath)
1202                 fail_path(mpio->pgpath);
1203
1204         spin_lock_irqsave(&m->lock, flags);
1205         if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
1206                 r = -EIO;
1207         spin_unlock_irqrestore(&m->lock, flags);
1208
1209         return r;
1210 }
1211
1212 static int multipath_end_io(struct dm_target *ti, struct request *clone,
1213                             int error, union map_info *map_context)
1214 {
1215         struct multipath *m = ti->private;
1216         struct dm_mpath_io *mpio = map_context->ptr;
1217         struct pgpath *pgpath = mpio->pgpath;
1218         struct path_selector *ps;
1219         int r;
1220
1221         r  = do_end_io(m, clone, error, mpio);
1222         if (pgpath) {
1223                 ps = &pgpath->pg->ps;
1224                 if (ps->type->end_io)
1225                         ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1226         }
1227         mempool_free(mpio, m->mpio_pool);
1228
1229         return r;
1230 }
1231
1232 /*
1233  * Suspend can't complete until all the I/O is processed so if
1234  * the last path fails we must error any remaining I/O.
1235  * Note that if the freeze_bdev fails while suspending, the
1236  * queue_if_no_path state is lost - userspace should reset it.
1237  */
1238 static void multipath_presuspend(struct dm_target *ti)
1239 {
1240         struct multipath *m = (struct multipath *) ti->private;
1241
1242         queue_if_no_path(m, 0, 1);
1243 }
1244
1245 /*
1246  * Restore the queue_if_no_path setting.
1247  */
1248 static void multipath_resume(struct dm_target *ti)
1249 {
1250         struct multipath *m = (struct multipath *) ti->private;
1251         unsigned long flags;
1252
1253         spin_lock_irqsave(&m->lock, flags);
1254         m->queue_if_no_path = m->saved_queue_if_no_path;
1255         spin_unlock_irqrestore(&m->lock, flags);
1256 }
1257
1258 /*
1259  * Info output has the following format:
1260  * num_multipath_feature_args [multipath_feature_args]*
1261  * num_handler_status_args [handler_status_args]*
1262  * num_groups init_group_number
1263  *            [A|D|E num_ps_status_args [ps_status_args]*
1264  *             num_paths num_selector_args
1265  *             [path_dev A|F fail_count [selector_args]* ]+ ]+
1266  *
1267  * Table output has the following format (identical to the constructor string):
1268  * num_feature_args [features_args]*
1269  * num_handler_args hw_handler [hw_handler_args]*
1270  * num_groups init_group_number
1271  *     [priority selector-name num_ps_args [ps_args]*
1272  *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1273  */
1274 static int multipath_status(struct dm_target *ti, status_type_t type,
1275                             char *result, unsigned int maxlen)
1276 {
1277         int sz = 0;
1278         unsigned long flags;
1279         struct multipath *m = (struct multipath *) ti->private;
1280         struct priority_group *pg;
1281         struct pgpath *p;
1282         unsigned pg_num;
1283         char state;
1284
1285         spin_lock_irqsave(&m->lock, flags);
1286
1287         /* Features */
1288         if (type == STATUSTYPE_INFO)
1289                 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
1290         else {
1291                 DMEMIT("%u ", m->queue_if_no_path +
1292                               (m->pg_init_retries > 0) * 2);
1293                 if (m->queue_if_no_path)
1294                         DMEMIT("queue_if_no_path ");
1295                 if (m->pg_init_retries)
1296                         DMEMIT("pg_init_retries %u ", m->pg_init_retries);
1297         }
1298
1299         if (!m->hw_handler_name || type == STATUSTYPE_INFO)
1300                 DMEMIT("0 ");
1301         else
1302                 DMEMIT("1 %s ", m->hw_handler_name);
1303
1304         DMEMIT("%u ", m->nr_priority_groups);
1305
1306         if (m->next_pg)
1307                 pg_num = m->next_pg->pg_num;
1308         else if (m->current_pg)
1309                 pg_num = m->current_pg->pg_num;
1310         else
1311                         pg_num = 1;
1312
1313         DMEMIT("%u ", pg_num);
1314
1315         switch (type) {
1316         case STATUSTYPE_INFO:
1317                 list_for_each_entry(pg, &m->priority_groups, list) {
1318                         if (pg->bypassed)
1319                                 state = 'D';    /* Disabled */
1320                         else if (pg == m->current_pg)
1321                                 state = 'A';    /* Currently Active */
1322                         else
1323                                 state = 'E';    /* Enabled */
1324
1325                         DMEMIT("%c ", state);
1326
1327                         if (pg->ps.type->status)
1328                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1329                                                           result + sz,
1330                                                           maxlen - sz);
1331                         else
1332                                 DMEMIT("0 ");
1333
1334                         DMEMIT("%u %u ", pg->nr_pgpaths,
1335                                pg->ps.type->info_args);
1336
1337                         list_for_each_entry(p, &pg->pgpaths, list) {
1338                                 DMEMIT("%s %s %u ", p->path.pdev,
1339                                        p->is_active ? "A" : "F",
1340                                        p->fail_count);
1341                                 if (pg->ps.type->status)
1342                                         sz += pg->ps.type->status(&pg->ps,
1343                                               &p->path, type, result + sz,
1344                                               maxlen - sz);
1345                         }
1346                 }
1347                 break;
1348
1349         case STATUSTYPE_TABLE:
1350                 list_for_each_entry(pg, &m->priority_groups, list) {
1351                         DMEMIT("%s ", pg->ps.type->name);
1352
1353                         if (pg->ps.type->status)
1354                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1355                                                           result + sz,
1356                                                           maxlen - sz);
1357                         else
1358                                 DMEMIT("0 ");
1359
1360                         DMEMIT("%u %u ", pg->nr_pgpaths,
1361                                pg->ps.type->table_args);
1362
1363                         list_for_each_entry(p, &pg->pgpaths, list) {
1364                                 DMEMIT("%s ", p->path.pdev);
1365                                 if (pg->ps.type->status)
1366                                         sz += pg->ps.type->status(&pg->ps,
1367                                               &p->path, type, result + sz,
1368                                               maxlen - sz);
1369                         }
1370                 }
1371                 break;
1372         }
1373
1374         spin_unlock_irqrestore(&m->lock, flags);
1375
1376         return 0;
1377 }
1378
1379 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1380 {
1381         int r;
1382         struct dm_dev *dev;
1383         struct multipath *m = (struct multipath *) ti->private;
1384         action_fn action;
1385
1386         if (argc == 1) {
1387                 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path")))
1388                         return queue_if_no_path(m, 1, 0);
1389                 else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path")))
1390                         return queue_if_no_path(m, 0, 0);
1391         }
1392
1393         if (argc != 2)
1394                 goto error;
1395
1396         if (!strnicmp(argv[0], MESG_STR("disable_group")))
1397                 return bypass_pg_num(m, argv[1], 1);
1398         else if (!strnicmp(argv[0], MESG_STR("enable_group")))
1399                 return bypass_pg_num(m, argv[1], 0);
1400         else if (!strnicmp(argv[0], MESG_STR("switch_group")))
1401                 return switch_pg_num(m, argv[1]);
1402         else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
1403                 action = reinstate_path;
1404         else if (!strnicmp(argv[0], MESG_STR("fail_path")))
1405                 action = fail_path;
1406         else
1407                 goto error;
1408
1409         r = dm_get_device(ti, argv[1], ti->begin, ti->len,
1410                           dm_table_get_mode(ti->table), &dev);
1411         if (r) {
1412                 DMWARN("message: error getting device %s",
1413                        argv[1]);
1414                 return -EINVAL;
1415         }
1416
1417         r = action_dev(m, dev, action);
1418
1419         dm_put_device(ti, dev);
1420
1421         return r;
1422
1423 error:
1424         DMWARN("Unrecognised multipath message received.");
1425         return -EINVAL;
1426 }
1427
1428 static int multipath_ioctl(struct dm_target *ti, struct inode *inode,
1429                            struct file *filp, unsigned int cmd,
1430                            unsigned long arg)
1431 {
1432         struct multipath *m = (struct multipath *) ti->private;
1433         struct block_device *bdev = NULL;
1434         unsigned long flags;
1435         struct file fake_file = {};
1436         struct dentry fake_dentry = {};
1437         int r = 0;
1438
1439         fake_file.f_path.dentry = &fake_dentry;
1440
1441         spin_lock_irqsave(&m->lock, flags);
1442
1443         if (!m->current_pgpath)
1444                 __choose_pgpath(m, 1 << 19); /* Assume 512KB */
1445
1446         if (m->current_pgpath && m->current_pgpath->path.dev) {
1447                 bdev = m->current_pgpath->path.dev->bdev;
1448                 fake_dentry.d_inode = bdev->bd_inode;
1449                 fake_file.f_mode = m->current_pgpath->path.dev->mode;
1450         }
1451
1452         if (m->queue_io)
1453                 r = -EAGAIN;
1454         else if (!bdev)
1455                 r = -EIO;
1456
1457         spin_unlock_irqrestore(&m->lock, flags);
1458
1459         return r ? : blkdev_driver_ioctl(bdev->bd_inode, &fake_file,
1460                                          bdev->bd_disk, cmd, arg);
1461 }
1462
1463 static int __pgpath_busy(struct pgpath *pgpath)
1464 {
1465         struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1466
1467         return dm_underlying_device_busy(q);
1468 }
1469
1470 /*
1471  * We return "busy", only when we can map I/Os but underlying devices
1472  * are busy (so even if we map I/Os now, the I/Os will wait on
1473  * the underlying queue).
1474  * In other words, if we want to kill I/Os or queue them inside us
1475  * due to map unavailability, we don't return "busy".  Otherwise,
1476  * dm core won't give us the I/Os and we can't do what we want.
1477  */
1478 static int multipath_busy(struct dm_target *ti)
1479 {
1480         int busy = 0, has_active = 0;
1481         struct multipath *m = (struct multipath *) ti->private;
1482         struct priority_group *pg;
1483         struct pgpath *pgpath;
1484         unsigned long flags;
1485
1486         spin_lock_irqsave(&m->lock, flags);
1487
1488         /* Guess which priority_group will be used at next mapping time */
1489         if (unlikely(!m->current_pgpath && m->next_pg))
1490                 pg = m->next_pg;
1491         else if (likely(m->current_pg))
1492                 pg = m->current_pg;
1493         else
1494                 /*
1495                  * We don't know which pg will be used at next mapping time.
1496                  * We don't call __choose_pgpath() here to avoid to trigger
1497                  * pg_init just by busy checking.
1498                  * So we don't know whether underlying devices we will be using
1499                  * at next mapping time are busy or not. Just try mapping.
1500                  */
1501                 goto out;
1502
1503         /*
1504          * If there is one non-busy active path at least, the path selector
1505          * will be able to select it. So we consider such a pg as not busy.
1506          */
1507         busy = 1;
1508         list_for_each_entry(pgpath, &pg->pgpaths, list)
1509                 if (pgpath->is_active) {
1510                         has_active = 1;
1511
1512                         if (!__pgpath_busy(pgpath)) {
1513                                 busy = 0;
1514                                 break;
1515                         }
1516                 }
1517
1518         if (!has_active)
1519                 /*
1520                  * No active path in this pg, so this pg won't be used and
1521                  * the current_pg will be changed at next mapping time.
1522                  * We need to try mapping to determine it.
1523                  */
1524                 busy = 0;
1525
1526 out:
1527         spin_unlock_irqrestore(&m->lock, flags);
1528
1529         return busy;
1530 }
1531
1532 /*-----------------------------------------------------------------
1533  * Module setup
1534  *---------------------------------------------------------------*/
1535 static struct target_type multipath_target = {
1536         .name = "multipath",
1537         .version = {1, 0, 5},
1538         .module = THIS_MODULE,
1539         .ctr = multipath_ctr,
1540         .dtr = multipath_dtr,
1541         .map_rq = multipath_map,
1542         .rq_end_io = multipath_end_io,
1543         .presuspend = multipath_presuspend,
1544         .resume = multipath_resume,
1545         .status = multipath_status,
1546         .message = multipath_message,
1547         .ioctl  = multipath_ioctl,
1548         .busy = multipath_busy,
1549 };
1550
1551 static int __init dm_multipath_init(void)
1552 {
1553         int r;
1554
1555         /* allocate a slab for the dm_ios */
1556         _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
1557         if (!_mpio_cache)
1558                 return -ENOMEM;
1559
1560         r = dm_register_target(&multipath_target);
1561         if (r < 0) {
1562                 DMERR("register failed %d", r);
1563                 kmem_cache_destroy(_mpio_cache);
1564                 return -EINVAL;
1565         }
1566
1567         kmultipathd = create_workqueue("kmpathd");
1568         if (!kmultipathd) {
1569                 DMERR("failed to create workqueue kmpathd");
1570                 dm_unregister_target(&multipath_target);
1571                 kmem_cache_destroy(_mpio_cache);
1572                 return -ENOMEM;
1573         }
1574
1575         /*
1576          * A separate workqueue is used to handle the device handlers
1577          * to avoid overloading existing workqueue. Overloading the
1578          * old workqueue would also create a bottleneck in the
1579          * path of the storage hardware device activation.
1580          */
1581         kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd");
1582         if (!kmpath_handlerd) {
1583                 DMERR("failed to create workqueue kmpath_handlerd");
1584                 destroy_workqueue(kmultipathd);
1585                 dm_unregister_target(&multipath_target);
1586                 kmem_cache_destroy(_mpio_cache);
1587                 return -ENOMEM;
1588         }
1589
1590         DMINFO("version %u.%u.%u loaded",
1591                multipath_target.version[0], multipath_target.version[1],
1592                multipath_target.version[2]);
1593
1594         return r;
1595 }
1596
1597 static void __exit dm_multipath_exit(void)
1598 {
1599         int r;
1600
1601         destroy_workqueue(kmpath_handlerd);
1602         destroy_workqueue(kmultipathd);
1603
1604         r = dm_unregister_target(&multipath_target);
1605         if (r < 0)
1606                 DMERR("target unregister failed %d", r);
1607         kmem_cache_destroy(_mpio_cache);
1608 }
1609
1610 module_init(dm_multipath_init);
1611 module_exit(dm_multipath_exit);
1612
1613 MODULE_DESCRIPTION(DM_NAME " multipath target");
1614 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1615 MODULE_LICENSE("GPL");