dm mpath: add mutex to synchronize adding and flushing work
[linux-flexiantxendom0-3.2.10.git] / drivers / md / dm-mpath.c
1 /*
2  * Copyright (C) 2003 Sistina Software Limited.
3  * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4  *
5  * This file is released under the GPL.
6  */
7
8 #include <linux/device-mapper.h>
9
10 #include "dm-path-selector.h"
11 #include "dm-uevent.h"
12
13 #include <linux/ctype.h>
14 #include <linux/init.h>
15 #include <linux/mempool.h>
16 #include <linux/module.h>
17 #include <linux/pagemap.h>
18 #include <linux/slab.h>
19 #include <linux/time.h>
20 #include <linux/workqueue.h>
21 #include <scsi/scsi_dh.h>
22 #include <asm/atomic.h>
23
24 #define DM_MSG_PREFIX "multipath"
25 #define MESG_STR(x) x, sizeof(x)
26
27 /* Path properties */
28 struct pgpath {
29         struct list_head list;
30
31         struct priority_group *pg;      /* Owning PG */
32         unsigned is_active;             /* Path status */
33         unsigned fail_count;            /* Cumulative failure count */
34
35         struct dm_path path;
36         struct work_struct deactivate_path;
37         struct work_struct activate_path;
38 };
39
40 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
41
42 /*
43  * Paths are grouped into Priority Groups and numbered from 1 upwards.
44  * Each has a path selector which controls which path gets used.
45  */
46 struct priority_group {
47         struct list_head list;
48
49         struct multipath *m;            /* Owning multipath instance */
50         struct path_selector ps;
51
52         unsigned pg_num;                /* Reference number */
53         unsigned bypassed;              /* Temporarily bypass this PG? */
54
55         unsigned nr_pgpaths;            /* Number of paths in PG */
56         struct list_head pgpaths;
57 };
58
59 /* Multipath context */
60 struct multipath {
61         struct list_head list;
62         struct dm_target *ti;
63
64         spinlock_t lock;
65
66         const char *hw_handler_name;
67         char *hw_handler_params;
68         unsigned nr_priority_groups;
69         struct list_head priority_groups;
70         unsigned pg_init_required;      /* pg_init needs calling? */
71         unsigned pg_init_in_progress;   /* Only one pg_init allowed at once */
72
73         unsigned nr_valid_paths;        /* Total number of usable paths */
74         struct pgpath *current_pgpath;
75         struct priority_group *current_pg;
76         struct priority_group *next_pg; /* Switch to this PG if set */
77         unsigned repeat_count;          /* I/Os left before calling PS again */
78
79         unsigned queue_io;              /* Must we queue all I/O? */
80         unsigned queue_if_no_path;      /* Queue I/O if last path fails? */
81         unsigned saved_queue_if_no_path;/* Saved state during suspension */
82         unsigned pg_init_retries;       /* Number of times to retry pg_init */
83         unsigned pg_init_count;         /* Number of times pg_init called */
84
85         struct work_struct process_queued_ios;
86         struct list_head queued_ios;
87         unsigned queue_size;
88
89         struct work_struct trigger_event;
90
91         /*
92          * We must use a mempool of dm_mpath_io structs so that we
93          * can resubmit bios on error.
94          */
95         mempool_t *mpio_pool;
96
97         struct mutex work_mutex;
98 };
99
100 /*
101  * Context information attached to each bio we process.
102  */
103 struct dm_mpath_io {
104         struct pgpath *pgpath;
105         size_t nr_bytes;
106 };
107
108 typedef int (*action_fn) (struct pgpath *pgpath);
109
110 #define MIN_IOS 256     /* Mempool size */
111
112 static struct kmem_cache *_mpio_cache;
113
114 static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
115 static void process_queued_ios(struct work_struct *work);
116 static void trigger_event(struct work_struct *work);
117 static void activate_path(struct work_struct *work);
118 static void deactivate_path(struct work_struct *work);
119
120
121 /*-----------------------------------------------
122  * Allocation routines
123  *-----------------------------------------------*/
124
125 static struct pgpath *alloc_pgpath(void)
126 {
127         struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
128
129         if (pgpath) {
130                 pgpath->is_active = 1;
131                 INIT_WORK(&pgpath->deactivate_path, deactivate_path);
132                 INIT_WORK(&pgpath->activate_path, activate_path);
133         }
134
135         return pgpath;
136 }
137
138 static void free_pgpath(struct pgpath *pgpath)
139 {
140         kfree(pgpath);
141 }
142
143 static void deactivate_path(struct work_struct *work)
144 {
145         struct pgpath *pgpath =
146                 container_of(work, struct pgpath, deactivate_path);
147
148         blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue);
149 }
150
151 static struct priority_group *alloc_priority_group(void)
152 {
153         struct priority_group *pg;
154
155         pg = kzalloc(sizeof(*pg), GFP_KERNEL);
156
157         if (pg)
158                 INIT_LIST_HEAD(&pg->pgpaths);
159
160         return pg;
161 }
162
163 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
164 {
165         struct pgpath *pgpath, *tmp;
166         struct multipath *m = ti->private;
167
168         list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
169                 list_del(&pgpath->list);
170                 if (m->hw_handler_name)
171                         scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
172                 dm_put_device(ti, pgpath->path.dev);
173                 free_pgpath(pgpath);
174         }
175 }
176
177 static void free_priority_group(struct priority_group *pg,
178                                 struct dm_target *ti)
179 {
180         struct path_selector *ps = &pg->ps;
181
182         if (ps->type) {
183                 ps->type->destroy(ps);
184                 dm_put_path_selector(ps->type);
185         }
186
187         free_pgpaths(&pg->pgpaths, ti);
188         kfree(pg);
189 }
190
191 static struct multipath *alloc_multipath(struct dm_target *ti)
192 {
193         struct multipath *m;
194
195         m = kzalloc(sizeof(*m), GFP_KERNEL);
196         if (m) {
197                 INIT_LIST_HEAD(&m->priority_groups);
198                 INIT_LIST_HEAD(&m->queued_ios);
199                 spin_lock_init(&m->lock);
200                 m->queue_io = 1;
201                 INIT_WORK(&m->process_queued_ios, process_queued_ios);
202                 INIT_WORK(&m->trigger_event, trigger_event);
203                 mutex_init(&m->work_mutex);
204                 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
205                 if (!m->mpio_pool) {
206                         kfree(m);
207                         return NULL;
208                 }
209                 m->ti = ti;
210                 ti->private = m;
211         }
212
213         return m;
214 }
215
216 static void free_multipath(struct multipath *m)
217 {
218         struct priority_group *pg, *tmp;
219
220         list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
221                 list_del(&pg->list);
222                 free_priority_group(pg, m->ti);
223         }
224
225         kfree(m->hw_handler_name);
226         kfree(m->hw_handler_params);
227         mempool_destroy(m->mpio_pool);
228         kfree(m);
229 }
230
231
232 /*-----------------------------------------------
233  * Path selection
234  *-----------------------------------------------*/
235
236 static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
237 {
238         m->current_pg = pgpath->pg;
239
240         /* Must we initialise the PG first, and queue I/O till it's ready? */
241         if (m->hw_handler_name) {
242                 m->pg_init_required = 1;
243                 m->queue_io = 1;
244         } else {
245                 m->pg_init_required = 0;
246                 m->queue_io = 0;
247         }
248
249         m->pg_init_count = 0;
250 }
251
252 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
253                                size_t nr_bytes)
254 {
255         struct dm_path *path;
256
257         path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
258         if (!path)
259                 return -ENXIO;
260
261         m->current_pgpath = path_to_pgpath(path);
262
263         if (m->current_pg != pg)
264                 __switch_pg(m, m->current_pgpath);
265
266         return 0;
267 }
268
269 static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
270 {
271         struct priority_group *pg;
272         unsigned bypassed = 1;
273
274         if (!m->nr_valid_paths)
275                 goto failed;
276
277         /* Were we instructed to switch PG? */
278         if (m->next_pg) {
279                 pg = m->next_pg;
280                 m->next_pg = NULL;
281                 if (!__choose_path_in_pg(m, pg, nr_bytes))
282                         return;
283         }
284
285         /* Don't change PG until it has no remaining paths */
286         if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
287                 return;
288
289         /*
290          * Loop through priority groups until we find a valid path.
291          * First time we skip PGs marked 'bypassed'.
292          * Second time we only try the ones we skipped.
293          */
294         do {
295                 list_for_each_entry(pg, &m->priority_groups, list) {
296                         if (pg->bypassed == bypassed)
297                                 continue;
298                         if (!__choose_path_in_pg(m, pg, nr_bytes))
299                                 return;
300                 }
301         } while (bypassed--);
302
303 failed:
304         m->current_pgpath = NULL;
305         m->current_pg = NULL;
306 }
307
308 /*
309  * Check whether bios must be queued in the device-mapper core rather
310  * than here in the target.
311  *
312  * m->lock must be held on entry.
313  *
314  * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
315  * same value then we are not between multipath_presuspend()
316  * and multipath_resume() calls and we have no need to check
317  * for the DMF_NOFLUSH_SUSPENDING flag.
318  */
319 static int __must_push_back(struct multipath *m)
320 {
321         return (m->queue_if_no_path != m->saved_queue_if_no_path &&
322                 dm_noflush_suspending(m->ti));
323 }
324
325 static int map_io(struct multipath *m, struct request *clone,
326                   struct dm_mpath_io *mpio, unsigned was_queued)
327 {
328         int r = DM_MAPIO_REMAPPED;
329         size_t nr_bytes = blk_rq_bytes(clone);
330         unsigned long flags;
331         struct pgpath *pgpath;
332         struct block_device *bdev;
333
334         spin_lock_irqsave(&m->lock, flags);
335
336         /* Do we need to select a new pgpath? */
337         if (!m->current_pgpath ||
338             (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
339                 __choose_pgpath(m, nr_bytes);
340
341         pgpath = m->current_pgpath;
342
343         if (was_queued)
344                 m->queue_size--;
345
346         if ((pgpath && m->queue_io) ||
347             (!pgpath && m->queue_if_no_path)) {
348                 /* Queue for the daemon to resubmit */
349                 list_add_tail(&clone->queuelist, &m->queued_ios);
350                 m->queue_size++;
351                 if ((m->pg_init_required && !m->pg_init_in_progress) ||
352                     !m->queue_io)
353                         queue_work(kmultipathd, &m->process_queued_ios);
354                 pgpath = NULL;
355                 r = DM_MAPIO_SUBMITTED;
356         } else if (pgpath) {
357                 bdev = pgpath->path.dev->bdev;
358                 clone->q = bdev_get_queue(bdev);
359                 clone->rq_disk = bdev->bd_disk;
360         } else if (__must_push_back(m))
361                 r = DM_MAPIO_REQUEUE;
362         else
363                 r = -EIO;       /* Failed */
364
365         mpio->pgpath = pgpath;
366         mpio->nr_bytes = nr_bytes;
367
368         if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io)
369                 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path,
370                                               nr_bytes);
371
372         spin_unlock_irqrestore(&m->lock, flags);
373
374         return r;
375 }
376
377 /*
378  * If we run out of usable paths, should we queue I/O or error it?
379  */
380 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
381                             unsigned save_old_value)
382 {
383         unsigned long flags;
384
385         spin_lock_irqsave(&m->lock, flags);
386
387         if (save_old_value)
388                 m->saved_queue_if_no_path = m->queue_if_no_path;
389         else
390                 m->saved_queue_if_no_path = queue_if_no_path;
391         m->queue_if_no_path = queue_if_no_path;
392         if (!m->queue_if_no_path && m->queue_size)
393                 queue_work(kmultipathd, &m->process_queued_ios);
394
395         spin_unlock_irqrestore(&m->lock, flags);
396
397         return 0;
398 }
399
400 /*-----------------------------------------------------------------
401  * The multipath daemon is responsible for resubmitting queued ios.
402  *---------------------------------------------------------------*/
403
404 static void dispatch_queued_ios(struct multipath *m)
405 {
406         int r;
407         unsigned long flags;
408         struct dm_mpath_io *mpio;
409         union map_info *info;
410         struct request *clone, *n;
411         LIST_HEAD(cl);
412
413         spin_lock_irqsave(&m->lock, flags);
414         list_splice_init(&m->queued_ios, &cl);
415         spin_unlock_irqrestore(&m->lock, flags);
416
417         list_for_each_entry_safe(clone, n, &cl, queuelist) {
418                 list_del_init(&clone->queuelist);
419
420                 info = dm_get_rq_mapinfo(clone);
421                 mpio = info->ptr;
422
423                 r = map_io(m, clone, mpio, 1);
424                 if (r < 0) {
425                         mempool_free(mpio, m->mpio_pool);
426                         dm_kill_unmapped_request(clone, r);
427                 } else if (r == DM_MAPIO_REMAPPED)
428                         dm_dispatch_request(clone);
429                 else if (r == DM_MAPIO_REQUEUE) {
430                         mempool_free(mpio, m->mpio_pool);
431                         dm_requeue_unmapped_request(clone);
432                 }
433         }
434 }
435
436 static void process_queued_ios(struct work_struct *work)
437 {
438         struct multipath *m =
439                 container_of(work, struct multipath, process_queued_ios);
440         struct pgpath *pgpath = NULL, *tmp;
441         unsigned must_queue = 1;
442         unsigned long flags;
443
444         spin_lock_irqsave(&m->lock, flags);
445
446         if (!m->queue_size)
447                 goto out;
448
449         if (!m->current_pgpath)
450                 __choose_pgpath(m, 0);
451
452         pgpath = m->current_pgpath;
453
454         if ((pgpath && !m->queue_io) ||
455             (!pgpath && !m->queue_if_no_path))
456                 must_queue = 0;
457
458         if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
459                 m->pg_init_count++;
460                 m->pg_init_required = 0;
461                 list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
462                         if (queue_work(kmpath_handlerd, &tmp->activate_path))
463                                 m->pg_init_in_progress++;
464                 }
465         }
466 out:
467         spin_unlock_irqrestore(&m->lock, flags);
468         if (!must_queue)
469                 dispatch_queued_ios(m);
470 }
471
472 /*
473  * An event is triggered whenever a path is taken out of use.
474  * Includes path failure and PG bypass.
475  */
476 static void trigger_event(struct work_struct *work)
477 {
478         struct multipath *m =
479                 container_of(work, struct multipath, trigger_event);
480
481         dm_table_event(m->ti->table);
482 }
483
484 /*-----------------------------------------------------------------
485  * Constructor/argument parsing:
486  * <#multipath feature args> [<arg>]*
487  * <#hw_handler args> [hw_handler [<arg>]*]
488  * <#priority groups>
489  * <initial priority group>
490  *     [<selector> <#selector args> [<arg>]*
491  *      <#paths> <#per-path selector args>
492  *         [<path> [<arg>]* ]+ ]+
493  *---------------------------------------------------------------*/
494 struct param {
495         unsigned min;
496         unsigned max;
497         char *error;
498 };
499
500 static int read_param(struct param *param, char *str, unsigned *v, char **error)
501 {
502         if (!str ||
503             (sscanf(str, "%u", v) != 1) ||
504             (*v < param->min) ||
505             (*v > param->max)) {
506                 *error = param->error;
507                 return -EINVAL;
508         }
509
510         return 0;
511 }
512
513 struct arg_set {
514         unsigned argc;
515         char **argv;
516 };
517
518 static char *shift(struct arg_set *as)
519 {
520         char *r;
521
522         if (as->argc) {
523                 as->argc--;
524                 r = *as->argv;
525                 as->argv++;
526                 return r;
527         }
528
529         return NULL;
530 }
531
532 static void consume(struct arg_set *as, unsigned n)
533 {
534         BUG_ON (as->argc < n);
535         as->argc -= n;
536         as->argv += n;
537 }
538
539 static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
540                                struct dm_target *ti)
541 {
542         int r;
543         struct path_selector_type *pst;
544         unsigned ps_argc;
545
546         static struct param _params[] = {
547                 {0, 1024, "invalid number of path selector args"},
548         };
549
550         pst = dm_get_path_selector(shift(as));
551         if (!pst) {
552                 ti->error = "unknown path selector type";
553                 return -EINVAL;
554         }
555
556         r = read_param(_params, shift(as), &ps_argc, &ti->error);
557         if (r) {
558                 dm_put_path_selector(pst);
559                 return -EINVAL;
560         }
561
562         if (ps_argc > as->argc) {
563                 dm_put_path_selector(pst);
564                 ti->error = "not enough arguments for path selector";
565                 return -EINVAL;
566         }
567
568         r = pst->create(&pg->ps, ps_argc, as->argv);
569         if (r) {
570                 dm_put_path_selector(pst);
571                 ti->error = "path selector constructor failed";
572                 return r;
573         }
574
575         pg->ps.type = pst;
576         consume(as, ps_argc);
577
578         return 0;
579 }
580
581 static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
582                                struct dm_target *ti)
583 {
584         int r;
585         struct pgpath *p;
586         struct multipath *m = ti->private;
587
588         /* we need at least a path arg */
589         if (as->argc < 1) {
590                 ti->error = "no device given";
591                 return ERR_PTR(-EINVAL);
592         }
593
594         p = alloc_pgpath();
595         if (!p)
596                 return ERR_PTR(-ENOMEM);
597
598         r = dm_get_device(ti, shift(as), ti->begin, ti->len,
599                           dm_table_get_mode(ti->table), &p->path.dev);
600         if (r) {
601                 ti->error = "error getting device";
602                 goto bad;
603         }
604
605         if (m->hw_handler_name) {
606                 struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
607
608                 r = scsi_dh_attach(q, m->hw_handler_name);
609                 if (r == -EBUSY) {
610                         /*
611                          * Already attached to different hw_handler,
612                          * try to reattach with correct one.
613                          */
614                         scsi_dh_detach(q);
615                         r = scsi_dh_attach(q, m->hw_handler_name);
616                 }
617
618                 if (r < 0) {
619                         ti->error = "error attaching hardware handler";
620                         dm_put_device(ti, p->path.dev);
621                         goto bad;
622                 }
623
624                 if (m->hw_handler_params) {
625                         r = scsi_dh_set_params(q, m->hw_handler_params);
626                         if (r < 0) {
627                                 ti->error = "unable to set hardware "
628                                                         "handler parameters";
629                                 scsi_dh_detach(q);
630                                 dm_put_device(ti, p->path.dev);
631                                 goto bad;
632                         }
633                 }
634         }
635
636         r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
637         if (r) {
638                 dm_put_device(ti, p->path.dev);
639                 goto bad;
640         }
641
642         return p;
643
644  bad:
645         free_pgpath(p);
646         return ERR_PTR(r);
647 }
648
649 static struct priority_group *parse_priority_group(struct arg_set *as,
650                                                    struct multipath *m)
651 {
652         static struct param _params[] = {
653                 {1, 1024, "invalid number of paths"},
654                 {0, 1024, "invalid number of selector args"}
655         };
656
657         int r;
658         unsigned i, nr_selector_args, nr_params;
659         struct priority_group *pg;
660         struct dm_target *ti = m->ti;
661
662         if (as->argc < 2) {
663                 as->argc = 0;
664                 ti->error = "not enough priority group arguments";
665                 return ERR_PTR(-EINVAL);
666         }
667
668         pg = alloc_priority_group();
669         if (!pg) {
670                 ti->error = "couldn't allocate priority group";
671                 return ERR_PTR(-ENOMEM);
672         }
673         pg->m = m;
674
675         r = parse_path_selector(as, pg, ti);
676         if (r)
677                 goto bad;
678
679         /*
680          * read the paths
681          */
682         r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
683         if (r)
684                 goto bad;
685
686         r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
687         if (r)
688                 goto bad;
689
690         nr_params = 1 + nr_selector_args;
691         for (i = 0; i < pg->nr_pgpaths; i++) {
692                 struct pgpath *pgpath;
693                 struct arg_set path_args;
694
695                 if (as->argc < nr_params) {
696                         ti->error = "not enough path parameters";
697                         goto bad;
698                 }
699
700                 path_args.argc = nr_params;
701                 path_args.argv = as->argv;
702
703                 pgpath = parse_path(&path_args, &pg->ps, ti);
704                 if (IS_ERR(pgpath)) {
705                         r = PTR_ERR(pgpath);
706                         goto bad;
707                 }
708
709                 pgpath->pg = pg;
710                 list_add_tail(&pgpath->list, &pg->pgpaths);
711                 consume(as, nr_params);
712         }
713
714         return pg;
715
716  bad:
717         free_priority_group(pg, ti);
718         return ERR_PTR(r);
719 }
720
721 static int parse_hw_handler(struct arg_set *as, struct multipath *m)
722 {
723         unsigned hw_argc;
724         int ret;
725         struct dm_target *ti = m->ti;
726
727         static struct param _params[] = {
728                 {0, 1024, "invalid number of hardware handler args"},
729         };
730
731         if (read_param(_params, shift(as), &hw_argc, &ti->error))
732                 return -EINVAL;
733
734         if (!hw_argc)
735                 return 0;
736
737         if (hw_argc > as->argc) {
738                 ti->error = "not enough arguments for hardware handler";
739                 return -EINVAL;
740         }
741
742         m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
743         request_module("scsi_dh_%s", m->hw_handler_name);
744         if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
745                 ti->error = "unknown hardware handler type";
746                 ret = -EINVAL;
747                 goto fail;
748         }
749
750         if (hw_argc > 1) {
751                 char *p;
752                 int i, j, len = 4;
753
754                 for (i = 0; i <= hw_argc - 2; i++)
755                         len += strlen(as->argv[i]) + 1;
756                 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
757                 if (!p) {
758                         ti->error = "memory allocation failed";
759                         ret = -ENOMEM;
760                         goto fail;
761                 }
762                 j = sprintf(p, "%d", hw_argc - 1);
763                 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
764                         j = sprintf(p, "%s", as->argv[i]);
765         }
766         consume(as, hw_argc - 1);
767
768         return 0;
769 fail:
770         kfree(m->hw_handler_name);
771         m->hw_handler_name = NULL;
772         return ret;
773 }
774
775 static int parse_features(struct arg_set *as, struct multipath *m)
776 {
777         int r;
778         unsigned argc;
779         struct dm_target *ti = m->ti;
780         const char *param_name;
781
782         static struct param _params[] = {
783                 {0, 3, "invalid number of feature args"},
784                 {1, 50, "pg_init_retries must be between 1 and 50"},
785         };
786
787         r = read_param(_params, shift(as), &argc, &ti->error);
788         if (r)
789                 return -EINVAL;
790
791         if (!argc)
792                 return 0;
793
794         do {
795                 param_name = shift(as);
796                 argc--;
797
798                 if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
799                         r = queue_if_no_path(m, 1, 0);
800                         continue;
801                 }
802
803                 if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
804                     (argc >= 1)) {
805                         r = read_param(_params + 1, shift(as),
806                                        &m->pg_init_retries, &ti->error);
807                         argc--;
808                         continue;
809                 }
810
811                 ti->error = "Unrecognised multipath feature request";
812                 r = -EINVAL;
813         } while (argc && !r);
814
815         return r;
816 }
817
818 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
819                          char **argv)
820 {
821         /* target parameters */
822         static struct param _params[] = {
823                 {1, 1024, "invalid number of priority groups"},
824                 {1, 1024, "invalid initial priority group number"},
825         };
826
827         int r;
828         struct multipath *m;
829         struct arg_set as;
830         unsigned pg_count = 0;
831         unsigned next_pg_num;
832
833         as.argc = argc;
834         as.argv = argv;
835
836         m = alloc_multipath(ti);
837         if (!m) {
838                 ti->error = "can't allocate multipath";
839                 return -EINVAL;
840         }
841
842         r = parse_features(&as, m);
843         if (r)
844                 goto bad;
845
846         r = parse_hw_handler(&as, m);
847         if (r)
848                 goto bad;
849
850         r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
851         if (r)
852                 goto bad;
853
854         r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
855         if (r)
856                 goto bad;
857
858         /* parse the priority groups */
859         while (as.argc) {
860                 struct priority_group *pg;
861
862                 pg = parse_priority_group(&as, m);
863                 if (IS_ERR(pg)) {
864                         r = PTR_ERR(pg);
865                         goto bad;
866                 }
867
868                 m->nr_valid_paths += pg->nr_pgpaths;
869                 list_add_tail(&pg->list, &m->priority_groups);
870                 pg_count++;
871                 pg->pg_num = pg_count;
872                 if (!--next_pg_num)
873                         m->next_pg = pg;
874         }
875
876         if (pg_count != m->nr_priority_groups) {
877                 ti->error = "priority group count mismatch";
878                 r = -EINVAL;
879                 goto bad;
880         }
881
882         ti->num_flush_requests = 1;
883
884         return 0;
885
886  bad:
887         free_multipath(m);
888         return r;
889 }
890
891 static void flush_multipath_work(void)
892 {
893         flush_workqueue(kmpath_handlerd);
894         flush_workqueue(kmultipathd);
895         flush_scheduled_work();
896 }
897
898 static void multipath_dtr(struct dm_target *ti)
899 {
900         struct multipath *m = ti->private;
901
902         flush_multipath_work();
903         free_multipath(m);
904 }
905
906 /*
907  * Map cloned requests
908  */
909 static int multipath_map(struct dm_target *ti, struct request *clone,
910                          union map_info *map_context)
911 {
912         int r;
913         struct dm_mpath_io *mpio;
914         struct multipath *m = (struct multipath *) ti->private;
915
916         mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
917         if (!mpio)
918                 /* ENOMEM, requeue */
919                 return DM_MAPIO_REQUEUE;
920         memset(mpio, 0, sizeof(*mpio));
921
922         map_context->ptr = mpio;
923         clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
924         r = map_io(m, clone, mpio, 0);
925         if (r < 0 || r == DM_MAPIO_REQUEUE)
926                 mempool_free(mpio, m->mpio_pool);
927
928         return r;
929 }
930
931 /*
932  * Take a path out of use.
933  */
934 static int fail_path(struct pgpath *pgpath)
935 {
936         unsigned long flags;
937         struct multipath *m = pgpath->pg->m;
938
939         spin_lock_irqsave(&m->lock, flags);
940
941         if (!pgpath->is_active)
942                 goto out;
943
944         DMWARN("Failing path %s.", pgpath->path.dev->name);
945
946         pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
947         pgpath->is_active = 0;
948         pgpath->fail_count++;
949
950         m->nr_valid_paths--;
951
952         if (pgpath == m->current_pgpath)
953                 m->current_pgpath = NULL;
954
955         dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
956                       pgpath->path.dev->name, m->nr_valid_paths);
957
958         schedule_work(&m->trigger_event);
959         queue_work(kmultipathd, &pgpath->deactivate_path);
960
961 out:
962         spin_unlock_irqrestore(&m->lock, flags);
963
964         return 0;
965 }
966
967 /*
968  * Reinstate a previously-failed path
969  */
970 static int reinstate_path(struct pgpath *pgpath)
971 {
972         int r = 0;
973         unsigned long flags;
974         struct multipath *m = pgpath->pg->m;
975
976         spin_lock_irqsave(&m->lock, flags);
977
978         if (pgpath->is_active)
979                 goto out;
980
981         if (!pgpath->pg->ps.type->reinstate_path) {
982                 DMWARN("Reinstate path not supported by path selector %s",
983                        pgpath->pg->ps.type->name);
984                 r = -EINVAL;
985                 goto out;
986         }
987
988         r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
989         if (r)
990                 goto out;
991
992         pgpath->is_active = 1;
993
994         if (!m->nr_valid_paths++ && m->queue_size) {
995                 m->current_pgpath = NULL;
996                 queue_work(kmultipathd, &m->process_queued_ios);
997         } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
998                 if (queue_work(kmpath_handlerd, &pgpath->activate_path))
999                         m->pg_init_in_progress++;
1000         }
1001
1002         dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
1003                       pgpath->path.dev->name, m->nr_valid_paths);
1004
1005         schedule_work(&m->trigger_event);
1006
1007 out:
1008         spin_unlock_irqrestore(&m->lock, flags);
1009
1010         return r;
1011 }
1012
1013 /*
1014  * Fail or reinstate all paths that match the provided struct dm_dev.
1015  */
1016 static int action_dev(struct multipath *m, struct dm_dev *dev,
1017                       action_fn action)
1018 {
1019         int r = 0;
1020         struct pgpath *pgpath;
1021         struct priority_group *pg;
1022
1023         list_for_each_entry(pg, &m->priority_groups, list) {
1024                 list_for_each_entry(pgpath, &pg->pgpaths, list) {
1025                         if (pgpath->path.dev == dev)
1026                                 r = action(pgpath);
1027                 }
1028         }
1029
1030         return r;
1031 }
1032
1033 /*
1034  * Temporarily try to avoid having to use the specified PG
1035  */
1036 static void bypass_pg(struct multipath *m, struct priority_group *pg,
1037                       int bypassed)
1038 {
1039         unsigned long flags;
1040
1041         spin_lock_irqsave(&m->lock, flags);
1042
1043         pg->bypassed = bypassed;
1044         m->current_pgpath = NULL;
1045         m->current_pg = NULL;
1046
1047         spin_unlock_irqrestore(&m->lock, flags);
1048
1049         schedule_work(&m->trigger_event);
1050 }
1051
1052 /*
1053  * Switch to using the specified PG from the next I/O that gets mapped
1054  */
1055 static int switch_pg_num(struct multipath *m, const char *pgstr)
1056 {
1057         struct priority_group *pg;
1058         unsigned pgnum;
1059         unsigned long flags;
1060
1061         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1062             (pgnum > m->nr_priority_groups)) {
1063                 DMWARN("invalid PG number supplied to switch_pg_num");
1064                 return -EINVAL;
1065         }
1066
1067         spin_lock_irqsave(&m->lock, flags);
1068         list_for_each_entry(pg, &m->priority_groups, list) {
1069                 pg->bypassed = 0;
1070                 if (--pgnum)
1071                         continue;
1072
1073                 m->current_pgpath = NULL;
1074                 m->current_pg = NULL;
1075                 m->next_pg = pg;
1076         }
1077         spin_unlock_irqrestore(&m->lock, flags);
1078
1079         schedule_work(&m->trigger_event);
1080         return 0;
1081 }
1082
1083 /*
1084  * Set/clear bypassed status of a PG.
1085  * PGs are numbered upwards from 1 in the order they were declared.
1086  */
1087 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
1088 {
1089         struct priority_group *pg;
1090         unsigned pgnum;
1091
1092         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1093             (pgnum > m->nr_priority_groups)) {
1094                 DMWARN("invalid PG number supplied to bypass_pg");
1095                 return -EINVAL;
1096         }
1097
1098         list_for_each_entry(pg, &m->priority_groups, list) {
1099                 if (!--pgnum)
1100                         break;
1101         }
1102
1103         bypass_pg(m, pg, bypassed);
1104         return 0;
1105 }
1106
1107 /*
1108  * Should we retry pg_init immediately?
1109  */
1110 static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
1111 {
1112         unsigned long flags;
1113         int limit_reached = 0;
1114
1115         spin_lock_irqsave(&m->lock, flags);
1116
1117         if (m->pg_init_count <= m->pg_init_retries)
1118                 m->pg_init_required = 1;
1119         else
1120                 limit_reached = 1;
1121
1122         spin_unlock_irqrestore(&m->lock, flags);
1123
1124         return limit_reached;
1125 }
1126
1127 static void pg_init_done(void *data, int errors)
1128 {
1129         struct dm_path *path = data;
1130         struct pgpath *pgpath = path_to_pgpath(path);
1131         struct priority_group *pg = pgpath->pg;
1132         struct multipath *m = pg->m;
1133         unsigned long flags;
1134
1135         /* device or driver problems */
1136         switch (errors) {
1137         case SCSI_DH_OK:
1138                 break;
1139         case SCSI_DH_NOSYS:
1140                 if (!m->hw_handler_name) {
1141                         errors = 0;
1142                         break;
1143                 }
1144                 DMERR("Cannot failover device because scsi_dh_%s was not "
1145                       "loaded.", m->hw_handler_name);
1146                 /*
1147                  * Fail path for now, so we do not ping pong
1148                  */
1149                 fail_path(pgpath);
1150                 break;
1151         case SCSI_DH_DEV_TEMP_BUSY:
1152                 /*
1153                  * Probably doing something like FW upgrade on the
1154                  * controller so try the other pg.
1155                  */
1156                 bypass_pg(m, pg, 1);
1157                 break;
1158         /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
1159         case SCSI_DH_RETRY:
1160         case SCSI_DH_IMM_RETRY:
1161         case SCSI_DH_RES_TEMP_UNAVAIL:
1162                 if (pg_init_limit_reached(m, pgpath))
1163                         fail_path(pgpath);
1164                 errors = 0;
1165                 break;
1166         default:
1167                 /*
1168                  * We probably do not want to fail the path for a device
1169                  * error, but this is what the old dm did. In future
1170                  * patches we can do more advanced handling.
1171                  */
1172                 fail_path(pgpath);
1173         }
1174
1175         spin_lock_irqsave(&m->lock, flags);
1176         if (errors) {
1177                 if (pgpath == m->current_pgpath) {
1178                         DMERR("Could not failover device. Error %d.", errors);
1179                         m->current_pgpath = NULL;
1180                         m->current_pg = NULL;
1181                 }
1182         } else if (!m->pg_init_required) {
1183                 m->queue_io = 0;
1184                 pg->bypassed = 0;
1185         }
1186
1187         m->pg_init_in_progress--;
1188         if (!m->pg_init_in_progress)
1189                 queue_work(kmultipathd, &m->process_queued_ios);
1190         spin_unlock_irqrestore(&m->lock, flags);
1191 }
1192
1193 static void activate_path(struct work_struct *work)
1194 {
1195         struct pgpath *pgpath =
1196                 container_of(work, struct pgpath, activate_path);
1197
1198         scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
1199                                 pg_init_done, &pgpath->path);
1200 }
1201
1202 /*
1203  * end_io handling
1204  */
1205 static int do_end_io(struct multipath *m, struct request *clone,
1206                      int error, struct dm_mpath_io *mpio)
1207 {
1208         /*
1209          * We don't queue any clone request inside the multipath target
1210          * during end I/O handling, since those clone requests don't have
1211          * bio clones.  If we queue them inside the multipath target,
1212          * we need to make bio clones, that requires memory allocation.
1213          * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
1214          *  don't have bio clones.)
1215          * Instead of queueing the clone request here, we queue the original
1216          * request into dm core, which will remake a clone request and
1217          * clone bios for it and resubmit it later.
1218          */
1219         int r = DM_ENDIO_REQUEUE;
1220         unsigned long flags;
1221
1222         if (!error && !clone->errors)
1223                 return 0;       /* I/O complete */
1224
1225         if (error == -EOPNOTSUPP)
1226                 return error;
1227
1228         if (mpio->pgpath)
1229                 fail_path(mpio->pgpath);
1230
1231         spin_lock_irqsave(&m->lock, flags);
1232         if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
1233                 r = -EIO;
1234         spin_unlock_irqrestore(&m->lock, flags);
1235
1236         return r;
1237 }
1238
1239 static int multipath_end_io(struct dm_target *ti, struct request *clone,
1240                             int error, union map_info *map_context)
1241 {
1242         struct multipath *m = ti->private;
1243         struct dm_mpath_io *mpio = map_context->ptr;
1244         struct pgpath *pgpath = mpio->pgpath;
1245         struct path_selector *ps;
1246         int r;
1247
1248         r  = do_end_io(m, clone, error, mpio);
1249         if (pgpath) {
1250                 ps = &pgpath->pg->ps;
1251                 if (ps->type->end_io)
1252                         ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1253         }
1254         mempool_free(mpio, m->mpio_pool);
1255
1256         return r;
1257 }
1258
1259 /*
1260  * Suspend can't complete until all the I/O is processed so if
1261  * the last path fails we must error any remaining I/O.
1262  * Note that if the freeze_bdev fails while suspending, the
1263  * queue_if_no_path state is lost - userspace should reset it.
1264  */
1265 static void multipath_presuspend(struct dm_target *ti)
1266 {
1267         struct multipath *m = (struct multipath *) ti->private;
1268
1269         queue_if_no_path(m, 0, 1);
1270 }
1271
1272 static void multipath_postsuspend(struct dm_target *ti)
1273 {
1274         struct multipath *m = ti->private;
1275
1276         mutex_lock(&m->work_mutex);
1277         flush_multipath_work();
1278         mutex_unlock(&m->work_mutex);
1279 }
1280
1281 /*
1282  * Restore the queue_if_no_path setting.
1283  */
1284 static void multipath_resume(struct dm_target *ti)
1285 {
1286         struct multipath *m = (struct multipath *) ti->private;
1287         unsigned long flags;
1288
1289         spin_lock_irqsave(&m->lock, flags);
1290         m->queue_if_no_path = m->saved_queue_if_no_path;
1291         spin_unlock_irqrestore(&m->lock, flags);
1292 }
1293
1294 /*
1295  * Info output has the following format:
1296  * num_multipath_feature_args [multipath_feature_args]*
1297  * num_handler_status_args [handler_status_args]*
1298  * num_groups init_group_number
1299  *            [A|D|E num_ps_status_args [ps_status_args]*
1300  *             num_paths num_selector_args
1301  *             [path_dev A|F fail_count [selector_args]* ]+ ]+
1302  *
1303  * Table output has the following format (identical to the constructor string):
1304  * num_feature_args [features_args]*
1305  * num_handler_args hw_handler [hw_handler_args]*
1306  * num_groups init_group_number
1307  *     [priority selector-name num_ps_args [ps_args]*
1308  *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1309  */
1310 static int multipath_status(struct dm_target *ti, status_type_t type,
1311                             char *result, unsigned int maxlen)
1312 {
1313         int sz = 0;
1314         unsigned long flags;
1315         struct multipath *m = (struct multipath *) ti->private;
1316         struct priority_group *pg;
1317         struct pgpath *p;
1318         unsigned pg_num;
1319         char state;
1320
1321         spin_lock_irqsave(&m->lock, flags);
1322
1323         /* Features */
1324         if (type == STATUSTYPE_INFO)
1325                 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
1326         else {
1327                 DMEMIT("%u ", m->queue_if_no_path +
1328                               (m->pg_init_retries > 0) * 2);
1329                 if (m->queue_if_no_path)
1330                         DMEMIT("queue_if_no_path ");
1331                 if (m->pg_init_retries)
1332                         DMEMIT("pg_init_retries %u ", m->pg_init_retries);
1333         }
1334
1335         if (!m->hw_handler_name || type == STATUSTYPE_INFO)
1336                 DMEMIT("0 ");
1337         else
1338                 DMEMIT("1 %s ", m->hw_handler_name);
1339
1340         DMEMIT("%u ", m->nr_priority_groups);
1341
1342         if (m->next_pg)
1343                 pg_num = m->next_pg->pg_num;
1344         else if (m->current_pg)
1345                 pg_num = m->current_pg->pg_num;
1346         else
1347                         pg_num = 1;
1348
1349         DMEMIT("%u ", pg_num);
1350
1351         switch (type) {
1352         case STATUSTYPE_INFO:
1353                 list_for_each_entry(pg, &m->priority_groups, list) {
1354                         if (pg->bypassed)
1355                                 state = 'D';    /* Disabled */
1356                         else if (pg == m->current_pg)
1357                                 state = 'A';    /* Currently Active */
1358                         else
1359                                 state = 'E';    /* Enabled */
1360
1361                         DMEMIT("%c ", state);
1362
1363                         if (pg->ps.type->status)
1364                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1365                                                           result + sz,
1366                                                           maxlen - sz);
1367                         else
1368                                 DMEMIT("0 ");
1369
1370                         DMEMIT("%u %u ", pg->nr_pgpaths,
1371                                pg->ps.type->info_args);
1372
1373                         list_for_each_entry(p, &pg->pgpaths, list) {
1374                                 DMEMIT("%s %s %u ", p->path.dev->name,
1375                                        p->is_active ? "A" : "F",
1376                                        p->fail_count);
1377                                 if (pg->ps.type->status)
1378                                         sz += pg->ps.type->status(&pg->ps,
1379                                               &p->path, type, result + sz,
1380                                               maxlen - sz);
1381                         }
1382                 }
1383                 break;
1384
1385         case STATUSTYPE_TABLE:
1386                 list_for_each_entry(pg, &m->priority_groups, list) {
1387                         DMEMIT("%s ", pg->ps.type->name);
1388
1389                         if (pg->ps.type->status)
1390                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1391                                                           result + sz,
1392                                                           maxlen - sz);
1393                         else
1394                                 DMEMIT("0 ");
1395
1396                         DMEMIT("%u %u ", pg->nr_pgpaths,
1397                                pg->ps.type->table_args);
1398
1399                         list_for_each_entry(p, &pg->pgpaths, list) {
1400                                 DMEMIT("%s ", p->path.dev->name);
1401                                 if (pg->ps.type->status)
1402                                         sz += pg->ps.type->status(&pg->ps,
1403                                               &p->path, type, result + sz,
1404                                               maxlen - sz);
1405                         }
1406                 }
1407                 break;
1408         }
1409
1410         spin_unlock_irqrestore(&m->lock, flags);
1411
1412         return 0;
1413 }
1414
1415 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1416 {
1417         int r = -EINVAL;
1418         struct dm_dev *dev;
1419         struct multipath *m = (struct multipath *) ti->private;
1420         action_fn action;
1421
1422         mutex_lock(&m->work_mutex);
1423
1424         if (argc == 1) {
1425                 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) {
1426                         r = queue_if_no_path(m, 1, 0);
1427                         goto out;
1428                 } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) {
1429                         r = queue_if_no_path(m, 0, 0);
1430                         goto out;
1431                 }
1432         }
1433
1434         if (argc != 2) {
1435                 DMWARN("Unrecognised multipath message received.");
1436                 goto out;
1437         }
1438
1439         if (!strnicmp(argv[0], MESG_STR("disable_group"))) {
1440                 r = bypass_pg_num(m, argv[1], 1);
1441                 goto out;
1442         } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) {
1443                 r = bypass_pg_num(m, argv[1], 0);
1444                 goto out;
1445         } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) {
1446                 r = switch_pg_num(m, argv[1]);
1447                 goto out;
1448         } else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
1449                 action = reinstate_path;
1450         else if (!strnicmp(argv[0], MESG_STR("fail_path")))
1451                 action = fail_path;
1452         else {
1453                 DMWARN("Unrecognised multipath message received.");
1454                 goto out;
1455         }
1456
1457         r = dm_get_device(ti, argv[1], ti->begin, ti->len,
1458                           dm_table_get_mode(ti->table), &dev);
1459         if (r) {
1460                 DMWARN("message: error getting device %s",
1461                        argv[1]);
1462                 goto out;
1463         }
1464
1465         r = action_dev(m, dev, action);
1466
1467         dm_put_device(ti, dev);
1468
1469 out:
1470         mutex_unlock(&m->work_mutex);
1471         return r;
1472 }
1473
1474 static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
1475                            unsigned long arg)
1476 {
1477         struct multipath *m = (struct multipath *) ti->private;
1478         struct block_device *bdev = NULL;
1479         fmode_t mode = 0;
1480         unsigned long flags;
1481         int r = 0;
1482
1483         spin_lock_irqsave(&m->lock, flags);
1484
1485         if (!m->current_pgpath)
1486                 __choose_pgpath(m, 0);
1487
1488         if (m->current_pgpath) {
1489                 bdev = m->current_pgpath->path.dev->bdev;
1490                 mode = m->current_pgpath->path.dev->mode;
1491         }
1492
1493         if (m->queue_io)
1494                 r = -EAGAIN;
1495         else if (!bdev)
1496                 r = -EIO;
1497
1498         spin_unlock_irqrestore(&m->lock, flags);
1499
1500         return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
1501 }
1502
1503 static int multipath_iterate_devices(struct dm_target *ti,
1504                                      iterate_devices_callout_fn fn, void *data)
1505 {
1506         struct multipath *m = ti->private;
1507         struct priority_group *pg;
1508         struct pgpath *p;
1509         int ret = 0;
1510
1511         list_for_each_entry(pg, &m->priority_groups, list) {
1512                 list_for_each_entry(p, &pg->pgpaths, list) {
1513                         ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
1514                         if (ret)
1515                                 goto out;
1516                 }
1517         }
1518
1519 out:
1520         return ret;
1521 }
1522
1523 static int __pgpath_busy(struct pgpath *pgpath)
1524 {
1525         struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1526
1527         return dm_underlying_device_busy(q);
1528 }
1529
1530 /*
1531  * We return "busy", only when we can map I/Os but underlying devices
1532  * are busy (so even if we map I/Os now, the I/Os will wait on
1533  * the underlying queue).
1534  * In other words, if we want to kill I/Os or queue them inside us
1535  * due to map unavailability, we don't return "busy".  Otherwise,
1536  * dm core won't give us the I/Os and we can't do what we want.
1537  */
1538 static int multipath_busy(struct dm_target *ti)
1539 {
1540         int busy = 0, has_active = 0;
1541         struct multipath *m = ti->private;
1542         struct priority_group *pg;
1543         struct pgpath *pgpath;
1544         unsigned long flags;
1545
1546         spin_lock_irqsave(&m->lock, flags);
1547
1548         /* Guess which priority_group will be used at next mapping time */
1549         if (unlikely(!m->current_pgpath && m->next_pg))
1550                 pg = m->next_pg;
1551         else if (likely(m->current_pg))
1552                 pg = m->current_pg;
1553         else
1554                 /*
1555                  * We don't know which pg will be used at next mapping time.
1556                  * We don't call __choose_pgpath() here to avoid to trigger
1557                  * pg_init just by busy checking.
1558                  * So we don't know whether underlying devices we will be using
1559                  * at next mapping time are busy or not. Just try mapping.
1560                  */
1561                 goto out;
1562
1563         /*
1564          * If there is one non-busy active path at least, the path selector
1565          * will be able to select it. So we consider such a pg as not busy.
1566          */
1567         busy = 1;
1568         list_for_each_entry(pgpath, &pg->pgpaths, list)
1569                 if (pgpath->is_active) {
1570                         has_active = 1;
1571
1572                         if (!__pgpath_busy(pgpath)) {
1573                                 busy = 0;
1574                                 break;
1575                         }
1576                 }
1577
1578         if (!has_active)
1579                 /*
1580                  * No active path in this pg, so this pg won't be used and
1581                  * the current_pg will be changed at next mapping time.
1582                  * We need to try mapping to determine it.
1583                  */
1584                 busy = 0;
1585
1586 out:
1587         spin_unlock_irqrestore(&m->lock, flags);
1588
1589         return busy;
1590 }
1591
1592 /*-----------------------------------------------------------------
1593  * Module setup
1594  *---------------------------------------------------------------*/
1595 static struct target_type multipath_target = {
1596         .name = "multipath",
1597         .version = {1, 1, 1},
1598         .module = THIS_MODULE,
1599         .ctr = multipath_ctr,
1600         .dtr = multipath_dtr,
1601         .map_rq = multipath_map,
1602         .rq_end_io = multipath_end_io,
1603         .presuspend = multipath_presuspend,
1604         .postsuspend = multipath_postsuspend,
1605         .resume = multipath_resume,
1606         .status = multipath_status,
1607         .message = multipath_message,
1608         .ioctl  = multipath_ioctl,
1609         .iterate_devices = multipath_iterate_devices,
1610         .busy = multipath_busy,
1611 };
1612
1613 static int __init dm_multipath_init(void)
1614 {
1615         int r;
1616
1617         /* allocate a slab for the dm_ios */
1618         _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
1619         if (!_mpio_cache)
1620                 return -ENOMEM;
1621
1622         r = dm_register_target(&multipath_target);
1623         if (r < 0) {
1624                 DMERR("register failed %d", r);
1625                 kmem_cache_destroy(_mpio_cache);
1626                 return -EINVAL;
1627         }
1628
1629         kmultipathd = create_workqueue("kmpathd");
1630         if (!kmultipathd) {
1631                 DMERR("failed to create workqueue kmpathd");
1632                 dm_unregister_target(&multipath_target);
1633                 kmem_cache_destroy(_mpio_cache);
1634                 return -ENOMEM;
1635         }
1636
1637         /*
1638          * A separate workqueue is used to handle the device handlers
1639          * to avoid overloading existing workqueue. Overloading the
1640          * old workqueue would also create a bottleneck in the
1641          * path of the storage hardware device activation.
1642          */
1643         kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd");
1644         if (!kmpath_handlerd) {
1645                 DMERR("failed to create workqueue kmpath_handlerd");
1646                 destroy_workqueue(kmultipathd);
1647                 dm_unregister_target(&multipath_target);
1648                 kmem_cache_destroy(_mpio_cache);
1649                 return -ENOMEM;
1650         }
1651
1652         DMINFO("version %u.%u.%u loaded",
1653                multipath_target.version[0], multipath_target.version[1],
1654                multipath_target.version[2]);
1655
1656         return r;
1657 }
1658
1659 static void __exit dm_multipath_exit(void)
1660 {
1661         destroy_workqueue(kmpath_handlerd);
1662         destroy_workqueue(kmultipathd);
1663
1664         dm_unregister_target(&multipath_target);
1665         kmem_cache_destroy(_mpio_cache);
1666 }
1667
1668 module_init(dm_multipath_init);
1669 module_exit(dm_multipath_exit);
1670
1671 MODULE_DESCRIPTION(DM_NAME " multipath target");
1672 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1673 MODULE_LICENSE("GPL");