dm mpath: avoid storing private suspended state
[linux-flexiantxendom0-3.2.10.git] / drivers / md / dm-mpath.c
1 /*
2  * Copyright (C) 2003 Sistina Software Limited.
3  * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4  *
5  * This file is released under the GPL.
6  */
7
8 #include <linux/device-mapper.h>
9
10 #include "dm-path-selector.h"
11 #include "dm-uevent.h"
12
13 #include <linux/ctype.h>
14 #include <linux/init.h>
15 #include <linux/mempool.h>
16 #include <linux/module.h>
17 #include <linux/pagemap.h>
18 #include <linux/slab.h>
19 #include <linux/time.h>
20 #include <linux/workqueue.h>
21 #include <scsi/scsi_dh.h>
22 #include <asm/atomic.h>
23
24 #define DM_MSG_PREFIX "multipath"
25 #define MESG_STR(x) x, sizeof(x)
26
27 /* Path properties */
28 struct pgpath {
29         struct list_head list;
30
31         struct priority_group *pg;      /* Owning PG */
32         unsigned is_active;             /* Path status */
33         unsigned fail_count;            /* Cumulative failure count */
34
35         struct dm_path path;
36         struct work_struct deactivate_path;
37         struct work_struct activate_path;
38 };
39
40 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
41
42 /*
43  * Paths are grouped into Priority Groups and numbered from 1 upwards.
44  * Each has a path selector which controls which path gets used.
45  */
46 struct priority_group {
47         struct list_head list;
48
49         struct multipath *m;            /* Owning multipath instance */
50         struct path_selector ps;
51
52         unsigned pg_num;                /* Reference number */
53         unsigned bypassed;              /* Temporarily bypass this PG? */
54
55         unsigned nr_pgpaths;            /* Number of paths in PG */
56         struct list_head pgpaths;
57 };
58
59 /* Multipath context */
60 struct multipath {
61         struct list_head list;
62         struct dm_target *ti;
63
64         spinlock_t lock;
65
66         const char *hw_handler_name;
67         char *hw_handler_params;
68         unsigned nr_priority_groups;
69         struct list_head priority_groups;
70         unsigned pg_init_required;      /* pg_init needs calling? */
71         unsigned pg_init_in_progress;   /* Only one pg_init allowed at once */
72
73         unsigned nr_valid_paths;        /* Total number of usable paths */
74         struct pgpath *current_pgpath;
75         struct priority_group *current_pg;
76         struct priority_group *next_pg; /* Switch to this PG if set */
77         unsigned repeat_count;          /* I/Os left before calling PS again */
78
79         unsigned queue_io;              /* Must we queue all I/O? */
80         unsigned queue_if_no_path;      /* Queue I/O if last path fails? */
81         unsigned saved_queue_if_no_path;/* Saved state during suspension */
82         unsigned pg_init_retries;       /* Number of times to retry pg_init */
83         unsigned pg_init_count;         /* Number of times pg_init called */
84
85         struct work_struct process_queued_ios;
86         struct list_head queued_ios;
87         unsigned queue_size;
88
89         struct work_struct trigger_event;
90
91         /*
92          * We must use a mempool of dm_mpath_io structs so that we
93          * can resubmit bios on error.
94          */
95         mempool_t *mpio_pool;
96
97         struct mutex work_mutex;
98 };
99
100 /*
101  * Context information attached to each bio we process.
102  */
103 struct dm_mpath_io {
104         struct pgpath *pgpath;
105         size_t nr_bytes;
106 };
107
108 typedef int (*action_fn) (struct pgpath *pgpath);
109
110 #define MIN_IOS 256     /* Mempool size */
111
112 static struct kmem_cache *_mpio_cache;
113
114 static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
115 static void process_queued_ios(struct work_struct *work);
116 static void trigger_event(struct work_struct *work);
117 static void activate_path(struct work_struct *work);
118 static void deactivate_path(struct work_struct *work);
119
120
121 /*-----------------------------------------------
122  * Allocation routines
123  *-----------------------------------------------*/
124
125 static struct pgpath *alloc_pgpath(void)
126 {
127         struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
128
129         if (pgpath) {
130                 pgpath->is_active = 1;
131                 INIT_WORK(&pgpath->deactivate_path, deactivate_path);
132                 INIT_WORK(&pgpath->activate_path, activate_path);
133         }
134
135         return pgpath;
136 }
137
138 static void free_pgpath(struct pgpath *pgpath)
139 {
140         kfree(pgpath);
141 }
142
143 static void deactivate_path(struct work_struct *work)
144 {
145         struct pgpath *pgpath =
146                 container_of(work, struct pgpath, deactivate_path);
147
148         blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue);
149 }
150
151 static struct priority_group *alloc_priority_group(void)
152 {
153         struct priority_group *pg;
154
155         pg = kzalloc(sizeof(*pg), GFP_KERNEL);
156
157         if (pg)
158                 INIT_LIST_HEAD(&pg->pgpaths);
159
160         return pg;
161 }
162
163 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
164 {
165         struct pgpath *pgpath, *tmp;
166         struct multipath *m = ti->private;
167
168         list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
169                 list_del(&pgpath->list);
170                 if (m->hw_handler_name)
171                         scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
172                 dm_put_device(ti, pgpath->path.dev);
173                 free_pgpath(pgpath);
174         }
175 }
176
177 static void free_priority_group(struct priority_group *pg,
178                                 struct dm_target *ti)
179 {
180         struct path_selector *ps = &pg->ps;
181
182         if (ps->type) {
183                 ps->type->destroy(ps);
184                 dm_put_path_selector(ps->type);
185         }
186
187         free_pgpaths(&pg->pgpaths, ti);
188         kfree(pg);
189 }
190
191 static struct multipath *alloc_multipath(struct dm_target *ti)
192 {
193         struct multipath *m;
194
195         m = kzalloc(sizeof(*m), GFP_KERNEL);
196         if (m) {
197                 INIT_LIST_HEAD(&m->priority_groups);
198                 INIT_LIST_HEAD(&m->queued_ios);
199                 spin_lock_init(&m->lock);
200                 m->queue_io = 1;
201                 INIT_WORK(&m->process_queued_ios, process_queued_ios);
202                 INIT_WORK(&m->trigger_event, trigger_event);
203                 mutex_init(&m->work_mutex);
204                 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
205                 if (!m->mpio_pool) {
206                         kfree(m);
207                         return NULL;
208                 }
209                 m->ti = ti;
210                 ti->private = m;
211         }
212
213         return m;
214 }
215
216 static void free_multipath(struct multipath *m)
217 {
218         struct priority_group *pg, *tmp;
219
220         list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
221                 list_del(&pg->list);
222                 free_priority_group(pg, m->ti);
223         }
224
225         kfree(m->hw_handler_name);
226         kfree(m->hw_handler_params);
227         mempool_destroy(m->mpio_pool);
228         kfree(m);
229 }
230
231
232 /*-----------------------------------------------
233  * Path selection
234  *-----------------------------------------------*/
235
236 static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
237 {
238         m->current_pg = pgpath->pg;
239
240         /* Must we initialise the PG first, and queue I/O till it's ready? */
241         if (m->hw_handler_name) {
242                 m->pg_init_required = 1;
243                 m->queue_io = 1;
244         } else {
245                 m->pg_init_required = 0;
246                 m->queue_io = 0;
247         }
248
249         m->pg_init_count = 0;
250 }
251
252 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
253                                size_t nr_bytes)
254 {
255         struct dm_path *path;
256
257         path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
258         if (!path)
259                 return -ENXIO;
260
261         m->current_pgpath = path_to_pgpath(path);
262
263         if (m->current_pg != pg)
264                 __switch_pg(m, m->current_pgpath);
265
266         return 0;
267 }
268
269 static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
270 {
271         struct priority_group *pg;
272         unsigned bypassed = 1;
273
274         if (!m->nr_valid_paths)
275                 goto failed;
276
277         /* Were we instructed to switch PG? */
278         if (m->next_pg) {
279                 pg = m->next_pg;
280                 m->next_pg = NULL;
281                 if (!__choose_path_in_pg(m, pg, nr_bytes))
282                         return;
283         }
284
285         /* Don't change PG until it has no remaining paths */
286         if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
287                 return;
288
289         /*
290          * Loop through priority groups until we find a valid path.
291          * First time we skip PGs marked 'bypassed'.
292          * Second time we only try the ones we skipped.
293          */
294         do {
295                 list_for_each_entry(pg, &m->priority_groups, list) {
296                         if (pg->bypassed == bypassed)
297                                 continue;
298                         if (!__choose_path_in_pg(m, pg, nr_bytes))
299                                 return;
300                 }
301         } while (bypassed--);
302
303 failed:
304         m->current_pgpath = NULL;
305         m->current_pg = NULL;
306 }
307
308 /*
309  * Check whether bios must be queued in the device-mapper core rather
310  * than here in the target.
311  *
312  * m->lock must be held on entry.
313  *
314  * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
315  * same value then we are not between multipath_presuspend()
316  * and multipath_resume() calls and we have no need to check
317  * for the DMF_NOFLUSH_SUSPENDING flag.
318  */
319 static int __must_push_back(struct multipath *m)
320 {
321         return (m->queue_if_no_path != m->saved_queue_if_no_path &&
322                 dm_noflush_suspending(m->ti));
323 }
324
325 static int map_io(struct multipath *m, struct request *clone,
326                   struct dm_mpath_io *mpio, unsigned was_queued)
327 {
328         int r = DM_MAPIO_REMAPPED;
329         size_t nr_bytes = blk_rq_bytes(clone);
330         unsigned long flags;
331         struct pgpath *pgpath;
332         struct block_device *bdev;
333
334         spin_lock_irqsave(&m->lock, flags);
335
336         /* Do we need to select a new pgpath? */
337         if (!m->current_pgpath ||
338             (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
339                 __choose_pgpath(m, nr_bytes);
340
341         pgpath = m->current_pgpath;
342
343         if (was_queued)
344                 m->queue_size--;
345
346         if ((pgpath && m->queue_io) ||
347             (!pgpath && m->queue_if_no_path)) {
348                 /* Queue for the daemon to resubmit */
349                 list_add_tail(&clone->queuelist, &m->queued_ios);
350                 m->queue_size++;
351                 if ((m->pg_init_required && !m->pg_init_in_progress) ||
352                     !m->queue_io)
353                         queue_work(kmultipathd, &m->process_queued_ios);
354                 pgpath = NULL;
355                 r = DM_MAPIO_SUBMITTED;
356         } else if (pgpath) {
357                 bdev = pgpath->path.dev->bdev;
358                 clone->q = bdev_get_queue(bdev);
359                 clone->rq_disk = bdev->bd_disk;
360         } else if (__must_push_back(m))
361                 r = DM_MAPIO_REQUEUE;
362         else
363                 r = -EIO;       /* Failed */
364
365         mpio->pgpath = pgpath;
366         mpio->nr_bytes = nr_bytes;
367
368         if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io)
369                 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path,
370                                               nr_bytes);
371
372         spin_unlock_irqrestore(&m->lock, flags);
373
374         return r;
375 }
376
377 /*
378  * If we run out of usable paths, should we queue I/O or error it?
379  */
380 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
381                             unsigned save_old_value)
382 {
383         unsigned long flags;
384
385         spin_lock_irqsave(&m->lock, flags);
386
387         if (save_old_value)
388                 m->saved_queue_if_no_path = m->queue_if_no_path;
389         else
390                 m->saved_queue_if_no_path = queue_if_no_path;
391         m->queue_if_no_path = queue_if_no_path;
392         if (!m->queue_if_no_path && m->queue_size)
393                 queue_work(kmultipathd, &m->process_queued_ios);
394
395         spin_unlock_irqrestore(&m->lock, flags);
396
397         return 0;
398 }
399
400 /*-----------------------------------------------------------------
401  * The multipath daemon is responsible for resubmitting queued ios.
402  *---------------------------------------------------------------*/
403
404 static void dispatch_queued_ios(struct multipath *m)
405 {
406         int r;
407         unsigned long flags;
408         struct dm_mpath_io *mpio;
409         union map_info *info;
410         struct request *clone, *n;
411         LIST_HEAD(cl);
412
413         spin_lock_irqsave(&m->lock, flags);
414         list_splice_init(&m->queued_ios, &cl);
415         spin_unlock_irqrestore(&m->lock, flags);
416
417         list_for_each_entry_safe(clone, n, &cl, queuelist) {
418                 list_del_init(&clone->queuelist);
419
420                 info = dm_get_rq_mapinfo(clone);
421                 mpio = info->ptr;
422
423                 r = map_io(m, clone, mpio, 1);
424                 if (r < 0) {
425                         mempool_free(mpio, m->mpio_pool);
426                         dm_kill_unmapped_request(clone, r);
427                 } else if (r == DM_MAPIO_REMAPPED)
428                         dm_dispatch_request(clone);
429                 else if (r == DM_MAPIO_REQUEUE) {
430                         mempool_free(mpio, m->mpio_pool);
431                         dm_requeue_unmapped_request(clone);
432                 }
433         }
434 }
435
436 static void process_queued_ios(struct work_struct *work)
437 {
438         struct multipath *m =
439                 container_of(work, struct multipath, process_queued_ios);
440         struct pgpath *pgpath = NULL, *tmp;
441         unsigned must_queue = 1;
442         unsigned long flags;
443
444         spin_lock_irqsave(&m->lock, flags);
445
446         if (!m->queue_size)
447                 goto out;
448
449         if (!m->current_pgpath)
450                 __choose_pgpath(m, 0);
451
452         pgpath = m->current_pgpath;
453
454         if ((pgpath && !m->queue_io) ||
455             (!pgpath && !m->queue_if_no_path))
456                 must_queue = 0;
457
458         if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
459                 m->pg_init_count++;
460                 m->pg_init_required = 0;
461                 list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
462                         /* Skip failed paths */
463                         if (!tmp->is_active)
464                                 continue;
465                         if (queue_work(kmpath_handlerd, &tmp->activate_path))
466                                 m->pg_init_in_progress++;
467                 }
468         }
469 out:
470         spin_unlock_irqrestore(&m->lock, flags);
471         if (!must_queue)
472                 dispatch_queued_ios(m);
473 }
474
475 /*
476  * An event is triggered whenever a path is taken out of use.
477  * Includes path failure and PG bypass.
478  */
479 static void trigger_event(struct work_struct *work)
480 {
481         struct multipath *m =
482                 container_of(work, struct multipath, trigger_event);
483
484         dm_table_event(m->ti->table);
485 }
486
487 /*-----------------------------------------------------------------
488  * Constructor/argument parsing:
489  * <#multipath feature args> [<arg>]*
490  * <#hw_handler args> [hw_handler [<arg>]*]
491  * <#priority groups>
492  * <initial priority group>
493  *     [<selector> <#selector args> [<arg>]*
494  *      <#paths> <#per-path selector args>
495  *         [<path> [<arg>]* ]+ ]+
496  *---------------------------------------------------------------*/
497 struct param {
498         unsigned min;
499         unsigned max;
500         char *error;
501 };
502
503 static int read_param(struct param *param, char *str, unsigned *v, char **error)
504 {
505         if (!str ||
506             (sscanf(str, "%u", v) != 1) ||
507             (*v < param->min) ||
508             (*v > param->max)) {
509                 *error = param->error;
510                 return -EINVAL;
511         }
512
513         return 0;
514 }
515
516 struct arg_set {
517         unsigned argc;
518         char **argv;
519 };
520
521 static char *shift(struct arg_set *as)
522 {
523         char *r;
524
525         if (as->argc) {
526                 as->argc--;
527                 r = *as->argv;
528                 as->argv++;
529                 return r;
530         }
531
532         return NULL;
533 }
534
535 static void consume(struct arg_set *as, unsigned n)
536 {
537         BUG_ON (as->argc < n);
538         as->argc -= n;
539         as->argv += n;
540 }
541
542 static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
543                                struct dm_target *ti)
544 {
545         int r;
546         struct path_selector_type *pst;
547         unsigned ps_argc;
548
549         static struct param _params[] = {
550                 {0, 1024, "invalid number of path selector args"},
551         };
552
553         pst = dm_get_path_selector(shift(as));
554         if (!pst) {
555                 ti->error = "unknown path selector type";
556                 return -EINVAL;
557         }
558
559         r = read_param(_params, shift(as), &ps_argc, &ti->error);
560         if (r) {
561                 dm_put_path_selector(pst);
562                 return -EINVAL;
563         }
564
565         if (ps_argc > as->argc) {
566                 dm_put_path_selector(pst);
567                 ti->error = "not enough arguments for path selector";
568                 return -EINVAL;
569         }
570
571         r = pst->create(&pg->ps, ps_argc, as->argv);
572         if (r) {
573                 dm_put_path_selector(pst);
574                 ti->error = "path selector constructor failed";
575                 return r;
576         }
577
578         pg->ps.type = pst;
579         consume(as, ps_argc);
580
581         return 0;
582 }
583
584 static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
585                                struct dm_target *ti)
586 {
587         int r;
588         struct pgpath *p;
589         struct multipath *m = ti->private;
590
591         /* we need at least a path arg */
592         if (as->argc < 1) {
593                 ti->error = "no device given";
594                 return ERR_PTR(-EINVAL);
595         }
596
597         p = alloc_pgpath();
598         if (!p)
599                 return ERR_PTR(-ENOMEM);
600
601         r = dm_get_device(ti, shift(as), ti->begin, ti->len,
602                           dm_table_get_mode(ti->table), &p->path.dev);
603         if (r) {
604                 ti->error = "error getting device";
605                 goto bad;
606         }
607
608         if (m->hw_handler_name) {
609                 struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
610
611                 r = scsi_dh_attach(q, m->hw_handler_name);
612                 if (r == -EBUSY) {
613                         /*
614                          * Already attached to different hw_handler,
615                          * try to reattach with correct one.
616                          */
617                         scsi_dh_detach(q);
618                         r = scsi_dh_attach(q, m->hw_handler_name);
619                 }
620
621                 if (r < 0) {
622                         ti->error = "error attaching hardware handler";
623                         dm_put_device(ti, p->path.dev);
624                         goto bad;
625                 }
626
627                 if (m->hw_handler_params) {
628                         r = scsi_dh_set_params(q, m->hw_handler_params);
629                         if (r < 0) {
630                                 ti->error = "unable to set hardware "
631                                                         "handler parameters";
632                                 scsi_dh_detach(q);
633                                 dm_put_device(ti, p->path.dev);
634                                 goto bad;
635                         }
636                 }
637         }
638
639         r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
640         if (r) {
641                 dm_put_device(ti, p->path.dev);
642                 goto bad;
643         }
644
645         return p;
646
647  bad:
648         free_pgpath(p);
649         return ERR_PTR(r);
650 }
651
652 static struct priority_group *parse_priority_group(struct arg_set *as,
653                                                    struct multipath *m)
654 {
655         static struct param _params[] = {
656                 {1, 1024, "invalid number of paths"},
657                 {0, 1024, "invalid number of selector args"}
658         };
659
660         int r;
661         unsigned i, nr_selector_args, nr_params;
662         struct priority_group *pg;
663         struct dm_target *ti = m->ti;
664
665         if (as->argc < 2) {
666                 as->argc = 0;
667                 ti->error = "not enough priority group arguments";
668                 return ERR_PTR(-EINVAL);
669         }
670
671         pg = alloc_priority_group();
672         if (!pg) {
673                 ti->error = "couldn't allocate priority group";
674                 return ERR_PTR(-ENOMEM);
675         }
676         pg->m = m;
677
678         r = parse_path_selector(as, pg, ti);
679         if (r)
680                 goto bad;
681
682         /*
683          * read the paths
684          */
685         r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
686         if (r)
687                 goto bad;
688
689         r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
690         if (r)
691                 goto bad;
692
693         nr_params = 1 + nr_selector_args;
694         for (i = 0; i < pg->nr_pgpaths; i++) {
695                 struct pgpath *pgpath;
696                 struct arg_set path_args;
697
698                 if (as->argc < nr_params) {
699                         ti->error = "not enough path parameters";
700                         goto bad;
701                 }
702
703                 path_args.argc = nr_params;
704                 path_args.argv = as->argv;
705
706                 pgpath = parse_path(&path_args, &pg->ps, ti);
707                 if (IS_ERR(pgpath)) {
708                         r = PTR_ERR(pgpath);
709                         goto bad;
710                 }
711
712                 pgpath->pg = pg;
713                 list_add_tail(&pgpath->list, &pg->pgpaths);
714                 consume(as, nr_params);
715         }
716
717         return pg;
718
719  bad:
720         free_priority_group(pg, ti);
721         return ERR_PTR(r);
722 }
723
724 static int parse_hw_handler(struct arg_set *as, struct multipath *m)
725 {
726         unsigned hw_argc;
727         int ret;
728         struct dm_target *ti = m->ti;
729
730         static struct param _params[] = {
731                 {0, 1024, "invalid number of hardware handler args"},
732         };
733
734         if (read_param(_params, shift(as), &hw_argc, &ti->error))
735                 return -EINVAL;
736
737         if (!hw_argc)
738                 return 0;
739
740         if (hw_argc > as->argc) {
741                 ti->error = "not enough arguments for hardware handler";
742                 return -EINVAL;
743         }
744
745         m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
746         request_module("scsi_dh_%s", m->hw_handler_name);
747         if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
748                 ti->error = "unknown hardware handler type";
749                 ret = -EINVAL;
750                 goto fail;
751         }
752
753         if (hw_argc > 1) {
754                 char *p;
755                 int i, j, len = 4;
756
757                 for (i = 0; i <= hw_argc - 2; i++)
758                         len += strlen(as->argv[i]) + 1;
759                 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
760                 if (!p) {
761                         ti->error = "memory allocation failed";
762                         ret = -ENOMEM;
763                         goto fail;
764                 }
765                 j = sprintf(p, "%d", hw_argc - 1);
766                 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
767                         j = sprintf(p, "%s", as->argv[i]);
768         }
769         consume(as, hw_argc - 1);
770
771         return 0;
772 fail:
773         kfree(m->hw_handler_name);
774         m->hw_handler_name = NULL;
775         return ret;
776 }
777
778 static int parse_features(struct arg_set *as, struct multipath *m)
779 {
780         int r;
781         unsigned argc;
782         struct dm_target *ti = m->ti;
783         const char *param_name;
784
785         static struct param _params[] = {
786                 {0, 3, "invalid number of feature args"},
787                 {1, 50, "pg_init_retries must be between 1 and 50"},
788         };
789
790         r = read_param(_params, shift(as), &argc, &ti->error);
791         if (r)
792                 return -EINVAL;
793
794         if (!argc)
795                 return 0;
796
797         do {
798                 param_name = shift(as);
799                 argc--;
800
801                 if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
802                         r = queue_if_no_path(m, 1, 0);
803                         continue;
804                 }
805
806                 if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
807                     (argc >= 1)) {
808                         r = read_param(_params + 1, shift(as),
809                                        &m->pg_init_retries, &ti->error);
810                         argc--;
811                         continue;
812                 }
813
814                 ti->error = "Unrecognised multipath feature request";
815                 r = -EINVAL;
816         } while (argc && !r);
817
818         return r;
819 }
820
821 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
822                          char **argv)
823 {
824         /* target parameters */
825         static struct param _params[] = {
826                 {1, 1024, "invalid number of priority groups"},
827                 {1, 1024, "invalid initial priority group number"},
828         };
829
830         int r;
831         struct multipath *m;
832         struct arg_set as;
833         unsigned pg_count = 0;
834         unsigned next_pg_num;
835
836         as.argc = argc;
837         as.argv = argv;
838
839         m = alloc_multipath(ti);
840         if (!m) {
841                 ti->error = "can't allocate multipath";
842                 return -EINVAL;
843         }
844
845         r = parse_features(&as, m);
846         if (r)
847                 goto bad;
848
849         r = parse_hw_handler(&as, m);
850         if (r)
851                 goto bad;
852
853         r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
854         if (r)
855                 goto bad;
856
857         r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
858         if (r)
859                 goto bad;
860
861         /* parse the priority groups */
862         while (as.argc) {
863                 struct priority_group *pg;
864
865                 pg = parse_priority_group(&as, m);
866                 if (IS_ERR(pg)) {
867                         r = PTR_ERR(pg);
868                         goto bad;
869                 }
870
871                 m->nr_valid_paths += pg->nr_pgpaths;
872                 list_add_tail(&pg->list, &m->priority_groups);
873                 pg_count++;
874                 pg->pg_num = pg_count;
875                 if (!--next_pg_num)
876                         m->next_pg = pg;
877         }
878
879         if (pg_count != m->nr_priority_groups) {
880                 ti->error = "priority group count mismatch";
881                 r = -EINVAL;
882                 goto bad;
883         }
884
885         ti->num_flush_requests = 1;
886
887         return 0;
888
889  bad:
890         free_multipath(m);
891         return r;
892 }
893
894 static void flush_multipath_work(void)
895 {
896         flush_workqueue(kmpath_handlerd);
897         flush_workqueue(kmultipathd);
898         flush_scheduled_work();
899 }
900
901 static void multipath_dtr(struct dm_target *ti)
902 {
903         struct multipath *m = ti->private;
904
905         flush_multipath_work();
906         free_multipath(m);
907 }
908
909 /*
910  * Map cloned requests
911  */
912 static int multipath_map(struct dm_target *ti, struct request *clone,
913                          union map_info *map_context)
914 {
915         int r;
916         struct dm_mpath_io *mpio;
917         struct multipath *m = (struct multipath *) ti->private;
918
919         mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
920         if (!mpio)
921                 /* ENOMEM, requeue */
922                 return DM_MAPIO_REQUEUE;
923         memset(mpio, 0, sizeof(*mpio));
924
925         map_context->ptr = mpio;
926         clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
927         r = map_io(m, clone, mpio, 0);
928         if (r < 0 || r == DM_MAPIO_REQUEUE)
929                 mempool_free(mpio, m->mpio_pool);
930
931         return r;
932 }
933
934 /*
935  * Take a path out of use.
936  */
937 static int fail_path(struct pgpath *pgpath)
938 {
939         unsigned long flags;
940         struct multipath *m = pgpath->pg->m;
941
942         spin_lock_irqsave(&m->lock, flags);
943
944         if (!pgpath->is_active)
945                 goto out;
946
947         DMWARN("Failing path %s.", pgpath->path.dev->name);
948
949         pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
950         pgpath->is_active = 0;
951         pgpath->fail_count++;
952
953         m->nr_valid_paths--;
954
955         if (pgpath == m->current_pgpath)
956                 m->current_pgpath = NULL;
957
958         dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
959                       pgpath->path.dev->name, m->nr_valid_paths);
960
961         schedule_work(&m->trigger_event);
962         queue_work(kmultipathd, &pgpath->deactivate_path);
963
964 out:
965         spin_unlock_irqrestore(&m->lock, flags);
966
967         return 0;
968 }
969
970 /*
971  * Reinstate a previously-failed path
972  */
973 static int reinstate_path(struct pgpath *pgpath)
974 {
975         int r = 0;
976         unsigned long flags;
977         struct multipath *m = pgpath->pg->m;
978
979         spin_lock_irqsave(&m->lock, flags);
980
981         if (pgpath->is_active)
982                 goto out;
983
984         if (!pgpath->pg->ps.type->reinstate_path) {
985                 DMWARN("Reinstate path not supported by path selector %s",
986                        pgpath->pg->ps.type->name);
987                 r = -EINVAL;
988                 goto out;
989         }
990
991         r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
992         if (r)
993                 goto out;
994
995         pgpath->is_active = 1;
996
997         if (!m->nr_valid_paths++ && m->queue_size) {
998                 m->current_pgpath = NULL;
999                 queue_work(kmultipathd, &m->process_queued_ios);
1000         } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
1001                 if (queue_work(kmpath_handlerd, &pgpath->activate_path))
1002                         m->pg_init_in_progress++;
1003         }
1004
1005         dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
1006                       pgpath->path.dev->name, m->nr_valid_paths);
1007
1008         schedule_work(&m->trigger_event);
1009
1010 out:
1011         spin_unlock_irqrestore(&m->lock, flags);
1012
1013         return r;
1014 }
1015
1016 /*
1017  * Fail or reinstate all paths that match the provided struct dm_dev.
1018  */
1019 static int action_dev(struct multipath *m, struct dm_dev *dev,
1020                       action_fn action)
1021 {
1022         int r = 0;
1023         struct pgpath *pgpath;
1024         struct priority_group *pg;
1025
1026         list_for_each_entry(pg, &m->priority_groups, list) {
1027                 list_for_each_entry(pgpath, &pg->pgpaths, list) {
1028                         if (pgpath->path.dev == dev)
1029                                 r = action(pgpath);
1030                 }
1031         }
1032
1033         return r;
1034 }
1035
1036 /*
1037  * Temporarily try to avoid having to use the specified PG
1038  */
1039 static void bypass_pg(struct multipath *m, struct priority_group *pg,
1040                       int bypassed)
1041 {
1042         unsigned long flags;
1043
1044         spin_lock_irqsave(&m->lock, flags);
1045
1046         pg->bypassed = bypassed;
1047         m->current_pgpath = NULL;
1048         m->current_pg = NULL;
1049
1050         spin_unlock_irqrestore(&m->lock, flags);
1051
1052         schedule_work(&m->trigger_event);
1053 }
1054
1055 /*
1056  * Switch to using the specified PG from the next I/O that gets mapped
1057  */
1058 static int switch_pg_num(struct multipath *m, const char *pgstr)
1059 {
1060         struct priority_group *pg;
1061         unsigned pgnum;
1062         unsigned long flags;
1063
1064         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1065             (pgnum > m->nr_priority_groups)) {
1066                 DMWARN("invalid PG number supplied to switch_pg_num");
1067                 return -EINVAL;
1068         }
1069
1070         spin_lock_irqsave(&m->lock, flags);
1071         list_for_each_entry(pg, &m->priority_groups, list) {
1072                 pg->bypassed = 0;
1073                 if (--pgnum)
1074                         continue;
1075
1076                 m->current_pgpath = NULL;
1077                 m->current_pg = NULL;
1078                 m->next_pg = pg;
1079         }
1080         spin_unlock_irqrestore(&m->lock, flags);
1081
1082         schedule_work(&m->trigger_event);
1083         return 0;
1084 }
1085
1086 /*
1087  * Set/clear bypassed status of a PG.
1088  * PGs are numbered upwards from 1 in the order they were declared.
1089  */
1090 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
1091 {
1092         struct priority_group *pg;
1093         unsigned pgnum;
1094
1095         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1096             (pgnum > m->nr_priority_groups)) {
1097                 DMWARN("invalid PG number supplied to bypass_pg");
1098                 return -EINVAL;
1099         }
1100
1101         list_for_each_entry(pg, &m->priority_groups, list) {
1102                 if (!--pgnum)
1103                         break;
1104         }
1105
1106         bypass_pg(m, pg, bypassed);
1107         return 0;
1108 }
1109
1110 /*
1111  * Should we retry pg_init immediately?
1112  */
1113 static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
1114 {
1115         unsigned long flags;
1116         int limit_reached = 0;
1117
1118         spin_lock_irqsave(&m->lock, flags);
1119
1120         if (m->pg_init_count <= m->pg_init_retries)
1121                 m->pg_init_required = 1;
1122         else
1123                 limit_reached = 1;
1124
1125         spin_unlock_irqrestore(&m->lock, flags);
1126
1127         return limit_reached;
1128 }
1129
1130 static void pg_init_done(void *data, int errors)
1131 {
1132         struct pgpath *pgpath = data;
1133         struct priority_group *pg = pgpath->pg;
1134         struct multipath *m = pg->m;
1135         unsigned long flags;
1136
1137         /* device or driver problems */
1138         switch (errors) {
1139         case SCSI_DH_OK:
1140                 break;
1141         case SCSI_DH_NOSYS:
1142                 if (!m->hw_handler_name) {
1143                         errors = 0;
1144                         break;
1145                 }
1146                 DMERR("Could not failover the device: Handler scsi_dh_%s "
1147                       "Error %d.", m->hw_handler_name, errors);
1148                 /*
1149                  * Fail path for now, so we do not ping pong
1150                  */
1151                 fail_path(pgpath);
1152                 break;
1153         case SCSI_DH_DEV_TEMP_BUSY:
1154                 /*
1155                  * Probably doing something like FW upgrade on the
1156                  * controller so try the other pg.
1157                  */
1158                 bypass_pg(m, pg, 1);
1159                 break;
1160         /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
1161         case SCSI_DH_RETRY:
1162         case SCSI_DH_IMM_RETRY:
1163         case SCSI_DH_RES_TEMP_UNAVAIL:
1164                 if (pg_init_limit_reached(m, pgpath))
1165                         fail_path(pgpath);
1166                 errors = 0;
1167                 break;
1168         default:
1169                 /*
1170                  * We probably do not want to fail the path for a device
1171                  * error, but this is what the old dm did. In future
1172                  * patches we can do more advanced handling.
1173                  */
1174                 fail_path(pgpath);
1175         }
1176
1177         spin_lock_irqsave(&m->lock, flags);
1178         if (errors) {
1179                 if (pgpath == m->current_pgpath) {
1180                         DMERR("Could not failover device. Error %d.", errors);
1181                         m->current_pgpath = NULL;
1182                         m->current_pg = NULL;
1183                 }
1184         } else if (!m->pg_init_required) {
1185                 m->queue_io = 0;
1186                 pg->bypassed = 0;
1187         }
1188
1189         m->pg_init_in_progress--;
1190         if (!m->pg_init_in_progress)
1191                 queue_work(kmultipathd, &m->process_queued_ios);
1192         spin_unlock_irqrestore(&m->lock, flags);
1193 }
1194
1195 static void activate_path(struct work_struct *work)
1196 {
1197         struct pgpath *pgpath =
1198                 container_of(work, struct pgpath, activate_path);
1199
1200         scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
1201                                 pg_init_done, pgpath);
1202 }
1203
1204 /*
1205  * end_io handling
1206  */
1207 static int do_end_io(struct multipath *m, struct request *clone,
1208                      int error, struct dm_mpath_io *mpio)
1209 {
1210         /*
1211          * We don't queue any clone request inside the multipath target
1212          * during end I/O handling, since those clone requests don't have
1213          * bio clones.  If we queue them inside the multipath target,
1214          * we need to make bio clones, that requires memory allocation.
1215          * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
1216          *  don't have bio clones.)
1217          * Instead of queueing the clone request here, we queue the original
1218          * request into dm core, which will remake a clone request and
1219          * clone bios for it and resubmit it later.
1220          */
1221         int r = DM_ENDIO_REQUEUE;
1222         unsigned long flags;
1223
1224         if (!error && !clone->errors)
1225                 return 0;       /* I/O complete */
1226
1227         if (error == -EOPNOTSUPP)
1228                 return error;
1229
1230         if (mpio->pgpath)
1231                 fail_path(mpio->pgpath);
1232
1233         spin_lock_irqsave(&m->lock, flags);
1234         if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
1235                 r = -EIO;
1236         spin_unlock_irqrestore(&m->lock, flags);
1237
1238         return r;
1239 }
1240
1241 static int multipath_end_io(struct dm_target *ti, struct request *clone,
1242                             int error, union map_info *map_context)
1243 {
1244         struct multipath *m = ti->private;
1245         struct dm_mpath_io *mpio = map_context->ptr;
1246         struct pgpath *pgpath = mpio->pgpath;
1247         struct path_selector *ps;
1248         int r;
1249
1250         r  = do_end_io(m, clone, error, mpio);
1251         if (pgpath) {
1252                 ps = &pgpath->pg->ps;
1253                 if (ps->type->end_io)
1254                         ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1255         }
1256         mempool_free(mpio, m->mpio_pool);
1257
1258         return r;
1259 }
1260
1261 /*
1262  * Suspend can't complete until all the I/O is processed so if
1263  * the last path fails we must error any remaining I/O.
1264  * Note that if the freeze_bdev fails while suspending, the
1265  * queue_if_no_path state is lost - userspace should reset it.
1266  */
1267 static void multipath_presuspend(struct dm_target *ti)
1268 {
1269         struct multipath *m = (struct multipath *) ti->private;
1270
1271         queue_if_no_path(m, 0, 1);
1272 }
1273
1274 static void multipath_postsuspend(struct dm_target *ti)
1275 {
1276         struct multipath *m = ti->private;
1277
1278         mutex_lock(&m->work_mutex);
1279         flush_multipath_work();
1280         mutex_unlock(&m->work_mutex);
1281 }
1282
1283 /*
1284  * Restore the queue_if_no_path setting.
1285  */
1286 static void multipath_resume(struct dm_target *ti)
1287 {
1288         struct multipath *m = (struct multipath *) ti->private;
1289         unsigned long flags;
1290
1291         spin_lock_irqsave(&m->lock, flags);
1292         m->queue_if_no_path = m->saved_queue_if_no_path;
1293         spin_unlock_irqrestore(&m->lock, flags);
1294 }
1295
1296 /*
1297  * Info output has the following format:
1298  * num_multipath_feature_args [multipath_feature_args]*
1299  * num_handler_status_args [handler_status_args]*
1300  * num_groups init_group_number
1301  *            [A|D|E num_ps_status_args [ps_status_args]*
1302  *             num_paths num_selector_args
1303  *             [path_dev A|F fail_count [selector_args]* ]+ ]+
1304  *
1305  * Table output has the following format (identical to the constructor string):
1306  * num_feature_args [features_args]*
1307  * num_handler_args hw_handler [hw_handler_args]*
1308  * num_groups init_group_number
1309  *     [priority selector-name num_ps_args [ps_args]*
1310  *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1311  */
1312 static int multipath_status(struct dm_target *ti, status_type_t type,
1313                             char *result, unsigned int maxlen)
1314 {
1315         int sz = 0;
1316         unsigned long flags;
1317         struct multipath *m = (struct multipath *) ti->private;
1318         struct priority_group *pg;
1319         struct pgpath *p;
1320         unsigned pg_num;
1321         char state;
1322
1323         spin_lock_irqsave(&m->lock, flags);
1324
1325         /* Features */
1326         if (type == STATUSTYPE_INFO)
1327                 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
1328         else {
1329                 DMEMIT("%u ", m->queue_if_no_path +
1330                               (m->pg_init_retries > 0) * 2);
1331                 if (m->queue_if_no_path)
1332                         DMEMIT("queue_if_no_path ");
1333                 if (m->pg_init_retries)
1334                         DMEMIT("pg_init_retries %u ", m->pg_init_retries);
1335         }
1336
1337         if (!m->hw_handler_name || type == STATUSTYPE_INFO)
1338                 DMEMIT("0 ");
1339         else
1340                 DMEMIT("1 %s ", m->hw_handler_name);
1341
1342         DMEMIT("%u ", m->nr_priority_groups);
1343
1344         if (m->next_pg)
1345                 pg_num = m->next_pg->pg_num;
1346         else if (m->current_pg)
1347                 pg_num = m->current_pg->pg_num;
1348         else
1349                         pg_num = 1;
1350
1351         DMEMIT("%u ", pg_num);
1352
1353         switch (type) {
1354         case STATUSTYPE_INFO:
1355                 list_for_each_entry(pg, &m->priority_groups, list) {
1356                         if (pg->bypassed)
1357                                 state = 'D';    /* Disabled */
1358                         else if (pg == m->current_pg)
1359                                 state = 'A';    /* Currently Active */
1360                         else
1361                                 state = 'E';    /* Enabled */
1362
1363                         DMEMIT("%c ", state);
1364
1365                         if (pg->ps.type->status)
1366                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1367                                                           result + sz,
1368                                                           maxlen - sz);
1369                         else
1370                                 DMEMIT("0 ");
1371
1372                         DMEMIT("%u %u ", pg->nr_pgpaths,
1373                                pg->ps.type->info_args);
1374
1375                         list_for_each_entry(p, &pg->pgpaths, list) {
1376                                 DMEMIT("%s %s %u ", p->path.dev->name,
1377                                        p->is_active ? "A" : "F",
1378                                        p->fail_count);
1379                                 if (pg->ps.type->status)
1380                                         sz += pg->ps.type->status(&pg->ps,
1381                                               &p->path, type, result + sz,
1382                                               maxlen - sz);
1383                         }
1384                 }
1385                 break;
1386
1387         case STATUSTYPE_TABLE:
1388                 list_for_each_entry(pg, &m->priority_groups, list) {
1389                         DMEMIT("%s ", pg->ps.type->name);
1390
1391                         if (pg->ps.type->status)
1392                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1393                                                           result + sz,
1394                                                           maxlen - sz);
1395                         else
1396                                 DMEMIT("0 ");
1397
1398                         DMEMIT("%u %u ", pg->nr_pgpaths,
1399                                pg->ps.type->table_args);
1400
1401                         list_for_each_entry(p, &pg->pgpaths, list) {
1402                                 DMEMIT("%s ", p->path.dev->name);
1403                                 if (pg->ps.type->status)
1404                                         sz += pg->ps.type->status(&pg->ps,
1405                                               &p->path, type, result + sz,
1406                                               maxlen - sz);
1407                         }
1408                 }
1409                 break;
1410         }
1411
1412         spin_unlock_irqrestore(&m->lock, flags);
1413
1414         return 0;
1415 }
1416
1417 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1418 {
1419         int r = -EINVAL;
1420         struct dm_dev *dev;
1421         struct multipath *m = (struct multipath *) ti->private;
1422         action_fn action;
1423
1424         mutex_lock(&m->work_mutex);
1425
1426         if (dm_suspended(ti)) {
1427                 r = -EBUSY;
1428                 goto out;
1429         }
1430
1431         if (argc == 1) {
1432                 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) {
1433                         r = queue_if_no_path(m, 1, 0);
1434                         goto out;
1435                 } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) {
1436                         r = queue_if_no_path(m, 0, 0);
1437                         goto out;
1438                 }
1439         }
1440
1441         if (argc != 2) {
1442                 DMWARN("Unrecognised multipath message received.");
1443                 goto out;
1444         }
1445
1446         if (!strnicmp(argv[0], MESG_STR("disable_group"))) {
1447                 r = bypass_pg_num(m, argv[1], 1);
1448                 goto out;
1449         } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) {
1450                 r = bypass_pg_num(m, argv[1], 0);
1451                 goto out;
1452         } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) {
1453                 r = switch_pg_num(m, argv[1]);
1454                 goto out;
1455         } else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
1456                 action = reinstate_path;
1457         else if (!strnicmp(argv[0], MESG_STR("fail_path")))
1458                 action = fail_path;
1459         else {
1460                 DMWARN("Unrecognised multipath message received.");
1461                 goto out;
1462         }
1463
1464         r = dm_get_device(ti, argv[1], ti->begin, ti->len,
1465                           dm_table_get_mode(ti->table), &dev);
1466         if (r) {
1467                 DMWARN("message: error getting device %s",
1468                        argv[1]);
1469                 goto out;
1470         }
1471
1472         r = action_dev(m, dev, action);
1473
1474         dm_put_device(ti, dev);
1475
1476 out:
1477         mutex_unlock(&m->work_mutex);
1478         return r;
1479 }
1480
1481 static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
1482                            unsigned long arg)
1483 {
1484         struct multipath *m = (struct multipath *) ti->private;
1485         struct block_device *bdev = NULL;
1486         fmode_t mode = 0;
1487         unsigned long flags;
1488         int r = 0;
1489
1490         spin_lock_irqsave(&m->lock, flags);
1491
1492         if (!m->current_pgpath)
1493                 __choose_pgpath(m, 0);
1494
1495         if (m->current_pgpath) {
1496                 bdev = m->current_pgpath->path.dev->bdev;
1497                 mode = m->current_pgpath->path.dev->mode;
1498         }
1499
1500         if (m->queue_io)
1501                 r = -EAGAIN;
1502         else if (!bdev)
1503                 r = -EIO;
1504
1505         spin_unlock_irqrestore(&m->lock, flags);
1506
1507         return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
1508 }
1509
1510 static int multipath_iterate_devices(struct dm_target *ti,
1511                                      iterate_devices_callout_fn fn, void *data)
1512 {
1513         struct multipath *m = ti->private;
1514         struct priority_group *pg;
1515         struct pgpath *p;
1516         int ret = 0;
1517
1518         list_for_each_entry(pg, &m->priority_groups, list) {
1519                 list_for_each_entry(p, &pg->pgpaths, list) {
1520                         ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
1521                         if (ret)
1522                                 goto out;
1523                 }
1524         }
1525
1526 out:
1527         return ret;
1528 }
1529
1530 static int __pgpath_busy(struct pgpath *pgpath)
1531 {
1532         struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1533
1534         return dm_underlying_device_busy(q);
1535 }
1536
1537 /*
1538  * We return "busy", only when we can map I/Os but underlying devices
1539  * are busy (so even if we map I/Os now, the I/Os will wait on
1540  * the underlying queue).
1541  * In other words, if we want to kill I/Os or queue them inside us
1542  * due to map unavailability, we don't return "busy".  Otherwise,
1543  * dm core won't give us the I/Os and we can't do what we want.
1544  */
1545 static int multipath_busy(struct dm_target *ti)
1546 {
1547         int busy = 0, has_active = 0;
1548         struct multipath *m = ti->private;
1549         struct priority_group *pg;
1550         struct pgpath *pgpath;
1551         unsigned long flags;
1552
1553         spin_lock_irqsave(&m->lock, flags);
1554
1555         /* Guess which priority_group will be used at next mapping time */
1556         if (unlikely(!m->current_pgpath && m->next_pg))
1557                 pg = m->next_pg;
1558         else if (likely(m->current_pg))
1559                 pg = m->current_pg;
1560         else
1561                 /*
1562                  * We don't know which pg will be used at next mapping time.
1563                  * We don't call __choose_pgpath() here to avoid to trigger
1564                  * pg_init just by busy checking.
1565                  * So we don't know whether underlying devices we will be using
1566                  * at next mapping time are busy or not. Just try mapping.
1567                  */
1568                 goto out;
1569
1570         /*
1571          * If there is one non-busy active path at least, the path selector
1572          * will be able to select it. So we consider such a pg as not busy.
1573          */
1574         busy = 1;
1575         list_for_each_entry(pgpath, &pg->pgpaths, list)
1576                 if (pgpath->is_active) {
1577                         has_active = 1;
1578
1579                         if (!__pgpath_busy(pgpath)) {
1580                                 busy = 0;
1581                                 break;
1582                         }
1583                 }
1584
1585         if (!has_active)
1586                 /*
1587                  * No active path in this pg, so this pg won't be used and
1588                  * the current_pg will be changed at next mapping time.
1589                  * We need to try mapping to determine it.
1590                  */
1591                 busy = 0;
1592
1593 out:
1594         spin_unlock_irqrestore(&m->lock, flags);
1595
1596         return busy;
1597 }
1598
1599 /*-----------------------------------------------------------------
1600  * Module setup
1601  *---------------------------------------------------------------*/
1602 static struct target_type multipath_target = {
1603         .name = "multipath",
1604         .version = {1, 1, 1},
1605         .module = THIS_MODULE,
1606         .ctr = multipath_ctr,
1607         .dtr = multipath_dtr,
1608         .map_rq = multipath_map,
1609         .rq_end_io = multipath_end_io,
1610         .presuspend = multipath_presuspend,
1611         .postsuspend = multipath_postsuspend,
1612         .resume = multipath_resume,
1613         .status = multipath_status,
1614         .message = multipath_message,
1615         .ioctl  = multipath_ioctl,
1616         .iterate_devices = multipath_iterate_devices,
1617         .busy = multipath_busy,
1618 };
1619
1620 static int __init dm_multipath_init(void)
1621 {
1622         int r;
1623
1624         /* allocate a slab for the dm_ios */
1625         _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
1626         if (!_mpio_cache)
1627                 return -ENOMEM;
1628
1629         r = dm_register_target(&multipath_target);
1630         if (r < 0) {
1631                 DMERR("register failed %d", r);
1632                 kmem_cache_destroy(_mpio_cache);
1633                 return -EINVAL;
1634         }
1635
1636         kmultipathd = create_workqueue("kmpathd");
1637         if (!kmultipathd) {
1638                 DMERR("failed to create workqueue kmpathd");
1639                 dm_unregister_target(&multipath_target);
1640                 kmem_cache_destroy(_mpio_cache);
1641                 return -ENOMEM;
1642         }
1643
1644         /*
1645          * A separate workqueue is used to handle the device handlers
1646          * to avoid overloading existing workqueue. Overloading the
1647          * old workqueue would also create a bottleneck in the
1648          * path of the storage hardware device activation.
1649          */
1650         kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd");
1651         if (!kmpath_handlerd) {
1652                 DMERR("failed to create workqueue kmpath_handlerd");
1653                 destroy_workqueue(kmultipathd);
1654                 dm_unregister_target(&multipath_target);
1655                 kmem_cache_destroy(_mpio_cache);
1656                 return -ENOMEM;
1657         }
1658
1659         DMINFO("version %u.%u.%u loaded",
1660                multipath_target.version[0], multipath_target.version[1],
1661                multipath_target.version[2]);
1662
1663         return r;
1664 }
1665
1666 static void __exit dm_multipath_exit(void)
1667 {
1668         destroy_workqueue(kmpath_handlerd);
1669         destroy_workqueue(kmultipathd);
1670
1671         dm_unregister_target(&multipath_target);
1672         kmem_cache_destroy(_mpio_cache);
1673 }
1674
1675 module_init(dm_multipath_init);
1676 module_exit(dm_multipath_exit);
1677
1678 MODULE_DESCRIPTION(DM_NAME " multipath target");
1679 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1680 MODULE_LICENSE("GPL");