- patches.suse/dm-mpath-evaluate-request-result-and-sense:
[linux-flexiantxendom0-3.2.10.git] / drivers / md / dm-mpath.c
1 /*
2  * Copyright (C) 2003 Sistina Software Limited.
3  * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4  *
5  * This file is released under the GPL.
6  */
7
8 #include <linux/device-mapper.h>
9
10 #include "dm-path-selector.h"
11 #include "dm-uevent.h"
12
13 #include <linux/ctype.h>
14 #include <linux/init.h>
15 #include <linux/mempool.h>
16 #include <linux/module.h>
17 #include <linux/pagemap.h>
18 #include <linux/slab.h>
19 #include <linux/time.h>
20 #include <linux/workqueue.h>
21 #include <scsi/scsi_dh.h>
22 #include <scsi/scsi_eh.h>
23 #include <asm/atomic.h>
24
25 #define DM_MSG_PREFIX "multipath"
26 #define MESG_STR(x) x, sizeof(x)
27
28 /* Path properties */
29 struct pgpath {
30         struct list_head list;
31
32         struct priority_group *pg;      /* Owning PG */
33         unsigned is_active;             /* Path status */
34         unsigned fail_count;            /* Cumulative failure count */
35
36         struct dm_path path;
37         struct work_struct deactivate_path;
38         struct work_struct activate_path;
39 };
40
41 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
42
43 /*
44  * Paths are grouped into Priority Groups and numbered from 1 upwards.
45  * Each has a path selector which controls which path gets used.
46  */
47 struct priority_group {
48         struct list_head list;
49
50         struct multipath *m;            /* Owning multipath instance */
51         struct path_selector ps;
52
53         unsigned pg_num;                /* Reference number */
54         unsigned bypassed;              /* Temporarily bypass this PG? */
55
56         unsigned nr_pgpaths;            /* Number of paths in PG */
57         struct list_head pgpaths;
58 };
59
60 /* Multipath context */
61 struct multipath {
62         struct list_head list;
63         struct dm_target *ti;
64
65         spinlock_t lock;
66
67         const char *hw_handler_name;
68         char *hw_handler_params;
69         unsigned nr_priority_groups;
70         struct list_head priority_groups;
71         unsigned pg_init_required;      /* pg_init needs calling? */
72         unsigned pg_init_in_progress;   /* Only one pg_init allowed at once */
73
74         unsigned nr_valid_paths;        /* Total number of usable paths */
75         struct pgpath *current_pgpath;
76         struct priority_group *current_pg;
77         struct priority_group *next_pg; /* Switch to this PG if set */
78         unsigned repeat_count;          /* I/Os left before calling PS again */
79
80         unsigned queue_io;              /* Must we queue all I/O? */
81         unsigned queue_if_no_path;      /* Queue I/O if last path fails? */
82         unsigned saved_queue_if_no_path;/* Saved state during suspension */
83         unsigned pg_init_retries;       /* Number of times to retry pg_init */
84         unsigned pg_init_count;         /* Number of times pg_init called */
85
86         struct work_struct process_queued_ios;
87         struct list_head queued_ios;
88         unsigned queue_size;
89
90         struct work_struct trigger_event;
91
92         /*
93          * We must use a mempool of dm_mpath_io structs so that we
94          * can resubmit bios on error.
95          */
96         mempool_t *mpio_pool;
97 };
98
99 /*
100  * Context information attached to each bio we process.
101  */
102 struct dm_mpath_io {
103         struct pgpath *pgpath;
104         size_t nr_bytes;
105         char sense[SCSI_SENSE_BUFFERSIZE];
106 };
107
108 typedef int (*action_fn) (struct pgpath *pgpath);
109
110 #define MIN_IOS 256     /* Mempool size */
111
112 static struct kmem_cache *_mpio_cache;
113
114 static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
115 static void process_queued_ios(struct work_struct *work);
116 static void trigger_event(struct work_struct *work);
117 static void activate_path(struct work_struct *work);
118 static void deactivate_path(struct work_struct *work);
119
120
121 /*-----------------------------------------------
122  * Allocation routines
123  *-----------------------------------------------*/
124
125 static struct pgpath *alloc_pgpath(void)
126 {
127         struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
128
129         if (pgpath) {
130                 pgpath->is_active = 1;
131                 INIT_WORK(&pgpath->deactivate_path, deactivate_path);
132                 INIT_WORK(&pgpath->activate_path, activate_path);
133         }
134
135         return pgpath;
136 }
137
138 static void free_pgpath(struct pgpath *pgpath)
139 {
140         kfree(pgpath);
141 }
142
143 static void deactivate_path(struct work_struct *work)
144 {
145         struct pgpath *pgpath =
146                 container_of(work, struct pgpath, deactivate_path);
147
148         if (pgpath->path.dev)
149                 blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue);
150 }
151
152 static struct priority_group *alloc_priority_group(void)
153 {
154         struct priority_group *pg;
155
156         pg = kzalloc(sizeof(*pg), GFP_KERNEL);
157
158         if (pg)
159                 INIT_LIST_HEAD(&pg->pgpaths);
160
161         return pg;
162 }
163
164 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
165 {
166         struct pgpath *pgpath, *tmp;
167         struct multipath *m = ti->private;
168
169         list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
170                 list_del(&pgpath->list);
171                 dm_put_device(ti, pgpath->path.dev);
172                 free_pgpath(pgpath);
173         }
174 }
175
176 static void free_priority_group(struct priority_group *pg,
177                                 struct dm_target *ti)
178 {
179         struct path_selector *ps = &pg->ps;
180
181         if (ps->type) {
182                 ps->type->destroy(ps);
183                 dm_put_path_selector(ps->type);
184         }
185
186         free_pgpaths(&pg->pgpaths, ti);
187         kfree(pg);
188 }
189
190 static struct multipath *alloc_multipath(struct dm_target *ti)
191 {
192         struct multipath *m;
193
194         m = kzalloc(sizeof(*m), GFP_KERNEL);
195         if (m) {
196                 INIT_LIST_HEAD(&m->priority_groups);
197                 INIT_LIST_HEAD(&m->queued_ios);
198                 spin_lock_init(&m->lock);
199                 m->queue_io = 1;
200                 INIT_WORK(&m->process_queued_ios, process_queued_ios);
201                 INIT_WORK(&m->trigger_event, trigger_event);
202                 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
203                 if (!m->mpio_pool) {
204                         kfree(m);
205                         return NULL;
206                 }
207                 m->ti = ti;
208                 ti->private = m;
209         }
210
211         return m;
212 }
213
214 static void free_multipath(struct multipath *m)
215 {
216         struct priority_group *pg, *tmp;
217
218         list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
219                 list_del(&pg->list);
220                 free_priority_group(pg, m->ti);
221         }
222
223         kfree(m->hw_handler_name);
224         kfree(m->hw_handler_params);
225         mempool_destroy(m->mpio_pool);
226         kfree(m);
227 }
228
229
230 /*-----------------------------------------------
231  * Path selection
232  *-----------------------------------------------*/
233
234 static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
235 {
236         m->current_pg = pgpath->pg;
237
238         /* Must we initialise the PG first, and queue I/O till it's ready? */
239         if (m->hw_handler_name) {
240                 m->pg_init_required = 1;
241                 m->queue_io = 1;
242         } else {
243                 m->pg_init_required = 0;
244                 m->queue_io = 0;
245         }
246
247         m->pg_init_count = 0;
248 }
249
250 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
251                                size_t nr_bytes)
252 {
253         struct dm_path *path;
254
255         path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
256         if (!path)
257                 return -ENXIO;
258
259         m->current_pgpath = path_to_pgpath(path);
260
261         if (!m->current_pgpath->path.dev) {
262                 m->current_pgpath = NULL;
263                 return -ENODEV;
264         }
265
266         if (m->current_pg != pg)
267                 __switch_pg(m, m->current_pgpath);
268
269         return 0;
270 }
271
272 static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
273 {
274         struct priority_group *pg;
275         unsigned bypassed = 1;
276
277         if (!m->nr_valid_paths)
278                 goto failed;
279
280         /* Were we instructed to switch PG? */
281         if (m->next_pg) {
282                 pg = m->next_pg;
283                 m->next_pg = NULL;
284                 if (!__choose_path_in_pg(m, pg, nr_bytes))
285                         return;
286         }
287
288         /* Don't change PG until it has no remaining paths */
289         if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
290                 return;
291
292         /*
293          * Loop through priority groups until we find a valid path.
294          * First time we skip PGs marked 'bypassed'.
295          * Second time we only try the ones we skipped.
296          */
297         do {
298                 list_for_each_entry(pg, &m->priority_groups, list) {
299                         if (pg->bypassed == bypassed)
300                                 continue;
301                         if (!__choose_path_in_pg(m, pg, nr_bytes))
302                                 return;
303                 }
304         } while (bypassed--);
305
306 failed:
307         m->current_pgpath = NULL;
308         m->current_pg = NULL;
309 }
310
311 /*
312  * Check whether bios must be queued in the device-mapper core rather
313  * than here in the target.
314  *
315  * m->lock must be held on entry.
316  *
317  * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
318  * same value then we are not between multipath_presuspend()
319  * and multipath_resume() calls and we have no need to check
320  * for the DMF_NOFLUSH_SUSPENDING flag.
321  */
322 static int __must_push_back(struct multipath *m)
323 {
324         return (m->queue_if_no_path != m->saved_queue_if_no_path &&
325                 dm_noflush_suspending(m->ti));
326 }
327
328 static int map_io(struct multipath *m, struct request *clone,
329                   struct dm_mpath_io *mpio, unsigned was_queued)
330 {
331         int r = DM_MAPIO_REMAPPED;
332         size_t nr_bytes = blk_rq_bytes(clone);
333         unsigned long flags;
334         struct pgpath *pgpath;
335         struct block_device *bdev;
336
337         spin_lock_irqsave(&m->lock, flags);
338
339         /* Do we need to select a new pgpath? */
340         if (!m->current_pgpath ||
341             (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
342                 __choose_pgpath(m, nr_bytes);
343
344         pgpath = m->current_pgpath;
345
346         if (was_queued)
347                 m->queue_size--;
348
349         if ((pgpath && m->queue_io) ||
350             (!pgpath && m->queue_if_no_path)) {
351                 /* Queue for the daemon to resubmit */
352                 list_add_tail(&clone->queuelist, &m->queued_ios);
353                 m->queue_size++;
354                 if ((m->pg_init_required && !m->pg_init_in_progress) ||
355                     !m->queue_io)
356                         queue_work(kmultipathd, &m->process_queued_ios);
357                 pgpath = NULL;
358                 r = DM_MAPIO_SUBMITTED;
359         } else if (pgpath) {
360                 bdev = pgpath->path.dev->bdev;
361                 clone->q = bdev_get_queue(bdev);
362                 clone->rq_disk = bdev->bd_disk;
363         } else if (__must_push_back(m))
364                 r = DM_MAPIO_REQUEUE;
365         else
366                 r = -EIO;       /* Failed */
367
368         mpio->pgpath = pgpath;
369         mpio->nr_bytes = nr_bytes;
370
371         if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io)
372                 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path,
373                                               nr_bytes);
374
375         spin_unlock_irqrestore(&m->lock, flags);
376
377         return r;
378 }
379
380 /*
381  * If we run out of usable paths, should we queue I/O or error it?
382  */
383 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
384                             unsigned save_old_value)
385 {
386         unsigned long flags;
387
388         spin_lock_irqsave(&m->lock, flags);
389
390         if (save_old_value)
391                 m->saved_queue_if_no_path = m->queue_if_no_path;
392         else
393                 m->saved_queue_if_no_path = queue_if_no_path;
394         m->queue_if_no_path = queue_if_no_path;
395         if (!m->queue_if_no_path && m->queue_size)
396                 queue_work(kmultipathd, &m->process_queued_ios);
397
398         spin_unlock_irqrestore(&m->lock, flags);
399
400         return 0;
401 }
402
403 /*-----------------------------------------------------------------
404  * The multipath daemon is responsible for resubmitting queued ios.
405  *---------------------------------------------------------------*/
406
407 static void dispatch_queued_ios(struct multipath *m)
408 {
409         int r;
410         unsigned long flags;
411         struct dm_mpath_io *mpio;
412         union map_info *info;
413         struct request *clone, *n;
414         LIST_HEAD(cl);
415
416         spin_lock_irqsave(&m->lock, flags);
417         list_splice_init(&m->queued_ios, &cl);
418         spin_unlock_irqrestore(&m->lock, flags);
419
420         list_for_each_entry_safe(clone, n, &cl, queuelist) {
421                 list_del_init(&clone->queuelist);
422
423                 info = dm_get_rq_mapinfo(clone);
424                 mpio = info->ptr;
425
426                 r = map_io(m, clone, mpio, 1);
427                 if (r < 0) {
428                         mempool_free(mpio, m->mpio_pool);
429                         dm_kill_unmapped_request(clone, r);
430                 } else if (r == DM_MAPIO_REMAPPED)
431                         dm_dispatch_request(clone);
432                 else if (r == DM_MAPIO_REQUEUE) {
433                         mempool_free(mpio, m->mpio_pool);
434                         dm_requeue_unmapped_request(clone);
435                 }
436         }
437 }
438
439 static void process_queued_ios(struct work_struct *work)
440 {
441         struct multipath *m =
442                 container_of(work, struct multipath, process_queued_ios);
443         struct pgpath *pgpath = NULL, *tmp;
444         unsigned must_queue = 1;
445         unsigned long flags;
446
447         spin_lock_irqsave(&m->lock, flags);
448
449         if (!m->queue_size)
450                 goto out;
451
452         if (!m->current_pgpath)
453                 __choose_pgpath(m, 0);
454
455         pgpath = m->current_pgpath;
456
457         if ((pgpath && !m->queue_io) ||
458             (!pgpath && !m->queue_if_no_path))
459                 must_queue = 0;
460
461         if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
462                 m->pg_init_count++;
463                 m->pg_init_required = 0;
464                 list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
465                         if (queue_work(kmpath_handlerd, &tmp->activate_path))
466                                 m->pg_init_in_progress++;
467                 }
468         }
469 out:
470         spin_unlock_irqrestore(&m->lock, flags);
471         if (!must_queue)
472                 dispatch_queued_ios(m);
473 }
474
475 /*
476  * An event is triggered whenever a path is taken out of use.
477  * Includes path failure and PG bypass.
478  */
479 static void trigger_event(struct work_struct *work)
480 {
481         struct multipath *m =
482                 container_of(work, struct multipath, trigger_event);
483
484         dm_table_event(m->ti->table);
485 }
486
487 /*-----------------------------------------------------------------
488  * Constructor/argument parsing:
489  * <#multipath feature args> [<arg>]*
490  * <#hw_handler args> [hw_handler [<arg>]*]
491  * <#priority groups>
492  * <initial priority group>
493  *     [<selector> <#selector args> [<arg>]*
494  *      <#paths> <#per-path selector args>
495  *         [<path> [<arg>]* ]+ ]+
496  *---------------------------------------------------------------*/
497 struct param {
498         unsigned min;
499         unsigned max;
500         char *error;
501 };
502
503 static int read_param(struct param *param, char *str, unsigned *v, char **error)
504 {
505         if (!str ||
506             (sscanf(str, "%u", v) != 1) ||
507             (*v < param->min) ||
508             (*v > param->max)) {
509                 *error = param->error;
510                 return -EINVAL;
511         }
512
513         return 0;
514 }
515
516 struct arg_set {
517         unsigned argc;
518         char **argv;
519 };
520
521 static char *shift(struct arg_set *as)
522 {
523         char *r;
524
525         if (as->argc) {
526                 as->argc--;
527                 r = *as->argv;
528                 as->argv++;
529                 return r;
530         }
531
532         return NULL;
533 }
534
535 static void consume(struct arg_set *as, unsigned n)
536 {
537         BUG_ON (as->argc < n);
538         as->argc -= n;
539         as->argv += n;
540 }
541
542 static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
543                                struct dm_target *ti)
544 {
545         int r;
546         struct path_selector_type *pst;
547         unsigned ps_argc;
548
549         static struct param _params[] = {
550                 {0, 1024, "invalid number of path selector args"},
551         };
552
553         pst = dm_get_path_selector(shift(as));
554         if (!pst) {
555                 ti->error = "unknown path selector type";
556                 return -EINVAL;
557         }
558
559         r = read_param(_params, shift(as), &ps_argc, &ti->error);
560         if (r) {
561                 dm_put_path_selector(pst);
562                 return -EINVAL;
563         }
564
565         if (ps_argc > as->argc) {
566                 dm_put_path_selector(pst);
567                 ti->error = "not enough arguments for path selector";
568                 return -EINVAL;
569         }
570
571         r = pst->create(&pg->ps, ps_argc, as->argv);
572         if (r) {
573                 dm_put_path_selector(pst);
574                 ti->error = "path selector constructor failed";
575                 return r;
576         }
577
578         pg->ps.type = pst;
579         consume(as, ps_argc);
580
581         return 0;
582 }
583
584 static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
585                                struct dm_target *ti)
586 {
587         int r;
588         struct pgpath *p;
589         char *path;
590         struct multipath *m = ti->private;
591
592         /* we need at least a path arg */
593         if (as->argc < 1) {
594                 ti->error = "no device given";
595                 return ERR_PTR(-EINVAL);
596         }
597
598         p = alloc_pgpath();
599         if (!p)
600                 return ERR_PTR(-ENOMEM);
601
602         path = shift(as);
603         r = dm_get_device(ti, path, ti->begin, ti->len,
604                           dm_table_get_mode(ti->table), &p->path.dev);
605         if (r) {
606                 unsigned major, minor;
607
608                 /* Try to add a failed device */
609                 if (r == -ENXIO && sscanf(path, "%u:%u", &major, &minor) == 2) {
610                         dev_t dev;
611
612                         /* Extract the major/minor numbers */
613                         dev = MKDEV(major, minor);
614                         if (MAJOR(dev) != major || MINOR(dev) != minor) {
615                                 /* Nice try, didn't work */
616                                 DMWARN("Invalid device path %s", path);
617                                 ti->error = "error converting devnum";
618                                 goto bad;
619                         }
620                         DMWARN("adding disabled device %d:%d", major, minor);
621                         p->path.dev = NULL;
622                         format_dev_t(p->path.pdev, dev);
623                         p->is_active = 0;
624                 } else {
625                         ti->error = "error getting device";
626                         goto bad;
627                 }
628         } else {
629                 memcpy(p->path.pdev, p->path.dev->name, 16);
630         }
631
632         if (p->path.dev) {
633                 struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
634
635                 if (m->hw_handler_name) {
636                         r = scsi_dh_attach(q, m->hw_handler_name);
637                         if (r == -EBUSY) {
638                                 /*
639                                  * Already attached to different hw_handler,
640                                  * try to reattach with correct one.
641                                  */
642                                 scsi_dh_detach(q);
643                                 r = scsi_dh_attach(q, m->hw_handler_name);
644                         }
645                         if (r < 0) {
646                                 ti->error = "error attaching hardware handler";
647                                 dm_put_device(ti, p->path.dev);
648                                 goto bad;
649                         }
650                 } else {
651                         /* Play safe and detach hardware handler */
652                         scsi_dh_detach(q);
653                 }
654
655                 if (m->hw_handler_params) {
656                         r = scsi_dh_set_params(q, m->hw_handler_params);
657                         if (r < 0) {
658                                 ti->error = "unable to set hardware "
659                                                         "handler parameters";
660                                 scsi_dh_detach(q);
661                                 dm_put_device(ti, p->path.dev);
662                                 goto bad;
663                         }
664                 }
665         }
666
667         r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
668         if (r) {
669                 dm_put_device(ti, p->path.dev);
670                 goto bad;
671         }
672
673         if (!p->is_active) {
674                 ps->type->fail_path(ps, &p->path);
675                 p->fail_count++;
676                 m->nr_valid_paths--;
677         }
678         return p;
679
680  bad:
681         free_pgpath(p);
682         return ERR_PTR(r);
683 }
684
685 static struct priority_group *parse_priority_group(struct arg_set *as,
686                                                    struct multipath *m)
687 {
688         static struct param _params[] = {
689                 {1, 1024, "invalid number of paths"},
690                 {0, 1024, "invalid number of selector args"}
691         };
692
693         int r;
694         unsigned i, nr_selector_args, nr_params;
695         struct priority_group *pg;
696         struct dm_target *ti = m->ti;
697
698         if (as->argc < 2) {
699                 as->argc = 0;
700                 ti->error = "not enough priority group arguments";
701                 return ERR_PTR(-EINVAL);
702         }
703
704         pg = alloc_priority_group();
705         if (!pg) {
706                 ti->error = "couldn't allocate priority group";
707                 return ERR_PTR(-ENOMEM);
708         }
709         pg->m = m;
710
711         r = parse_path_selector(as, pg, ti);
712         if (r)
713                 goto bad;
714
715         /*
716          * read the paths
717          */
718         r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
719         if (r)
720                 goto bad;
721
722         r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
723         if (r)
724                 goto bad;
725
726         nr_params = 1 + nr_selector_args;
727         for (i = 0; i < pg->nr_pgpaths; i++) {
728                 struct pgpath *pgpath;
729                 struct arg_set path_args;
730
731                 if (as->argc < nr_params) {
732                         ti->error = "not enough path parameters";
733                         goto bad;
734                 }
735
736                 path_args.argc = nr_params;
737                 path_args.argv = as->argv;
738
739                 pgpath = parse_path(&path_args, &pg->ps, ti);
740                 if (IS_ERR(pgpath)) {
741                         r = PTR_ERR(pgpath);
742                         goto bad;
743                 }
744
745                 pgpath->pg = pg;
746                 list_add_tail(&pgpath->list, &pg->pgpaths);
747                 consume(as, nr_params);
748         }
749
750         return pg;
751
752  bad:
753         free_priority_group(pg, ti);
754         return ERR_PTR(r);
755 }
756
757 static int parse_hw_handler(struct arg_set *as, struct multipath *m)
758 {
759         unsigned hw_argc;
760         int ret;
761         struct dm_target *ti = m->ti;
762
763         static struct param _params[] = {
764                 {0, 1024, "invalid number of hardware handler args"},
765         };
766
767         if (read_param(_params, shift(as), &hw_argc, &ti->error))
768                 return -EINVAL;
769
770         if (!hw_argc)
771                 return 0;
772
773         if (hw_argc > as->argc) {
774                 ti->error = "not enough arguments for hardware handler";
775                 return -EINVAL;
776         }
777
778         m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
779         request_module("scsi_dh_%s", m->hw_handler_name);
780         if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
781                 ti->error = "unknown hardware handler type";
782                 ret = -EINVAL;
783                 goto fail;
784         }
785
786         if (hw_argc > 1) {
787                 char *p;
788                 int i, j, len = 4;
789
790                 for (i = 0; i <= hw_argc - 2; i++)
791                         len += strlen(as->argv[i]) + 1;
792                 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
793                 if (!p) {
794                         ti->error = "memory allocation failed";
795                         ret = -ENOMEM;
796                         goto fail;
797                 }
798                 j = sprintf(p, "%d", hw_argc - 1);
799                 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
800                         j = sprintf(p, "%s", as->argv[i]);
801         }
802         consume(as, hw_argc - 1);
803
804         return 0;
805 fail:
806         kfree(m->hw_handler_name);
807         m->hw_handler_name = NULL;
808         return ret;
809 }
810
811 static int parse_features(struct arg_set *as, struct multipath *m)
812 {
813         int r;
814         unsigned argc;
815         struct dm_target *ti = m->ti;
816         const char *param_name;
817
818         static struct param _params[] = {
819                 {0, 3, "invalid number of feature args"},
820                 {1, 50, "pg_init_retries must be between 1 and 50"},
821         };
822
823         r = read_param(_params, shift(as), &argc, &ti->error);
824         if (r)
825                 return -EINVAL;
826
827         if (!argc)
828                 return 0;
829
830         do {
831                 param_name = shift(as);
832                 argc--;
833
834                 if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
835                         r = queue_if_no_path(m, 1, 0);
836                         continue;
837                 }
838
839                 if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
840                     (argc >= 1)) {
841                         r = read_param(_params + 1, shift(as),
842                                        &m->pg_init_retries, &ti->error);
843                         argc--;
844                         continue;
845                 }
846
847                 ti->error = "Unrecognised multipath feature request";
848                 r = -EINVAL;
849         } while (argc && !r);
850
851         return r;
852 }
853
854 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
855                          char **argv)
856 {
857         /* target parameters */
858         static struct param _params[] = {
859                 {0, 1024, "invalid number of priority groups"},
860                 {0, 1024, "invalid initial priority group number"},
861         };
862
863         int r;
864         struct multipath *m;
865         struct arg_set as;
866         unsigned pg_count = 0;
867         unsigned next_pg_num;
868
869         as.argc = argc;
870         as.argv = argv;
871
872         m = alloc_multipath(ti);
873         if (!m) {
874                 ti->error = "can't allocate multipath";
875                 return -EINVAL;
876         }
877
878         r = parse_features(&as, m);
879         if (r)
880                 goto bad;
881
882         r = parse_hw_handler(&as, m);
883         if (r)
884                 goto bad;
885
886         r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
887         if (r)
888                 goto bad;
889
890         r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
891         if (r)
892                 goto bad;
893
894         /* parse the priority groups */
895         while (as.argc) {
896                 struct priority_group *pg;
897
898                 pg = parse_priority_group(&as, m);
899                 if (IS_ERR(pg)) {
900                         r = PTR_ERR(pg);
901                         goto bad;
902                 }
903
904                 m->nr_valid_paths += pg->nr_pgpaths;
905                 list_add_tail(&pg->list, &m->priority_groups);
906                 pg_count++;
907                 pg->pg_num = pg_count;
908                 if (!--next_pg_num)
909                         m->next_pg = pg;
910         }
911
912         if (pg_count != m->nr_priority_groups) {
913                 ti->error = "priority group count mismatch";
914                 r = -EINVAL;
915                 goto bad;
916         }
917
918         ti->num_flush_requests = 1;
919
920         return 0;
921
922  bad:
923         free_multipath(m);
924         return r;
925 }
926
927 static void multipath_dtr(struct dm_target *ti)
928 {
929         struct multipath *m = (struct multipath *) ti->private;
930
931         flush_workqueue(kmpath_handlerd);
932         flush_workqueue(kmultipathd);
933         flush_scheduled_work();
934         free_multipath(m);
935 }
936
937 /*
938  * Map cloned requests
939  */
940 static int multipath_map(struct dm_target *ti, struct request *clone,
941                          union map_info *map_context)
942 {
943         int r;
944         struct dm_mpath_io *mpio;
945         struct multipath *m = (struct multipath *) ti->private;
946
947         mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
948         if (!mpio)
949                 /* ENOMEM, requeue */
950                 return DM_MAPIO_REQUEUE;
951         memset(mpio, 0, sizeof(*mpio));
952
953         map_context->ptr = mpio;
954         clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
955         /* Always attach a sense buffer */
956         if (!clone->sense)
957                 clone->sense = mpio->sense;
958         r = map_io(m, clone, mpio, 0);
959         if (r < 0 || r == DM_MAPIO_REQUEUE)
960                 mempool_free(mpio, m->mpio_pool);
961
962         return r;
963 }
964
965 /*
966  * Take a path out of use.
967  */
968 static int fail_path(struct pgpath *pgpath)
969 {
970         unsigned long flags;
971         struct multipath *m = pgpath->pg->m;
972
973         spin_lock_irqsave(&m->lock, flags);
974
975         if (!pgpath->is_active)
976                 goto out;
977
978         DMWARN("Failing path %s.", pgpath->path.pdev);
979
980         pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
981         pgpath->is_active = 0;
982         pgpath->fail_count++;
983
984         m->nr_valid_paths--;
985
986         if (pgpath == m->current_pgpath)
987                 m->current_pgpath = NULL;
988
989         dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
990                        pgpath->path.pdev, m->nr_valid_paths);
991
992         schedule_work(&m->trigger_event);
993         queue_work(kmultipathd, &pgpath->deactivate_path);
994
995 out:
996         spin_unlock_irqrestore(&m->lock, flags);
997
998         return 0;
999 }
1000
1001 /*
1002  * Reinstate a previously-failed path
1003  */
1004 static int reinstate_path(struct pgpath *pgpath)
1005 {
1006         int r = 0;
1007         unsigned long flags;
1008         struct multipath *m = pgpath->pg->m;
1009
1010         spin_lock_irqsave(&m->lock, flags);
1011
1012         if (pgpath->is_active)
1013                 goto out;
1014
1015         if (!pgpath->path.dev) {
1016                 DMWARN("Cannot reinstate disabled path %s", pgpath->path.pdev);
1017                 r = -ENODEV;
1018                 goto out;
1019         }
1020
1021         if (!pgpath->pg->ps.type->reinstate_path) {
1022                 DMWARN("Reinstate path not supported by path selector %s",
1023                        pgpath->pg->ps.type->name);
1024                 r = -EINVAL;
1025                 goto out;
1026         }
1027
1028         r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
1029         if (r)
1030                 goto out;
1031
1032         pgpath->is_active = 1;
1033
1034         if (!m->nr_valid_paths++ && m->queue_size) {
1035                 m->current_pgpath = NULL;
1036                 queue_work(kmultipathd, &m->process_queued_ios);
1037         } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
1038                 if (queue_work(kmpath_handlerd, &pgpath->activate_path))
1039                         m->pg_init_in_progress++;
1040         }
1041
1042         dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
1043                        pgpath->path.pdev, m->nr_valid_paths);
1044
1045         schedule_work(&m->trigger_event);
1046
1047 out:
1048         spin_unlock_irqrestore(&m->lock, flags);
1049
1050         return r;
1051 }
1052
1053 /*
1054  * Fail or reinstate all paths that match the provided struct dm_dev.
1055  */
1056 static int action_dev(struct multipath *m, struct dm_dev *dev,
1057                       action_fn action)
1058 {
1059         int r = 0;
1060         struct pgpath *pgpath;
1061         struct priority_group *pg;
1062
1063         if (!dev)
1064                 return 0;
1065
1066         list_for_each_entry(pg, &m->priority_groups, list) {
1067                 list_for_each_entry(pgpath, &pg->pgpaths, list) {
1068                         if (pgpath->path.dev == dev)
1069                                 r = action(pgpath);
1070                 }
1071         }
1072
1073         return r;
1074 }
1075
1076 /*
1077  * Temporarily try to avoid having to use the specified PG
1078  */
1079 static void bypass_pg(struct multipath *m, struct priority_group *pg,
1080                       int bypassed)
1081 {
1082         unsigned long flags;
1083
1084         spin_lock_irqsave(&m->lock, flags);
1085
1086         pg->bypassed = bypassed;
1087         m->current_pgpath = NULL;
1088         m->current_pg = NULL;
1089
1090         spin_unlock_irqrestore(&m->lock, flags);
1091
1092         schedule_work(&m->trigger_event);
1093 }
1094
1095 /*
1096  * Switch to using the specified PG from the next I/O that gets mapped
1097  */
1098 static int switch_pg_num(struct multipath *m, const char *pgstr)
1099 {
1100         struct priority_group *pg;
1101         unsigned pgnum;
1102         unsigned long flags;
1103
1104         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1105             (pgnum > m->nr_priority_groups)) {
1106                 DMWARN("invalid PG number supplied to switch_pg_num");
1107                 return -EINVAL;
1108         }
1109
1110         spin_lock_irqsave(&m->lock, flags);
1111         list_for_each_entry(pg, &m->priority_groups, list) {
1112                 pg->bypassed = 0;
1113                 if (--pgnum)
1114                         continue;
1115
1116                 m->current_pgpath = NULL;
1117                 m->current_pg = NULL;
1118                 m->next_pg = pg;
1119         }
1120         spin_unlock_irqrestore(&m->lock, flags);
1121
1122         schedule_work(&m->trigger_event);
1123         return 0;
1124 }
1125
1126 /*
1127  * Set/clear bypassed status of a PG.
1128  * PGs are numbered upwards from 1 in the order they were declared.
1129  */
1130 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
1131 {
1132         struct priority_group *pg;
1133         unsigned pgnum;
1134
1135         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1136             (pgnum > m->nr_priority_groups)) {
1137                 DMWARN("invalid PG number supplied to bypass_pg");
1138                 return -EINVAL;
1139         }
1140
1141         list_for_each_entry(pg, &m->priority_groups, list) {
1142                 if (!--pgnum)
1143                         break;
1144         }
1145
1146         bypass_pg(m, pg, bypassed);
1147         return 0;
1148 }
1149
1150 /*
1151  * Should we retry pg_init immediately?
1152  */
1153 static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
1154 {
1155         unsigned long flags;
1156         int limit_reached = 0;
1157
1158         spin_lock_irqsave(&m->lock, flags);
1159
1160         if (m->pg_init_count <= m->pg_init_retries)
1161                 m->pg_init_required = 1;
1162         else
1163                 limit_reached = 1;
1164
1165         spin_unlock_irqrestore(&m->lock, flags);
1166
1167         return limit_reached;
1168 }
1169
1170 static void pg_init_done(void *data, int errors)
1171 {
1172         struct dm_path *path = data;
1173         struct pgpath *pgpath = path_to_pgpath(path);
1174         struct priority_group *pg = pgpath->pg;
1175         struct multipath *m = pg->m;
1176         unsigned long flags;
1177
1178         /* device or driver problems */
1179         switch (errors) {
1180         case SCSI_DH_OK:
1181                 break;
1182         case SCSI_DH_NOSYS:
1183                 if (!m->hw_handler_name) {
1184                         errors = 0;
1185                         break;
1186                 }
1187                 DMERR("Cannot failover device because scsi_dh_%s was not "
1188                       "loaded.", m->hw_handler_name);
1189                 /*
1190                  * Fail path for now, so we do not ping pong
1191                  */
1192                 fail_path(pgpath);
1193                 break;
1194         case SCSI_DH_DEV_TEMP_BUSY:
1195                 /*
1196                  * Probably doing something like FW upgrade on the
1197                  * controller so try the other pg.
1198                  */
1199                 bypass_pg(m, pg, 1);
1200                 break;
1201         /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
1202         case SCSI_DH_RETRY:
1203         case SCSI_DH_IMM_RETRY:
1204         case SCSI_DH_RES_TEMP_UNAVAIL:
1205                 if (pg_init_limit_reached(m, pgpath))
1206                         fail_path(pgpath);
1207                 errors = 0;
1208                 break;
1209         default:
1210                 /*
1211                  * We probably do not want to fail the path for a device
1212                  * error, but this is what the old dm did. In future
1213                  * patches we can do more advanced handling.
1214                  */
1215                 fail_path(pgpath);
1216         }
1217
1218         spin_lock_irqsave(&m->lock, flags);
1219         if (errors) {
1220                 if (pgpath == m->current_pgpath) {
1221                         DMERR("Could not failover device. Error %d.", errors);
1222                         m->current_pgpath = NULL;
1223                         m->current_pg = NULL;
1224                 }
1225         } else if (!m->pg_init_required) {
1226                 m->queue_io = 0;
1227                 pg->bypassed = 0;
1228         }
1229
1230         m->pg_init_in_progress--;
1231         if (!m->pg_init_in_progress)
1232                 queue_work(kmultipathd, &m->process_queued_ios);
1233         spin_unlock_irqrestore(&m->lock, flags);
1234 }
1235
1236 static void activate_path(struct work_struct *work)
1237 {
1238         struct pgpath *pgpath =
1239                 container_of(work, struct pgpath, activate_path);
1240
1241         if (pgpath->path.dev)
1242                 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
1243                                  pg_init_done, &pgpath->path);
1244 }
1245
1246 /*
1247  * Evaluate scsi return code
1248  */
1249 static int eval_scsi_error(int result, char *sense, int sense_len)
1250 {
1251         struct scsi_sense_hdr sshdr;
1252         int r = DM_ENDIO_REQUEUE;
1253
1254         if (host_byte(result) != DID_OK)
1255                 return r;
1256
1257         if (msg_byte(result) != COMMAND_COMPLETE)
1258                 return r;
1259
1260         if (status_byte(result) == RESERVATION_CONFLICT)
1261                 /* Do not retry here, possible data corruption */
1262                 return -EIO;
1263
1264 #if defined(CONFIG_SCSI) || defined(CONFIG_SCSI_MODULE)
1265         if (status_byte(result) == CHECK_CONDITION &&
1266             !scsi_normalize_sense(sense, sense_len, &sshdr)) {
1267
1268                 switch (sshdr.sense_key) {
1269                 case MEDIUM_ERROR:
1270                 case DATA_PROTECT:
1271                 case BLANK_CHECK:
1272                 case COPY_ABORTED:
1273                 case VOLUME_OVERFLOW:
1274                 case MISCOMPARE:
1275                         r = -EIO;
1276                         break;
1277                 }
1278         }
1279 #endif
1280
1281         return r;
1282 }
1283
1284 /*
1285  * end_io handling
1286  */
1287 static int do_end_io(struct multipath *m, struct request *clone,
1288                      int error, struct dm_mpath_io *mpio)
1289 {
1290         /*
1291          * We don't queue any clone request inside the multipath target
1292          * during end I/O handling, since those clone requests don't have
1293          * bio clones.  If we queue them inside the multipath target,
1294          * we need to make bio clones, that requires memory allocation.
1295          * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
1296          *  don't have bio clones.)
1297          * Instead of queueing the clone request here, we queue the original
1298          * request into dm core, which will remake a clone request and
1299          * clone bios for it and resubmit it later.
1300          */
1301         int r = DM_ENDIO_REQUEUE;
1302         unsigned long flags;
1303
1304         if (!error && !clone->errors)
1305                 return 0;       /* I/O complete */
1306
1307         if (error == -EOPNOTSUPP)
1308                 return error;
1309
1310         r = eval_scsi_error(clone->errors, clone->sense, clone->sense_len);
1311         if (r != DM_ENDIO_REQUEUE)
1312                 return r;
1313
1314         if (mpio->pgpath)
1315                 fail_path(mpio->pgpath);
1316
1317         spin_lock_irqsave(&m->lock, flags);
1318         if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
1319                 r = -EIO;
1320         spin_unlock_irqrestore(&m->lock, flags);
1321
1322         return r;
1323 }
1324
1325 static int multipath_end_io(struct dm_target *ti, struct request *clone,
1326                             int error, union map_info *map_context)
1327 {
1328         struct multipath *m = ti->private;
1329         struct dm_mpath_io *mpio = map_context->ptr;
1330         struct pgpath *pgpath = mpio->pgpath;
1331         struct path_selector *ps;
1332         int r;
1333
1334         r  = do_end_io(m, clone, error, mpio);
1335         if (pgpath) {
1336                 ps = &pgpath->pg->ps;
1337                 if (ps->type->end_io)
1338                         ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1339         }
1340         if (clone->sense == mpio->sense) {
1341                 clone->sense = NULL;
1342                 clone->sense_len = 0;
1343         }
1344         mempool_free(mpio, m->mpio_pool);
1345
1346         return r;
1347 }
1348
1349 /*
1350  * Suspend can't complete until all the I/O is processed so if
1351  * the last path fails we must error any remaining I/O.
1352  * Note that if the freeze_bdev fails while suspending, the
1353  * queue_if_no_path state is lost - userspace should reset it.
1354  */
1355 static void multipath_presuspend(struct dm_target *ti)
1356 {
1357         struct multipath *m = (struct multipath *) ti->private;
1358
1359         queue_if_no_path(m, 0, 1);
1360 }
1361
1362 /*
1363  * Restore the queue_if_no_path setting.
1364  */
1365 static void multipath_resume(struct dm_target *ti)
1366 {
1367         struct multipath *m = (struct multipath *) ti->private;
1368         unsigned long flags;
1369
1370         spin_lock_irqsave(&m->lock, flags);
1371         m->queue_if_no_path = m->saved_queue_if_no_path;
1372         spin_unlock_irqrestore(&m->lock, flags);
1373 }
1374
1375 /*
1376  * Info output has the following format:
1377  * num_multipath_feature_args [multipath_feature_args]*
1378  * num_handler_status_args [handler_status_args]*
1379  * num_groups init_group_number
1380  *            [A|D|E num_ps_status_args [ps_status_args]*
1381  *             num_paths num_selector_args
1382  *             [path_dev A|F fail_count [selector_args]* ]+ ]+
1383  *
1384  * Table output has the following format (identical to the constructor string):
1385  * num_feature_args [features_args]*
1386  * num_handler_args hw_handler [hw_handler_args]*
1387  * num_groups init_group_number
1388  *     [priority selector-name num_ps_args [ps_args]*
1389  *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1390  */
1391 static int multipath_status(struct dm_target *ti, status_type_t type,
1392                             char *result, unsigned int maxlen)
1393 {
1394         int sz = 0;
1395         unsigned long flags;
1396         struct multipath *m = (struct multipath *) ti->private;
1397         struct priority_group *pg;
1398         struct pgpath *p;
1399         unsigned pg_num;
1400         char state;
1401
1402         spin_lock_irqsave(&m->lock, flags);
1403
1404         /* Features */
1405         if (type == STATUSTYPE_INFO)
1406                 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
1407         else {
1408                 DMEMIT("%u ", m->queue_if_no_path +
1409                               (m->pg_init_retries > 0) * 2);
1410                 if (m->queue_if_no_path)
1411                         DMEMIT("queue_if_no_path ");
1412                 if (m->pg_init_retries)
1413                         DMEMIT("pg_init_retries %u ", m->pg_init_retries);
1414         }
1415
1416         if (!m->hw_handler_name || type == STATUSTYPE_INFO)
1417                 DMEMIT("0 ");
1418         else
1419                 DMEMIT("1 %s ", m->hw_handler_name);
1420
1421         DMEMIT("%u ", m->nr_priority_groups);
1422
1423         if (m->next_pg)
1424                 pg_num = m->next_pg->pg_num;
1425         else if (m->current_pg)
1426                 pg_num = m->current_pg->pg_num;
1427         else
1428                         pg_num = 1;
1429
1430         DMEMIT("%u ", pg_num);
1431
1432         switch (type) {
1433         case STATUSTYPE_INFO:
1434                 list_for_each_entry(pg, &m->priority_groups, list) {
1435                         if (pg->bypassed)
1436                                 state = 'D';    /* Disabled */
1437                         else if (pg == m->current_pg)
1438                                 state = 'A';    /* Currently Active */
1439                         else
1440                                 state = 'E';    /* Enabled */
1441
1442                         DMEMIT("%c ", state);
1443
1444                         if (pg->ps.type->status)
1445                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1446                                                           result + sz,
1447                                                           maxlen - sz);
1448                         else
1449                                 DMEMIT("0 ");
1450
1451                         DMEMIT("%u %u ", pg->nr_pgpaths,
1452                                pg->ps.type->info_args);
1453
1454                         list_for_each_entry(p, &pg->pgpaths, list) {
1455                                 DMEMIT("%s %s %u ", p->path.pdev,
1456                                        p->is_active ? "A" : "F",
1457                                        p->fail_count);
1458                                 if (pg->ps.type->status)
1459                                         sz += pg->ps.type->status(&pg->ps,
1460                                               &p->path, type, result + sz,
1461                                               maxlen - sz);
1462                         }
1463                 }
1464                 break;
1465
1466         case STATUSTYPE_TABLE:
1467                 list_for_each_entry(pg, &m->priority_groups, list) {
1468                         DMEMIT("%s ", pg->ps.type->name);
1469
1470                         if (pg->ps.type->status)
1471                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1472                                                           result + sz,
1473                                                           maxlen - sz);
1474                         else
1475                                 DMEMIT("0 ");
1476
1477                         DMEMIT("%u %u ", pg->nr_pgpaths,
1478                                pg->ps.type->table_args);
1479
1480                         list_for_each_entry(p, &pg->pgpaths, list) {
1481                                 DMEMIT("%s ", p->path.pdev);
1482                                 if (pg->ps.type->status)
1483                                         sz += pg->ps.type->status(&pg->ps,
1484                                               &p->path, type, result + sz,
1485                                               maxlen - sz);
1486                         }
1487                 }
1488                 break;
1489         }
1490
1491         spin_unlock_irqrestore(&m->lock, flags);
1492
1493         return 0;
1494 }
1495
1496 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1497 {
1498         int r;
1499         struct dm_dev *dev;
1500         struct multipath *m = (struct multipath *) ti->private;
1501         action_fn action;
1502
1503         if (argc == 1) {
1504                 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path")))
1505                         return queue_if_no_path(m, 1, 0);
1506                 else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path")))
1507                         return queue_if_no_path(m, 0, 0);
1508         }
1509
1510         if (argc != 2)
1511                 goto error;
1512
1513         if (!strnicmp(argv[0], MESG_STR("disable_group")))
1514                 return bypass_pg_num(m, argv[1], 1);
1515         else if (!strnicmp(argv[0], MESG_STR("enable_group")))
1516                 return bypass_pg_num(m, argv[1], 0);
1517         else if (!strnicmp(argv[0], MESG_STR("switch_group")))
1518                 return switch_pg_num(m, argv[1]);
1519         else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
1520                 action = reinstate_path;
1521         else if (!strnicmp(argv[0], MESG_STR("fail_path")))
1522                 action = fail_path;
1523         else
1524                 goto error;
1525
1526         r = dm_get_device(ti, argv[1], ti->begin, ti->len,
1527                           dm_table_get_mode(ti->table), &dev);
1528         if (r) {
1529                 DMWARN("message: error getting device %s",
1530                        argv[1]);
1531                 return -EINVAL;
1532         }
1533
1534         r = action_dev(m, dev, action);
1535
1536         dm_put_device(ti, dev);
1537
1538         return r;
1539
1540 error:
1541         DMWARN("Unrecognised multipath message received.");
1542         return -EINVAL;
1543 }
1544
1545 static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
1546                            unsigned long arg)
1547 {
1548         struct multipath *m = (struct multipath *) ti->private;
1549         struct block_device *bdev = NULL;
1550         fmode_t mode = 0;
1551         unsigned long flags;
1552         int r = 0;
1553
1554         spin_lock_irqsave(&m->lock, flags);
1555
1556         if (!m->current_pgpath)
1557                 __choose_pgpath(m, 0);
1558
1559         if (m->current_pgpath && m->current_pgpath->path.dev) {
1560                 bdev = m->current_pgpath->path.dev->bdev;
1561                 mode = m->current_pgpath->path.dev->mode;
1562         }
1563
1564         if (m->queue_io)
1565                 r = -EAGAIN;
1566         else if (!bdev)
1567                 r = -EIO;
1568
1569         spin_unlock_irqrestore(&m->lock, flags);
1570
1571         return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
1572 }
1573
1574 static int multipath_iterate_devices(struct dm_target *ti,
1575                                      iterate_devices_callout_fn fn, void *data)
1576 {
1577         struct multipath *m = ti->private;
1578         struct priority_group *pg;
1579         struct pgpath *p;
1580         int ret = 0;
1581
1582         list_for_each_entry(pg, &m->priority_groups, list) {
1583                 list_for_each_entry(p, &pg->pgpaths, list) {
1584                         ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
1585                         if (ret)
1586                                 goto out;
1587                 }
1588         }
1589
1590 out:
1591         return ret;
1592 }
1593
1594 static int __pgpath_busy(struct pgpath *pgpath)
1595 {
1596         struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1597
1598         return dm_underlying_device_busy(q);
1599 }
1600
1601 /*
1602  * We return "busy", only when we can map I/Os but underlying devices
1603  * are busy (so even if we map I/Os now, the I/Os will wait on
1604  * the underlying queue).
1605  * In other words, if we want to kill I/Os or queue them inside us
1606  * due to map unavailability, we don't return "busy".  Otherwise,
1607  * dm core won't give us the I/Os and we can't do what we want.
1608  */
1609 static int multipath_busy(struct dm_target *ti)
1610 {
1611         int busy = 0, has_active = 0;
1612         struct multipath *m = ti->private;
1613         struct priority_group *pg;
1614         struct pgpath *pgpath;
1615         unsigned long flags;
1616
1617         spin_lock_irqsave(&m->lock, flags);
1618
1619         /* Guess which priority_group will be used at next mapping time */
1620         if (unlikely(!m->current_pgpath && m->next_pg))
1621                 pg = m->next_pg;
1622         else if (likely(m->current_pg))
1623                 pg = m->current_pg;
1624         else
1625                 /*
1626                  * We don't know which pg will be used at next mapping time.
1627                  * We don't call __choose_pgpath() here to avoid to trigger
1628                  * pg_init just by busy checking.
1629                  * So we don't know whether underlying devices we will be using
1630                  * at next mapping time are busy or not. Just try mapping.
1631                  */
1632                 goto out;
1633
1634         /*
1635          * If there is one non-busy active path at least, the path selector
1636          * will be able to select it. So we consider such a pg as not busy.
1637          */
1638         busy = 1;
1639         list_for_each_entry(pgpath, &pg->pgpaths, list)
1640                 if (pgpath->is_active) {
1641                         has_active = 1;
1642
1643                         if (!__pgpath_busy(pgpath)) {
1644                                 busy = 0;
1645                                 break;
1646                         }
1647                 }
1648
1649         if (!has_active)
1650                 /*
1651                  * No active path in this pg, so this pg won't be used and
1652                  * the current_pg will be changed at next mapping time.
1653                  * We need to try mapping to determine it.
1654                  */
1655                 busy = 0;
1656
1657 out:
1658         spin_unlock_irqrestore(&m->lock, flags);
1659
1660         return busy;
1661 }
1662
1663 /*-----------------------------------------------------------------
1664  * Module setup
1665  *---------------------------------------------------------------*/
1666 static struct target_type multipath_target = {
1667         .name = "multipath",
1668         .version = {1, 1, 0},
1669         .module = THIS_MODULE,
1670         .ctr = multipath_ctr,
1671         .dtr = multipath_dtr,
1672         .map_rq = multipath_map,
1673         .rq_end_io = multipath_end_io,
1674         .presuspend = multipath_presuspend,
1675         .resume = multipath_resume,
1676         .status = multipath_status,
1677         .message = multipath_message,
1678         .ioctl  = multipath_ioctl,
1679         .iterate_devices = multipath_iterate_devices,
1680         .busy = multipath_busy,
1681 };
1682
1683 static int __init dm_multipath_init(void)
1684 {
1685         int r;
1686
1687         /* allocate a slab for the dm_ios */
1688         _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
1689         if (!_mpio_cache)
1690                 return -ENOMEM;
1691
1692         r = dm_register_target(&multipath_target);
1693         if (r < 0) {
1694                 DMERR("register failed %d", r);
1695                 kmem_cache_destroy(_mpio_cache);
1696                 return -EINVAL;
1697         }
1698
1699         kmultipathd = create_workqueue("kmpathd");
1700         if (!kmultipathd) {
1701                 DMERR("failed to create workqueue kmpathd");
1702                 dm_unregister_target(&multipath_target);
1703                 kmem_cache_destroy(_mpio_cache);
1704                 return -ENOMEM;
1705         }
1706
1707         /*
1708          * A separate workqueue is used to handle the device handlers
1709          * to avoid overloading existing workqueue. Overloading the
1710          * old workqueue would also create a bottleneck in the
1711          * path of the storage hardware device activation.
1712          */
1713         kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd");
1714         if (!kmpath_handlerd) {
1715                 DMERR("failed to create workqueue kmpath_handlerd");
1716                 destroy_workqueue(kmultipathd);
1717                 dm_unregister_target(&multipath_target);
1718                 kmem_cache_destroy(_mpio_cache);
1719                 return -ENOMEM;
1720         }
1721
1722         DMINFO("version %u.%u.%u loaded",
1723                multipath_target.version[0], multipath_target.version[1],
1724                multipath_target.version[2]);
1725
1726         return r;
1727 }
1728
1729 static void __exit dm_multipath_exit(void)
1730 {
1731         destroy_workqueue(kmpath_handlerd);
1732         destroy_workqueue(kmultipathd);
1733
1734         dm_unregister_target(&multipath_target);
1735         kmem_cache_destroy(_mpio_cache);
1736 }
1737
1738 module_init(dm_multipath_init);
1739 module_exit(dm_multipath_exit);
1740
1741 MODULE_DESCRIPTION(DM_NAME " multipath target");
1742 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1743 MODULE_LICENSE("GPL");