dm mpath: remove is_active from struct dm_path
[linux-flexiantxendom0-3.2.10.git] / drivers / md / dm-mpath.c
1 /*
2  * Copyright (C) 2003 Sistina Software Limited.
3  * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4  *
5  * This file is released under the GPL.
6  */
7
8 #include "dm.h"
9 #include "dm-path-selector.h"
10 #include "dm-bio-list.h"
11 #include "dm-bio-record.h"
12 #include "dm-uevent.h"
13
14 #include <linux/ctype.h>
15 #include <linux/init.h>
16 #include <linux/mempool.h>
17 #include <linux/module.h>
18 #include <linux/pagemap.h>
19 #include <linux/slab.h>
20 #include <linux/time.h>
21 #include <linux/workqueue.h>
22 #include <scsi/scsi_dh.h>
23 #include <asm/atomic.h>
24
25 #define DM_MSG_PREFIX "multipath"
26 #define MESG_STR(x) x, sizeof(x)
27
28 /* Path properties */
29 struct pgpath {
30         struct list_head list;
31
32         struct priority_group *pg;      /* Owning PG */
33         unsigned is_active;             /* Path status */
34         unsigned fail_count;            /* Cumulative failure count */
35
36         struct dm_path path;
37 };
38
39 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
40
41 /*
42  * Paths are grouped into Priority Groups and numbered from 1 upwards.
43  * Each has a path selector which controls which path gets used.
44  */
45 struct priority_group {
46         struct list_head list;
47
48         struct multipath *m;            /* Owning multipath instance */
49         struct path_selector ps;
50
51         unsigned pg_num;                /* Reference number */
52         unsigned bypassed;              /* Temporarily bypass this PG? */
53
54         unsigned nr_pgpaths;            /* Number of paths in PG */
55         struct list_head pgpaths;
56 };
57
58 /* Multipath context */
59 struct multipath {
60         struct list_head list;
61         struct dm_target *ti;
62
63         spinlock_t lock;
64
65         const char *hw_handler_name;
66         struct work_struct activate_path;
67         struct pgpath *pgpath_to_activate;
68         unsigned nr_priority_groups;
69         struct list_head priority_groups;
70         unsigned pg_init_required;      /* pg_init needs calling? */
71         unsigned pg_init_in_progress;   /* Only one pg_init allowed at once */
72
73         unsigned nr_valid_paths;        /* Total number of usable paths */
74         struct pgpath *current_pgpath;
75         struct priority_group *current_pg;
76         struct priority_group *next_pg; /* Switch to this PG if set */
77         unsigned repeat_count;          /* I/Os left before calling PS again */
78
79         unsigned queue_io;              /* Must we queue all I/O? */
80         unsigned queue_if_no_path;      /* Queue I/O if last path fails? */
81         unsigned saved_queue_if_no_path;/* Saved state during suspension */
82         unsigned pg_init_retries;       /* Number of times to retry pg_init */
83         unsigned pg_init_count;         /* Number of times pg_init called */
84
85         struct work_struct process_queued_ios;
86         struct bio_list queued_ios;
87         unsigned queue_size;
88
89         struct work_struct trigger_event;
90
91         /*
92          * We must use a mempool of dm_mpath_io structs so that we
93          * can resubmit bios on error.
94          */
95         mempool_t *mpio_pool;
96 };
97
98 /*
99  * Context information attached to each bio we process.
100  */
101 struct dm_mpath_io {
102         struct pgpath *pgpath;
103         struct dm_bio_details details;
104 };
105
106 typedef int (*action_fn) (struct pgpath *pgpath);
107
108 #define MIN_IOS 256     /* Mempool size */
109
110 static struct kmem_cache *_mpio_cache;
111
112 static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
113 static void process_queued_ios(struct work_struct *work);
114 static void trigger_event(struct work_struct *work);
115 static void activate_path(struct work_struct *work);
116
117
118 /*-----------------------------------------------
119  * Allocation routines
120  *-----------------------------------------------*/
121
122 static struct pgpath *alloc_pgpath(void)
123 {
124         struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
125
126         if (pgpath)
127                 pgpath->is_active = 1;
128
129         return pgpath;
130 }
131
132 static void free_pgpath(struct pgpath *pgpath)
133 {
134         kfree(pgpath);
135 }
136
137 static struct priority_group *alloc_priority_group(void)
138 {
139         struct priority_group *pg;
140
141         pg = kzalloc(sizeof(*pg), GFP_KERNEL);
142
143         if (pg)
144                 INIT_LIST_HEAD(&pg->pgpaths);
145
146         return pg;
147 }
148
149 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
150 {
151         unsigned long flags;
152         struct pgpath *pgpath, *tmp;
153         struct multipath *m = ti->private;
154
155         list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
156                 list_del(&pgpath->list);
157                 if (m->hw_handler_name)
158                         scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
159                 dm_put_device(ti, pgpath->path.dev);
160                 spin_lock_irqsave(&m->lock, flags);
161                 if (m->pgpath_to_activate == pgpath)
162                         m->pgpath_to_activate = NULL;
163                 spin_unlock_irqrestore(&m->lock, flags);
164                 free_pgpath(pgpath);
165         }
166 }
167
168 static void free_priority_group(struct priority_group *pg,
169                                 struct dm_target *ti)
170 {
171         struct path_selector *ps = &pg->ps;
172
173         if (ps->type) {
174                 ps->type->destroy(ps);
175                 dm_put_path_selector(ps->type);
176         }
177
178         free_pgpaths(&pg->pgpaths, ti);
179         kfree(pg);
180 }
181
182 static struct multipath *alloc_multipath(struct dm_target *ti)
183 {
184         struct multipath *m;
185
186         m = kzalloc(sizeof(*m), GFP_KERNEL);
187         if (m) {
188                 INIT_LIST_HEAD(&m->priority_groups);
189                 spin_lock_init(&m->lock);
190                 m->queue_io = 1;
191                 INIT_WORK(&m->process_queued_ios, process_queued_ios);
192                 INIT_WORK(&m->trigger_event, trigger_event);
193                 INIT_WORK(&m->activate_path, activate_path);
194                 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
195                 if (!m->mpio_pool) {
196                         kfree(m);
197                         return NULL;
198                 }
199                 m->ti = ti;
200                 ti->private = m;
201         }
202
203         return m;
204 }
205
206 static void free_multipath(struct multipath *m)
207 {
208         struct priority_group *pg, *tmp;
209
210         list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
211                 list_del(&pg->list);
212                 free_priority_group(pg, m->ti);
213         }
214
215         kfree(m->hw_handler_name);
216         mempool_destroy(m->mpio_pool);
217         kfree(m);
218 }
219
220
221 /*-----------------------------------------------
222  * Path selection
223  *-----------------------------------------------*/
224
225 static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
226 {
227         m->current_pg = pgpath->pg;
228
229         /* Must we initialise the PG first, and queue I/O till it's ready? */
230         if (m->hw_handler_name) {
231                 m->pg_init_required = 1;
232                 m->queue_io = 1;
233         } else {
234                 m->pg_init_required = 0;
235                 m->queue_io = 0;
236         }
237
238         m->pg_init_count = 0;
239 }
240
241 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
242 {
243         struct dm_path *path;
244
245         path = pg->ps.type->select_path(&pg->ps, &m->repeat_count);
246         if (!path)
247                 return -ENXIO;
248
249         m->current_pgpath = path_to_pgpath(path);
250
251         if (m->current_pg != pg)
252                 __switch_pg(m, m->current_pgpath);
253
254         return 0;
255 }
256
257 static void __choose_pgpath(struct multipath *m)
258 {
259         struct priority_group *pg;
260         unsigned bypassed = 1;
261
262         if (!m->nr_valid_paths)
263                 goto failed;
264
265         /* Were we instructed to switch PG? */
266         if (m->next_pg) {
267                 pg = m->next_pg;
268                 m->next_pg = NULL;
269                 if (!__choose_path_in_pg(m, pg))
270                         return;
271         }
272
273         /* Don't change PG until it has no remaining paths */
274         if (m->current_pg && !__choose_path_in_pg(m, m->current_pg))
275                 return;
276
277         /*
278          * Loop through priority groups until we find a valid path.
279          * First time we skip PGs marked 'bypassed'.
280          * Second time we only try the ones we skipped.
281          */
282         do {
283                 list_for_each_entry(pg, &m->priority_groups, list) {
284                         if (pg->bypassed == bypassed)
285                                 continue;
286                         if (!__choose_path_in_pg(m, pg))
287                                 return;
288                 }
289         } while (bypassed--);
290
291 failed:
292         m->current_pgpath = NULL;
293         m->current_pg = NULL;
294 }
295
296 /*
297  * Check whether bios must be queued in the device-mapper core rather
298  * than here in the target.
299  *
300  * m->lock must be held on entry.
301  *
302  * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
303  * same value then we are not between multipath_presuspend()
304  * and multipath_resume() calls and we have no need to check
305  * for the DMF_NOFLUSH_SUSPENDING flag.
306  */
307 static int __must_push_back(struct multipath *m)
308 {
309         return (m->queue_if_no_path != m->saved_queue_if_no_path &&
310                 dm_noflush_suspending(m->ti));
311 }
312
313 static int map_io(struct multipath *m, struct bio *bio,
314                   struct dm_mpath_io *mpio, unsigned was_queued)
315 {
316         int r = DM_MAPIO_REMAPPED;
317         unsigned long flags;
318         struct pgpath *pgpath;
319
320         spin_lock_irqsave(&m->lock, flags);
321
322         /* Do we need to select a new pgpath? */
323         if (!m->current_pgpath ||
324             (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
325                 __choose_pgpath(m);
326
327         pgpath = m->current_pgpath;
328
329         if (was_queued)
330                 m->queue_size--;
331
332         if ((pgpath && m->queue_io) ||
333             (!pgpath && m->queue_if_no_path)) {
334                 /* Queue for the daemon to resubmit */
335                 bio_list_add(&m->queued_ios, bio);
336                 m->queue_size++;
337                 if ((m->pg_init_required && !m->pg_init_in_progress) ||
338                     !m->queue_io)
339                         queue_work(kmultipathd, &m->process_queued_ios);
340                 pgpath = NULL;
341                 r = DM_MAPIO_SUBMITTED;
342         } else if (pgpath)
343                 bio->bi_bdev = pgpath->path.dev->bdev;
344         else if (__must_push_back(m))
345                 r = DM_MAPIO_REQUEUE;
346         else
347                 r = -EIO;       /* Failed */
348
349         mpio->pgpath = pgpath;
350
351         spin_unlock_irqrestore(&m->lock, flags);
352
353         return r;
354 }
355
356 /*
357  * If we run out of usable paths, should we queue I/O or error it?
358  */
359 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
360                             unsigned save_old_value)
361 {
362         unsigned long flags;
363
364         spin_lock_irqsave(&m->lock, flags);
365
366         if (save_old_value)
367                 m->saved_queue_if_no_path = m->queue_if_no_path;
368         else
369                 m->saved_queue_if_no_path = queue_if_no_path;
370         m->queue_if_no_path = queue_if_no_path;
371         if (!m->queue_if_no_path && m->queue_size)
372                 queue_work(kmultipathd, &m->process_queued_ios);
373
374         spin_unlock_irqrestore(&m->lock, flags);
375
376         return 0;
377 }
378
379 /*-----------------------------------------------------------------
380  * The multipath daemon is responsible for resubmitting queued ios.
381  *---------------------------------------------------------------*/
382
383 static void dispatch_queued_ios(struct multipath *m)
384 {
385         int r;
386         unsigned long flags;
387         struct bio *bio = NULL, *next;
388         struct dm_mpath_io *mpio;
389         union map_info *info;
390
391         spin_lock_irqsave(&m->lock, flags);
392         bio = bio_list_get(&m->queued_ios);
393         spin_unlock_irqrestore(&m->lock, flags);
394
395         while (bio) {
396                 next = bio->bi_next;
397                 bio->bi_next = NULL;
398
399                 info = dm_get_mapinfo(bio);
400                 mpio = info->ptr;
401
402                 r = map_io(m, bio, mpio, 1);
403                 if (r < 0)
404                         bio_endio(bio, r);
405                 else if (r == DM_MAPIO_REMAPPED)
406                         generic_make_request(bio);
407                 else if (r == DM_MAPIO_REQUEUE)
408                         bio_endio(bio, -EIO);
409
410                 bio = next;
411         }
412 }
413
414 static void process_queued_ios(struct work_struct *work)
415 {
416         struct multipath *m =
417                 container_of(work, struct multipath, process_queued_ios);
418         struct pgpath *pgpath = NULL;
419         unsigned init_required = 0, must_queue = 1;
420         unsigned long flags;
421
422         spin_lock_irqsave(&m->lock, flags);
423
424         if (!m->queue_size)
425                 goto out;
426
427         if (!m->current_pgpath)
428                 __choose_pgpath(m);
429
430         pgpath = m->current_pgpath;
431         m->pgpath_to_activate = m->current_pgpath;
432
433         if ((pgpath && !m->queue_io) ||
434             (!pgpath && !m->queue_if_no_path))
435                 must_queue = 0;
436
437         if (m->pg_init_required && !m->pg_init_in_progress) {
438                 m->pg_init_count++;
439                 m->pg_init_required = 0;
440                 m->pg_init_in_progress = 1;
441                 init_required = 1;
442         }
443
444 out:
445         spin_unlock_irqrestore(&m->lock, flags);
446
447         if (init_required)
448                 queue_work(kmpath_handlerd, &m->activate_path);
449
450         if (!must_queue)
451                 dispatch_queued_ios(m);
452 }
453
454 /*
455  * An event is triggered whenever a path is taken out of use.
456  * Includes path failure and PG bypass.
457  */
458 static void trigger_event(struct work_struct *work)
459 {
460         struct multipath *m =
461                 container_of(work, struct multipath, trigger_event);
462
463         dm_table_event(m->ti->table);
464 }
465
466 /*-----------------------------------------------------------------
467  * Constructor/argument parsing:
468  * <#multipath feature args> [<arg>]*
469  * <#hw_handler args> [hw_handler [<arg>]*]
470  * <#priority groups>
471  * <initial priority group>
472  *     [<selector> <#selector args> [<arg>]*
473  *      <#paths> <#per-path selector args>
474  *         [<path> [<arg>]* ]+ ]+
475  *---------------------------------------------------------------*/
476 struct param {
477         unsigned min;
478         unsigned max;
479         char *error;
480 };
481
482 static int read_param(struct param *param, char *str, unsigned *v, char **error)
483 {
484         if (!str ||
485             (sscanf(str, "%u", v) != 1) ||
486             (*v < param->min) ||
487             (*v > param->max)) {
488                 *error = param->error;
489                 return -EINVAL;
490         }
491
492         return 0;
493 }
494
495 struct arg_set {
496         unsigned argc;
497         char **argv;
498 };
499
500 static char *shift(struct arg_set *as)
501 {
502         char *r;
503
504         if (as->argc) {
505                 as->argc--;
506                 r = *as->argv;
507                 as->argv++;
508                 return r;
509         }
510
511         return NULL;
512 }
513
514 static void consume(struct arg_set *as, unsigned n)
515 {
516         BUG_ON (as->argc < n);
517         as->argc -= n;
518         as->argv += n;
519 }
520
521 static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
522                                struct dm_target *ti)
523 {
524         int r;
525         struct path_selector_type *pst;
526         unsigned ps_argc;
527
528         static struct param _params[] = {
529                 {0, 1024, "invalid number of path selector args"},
530         };
531
532         pst = dm_get_path_selector(shift(as));
533         if (!pst) {
534                 ti->error = "unknown path selector type";
535                 return -EINVAL;
536         }
537
538         r = read_param(_params, shift(as), &ps_argc, &ti->error);
539         if (r) {
540                 dm_put_path_selector(pst);
541                 return -EINVAL;
542         }
543
544         r = pst->create(&pg->ps, ps_argc, as->argv);
545         if (r) {
546                 dm_put_path_selector(pst);
547                 ti->error = "path selector constructor failed";
548                 return r;
549         }
550
551         pg->ps.type = pst;
552         consume(as, ps_argc);
553
554         return 0;
555 }
556
557 static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
558                                struct dm_target *ti)
559 {
560         int r;
561         struct pgpath *p;
562         struct multipath *m = ti->private;
563
564         /* we need at least a path arg */
565         if (as->argc < 1) {
566                 ti->error = "no device given";
567                 return ERR_PTR(-EINVAL);
568         }
569
570         p = alloc_pgpath();
571         if (!p)
572                 return ERR_PTR(-ENOMEM);
573
574         r = dm_get_device(ti, shift(as), ti->begin, ti->len,
575                           dm_table_get_mode(ti->table), &p->path.dev);
576         if (r) {
577                 ti->error = "error getting device";
578                 goto bad;
579         }
580
581         if (m->hw_handler_name) {
582                 r = scsi_dh_attach(bdev_get_queue(p->path.dev->bdev),
583                                    m->hw_handler_name);
584                 if (r < 0) {
585                         dm_put_device(ti, p->path.dev);
586                         goto bad;
587                 }
588         }
589
590         r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
591         if (r) {
592                 dm_put_device(ti, p->path.dev);
593                 goto bad;
594         }
595
596         return p;
597
598  bad:
599         free_pgpath(p);
600         return ERR_PTR(r);
601 }
602
603 static struct priority_group *parse_priority_group(struct arg_set *as,
604                                                    struct multipath *m)
605 {
606         static struct param _params[] = {
607                 {1, 1024, "invalid number of paths"},
608                 {0, 1024, "invalid number of selector args"}
609         };
610
611         int r;
612         unsigned i, nr_selector_args, nr_params;
613         struct priority_group *pg;
614         struct dm_target *ti = m->ti;
615
616         if (as->argc < 2) {
617                 as->argc = 0;
618                 ti->error = "not enough priority group arguments";
619                 return ERR_PTR(-EINVAL);
620         }
621
622         pg = alloc_priority_group();
623         if (!pg) {
624                 ti->error = "couldn't allocate priority group";
625                 return ERR_PTR(-ENOMEM);
626         }
627         pg->m = m;
628
629         r = parse_path_selector(as, pg, ti);
630         if (r)
631                 goto bad;
632
633         /*
634          * read the paths
635          */
636         r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
637         if (r)
638                 goto bad;
639
640         r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
641         if (r)
642                 goto bad;
643
644         nr_params = 1 + nr_selector_args;
645         for (i = 0; i < pg->nr_pgpaths; i++) {
646                 struct pgpath *pgpath;
647                 struct arg_set path_args;
648
649                 if (as->argc < nr_params) {
650                         ti->error = "not enough path parameters";
651                         goto bad;
652                 }
653
654                 path_args.argc = nr_params;
655                 path_args.argv = as->argv;
656
657                 pgpath = parse_path(&path_args, &pg->ps, ti);
658                 if (IS_ERR(pgpath)) {
659                         r = PTR_ERR(pgpath);
660                         goto bad;
661                 }
662
663                 pgpath->pg = pg;
664                 list_add_tail(&pgpath->list, &pg->pgpaths);
665                 consume(as, nr_params);
666         }
667
668         return pg;
669
670  bad:
671         free_priority_group(pg, ti);
672         return ERR_PTR(r);
673 }
674
675 static int parse_hw_handler(struct arg_set *as, struct multipath *m)
676 {
677         unsigned hw_argc;
678         struct dm_target *ti = m->ti;
679
680         static struct param _params[] = {
681                 {0, 1024, "invalid number of hardware handler args"},
682         };
683
684         if (read_param(_params, shift(as), &hw_argc, &ti->error))
685                 return -EINVAL;
686
687         if (!hw_argc)
688                 return 0;
689
690         m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
691         request_module("scsi_dh_%s", m->hw_handler_name);
692         if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
693                 ti->error = "unknown hardware handler type";
694                 kfree(m->hw_handler_name);
695                 m->hw_handler_name = NULL;
696                 return -EINVAL;
697         }
698         consume(as, hw_argc - 1);
699
700         return 0;
701 }
702
703 static int parse_features(struct arg_set *as, struct multipath *m)
704 {
705         int r;
706         unsigned argc;
707         struct dm_target *ti = m->ti;
708         const char *param_name;
709
710         static struct param _params[] = {
711                 {0, 3, "invalid number of feature args"},
712                 {1, 50, "pg_init_retries must be between 1 and 50"},
713         };
714
715         r = read_param(_params, shift(as), &argc, &ti->error);
716         if (r)
717                 return -EINVAL;
718
719         if (!argc)
720                 return 0;
721
722         do {
723                 param_name = shift(as);
724                 argc--;
725
726                 if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
727                         r = queue_if_no_path(m, 1, 0);
728                         continue;
729                 }
730
731                 if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
732                     (argc >= 1)) {
733                         r = read_param(_params + 1, shift(as),
734                                        &m->pg_init_retries, &ti->error);
735                         argc--;
736                         continue;
737                 }
738
739                 ti->error = "Unrecognised multipath feature request";
740                 r = -EINVAL;
741         } while (argc && !r);
742
743         return r;
744 }
745
746 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
747                          char **argv)
748 {
749         /* target parameters */
750         static struct param _params[] = {
751                 {1, 1024, "invalid number of priority groups"},
752                 {1, 1024, "invalid initial priority group number"},
753         };
754
755         int r;
756         struct multipath *m;
757         struct arg_set as;
758         unsigned pg_count = 0;
759         unsigned next_pg_num;
760
761         as.argc = argc;
762         as.argv = argv;
763
764         m = alloc_multipath(ti);
765         if (!m) {
766                 ti->error = "can't allocate multipath";
767                 return -EINVAL;
768         }
769
770         r = parse_features(&as, m);
771         if (r)
772                 goto bad;
773
774         r = parse_hw_handler(&as, m);
775         if (r)
776                 goto bad;
777
778         r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
779         if (r)
780                 goto bad;
781
782         r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
783         if (r)
784                 goto bad;
785
786         /* parse the priority groups */
787         while (as.argc) {
788                 struct priority_group *pg;
789
790                 pg = parse_priority_group(&as, m);
791                 if (IS_ERR(pg)) {
792                         r = PTR_ERR(pg);
793                         goto bad;
794                 }
795
796                 m->nr_valid_paths += pg->nr_pgpaths;
797                 list_add_tail(&pg->list, &m->priority_groups);
798                 pg_count++;
799                 pg->pg_num = pg_count;
800                 if (!--next_pg_num)
801                         m->next_pg = pg;
802         }
803
804         if (pg_count != m->nr_priority_groups) {
805                 ti->error = "priority group count mismatch";
806                 r = -EINVAL;
807                 goto bad;
808         }
809
810         return 0;
811
812  bad:
813         free_multipath(m);
814         return r;
815 }
816
817 static void multipath_dtr(struct dm_target *ti)
818 {
819         struct multipath *m = (struct multipath *) ti->private;
820
821         flush_workqueue(kmpath_handlerd);
822         flush_workqueue(kmultipathd);
823         free_multipath(m);
824 }
825
826 /*
827  * Map bios, recording original fields for later in case we have to resubmit
828  */
829 static int multipath_map(struct dm_target *ti, struct bio *bio,
830                          union map_info *map_context)
831 {
832         int r;
833         struct dm_mpath_io *mpio;
834         struct multipath *m = (struct multipath *) ti->private;
835
836         mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
837         dm_bio_record(&mpio->details, bio);
838
839         map_context->ptr = mpio;
840         bio->bi_rw |= (1 << BIO_RW_FAILFAST);
841         r = map_io(m, bio, mpio, 0);
842         if (r < 0 || r == DM_MAPIO_REQUEUE)
843                 mempool_free(mpio, m->mpio_pool);
844
845         return r;
846 }
847
848 /*
849  * Take a path out of use.
850  */
851 static int fail_path(struct pgpath *pgpath)
852 {
853         unsigned long flags;
854         struct multipath *m = pgpath->pg->m;
855
856         spin_lock_irqsave(&m->lock, flags);
857
858         if (!pgpath->is_active)
859                 goto out;
860
861         DMWARN("Failing path %s.", pgpath->path.dev->name);
862
863         pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
864         pgpath->is_active = 0;
865         pgpath->fail_count++;
866
867         m->nr_valid_paths--;
868
869         if (pgpath == m->current_pgpath)
870                 m->current_pgpath = NULL;
871
872         dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
873                       pgpath->path.dev->name, m->nr_valid_paths);
874
875         queue_work(kmultipathd, &m->trigger_event);
876
877 out:
878         spin_unlock_irqrestore(&m->lock, flags);
879
880         return 0;
881 }
882
883 /*
884  * Reinstate a previously-failed path
885  */
886 static int reinstate_path(struct pgpath *pgpath)
887 {
888         int r = 0;
889         unsigned long flags;
890         struct multipath *m = pgpath->pg->m;
891
892         spin_lock_irqsave(&m->lock, flags);
893
894         if (pgpath->is_active)
895                 goto out;
896
897         if (!pgpath->pg->ps.type->reinstate_path) {
898                 DMWARN("Reinstate path not supported by path selector %s",
899                        pgpath->pg->ps.type->name);
900                 r = -EINVAL;
901                 goto out;
902         }
903
904         r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
905         if (r)
906                 goto out;
907
908         pgpath->is_active = 1;
909
910         m->current_pgpath = NULL;
911         if (!m->nr_valid_paths++ && m->queue_size)
912                 queue_work(kmultipathd, &m->process_queued_ios);
913
914         dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
915                       pgpath->path.dev->name, m->nr_valid_paths);
916
917         queue_work(kmultipathd, &m->trigger_event);
918
919 out:
920         spin_unlock_irqrestore(&m->lock, flags);
921
922         return r;
923 }
924
925 /*
926  * Fail or reinstate all paths that match the provided struct dm_dev.
927  */
928 static int action_dev(struct multipath *m, struct dm_dev *dev,
929                       action_fn action)
930 {
931         int r = 0;
932         struct pgpath *pgpath;
933         struct priority_group *pg;
934
935         list_for_each_entry(pg, &m->priority_groups, list) {
936                 list_for_each_entry(pgpath, &pg->pgpaths, list) {
937                         if (pgpath->path.dev == dev)
938                                 r = action(pgpath);
939                 }
940         }
941
942         return r;
943 }
944
945 /*
946  * Temporarily try to avoid having to use the specified PG
947  */
948 static void bypass_pg(struct multipath *m, struct priority_group *pg,
949                       int bypassed)
950 {
951         unsigned long flags;
952
953         spin_lock_irqsave(&m->lock, flags);
954
955         pg->bypassed = bypassed;
956         m->current_pgpath = NULL;
957         m->current_pg = NULL;
958
959         spin_unlock_irqrestore(&m->lock, flags);
960
961         queue_work(kmultipathd, &m->trigger_event);
962 }
963
964 /*
965  * Switch to using the specified PG from the next I/O that gets mapped
966  */
967 static int switch_pg_num(struct multipath *m, const char *pgstr)
968 {
969         struct priority_group *pg;
970         unsigned pgnum;
971         unsigned long flags;
972
973         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
974             (pgnum > m->nr_priority_groups)) {
975                 DMWARN("invalid PG number supplied to switch_pg_num");
976                 return -EINVAL;
977         }
978
979         spin_lock_irqsave(&m->lock, flags);
980         list_for_each_entry(pg, &m->priority_groups, list) {
981                 pg->bypassed = 0;
982                 if (--pgnum)
983                         continue;
984
985                 m->current_pgpath = NULL;
986                 m->current_pg = NULL;
987                 m->next_pg = pg;
988         }
989         spin_unlock_irqrestore(&m->lock, flags);
990
991         queue_work(kmultipathd, &m->trigger_event);
992         return 0;
993 }
994
995 /*
996  * Set/clear bypassed status of a PG.
997  * PGs are numbered upwards from 1 in the order they were declared.
998  */
999 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
1000 {
1001         struct priority_group *pg;
1002         unsigned pgnum;
1003
1004         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1005             (pgnum > m->nr_priority_groups)) {
1006                 DMWARN("invalid PG number supplied to bypass_pg");
1007                 return -EINVAL;
1008         }
1009
1010         list_for_each_entry(pg, &m->priority_groups, list) {
1011                 if (!--pgnum)
1012                         break;
1013         }
1014
1015         bypass_pg(m, pg, bypassed);
1016         return 0;
1017 }
1018
1019 /*
1020  * Should we retry pg_init immediately?
1021  */
1022 static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
1023 {
1024         unsigned long flags;
1025         int limit_reached = 0;
1026
1027         spin_lock_irqsave(&m->lock, flags);
1028
1029         if (m->pg_init_count <= m->pg_init_retries)
1030                 m->pg_init_required = 1;
1031         else
1032                 limit_reached = 1;
1033
1034         spin_unlock_irqrestore(&m->lock, flags);
1035
1036         return limit_reached;
1037 }
1038
1039 static void pg_init_done(struct dm_path *path, int errors)
1040 {
1041         struct pgpath *pgpath = path_to_pgpath(path);
1042         struct priority_group *pg = pgpath->pg;
1043         struct multipath *m = pg->m;
1044         unsigned long flags;
1045
1046         /* device or driver problems */
1047         switch (errors) {
1048         case SCSI_DH_OK:
1049                 break;
1050         case SCSI_DH_NOSYS:
1051                 if (!m->hw_handler_name) {
1052                         errors = 0;
1053                         break;
1054                 }
1055                 DMERR("Cannot failover device because scsi_dh_%s was not "
1056                       "loaded.", m->hw_handler_name);
1057                 /*
1058                  * Fail path for now, so we do not ping pong
1059                  */
1060                 fail_path(pgpath);
1061                 break;
1062         case SCSI_DH_DEV_TEMP_BUSY:
1063                 /*
1064                  * Probably doing something like FW upgrade on the
1065                  * controller so try the other pg.
1066                  */
1067                 bypass_pg(m, pg, 1);
1068                 break;
1069         /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
1070         case SCSI_DH_RETRY:
1071         case SCSI_DH_IMM_RETRY:
1072         case SCSI_DH_RES_TEMP_UNAVAIL:
1073                 if (pg_init_limit_reached(m, pgpath))
1074                         fail_path(pgpath);
1075                 errors = 0;
1076                 break;
1077         default:
1078                 /*
1079                  * We probably do not want to fail the path for a device
1080                  * error, but this is what the old dm did. In future
1081                  * patches we can do more advanced handling.
1082                  */
1083                 fail_path(pgpath);
1084         }
1085
1086         spin_lock_irqsave(&m->lock, flags);
1087         if (errors) {
1088                 DMERR("Could not failover device. Error %d.", errors);
1089                 m->current_pgpath = NULL;
1090                 m->current_pg = NULL;
1091         } else if (!m->pg_init_required) {
1092                 m->queue_io = 0;
1093                 pg->bypassed = 0;
1094         }
1095
1096         m->pg_init_in_progress = 0;
1097         queue_work(kmultipathd, &m->process_queued_ios);
1098         spin_unlock_irqrestore(&m->lock, flags);
1099 }
1100
1101 static void activate_path(struct work_struct *work)
1102 {
1103         int ret;
1104         struct multipath *m =
1105                 container_of(work, struct multipath, activate_path);
1106         struct dm_path *path;
1107         unsigned long flags;
1108
1109         spin_lock_irqsave(&m->lock, flags);
1110         path = &m->pgpath_to_activate->path;
1111         m->pgpath_to_activate = NULL;
1112         spin_unlock_irqrestore(&m->lock, flags);
1113         if (!path)
1114                 return;
1115         ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
1116         pg_init_done(path, ret);
1117 }
1118
1119 /*
1120  * end_io handling
1121  */
1122 static int do_end_io(struct multipath *m, struct bio *bio,
1123                      int error, struct dm_mpath_io *mpio)
1124 {
1125         unsigned long flags;
1126
1127         if (!error)
1128                 return 0;       /* I/O complete */
1129
1130         if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
1131                 return error;
1132
1133         if (error == -EOPNOTSUPP)
1134                 return error;
1135
1136         spin_lock_irqsave(&m->lock, flags);
1137         if (!m->nr_valid_paths) {
1138                 if (__must_push_back(m)) {
1139                         spin_unlock_irqrestore(&m->lock, flags);
1140                         return DM_ENDIO_REQUEUE;
1141                 } else if (!m->queue_if_no_path) {
1142                         spin_unlock_irqrestore(&m->lock, flags);
1143                         return -EIO;
1144                 } else {
1145                         spin_unlock_irqrestore(&m->lock, flags);
1146                         goto requeue;
1147                 }
1148         }
1149         spin_unlock_irqrestore(&m->lock, flags);
1150
1151         if (mpio->pgpath)
1152                 fail_path(mpio->pgpath);
1153
1154       requeue:
1155         dm_bio_restore(&mpio->details, bio);
1156
1157         /* queue for the daemon to resubmit or fail */
1158         spin_lock_irqsave(&m->lock, flags);
1159         bio_list_add(&m->queued_ios, bio);
1160         m->queue_size++;
1161         if (!m->queue_io)
1162                 queue_work(kmultipathd, &m->process_queued_ios);
1163         spin_unlock_irqrestore(&m->lock, flags);
1164
1165         return DM_ENDIO_INCOMPLETE;     /* io not complete */
1166 }
1167
1168 static int multipath_end_io(struct dm_target *ti, struct bio *bio,
1169                             int error, union map_info *map_context)
1170 {
1171         struct multipath *m = ti->private;
1172         struct dm_mpath_io *mpio = map_context->ptr;
1173         struct pgpath *pgpath = mpio->pgpath;
1174         struct path_selector *ps;
1175         int r;
1176
1177         r  = do_end_io(m, bio, error, mpio);
1178         if (pgpath) {
1179                 ps = &pgpath->pg->ps;
1180                 if (ps->type->end_io)
1181                         ps->type->end_io(ps, &pgpath->path);
1182         }
1183         if (r != DM_ENDIO_INCOMPLETE)
1184                 mempool_free(mpio, m->mpio_pool);
1185
1186         return r;
1187 }
1188
1189 /*
1190  * Suspend can't complete until all the I/O is processed so if
1191  * the last path fails we must error any remaining I/O.
1192  * Note that if the freeze_bdev fails while suspending, the
1193  * queue_if_no_path state is lost - userspace should reset it.
1194  */
1195 static void multipath_presuspend(struct dm_target *ti)
1196 {
1197         struct multipath *m = (struct multipath *) ti->private;
1198
1199         queue_if_no_path(m, 0, 1);
1200 }
1201
1202 /*
1203  * Restore the queue_if_no_path setting.
1204  */
1205 static void multipath_resume(struct dm_target *ti)
1206 {
1207         struct multipath *m = (struct multipath *) ti->private;
1208         unsigned long flags;
1209
1210         spin_lock_irqsave(&m->lock, flags);
1211         m->queue_if_no_path = m->saved_queue_if_no_path;
1212         spin_unlock_irqrestore(&m->lock, flags);
1213 }
1214
1215 /*
1216  * Info output has the following format:
1217  * num_multipath_feature_args [multipath_feature_args]*
1218  * num_handler_status_args [handler_status_args]*
1219  * num_groups init_group_number
1220  *            [A|D|E num_ps_status_args [ps_status_args]*
1221  *             num_paths num_selector_args
1222  *             [path_dev A|F fail_count [selector_args]* ]+ ]+
1223  *
1224  * Table output has the following format (identical to the constructor string):
1225  * num_feature_args [features_args]*
1226  * num_handler_args hw_handler [hw_handler_args]*
1227  * num_groups init_group_number
1228  *     [priority selector-name num_ps_args [ps_args]*
1229  *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1230  */
1231 static int multipath_status(struct dm_target *ti, status_type_t type,
1232                             char *result, unsigned int maxlen)
1233 {
1234         int sz = 0;
1235         unsigned long flags;
1236         struct multipath *m = (struct multipath *) ti->private;
1237         struct priority_group *pg;
1238         struct pgpath *p;
1239         unsigned pg_num;
1240         char state;
1241
1242         spin_lock_irqsave(&m->lock, flags);
1243
1244         /* Features */
1245         if (type == STATUSTYPE_INFO)
1246                 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
1247         else {
1248                 DMEMIT("%u ", m->queue_if_no_path +
1249                               (m->pg_init_retries > 0) * 2);
1250                 if (m->queue_if_no_path)
1251                         DMEMIT("queue_if_no_path ");
1252                 if (m->pg_init_retries)
1253                         DMEMIT("pg_init_retries %u ", m->pg_init_retries);
1254         }
1255
1256         if (!m->hw_handler_name || type == STATUSTYPE_INFO)
1257                 DMEMIT("0 ");
1258         else
1259                 DMEMIT("1 %s ", m->hw_handler_name);
1260
1261         DMEMIT("%u ", m->nr_priority_groups);
1262
1263         if (m->next_pg)
1264                 pg_num = m->next_pg->pg_num;
1265         else if (m->current_pg)
1266                 pg_num = m->current_pg->pg_num;
1267         else
1268                         pg_num = 1;
1269
1270         DMEMIT("%u ", pg_num);
1271
1272         switch (type) {
1273         case STATUSTYPE_INFO:
1274                 list_for_each_entry(pg, &m->priority_groups, list) {
1275                         if (pg->bypassed)
1276                                 state = 'D';    /* Disabled */
1277                         else if (pg == m->current_pg)
1278                                 state = 'A';    /* Currently Active */
1279                         else
1280                                 state = 'E';    /* Enabled */
1281
1282                         DMEMIT("%c ", state);
1283
1284                         if (pg->ps.type->status)
1285                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1286                                                           result + sz,
1287                                                           maxlen - sz);
1288                         else
1289                                 DMEMIT("0 ");
1290
1291                         DMEMIT("%u %u ", pg->nr_pgpaths,
1292                                pg->ps.type->info_args);
1293
1294                         list_for_each_entry(p, &pg->pgpaths, list) {
1295                                 DMEMIT("%s %s %u ", p->path.dev->name,
1296                                        p->is_active ? "A" : "F",
1297                                        p->fail_count);
1298                                 if (pg->ps.type->status)
1299                                         sz += pg->ps.type->status(&pg->ps,
1300                                               &p->path, type, result + sz,
1301                                               maxlen - sz);
1302                         }
1303                 }
1304                 break;
1305
1306         case STATUSTYPE_TABLE:
1307                 list_for_each_entry(pg, &m->priority_groups, list) {
1308                         DMEMIT("%s ", pg->ps.type->name);
1309
1310                         if (pg->ps.type->status)
1311                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1312                                                           result + sz,
1313                                                           maxlen - sz);
1314                         else
1315                                 DMEMIT("0 ");
1316
1317                         DMEMIT("%u %u ", pg->nr_pgpaths,
1318                                pg->ps.type->table_args);
1319
1320                         list_for_each_entry(p, &pg->pgpaths, list) {
1321                                 DMEMIT("%s ", p->path.dev->name);
1322                                 if (pg->ps.type->status)
1323                                         sz += pg->ps.type->status(&pg->ps,
1324                                               &p->path, type, result + sz,
1325                                               maxlen - sz);
1326                         }
1327                 }
1328                 break;
1329         }
1330
1331         spin_unlock_irqrestore(&m->lock, flags);
1332
1333         return 0;
1334 }
1335
1336 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1337 {
1338         int r;
1339         struct dm_dev *dev;
1340         struct multipath *m = (struct multipath *) ti->private;
1341         action_fn action;
1342
1343         if (argc == 1) {
1344                 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path")))
1345                         return queue_if_no_path(m, 1, 0);
1346                 else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path")))
1347                         return queue_if_no_path(m, 0, 0);
1348         }
1349
1350         if (argc != 2)
1351                 goto error;
1352
1353         if (!strnicmp(argv[0], MESG_STR("disable_group")))
1354                 return bypass_pg_num(m, argv[1], 1);
1355         else if (!strnicmp(argv[0], MESG_STR("enable_group")))
1356                 return bypass_pg_num(m, argv[1], 0);
1357         else if (!strnicmp(argv[0], MESG_STR("switch_group")))
1358                 return switch_pg_num(m, argv[1]);
1359         else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
1360                 action = reinstate_path;
1361         else if (!strnicmp(argv[0], MESG_STR("fail_path")))
1362                 action = fail_path;
1363         else
1364                 goto error;
1365
1366         r = dm_get_device(ti, argv[1], ti->begin, ti->len,
1367                           dm_table_get_mode(ti->table), &dev);
1368         if (r) {
1369                 DMWARN("message: error getting device %s",
1370                        argv[1]);
1371                 return -EINVAL;
1372         }
1373
1374         r = action_dev(m, dev, action);
1375
1376         dm_put_device(ti, dev);
1377
1378         return r;
1379
1380 error:
1381         DMWARN("Unrecognised multipath message received.");
1382         return -EINVAL;
1383 }
1384
1385 static int multipath_ioctl(struct dm_target *ti, struct inode *inode,
1386                            struct file *filp, unsigned int cmd,
1387                            unsigned long arg)
1388 {
1389         struct multipath *m = (struct multipath *) ti->private;
1390         struct block_device *bdev = NULL;
1391         unsigned long flags;
1392         struct file fake_file = {};
1393         struct dentry fake_dentry = {};
1394         int r = 0;
1395
1396         fake_file.f_path.dentry = &fake_dentry;
1397
1398         spin_lock_irqsave(&m->lock, flags);
1399
1400         if (!m->current_pgpath)
1401                 __choose_pgpath(m);
1402
1403         if (m->current_pgpath) {
1404                 bdev = m->current_pgpath->path.dev->bdev;
1405                 fake_dentry.d_inode = bdev->bd_inode;
1406                 fake_file.f_mode = m->current_pgpath->path.dev->mode;
1407         }
1408
1409         if (m->queue_io)
1410                 r = -EAGAIN;
1411         else if (!bdev)
1412                 r = -EIO;
1413
1414         spin_unlock_irqrestore(&m->lock, flags);
1415
1416         return r ? : blkdev_driver_ioctl(bdev->bd_inode, &fake_file,
1417                                          bdev->bd_disk, cmd, arg);
1418 }
1419
1420 /*-----------------------------------------------------------------
1421  * Module setup
1422  *---------------------------------------------------------------*/
1423 static struct target_type multipath_target = {
1424         .name = "multipath",
1425         .version = {1, 0, 5},
1426         .module = THIS_MODULE,
1427         .ctr = multipath_ctr,
1428         .dtr = multipath_dtr,
1429         .map = multipath_map,
1430         .end_io = multipath_end_io,
1431         .presuspend = multipath_presuspend,
1432         .resume = multipath_resume,
1433         .status = multipath_status,
1434         .message = multipath_message,
1435         .ioctl  = multipath_ioctl,
1436 };
1437
1438 static int __init dm_multipath_init(void)
1439 {
1440         int r;
1441
1442         /* allocate a slab for the dm_ios */
1443         _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
1444         if (!_mpio_cache)
1445                 return -ENOMEM;
1446
1447         r = dm_register_target(&multipath_target);
1448         if (r < 0) {
1449                 DMERR("register failed %d", r);
1450                 kmem_cache_destroy(_mpio_cache);
1451                 return -EINVAL;
1452         }
1453
1454         kmultipathd = create_workqueue("kmpathd");
1455         if (!kmultipathd) {
1456                 DMERR("failed to create workqueue kmpathd");
1457                 dm_unregister_target(&multipath_target);
1458                 kmem_cache_destroy(_mpio_cache);
1459                 return -ENOMEM;
1460         }
1461
1462         /*
1463          * A separate workqueue is used to handle the device handlers
1464          * to avoid overloading existing workqueue. Overloading the
1465          * old workqueue would also create a bottleneck in the
1466          * path of the storage hardware device activation.
1467          */
1468         kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd");
1469         if (!kmpath_handlerd) {
1470                 DMERR("failed to create workqueue kmpath_handlerd");
1471                 destroy_workqueue(kmultipathd);
1472                 dm_unregister_target(&multipath_target);
1473                 kmem_cache_destroy(_mpio_cache);
1474                 return -ENOMEM;
1475         }
1476
1477         DMINFO("version %u.%u.%u loaded",
1478                multipath_target.version[0], multipath_target.version[1],
1479                multipath_target.version[2]);
1480
1481         return r;
1482 }
1483
1484 static void __exit dm_multipath_exit(void)
1485 {
1486         int r;
1487
1488         destroy_workqueue(kmpath_handlerd);
1489         destroy_workqueue(kmultipathd);
1490
1491         r = dm_unregister_target(&multipath_target);
1492         if (r < 0)
1493                 DMERR("target unregister failed %d", r);
1494         kmem_cache_destroy(_mpio_cache);
1495 }
1496
1497 module_init(dm_multipath_init);
1498 module_exit(dm_multipath_exit);
1499
1500 MODULE_DESCRIPTION(DM_NAME " multipath target");
1501 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1502 MODULE_LICENSE("GPL");