drivers/md/dm.c

   1 /*
   2  * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
   3  *
   4  * This file is released under the GPL.
   5  */
   6
   7 #include "dm.h"
   8
   9 #include <linux/init.h>
  10 #include <linux/module.h>
  11 #include <linux/moduleparam.h>
  12 #include <linux/blkpg.h>
  13 #include <linux/bio.h>
  14 #include <linux/mempool.h>
  15 #include <linux/slab.h>
  16
  17 static const char *_name = DM_NAME;
  18 #define MAX_DEVICES 1024
  19
  20 static unsigned int major = 0;
  21 static unsigned int _major = 0;
  22
  23 struct dm_io {
  24         struct mapped_device *md;
  25         int error;
  26         struct bio *bio;
  27         atomic_t io_count;
  28 };
  29
  30 struct deferred_io {
  31         struct bio *bio;
  32         struct deferred_io *next;
  33 };
  34
  35 /*
  36  * Bits for the md->flags field.
  37  */
  38 #define DMF_BLOCK_IO 0
  39 #define DMF_SUSPENDED 1
  40
  41 struct mapped_device {
  42         struct rw_semaphore lock;
  43         atomic_t holders;
  44
  45         unsigned long flags;
  46
  47         request_queue_t *queue;
  48         struct gendisk *disk;
  49
  50         /*
  51          * A list of ios that arrived while we were suspended.
  52          */
  53         atomic_t pending;
  54         wait_queue_head_t wait;
  55         struct deferred_io *deferred;
  56
  57         /*
  58          * The current mapping.
  59          */
  60         struct dm_table *map;
  61
  62         /*
  63          * io objects are allocated from here.
  64          */
  65         mempool_t *io_pool;
  66
  67         /*
  68          * Event handling.
  69          */
  70         uint32_t event_nr;
  71         wait_queue_head_t eventq;
  72 };
  73
  74 #define MIN_IOS 256
  75 static kmem_cache_t *_io_cache;
  76
  77 static __init int local_init(void)
  78 {
  79         int r;
  80
  81         /* allocate a slab for the dm_ios */
  82         _io_cache = kmem_cache_create("dm_io",
  83                                       sizeof(struct dm_io), 0, 0, NULL, NULL);
  84         if (!_io_cache)
  85                 return -ENOMEM;
  86
  87         _major = major;
  88         r = register_blkdev(_major, _name);
  89         if (r < 0) {
  90                 kmem_cache_destroy(_io_cache);
  91                 return r;
  92         }
  93
  94         if (!_major)
  95                 _major = r;
  96
  97         return 0;
  98 }
  99
 100 static void local_exit(void)
 101 {
 102         kmem_cache_destroy(_io_cache);
 103
 104         if (unregister_blkdev(_major, _name) < 0)
 105                 DMERR("devfs_unregister_blkdev failed");
 106
 107         _major = 0;
 108
 109         DMINFO("cleaned up");
 110 }
 111
 112 /*
 113  * We have a lot of init/exit functions, so it seems easier to
 114  * store them in an array.  The disposable macro 'xx'
 115  * expands a prefix into a pair of function names.
 116  */
 117 static struct {
 118         int (*init) (void);
 119         void (*exit) (void);
 120
 121 } _inits[] = {
 122 #define xx(n) {n ## _init, n ## _exit},
 123         xx(local)
 124         xx(dm_target)
 125         xx(dm_linear)
 126         xx(dm_stripe)
 127         xx(dm_interface)
 128 #undef xx
 129 };
 130
 131 static int __init dm_init(void)
 132 {
 133         const int count = ARRAY_SIZE(_inits);
 134
 135         int r, i;
 136
 137         for (i = 0; i < count; i++) {
 138                 r = _inits[i].init();
 139                 if (r)
 140                         goto bad;
 141         }
 142
 143         return 0;
 144
 145       bad:
 146         while (i--)
 147                 _inits[i].exit();
 148
 149         return r;
 150 }
 151
 152 static void __exit dm_exit(void)
 153 {
 154         int i = ARRAY_SIZE(_inits);
 155
 156         while (i--)
 157                 _inits[i].exit();
 158 }
 159
 160 /*
 161  * Block device functions
 162  */
 163 static int dm_blk_open(struct inode *inode, struct file *file)
 164 {
 165         struct mapped_device *md;
 166
 167         md = inode->i_bdev->bd_disk->private_data;
 168         dm_get(md);
 169         return 0;
 170 }
 171
 172 static int dm_blk_close(struct inode *inode, struct file *file)
 173 {
 174         struct mapped_device *md;
 175
 176         md = inode->i_bdev->bd_disk->private_data;
 177         dm_put(md);
 178         return 0;
 179 }
 180
 181 static inline struct dm_io *alloc_io(struct mapped_device *md)
 182 {
 183         return mempool_alloc(md->io_pool, GFP_NOIO);
 184 }
 185
 186 static inline void free_io(struct mapped_device *md, struct dm_io *io)
 187 {
 188         mempool_free(io, md->io_pool);
 189 }
 190
 191 static inline struct deferred_io *alloc_deferred(void)
 192 {
 193         return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
 194 }
 195
 196 static inline void free_deferred(struct deferred_io *di)
 197 {
 198         kfree(di);
 199 }
 200
 201 /*
 202  * Add the bio to the list of deferred io.
 203  */
 204 static int queue_io(struct mapped_device *md, struct bio *bio)
 205 {
 206         struct deferred_io *di;
 207
 208         di = alloc_deferred();
 209         if (!di)
 210                 return -ENOMEM;
 211
 212         down_write(&md->lock);
 213
 214         if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
 215                 up_write(&md->lock);
 216                 free_deferred(di);
 217                 return 1;
 218         }
 219
 220         di->bio = bio;
 221         di->next = md->deferred;
 222         md->deferred = di;
 223
 224         up_write(&md->lock);
 225         return 0;               /* deferred successfully */
 226 }
 227
 228 /*-----------------------------------------------------------------
 229  * CRUD START:
 230  *   A more elegant soln is in the works that uses the queue
 231  *   merge fn, unfortunately there are a couple of changes to
 232  *   the block layer that I want to make for this.  So in the
 233  *   interests of getting something for people to use I give
 234  *   you this clearly demarcated crap.
 235  *---------------------------------------------------------------*/
 236 static inline sector_t to_sector(unsigned int bytes)
 237 {
 238         return bytes >> SECTOR_SHIFT;
 239 }
 240
 241 static inline unsigned int to_bytes(sector_t sector)
 242 {
 243         return sector << SECTOR_SHIFT;
 244 }
 245
 246 /*
 247  * Decrements the number of outstanding ios that a bio has been
 248  * cloned into, completing the original io if necc.
 249  */
 250 static inline void dec_pending(struct dm_io *io, int error)
 251 {
 252         static spinlock_t _uptodate_lock = SPIN_LOCK_UNLOCKED;
 253         unsigned long flags;
 254
 255         if (error) {
 256                 spin_lock_irqsave(&_uptodate_lock, flags);
 257                 io->error = error;
 258                 spin_unlock_irqrestore(&_uptodate_lock, flags);
 259         }
 260
 261         if (atomic_dec_and_test(&io->io_count)) {
 262                 if (atomic_dec_and_test(&io->md->pending))
 263                         /* nudge anyone waiting on suspend queue */
 264                         wake_up(&io->md->wait);
 265
 266                 bio_endio(io->bio, io->bio->bi_size, io->error);
 267                 free_io(io->md, io);
 268         }
 269 }
 270
 271 static int clone_endio(struct bio *bio, unsigned int done, int error)
 272 {
 273         struct dm_io *io = bio->bi_private;
 274
 275         if (bio->bi_size)
 276                 return 1;
 277
 278         dec_pending(io, error);
 279         bio_put(bio);
 280         return 0;
 281 }
 282
 283
 284 static sector_t max_io_len(struct mapped_device *md,
 285                            sector_t sector, struct dm_target *ti)
 286 {
 287         sector_t offset = sector - ti->begin;
 288         sector_t len = ti->len - offset;
 289
 290         /*
 291          * Does the target need to split even further ?
 292          */
 293         if (ti->split_io) {
 294                 sector_t boundary;
 295                 boundary = dm_round_up(offset + 1, ti->split_io) - offset;
 296
 297                 if (len > boundary)
 298                         len = boundary;
 299         }
 300
 301         return len;
 302 }
 303
 304 static void __map_bio(struct dm_target *ti, struct bio *clone, struct dm_io *io)
 305 {
 306         int r;
 307
 308         /*
 309          * Sanity checks.
 310          */
 311         BUG_ON(!clone->bi_size);
 312
 313         clone->bi_end_io = clone_endio;
 314         clone->bi_private = io;
 315
 316         /*
 317          * Map the clone.  If r == 0 we don't need to do
 318          * anything, the target has assumed ownership of
 319          * this io.
 320          */
 321         atomic_inc(&io->io_count);
 322         r = ti->type->map(ti, clone);
 323         if (r > 0)
 324                 /* the bio has been remapped so dispatch it */
 325                 generic_make_request(clone);
 326
 327         else if (r < 0)
 328                 /* error the io and bail out */
 329                 dec_pending(io, -EIO);
 330 }
 331
 332 struct clone_info {
 333         struct mapped_device *md;
 334         struct bio *bio;
 335         struct dm_io *io;
 336         sector_t sector;
 337         sector_t sector_count;
 338         unsigned short idx;
 339 };
 340
 341 /*
 342  * Creates a little bio that is just does part of a bvec.
 343  */
 344 static struct bio *split_bvec(struct bio *bio, sector_t sector,
 345                               unsigned short idx, unsigned int offset,
 346                               unsigned int len)
 347 {
 348         struct bio *clone;
 349         struct bio_vec *bv = bio->bi_io_vec + idx;
 350
 351         clone = bio_alloc(GFP_NOIO, 1);
 352         memcpy(clone->bi_io_vec, bv, sizeof(*bv));
 353
 354         clone->bi_sector = sector;
 355         clone->bi_bdev = bio->bi_bdev;
 356         clone->bi_rw = bio->bi_rw;
 357         clone->bi_vcnt = 1;
 358         clone->bi_size = to_bytes(len);
 359         clone->bi_io_vec->bv_offset = offset;
 360         clone->bi_io_vec->bv_len = clone->bi_size;
 361
 362         return clone;
 363 }
 364
 365 /*
 366  * Creates a bio that consists of range of complete bvecs.
 367  */
 368 static struct bio *clone_bio(struct bio *bio, sector_t sector,
 369                              unsigned short idx, unsigned short bv_count,
 370                              unsigned int len)
 371 {
 372         struct bio *clone;
 373
 374         clone = bio_clone(bio, GFP_NOIO);
 375         clone->bi_sector = sector;
 376         clone->bi_idx = idx;
 377         clone->bi_vcnt = idx + bv_count;
 378         clone->bi_size = to_bytes(len);
 379
 380         return clone;
 381 }
 382
 383 static void __clone_and_map(struct clone_info *ci)
 384 {
 385         struct bio *clone, *bio = ci->bio;
 386         struct dm_target *ti = dm_table_find_target(ci->md->map, ci->sector);
 387         sector_t len = 0, max = max_io_len(ci->md, ci->sector, ti);
 388
 389         if (ci->sector_count <= max) {
 390                 /*
 391                  * Optimise for the simple case where we can do all of
 392                  * the remaining io with a single clone.
 393                  */
 394                 clone = clone_bio(bio, ci->sector, ci->idx,
 395                                   bio->bi_vcnt - ci->idx, ci->sector_count);
 396                 __map_bio(ti, clone, ci->io);
 397                 ci->sector_count = 0;
 398
 399         } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
 400                 /*
 401                  * There are some bvecs that don't span targets.
 402                  * Do as many of these as possible.
 403                  */
 404                 int i;
 405                 sector_t remaining = max;
 406                 sector_t bv_len;
 407
 408                 for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) {
 409                         bv_len = to_sector(bio->bi_io_vec[i].bv_len);
 410
 411                         if (bv_len > remaining)
 412                                 break;
 413
 414                         remaining -= bv_len;
 415                         len += bv_len;
 416                 }
 417
 418                 clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len);
 419                 __map_bio(ti, clone, ci->io);
 420
 421                 ci->sector += len;
 422                 ci->sector_count -= len;
 423                 ci->idx = i;
 424
 425         } else {
 426                 /*
 427                  * Create two copy bios to deal with io that has
 428                  * been split across a target.
 429                  */
 430                 struct bio_vec *bv = bio->bi_io_vec + ci->idx;
 431
 432                 clone = split_bvec(bio, ci->sector, ci->idx,
 433                                    bv->bv_offset, max);
 434                 __map_bio(ti, clone, ci->io);
 435
 436                 ci->sector += max;
 437                 ci->sector_count -= max;
 438                 ti = dm_table_find_target(ci->md->map, ci->sector);
 439
 440                 len = to_sector(bv->bv_len) - max;
 441                 clone = split_bvec(bio, ci->sector, ci->idx,
 442                                    bv->bv_offset + to_bytes(max), len);
 443                 __map_bio(ti, clone, ci->io);
 444
 445                 ci->sector += len;
 446                 ci->sector_count -= len;
 447                 ci->idx++;
 448         }
 449 }
 450
 451 /*
 452  * Split the bio into several clones.
 453  */
 454 static void __split_bio(struct mapped_device *md, struct bio *bio)
 455 {
 456         struct clone_info ci;
 457
 458         ci.md = md;
 459         ci.bio = bio;
 460         ci.io = alloc_io(md);
 461         ci.io->error = 0;
 462         atomic_set(&ci.io->io_count, 1);
 463         ci.io->bio = bio;
 464         ci.io->md = md;
 465         ci.sector = bio->bi_sector;
 466         ci.sector_count = bio_sectors(bio);
 467         ci.idx = bio->bi_idx;
 468
 469         atomic_inc(&md->pending);
 470         while (ci.sector_count)
 471                 __clone_and_map(&ci);
 472
 473         /* drop the extra reference count */
 474         dec_pending(ci.io, 0);
 475 }
 476 /*-----------------------------------------------------------------
 477  * CRUD END
 478  *---------------------------------------------------------------*/
 479
 480
 481 /*
 482  * The request function that just remaps the bio built up by
 483  * dm_merge_bvec.
 484  */
 485 static int dm_request(request_queue_t *q, struct bio *bio)
 486 {
 487         int r;
 488         struct mapped_device *md = q->queuedata;
 489
 490         down_read(&md->lock);
 491
 492         /*
 493          * If we're suspended we have to queue
 494          * this io for later.
 495          */
 496         while (test_bit(DMF_BLOCK_IO, &md->flags)) {
 497                 up_read(&md->lock);
 498
 499                 if (bio_rw(bio) == READA) {
 500                         bio_io_error(bio, bio->bi_size);
 501                         return 0;
 502                 }
 503
 504                 r = queue_io(md, bio);
 505                 if (r < 0) {
 506                         bio_io_error(bio, bio->bi_size);
 507                         return 0;
 508
 509                 } else if (r == 0)
 510                         return 0;       /* deferred successfully */
 511
 512                 /*
 513                  * We're in a while loop, because someone could suspend
 514                  * before we get to the following read lock.
 515                  */
 516                 down_read(&md->lock);
 517         }
 518
 519         if (!md->map) {
 520                 bio_io_error(bio, bio->bi_size);
 521                 return 0;
 522         }
 523
 524         __split_bio(md, bio);
 525         up_read(&md->lock);
 526         return 0;
 527 }
 528
 529 /*-----------------------------------------------------------------
 530  * A bitset is used to keep track of allocated minor numbers.
 531  *---------------------------------------------------------------*/
 532 static spinlock_t _minor_lock = SPIN_LOCK_UNLOCKED;
 533 static unsigned long _minor_bits[MAX_DEVICES / BITS_PER_LONG];
 534
 535 static void free_minor(unsigned int minor)
 536 {
 537         spin_lock(&_minor_lock);
 538         clear_bit(minor, _minor_bits);
 539         spin_unlock(&_minor_lock);
 540 }
 541
 542 /*
 543  * See if the device with a specific minor # is free.
 544  */
 545 static int specific_minor(unsigned int minor)
 546 {
 547         int r = -EBUSY;
 548
 549         if (minor >= MAX_DEVICES) {
 550                 DMWARN("request for a mapped_device beyond MAX_DEVICES (%d)",
 551                        MAX_DEVICES);
 552                 return -EINVAL;
 553         }
 554
 555         spin_lock(&_minor_lock);
 556         if (!test_and_set_bit(minor, _minor_bits))
 557                 r = 0;
 558         spin_unlock(&_minor_lock);
 559
 560         return r;
 561 }
 562
 563 static int next_free_minor(unsigned int *minor)
 564 {
 565         int r = -EBUSY;
 566         unsigned int m;
 567
 568         spin_lock(&_minor_lock);
 569         m = find_first_zero_bit(_minor_bits, MAX_DEVICES);
 570         if (m != MAX_DEVICES) {
 571                 set_bit(m, _minor_bits);
 572                 *minor = m;
 573                 r = 0;
 574         }
 575         spin_unlock(&_minor_lock);
 576
 577         return r;
 578 }
 579
 580 /*
 581  * Allocate and initialise a blank device with a given minor.
 582  */
 583 static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
 584 {
 585         int r;
 586         struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
 587
 588         if (!md) {
 589                 DMWARN("unable to allocate device, out of memory.");
 590                 return NULL;
 591         }
 592
 593         /* get a minor number for the dev */
 594         r = persistent ? specific_minor(minor) : next_free_minor(&minor);
 595         if (r < 0) {
 596                 kfree(md);
 597                 return NULL;
 598         }
 599
 600         memset(md, 0, sizeof(*md));
 601         init_rwsem(&md->lock);
 602         atomic_set(&md->holders, 1);
 603
 604         md->queue = blk_alloc_queue(GFP_KERNEL);
 605         if (!md->queue) {
 606                 kfree(md);
 607                 return NULL;
 608         }
 609
 610         md->queue->queuedata = md;
 611         blk_queue_make_request(md->queue, dm_request);
 612
 613         md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
 614                                      mempool_free_slab, _io_cache);
 615         if (!md->io_pool) {
 616                 free_minor(minor);
 617                 blk_put_queue(md->queue);
 618                 kfree(md);
 619                 return NULL;
 620         }
 621
 622         md->disk = alloc_disk(1);
 623         if (!md->disk) {
 624                 mempool_destroy(md->io_pool);
 625                 free_minor(minor);
 626                 blk_put_queue(md->queue);
 627                 kfree(md);
 628                 return NULL;
 629         }
 630
 631         md->disk->major = _major;
 632         md->disk->first_minor = minor;
 633         md->disk->fops = &dm_blk_dops;
 634         md->disk->queue = md->queue;
 635         md->disk->private_data = md;
 636         sprintf(md->disk->disk_name, "dm-%d", minor);
 637         add_disk(md->disk);
 638
 639         atomic_set(&md->pending, 0);
 640         init_waitqueue_head(&md->wait);
 641         init_waitqueue_head(&md->eventq);
 642
 643         return md;
 644 }
 645
 646 static void free_dev(struct mapped_device *md)
 647 {
 648         free_minor(md->disk->first_minor);
 649         mempool_destroy(md->io_pool);
 650         del_gendisk(md->disk);
 651         put_disk(md->disk);
 652         blk_put_queue(md->queue);
 653         kfree(md);
 654 }
 655
 656 /*
 657  * Bind a table to the device.
 658  */
 659 static void event_callback(void *context)
 660 {
 661         struct mapped_device *md = (struct mapped_device *) context;
 662
 663         down_write(&md->lock);
 664         md->event_nr++;
 665         wake_up_interruptible(&md->eventq);
 666         up_write(&md->lock);
 667 }
 668
 669 static void __set_size(struct gendisk *disk, sector_t size)
 670 {
 671         struct block_device *bdev;
 672
 673         set_capacity(disk, size);
 674         bdev = bdget_disk(disk, 0);
 675         if (bdev) {
 676                 down(&bdev->bd_inode->i_sem);
 677                 i_size_write(bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
 678                 up(&bdev->bd_inode->i_sem);
 679                 bdput(bdev);
 680         }
 681 }
 682
 683 static int __bind(struct mapped_device *md, struct dm_table *t)
 684 {
 685         request_queue_t *q = md->queue;
 686         sector_t size;
 687         md->map = t;
 688
 689         size = dm_table_get_size(t);
 690         __set_size(md->disk, size);
 691         if (size == 0)
 692                 return 0;
 693
 694         dm_table_event_callback(md->map, event_callback, md);
 695
 696         dm_table_get(t);
 697         dm_table_set_restrictions(t, q);
 698         return 0;
 699 }
 700
 701 static void __unbind(struct mapped_device *md)
 702 {
 703         if (!md->map)
 704                 return;
 705
 706         dm_table_event_callback(md->map, NULL, NULL);
 707         dm_table_put(md->map);
 708         md->map = NULL;
 709 }
 710
 711 /*
 712  * Constructor for a new device.
 713  */
 714 static int create_aux(unsigned int minor, int persistent,
 715                       struct mapped_device **result)
 716 {
 717         struct mapped_device *md;
 718
 719         md = alloc_dev(minor, persistent);
 720         if (!md)
 721                 return -ENXIO;
 722
 723         *result = md;
 724         return 0;
 725 }
 726
 727 int dm_create(struct mapped_device **result)
 728 {
 729         return create_aux(0, 0, result);
 730 }
 731
 732 int dm_create_with_minor(unsigned int minor, struct mapped_device **result)
 733 {
 734         return create_aux(minor, 1, result);
 735 }
 736
 737 void dm_get(struct mapped_device *md)
 738 {
 739         atomic_inc(&md->holders);
 740 }
 741
 742 void dm_put(struct mapped_device *md)
 743 {
 744         if (atomic_dec_and_test(&md->holders)) {
 745                 if (!test_bit(DMF_SUSPENDED, &md->flags) && md->map)
 746                         dm_table_suspend_targets(md->map);
 747                 __unbind(md);
 748                 free_dev(md);
 749         }
 750 }
 751
 752 /*
 753  * Requeue the deferred bios by calling generic_make_request.
 754  */
 755 static void flush_deferred_io(struct deferred_io *c)
 756 {
 757         struct deferred_io *n;
 758
 759         while (c) {
 760                 n = c->next;
 761                 generic_make_request(c->bio);
 762                 free_deferred(c);
 763                 c = n;
 764         }
 765 }
 766
 767 /*
 768  * Swap in a new table (destroying old one).
 769  */
 770 int dm_swap_table(struct mapped_device *md, struct dm_table *table)
 771 {
 772         int r;
 773
 774         down_write(&md->lock);
 775
 776         /* device must be suspended */
 777         if (!test_bit(DMF_SUSPENDED, &md->flags)) {
 778                 up_write(&md->lock);
 779                 return -EPERM;
 780         }
 781
 782         __unbind(md);
 783         r = __bind(md, table);
 784         if (r)
 785                 return r;
 786
 787         up_write(&md->lock);
 788         return 0;
 789 }
 790
 791 /*
 792  * We need to be able to change a mapping table under a mounted
 793  * filesystem.  For example we might want to move some data in
 794  * the background.  Before the table can be swapped with
 795  * dm_bind_table, dm_suspend must be called to flush any in
 796  * flight bios and ensure that any further io gets deferred.
 797  */
 798 int dm_suspend(struct mapped_device *md)
 799 {
 800         DECLARE_WAITQUEUE(wait, current);
 801
 802         down_write(&md->lock);
 803
 804         /*
 805          * First we set the BLOCK_IO flag so no more ios will be
 806          * mapped.
 807          */
 808         if (test_bit(DMF_BLOCK_IO, &md->flags)) {
 809                 up_write(&md->lock);
 810                 return -EINVAL;
 811         }
 812
 813         set_bit(DMF_BLOCK_IO, &md->flags);
 814         add_wait_queue(&md->wait, &wait);
 815         up_write(&md->lock);
 816
 817         /*
 818          * Then we wait for the already mapped ios to
 819          * complete.
 820          */
 821         blk_run_queues();
 822         while (1) {
 823                 set_current_state(TASK_INTERRUPTIBLE);
 824
 825                 if (!atomic_read(&md->pending))
 826                         break;
 827
 828                 io_schedule();
 829         }
 830         set_current_state(TASK_RUNNING);
 831
 832         down_write(&md->lock);
 833         remove_wait_queue(&md->wait, &wait);
 834         set_bit(DMF_SUSPENDED, &md->flags);
 835         if (md->map)
 836                 dm_table_suspend_targets(md->map);
 837         up_write(&md->lock);
 838
 839         return 0;
 840 }
 841
 842 int dm_resume(struct mapped_device *md)
 843 {
 844         struct deferred_io *def;
 845
 846         down_write(&md->lock);
 847         if (!md->map ||
 848             !test_bit(DMF_SUSPENDED, &md->flags) ||
 849             !dm_table_get_size(md->map)) {
 850                 up_write(&md->lock);
 851                 return -EINVAL;
 852         }
 853
 854         dm_table_resume_targets(md->map);
 855         clear_bit(DMF_SUSPENDED, &md->flags);
 856         clear_bit(DMF_BLOCK_IO, &md->flags);
 857         def = md->deferred;
 858         md->deferred = NULL;
 859         up_write(&md->lock);
 860
 861         flush_deferred_io(def);
 862         blk_run_queues();
 863
 864         return 0;
 865 }
 866
 867 /*-----------------------------------------------------------------
 868  * Event notification.
 869  *---------------------------------------------------------------*/
 870 uint32_t dm_get_event_nr(struct mapped_device *md)
 871 {
 872         uint32_t r;
 873
 874         down_read(&md->lock);
 875         r = md->event_nr;
 876         up_read(&md->lock);
 877
 878         return r;
 879 }
 880
 881 int dm_add_wait_queue(struct mapped_device *md, wait_queue_t *wq,
 882                       uint32_t event_nr)
 883 {
 884         down_write(&md->lock);
 885         if (event_nr != md->event_nr) {
 886                 up_write(&md->lock);
 887                 return 1;
 888         }
 889
 890         add_wait_queue(&md->eventq, wq);
 891         up_write(&md->lock);
 892
 893         return 0;
 894 }
 895
 896 void dm_remove_wait_queue(struct mapped_device *md, wait_queue_t *wq)
 897 {
 898         down_write(&md->lock);
 899         remove_wait_queue(&md->eventq, wq);
 900         up_write(&md->lock);
 901 }
 902
 903 /*
 904  * The gendisk is only valid as long as you have a reference
 905  * count on 'md'.
 906  */
 907 struct gendisk *dm_disk(struct mapped_device *md)
 908 {
 909         return md->disk;
 910 }
 911
 912 struct dm_table *dm_get_table(struct mapped_device *md)
 913 {
 914         struct dm_table *t;
 915
 916         down_read(&md->lock);
 917         t = md->map;
 918         if (t)
 919                 dm_table_get(t);
 920         up_read(&md->lock);
 921
 922         return t;
 923 }
 924
 925 int dm_suspended(struct mapped_device *md)
 926 {
 927         return test_bit(DMF_SUSPENDED, &md->flags);
 928 }
 929
 930 struct block_device_operations dm_blk_dops = {
 931         .open = dm_blk_open,
 932         .release = dm_blk_close,
 933         .owner = THIS_MODULE
 934 };
 935
 936 /*
 937  * module hooks
 938  */
 939 module_init(dm_init);
 940 module_exit(dm_exit);
 941
 942 module_param(major, uint, 0);
 943 MODULE_PARM_DESC(major, "The major number of the device mapper");
 944 MODULE_DESCRIPTION(DM_NAME " driver");
 945 MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
 946 MODULE_LICENSE("GPL");