#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/major.h>
-#include <linux/smp_lock.h>
#include <linux/device_cgroup.h>
#include <linux/highmem.h>
#include <linux/blkdev.h>
return &ei->vfs_inode;
}
-static void bdev_destroy_inode(struct inode *inode)
+static void bdev_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bdev_cachep, bdi);
}
+static void bdev_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, bdev_i_callback);
+}
+
static void init_once(void *foo)
{
struct bdev_inode *ei = (struct bdev_inode *) foo;
mutex_init(&bdev->bd_mutex);
INIT_LIST_HEAD(&bdev->bd_inodes);
INIT_LIST_HEAD(&bdev->bd_list);
+#ifdef CONFIG_SYSFS
+ INIT_LIST_HEAD(&bdev->bd_holder_disks);
+#endif
inode_init_once(&ei->vfs_inode);
/* Initialize mutex for freeze. */
mutex_init(&bdev->bd_fsfreeze_mutex);
static struct dentry *bd_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
+ return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576);
}
static struct file_system_type bd_type = {
}
}
-/* releases bdev_lock */
-static void __bd_abort_claiming(struct block_device *whole, void *holder)
-{
- BUG_ON(whole->bd_claiming != holder);
- whole->bd_claiming = NULL;
- wake_up_bit(&whole->bd_claiming, 0);
-
- spin_unlock(&bdev_lock);
- bdput(whole);
-}
-
-/**
- * bd_abort_claiming - abort claiming a block device
- * @whole: whole block device returned by bd_start_claiming()
- * @holder: holder trying to claim @bdev
- *
- * Abort a claiming block started by bd_start_claiming(). Note that
- * @whole is not the block device to be claimed but the whole device
- * returned by bd_start_claiming().
- *
- * CONTEXT:
- * Grabs and releases bdev_lock.
- */
-static void bd_abort_claiming(struct block_device *whole, void *holder)
-{
- spin_lock(&bdev_lock);
- __bd_abort_claiming(whole, holder); /* releases bdev_lock */
-}
-
-/* increment holders when we have a legitimate claim. requires bdev_lock */
-static void __bd_claim(struct block_device *bdev, struct block_device *whole,
- void *holder)
-{
- /* note that for a whole device bd_holders
- * will be incremented twice, and bd_holder will
- * be set to bd_may_claim before being set to holder
- */
- whole->bd_holders++;
- whole->bd_holder = bd_may_claim;
- bdev->bd_holders++;
- bdev->bd_holder = holder;
-}
+#ifdef CONFIG_SYSFS
+struct bd_holder_disk {
+ struct list_head list;
+ struct gendisk *disk;
+ int refcnt;
+};
-/**
- * bd_finish_claiming - finish claiming a block device
- * @bdev: block device of interest (passed to bd_start_claiming())
- * @whole: whole block device returned by bd_start_claiming()
- * @holder: holder trying to claim @bdev
- *
- * Finish a claiming block started by bd_start_claiming().
- *
- * CONTEXT:
- * Grabs and releases bdev_lock.
- */
-static void bd_finish_claiming(struct block_device *bdev,
- struct block_device *whole, void *holder)
+static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
+ struct gendisk *disk)
{
- spin_lock(&bdev_lock);
- BUG_ON(!bd_may_claim(bdev, whole, holder));
- __bd_claim(bdev, whole, holder);
- __bd_abort_claiming(whole, holder); /* not actually an abort */
-}
+ struct bd_holder_disk *holder;
-static void bd_release(struct block_device *bdev)
-{
- spin_lock(&bdev_lock);
- if (!--bdev->bd_contains->bd_holders)
- bdev->bd_contains->bd_holder = NULL;
- if (!--bdev->bd_holders)
- bdev->bd_holder = NULL;
- spin_unlock(&bdev_lock);
+ list_for_each_entry(holder, &bdev->bd_holder_disks, list)
+ if (holder->disk == disk)
+ return holder;
+ return NULL;
}
-#ifdef CONFIG_SYSFS
static int add_symlink(struct kobject *from, struct kobject *to)
{
return sysfs_create_link(from, to, kobject_name(to));
* @bdev: the claimed slave bdev
* @disk: the holding disk
*
+ * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
+ *
* This functions creates the following sysfs symlinks.
*
* - from "slaves" directory of the holder @disk to the claimed @bdev
*/
int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
+ struct bd_holder_disk *holder;
int ret = 0;
mutex_lock(&bdev->bd_mutex);
- WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk);
+ WARN_ON_ONCE(!bdev->bd_holder);
/* FIXME: remove the following once add_disk() handles errors */
if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
goto out_unlock;
- ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
- if (ret)
+ holder = bd_find_holder_disk(bdev, disk);
+ if (holder) {
+ holder->refcnt++;
goto out_unlock;
+ }
- ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
- if (ret) {
- del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ holder = kzalloc(sizeof(*holder), GFP_KERNEL);
+ if (!holder) {
+ ret = -ENOMEM;
goto out_unlock;
}
- bdev->bd_holder_disk = disk;
+ INIT_LIST_HEAD(&holder->list);
+ holder->disk = disk;
+ holder->refcnt = 1;
+
+ ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ if (ret)
+ goto out_free;
+
+ ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
+ if (ret)
+ goto out_del;
+ /*
+ * bdev could be deleted beneath us which would implicitly destroy
+ * the holder directory. Hold on to it.
+ */
+ kobject_get(bdev->bd_part->holder_dir);
+
+ list_add(&holder->list, &bdev->bd_holder_disks);
+ goto out_unlock;
+
+out_del:
+ del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+out_free:
+ kfree(holder);
out_unlock:
mutex_unlock(&bdev->bd_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(bd_link_disk_holder);
-static void bd_unlink_disk_holder(struct block_device *bdev)
+/**
+ * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
+ * @bdev: the calimed slave bdev
+ * @disk: the holding disk
+ *
+ * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
- struct gendisk *disk = bdev->bd_holder_disk;
+ struct bd_holder_disk *holder;
- bdev->bd_holder_disk = NULL;
- if (!disk)
- return;
+ mutex_lock(&bdev->bd_mutex);
- del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
- del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
-}
-#else
-static inline void bd_unlink_disk_holder(struct block_device *bdev)
-{ }
-#endif
+ holder = bd_find_holder_disk(bdev, disk);
-/*
- * Tries to open block device by device number. Use it ONLY if you
- * really do not have anything better - i.e. when you are behind a
- * truly sucky interface and all you are given is a device number. _Never_
- * to be used for internal purposes. If you ever need it - reconsider
- * your API.
- */
-struct block_device *open_by_devnum(dev_t dev, fmode_t mode, void *holder)
-{
- struct block_device *bdev = bdget(dev);
- int err = -ENOMEM;
- if (bdev)
- err = blkdev_get(bdev, mode, holder);
- return err ? ERR_PTR(err) : bdev;
-}
+ if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
+ del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ del_symlink(bdev->bd_part->holder_dir,
+ &disk_to_dev(disk)->kobj);
+ kobject_put(bdev->bd_part->holder_dir);
+ list_del_init(&holder->list);
+ kfree(holder);
+ }
-EXPORT_SYMBOL(open_by_devnum);
+ mutex_unlock(&bdev->bd_mutex);
+}
+EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
+#endif
/**
* flush_disk - invalidates all buffer-cache entries on a disk
*
* @bdev: struct block device to be flushed
+ * @kill_dirty: flag to guide handling of dirty inodes
*
* Invalidates all buffer-cache entries on a disk. It should be called
* when a disk has been changed -- either by a media change or online
* resize.
*/
-static void flush_disk(struct block_device *bdev)
+static void flush_disk(struct block_device *bdev, bool kill_dirty)
{
- if (__invalidate_device(bdev)) {
+ if (__invalidate_device(bdev, kill_dirty)) {
char name[BDEVNAME_SIZE] = "";
if (bdev->bd_disk)
"%s: detected capacity change from %lld to %lld\n",
name, bdev_size, disk_size);
i_size_write(bdev->bd_inode, disk_size);
- flush_disk(bdev);
+ flush_disk(bdev, false);
}
}
EXPORT_SYMBOL(check_disk_size_change);
{
struct gendisk *disk = bdev->bd_disk;
const struct block_device_operations *bdops = disk->fops;
+ unsigned int events;
- if (!bdops->media_changed)
- return 0;
- if (!bdops->media_changed(bdev->bd_disk))
+ events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
+ DISK_EVENT_EJECT_REQUEST);
+ if (!(events & DISK_EVENT_MEDIA_CHANGE))
return 0;
- flush_disk(bdev);
+ flush_disk(bdev, true);
if (bdops->revalidate_disk)
bdops->revalidate_disk(bdev->bd_disk);
return 1;
if (!bdev->bd_part)
goto out_clear;
+ ret = 0;
if (disk->fops->open) {
ret = disk->fops->open(bdev, mode);
if (ret == -ERESTARTSYS) {
mutex_unlock(&bdev->bd_mutex);
goto restart;
}
- if (ret)
- goto out_clear;
}
- if (!bdev->bd_openers) {
+
+ if (!ret && !bdev->bd_openers) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
bdi = blk_get_backing_dev_info(bdev);
if (bdi == NULL)
bdi = &default_backing_dev_info;
bdev_inode_switch_bdi(bdev->bd_inode, bdi);
}
- if (bdev->bd_invalidated)
+
+ /*
+ * If the device is invalidated, rescan partition
+ * if open succeeded or failed with -ENOMEDIUM.
+ * The latter is necessary to prevent ghost
+ * partitions on a removed medium.
+ */
+ if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
rescan_partitions(disk, bdev);
+ if (ret)
+ goto out_clear;
} else {
struct block_device *whole;
whole = bdget_disk(disk, 0);
put_disk(disk);
disk = NULL;
if (bdev->bd_contains == bdev) {
- if (bdev->bd_disk->fops->open) {
+ ret = 0;
+ if (bdev->bd_disk->fops->open)
ret = bdev->bd_disk->fops->open(bdev, mode);
- if (ret)
- goto out_unlock_bdev;
- }
- if (bdev->bd_invalidated)
+ /* the same as first opener case, read comment there */
+ if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
rescan_partitions(bdev->bd_disk, bdev);
+ if (ret)
+ goto out_unlock_bdev;
}
}
bdev->bd_openers++;
return ret;
}
+/**
+ * blkdev_get - open a block device
+ * @bdev: block_device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is
+ * open with exclusive access. Specifying %FMODE_EXCL with %NULL
+ * @holder is invalid. Exclusive opens may nest for the same @holder.
+ *
+ * On success, the reference count of @bdev is unchanged. On failure,
+ * @bdev is put.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
{
struct block_device *whole = NULL;
res = __blkdev_get(bdev, mode, 0);
if (whole) {
- if (res == 0)
- bd_finish_claiming(bdev, whole, holder);
- else
- bd_abort_claiming(whole, holder);
+ struct gendisk *disk = whole->bd_disk;
+
+ /* finish claiming */
+ mutex_lock(&bdev->bd_mutex);
+ spin_lock(&bdev_lock);
+
+ if (!res) {
+ BUG_ON(!bd_may_claim(bdev, whole, holder));
+ /*
+ * Note that for a whole device bd_holders
+ * will be incremented twice, and bd_holder
+ * will be set to bd_may_claim before being
+ * set to holder
+ */
+ whole->bd_holders++;
+ whole->bd_holder = bd_may_claim;
+ bdev->bd_holders++;
+ bdev->bd_holder = holder;
+ }
+
+ /* tell others that we're done */
+ BUG_ON(whole->bd_claiming != holder);
+ whole->bd_claiming = NULL;
+ wake_up_bit(&whole->bd_claiming, 0);
+
+ spin_unlock(&bdev_lock);
+
+ /*
+ * Block event polling for write claims if requested. Any
+ * write holder makes the write_holder state stick until
+ * all are released. This is good enough and tracking
+ * individual writeable reference is too fragile given the
+ * way @mode is used in blkdev_get/put().
+ */
+ if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
+ !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
+ bdev->bd_write_holder = true;
+ disk_block_events(disk);
+ }
+
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(whole);
}
return res;
}
EXPORT_SYMBOL(blkdev_get);
+/**
+ * blkdev_get_by_path - open a block device by name
+ * @path: path to the block device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open the blockdevice described by the device file at @path. @mode
+ * and @holder are identical to blkdev_get().
+ *
+ * On success, the returned block_device has reference count of one.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+ */
+struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+ void *holder)
+{
+ struct block_device *bdev;
+ int err;
+
+ bdev = lookup_bdev(path);
+ if (IS_ERR(bdev))
+ return bdev;
+
+ err = blkdev_get(bdev, mode, holder);
+ if (err)
+ return ERR_PTR(err);
+
+ if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+ blkdev_put(bdev, mode);
+ return ERR_PTR(-EACCES);
+ }
+
+ return bdev;
+}
+EXPORT_SYMBOL(blkdev_get_by_path);
+
+/**
+ * blkdev_get_by_dev - open a block device by device number
+ * @dev: device number of block device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open the blockdevice described by device number @dev. @mode and
+ * @holder are identical to blkdev_get().
+ *
+ * Use it ONLY if you really do not have anything better - i.e. when
+ * you are behind a truly sucky interface and all you are given is a
+ * device number. _Never_ to be used for internal purposes. If you
+ * ever need it - reconsider your API.
+ *
+ * On success, the returned block_device has reference count of one.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+ */
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+{
+ struct block_device *bdev;
+ int err;
+
+ bdev = bdget(dev);
+ if (!bdev)
+ return ERR_PTR(-ENOMEM);
+
+ err = blkdev_get(bdev, mode, holder);
+ if (err)
+ return ERR_PTR(err);
+
+ return bdev;
+}
+EXPORT_SYMBOL(blkdev_get_by_dev);
+
static int blkdev_open(struct inode * inode, struct file * filp)
{
struct block_device *bdev;
bdev->bd_part_count--;
if (!--bdev->bd_openers) {
+ WARN_ON_ONCE(bdev->bd_holders);
sync_blockdev(bdev);
kill_bdev(bdev);
}
int blkdev_put(struct block_device *bdev, fmode_t mode)
{
if (mode & FMODE_EXCL) {
+ bool bdev_free;
+
+ /*
+ * Release a claim on the device. The holder fields
+ * are protected with bdev_lock. bd_mutex is to
+ * synchronize disk_holder unlinking.
+ */
mutex_lock(&bdev->bd_mutex);
- bd_release(bdev);
- if (!bdev->bd_holders)
- bd_unlink_disk_holder(bdev);
+ spin_lock(&bdev_lock);
+
+ WARN_ON_ONCE(--bdev->bd_holders < 0);
+ WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
+
+ /* bd_contains might point to self, check in a separate step */
+ if ((bdev_free = !bdev->bd_holders))
+ bdev->bd_holder = NULL;
+ if (!bdev->bd_contains->bd_holders)
+ bdev->bd_contains->bd_holder = NULL;
+
+ spin_unlock(&bdev_lock);
+
+ /*
+ * If this was the last claim, remove holder link and
+ * unblock evpoll if it was a write holder.
+ */
+ if (bdev_free) {
+ if (bdev->bd_write_holder) {
+ disk_unblock_events(bdev->bd_disk);
+ bdev->bd_write_holder = false;
+ } else
+ disk_check_events(bdev->bd_disk);
+ }
+
mutex_unlock(&bdev->bd_mutex);
- }
+ } else
+ disk_check_events(bdev->bd_disk);
+
return __blkdev_put(bdev, mode, 0);
}
EXPORT_SYMBOL(blkdev_put);
}
EXPORT_SYMBOL(lookup_bdev);
-/**
- * open_bdev_exclusive - open a block device by name and set it up for use
- *
- * @path: special file representing the block device
- * @mode: FMODE_... combination to pass be used
- * @holder: owner for exclusion
- *
- * Open the blockdevice described by the special file at @path, claim it
- * for the @holder.
- */
-struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
-{
- struct block_device *bdev;
- int error;
-
- bdev = lookup_bdev(path);
- if (IS_ERR(bdev))
- return bdev;
-
- error = blkdev_get(bdev, mode | FMODE_EXCL, holder);
- if (error)
- return ERR_PTR(error);
-
- if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
- blkdev_put(bdev, mode);
- return ERR_PTR(-EACCES);
- }
-
- return bdev;
-}
-
-EXPORT_SYMBOL(open_bdev_exclusive);
-
-int __invalidate_device(struct block_device *bdev)
+int __invalidate_device(struct block_device *bdev, bool kill_dirty)
{
struct super_block *sb = get_super(bdev);
int res = 0;
* hold).
*/
shrink_dcache_sb(sb);
- res = invalidate_inodes(sb);
+ res = invalidate_inodes(sb, kill_dirty);
drop_super(sb);
}
invalidate_bdev(bdev);